Skip to content

Commit

Permalink
pw_tokenizer: Move config value check to .cc file
Browse files Browse the repository at this point in the history
Moving config value checks to a source file avoids running the checks
multiple times and avoids the requirement of C compatibility. Also,
migrate the pw_tokenizer config to Doxygen.

Change-Id: Icefd363d71055a17e29d42a282d1cc726bd27ef1
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/168615
Pigweed-Auto-Submit: Wyatt Hepler <[email protected]>
Reviewed-by: Taylor Cramer <[email protected]>
Commit-Queue: Auto-Submit <[email protected]>
  • Loading branch information
255 authored and CQ Bot Account committed Aug 25, 2023
1 parent 98eda46 commit f7350d3
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 46 deletions.
25 changes: 13 additions & 12 deletions docs/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ group("third_party_docs") {
]
}

_doxygen_input_files = [
# All sources with doxygen comment blocks.
# All sources with doxygen comment blocks.
_doxygen_input_files = [ # keep-sorted: start
"$dir_pw_allocator/public/pw_allocator/block.h",
"$dir_pw_allocator/public/pw_allocator/freelist.h",
"$dir_pw_async/public/pw_async/context.h",
"$dir_pw_async/public/pw_async/dispatcher.h",
"$dir_pw_async/public/pw_async/heap_dispatcher.h",
"$dir_pw_async/public/pw_async/fake_dispatcher_fixture.h",
"$dir_pw_async/public/pw_async/function_dispatcher.h",
"$dir_pw_async/public/pw_async/heap_dispatcher.h",
"$dir_pw_async/public/pw_async/task.h",
"$dir_pw_async/public/pw_async/task_function.h",
"$dir_pw_async_basic/public/pw_async_basic/dispatcher.h",
Expand All @@ -133,32 +133,32 @@ _doxygen_input_files = [
"$dir_pw_bluetooth/public/pw_bluetooth/low_energy/central.h",
"$dir_pw_bluetooth/public/pw_bluetooth/low_energy/connection.h",
"$dir_pw_bluetooth/public/pw_bluetooth/low_energy/peripheral.h",
"$dir_pw_chrono/public/pw_chrono/system_clock.h",
"$dir_pw_chrono/public/pw_chrono/system_timer.h",
"$dir_pw_containers/public/pw_containers/filtered_view.h",
"$dir_pw_containers/public/pw_containers/inline_deque.h",
"$dir_pw_containers/public/pw_containers/inline_queue.h",
"$dir_pw_chrono/public/pw_chrono/system_clock.h",
"$dir_pw_chrono/public/pw_chrono/system_timer.h",
"$dir_pw_digital_io/public/pw_digital_io/digital_io.h",
"$dir_pw_function/public/pw_function/function.h",
"$dir_pw_function/public/pw_function/pointer.h",
"$dir_pw_function/public/pw_function/scope_guard.h",
"$dir_pw_hdlc/public/pw_hdlc/decoder.h",
"$dir_pw_hdlc/public/pw_hdlc/encoder.h",
"$dir_pw_interrupt/public/pw_interrupt/context.h",
"$dir_pw_i2c/public/pw_i2c/initiator.h",
"$dir_pw_i2c_linux/public/pw_i2c_linux/initiator.h",
"$dir_pw_interrupt/public/pw_interrupt/context.h",
"$dir_pw_log_tokenized/public/pw_log_tokenized/base64.h",
"$dir_pw_log_tokenized/public/pw_log_tokenized/handler.h",
"$dir_pw_log_tokenized/public/pw_log_tokenized/metadata.h",
"$dir_pw_log_tokenized/public/pw_log_tokenized/base64.h",
"$dir_pw_protobuf/public/pw_protobuf/find.h",
"$dir_pw_random/public/pw_random/random.h",
"$dir_pw_rpc/public/pw_rpc/internal/config.h",
"$dir_pw_rpc/public/pw_rpc/synchronous_call.h",
"$dir_pw_string/public/pw_string/format.h",
"$dir_pw_string/public/pw_string/string.h",
"$dir_pw_status/public/pw_status/status.h",
"$dir_pw_stream/public/pw_stream/stream.h",
"$dir_pw_stream_uart_linux/public/pw_stream_uart_linux/stream.h",
"$dir_pw_string/public/pw_string/format.h",
"$dir_pw_string/public/pw_string/string.h",
"$dir_pw_string/public/pw_string/string_builder.h",
"$dir_pw_string/public/pw_string/util.h",
"$dir_pw_sync/public/pw_sync/binary_semaphore.h",
Expand All @@ -174,13 +174,14 @@ _doxygen_input_files = [
"$dir_pw_sync/public/pw_sync/virtual_basic_lockable.h",
"$dir_pw_sys_io/public/pw_sys_io/sys_io.h",
"$dir_pw_thread/public/pw_thread/test_thread_context.h",
"$dir_pw_tokenizer/public/pw_tokenizer/config.h",
"$dir_pw_tokenizer/public/pw_tokenizer/encode_args.h",
"$dir_pw_tokenizer/public/pw_tokenizer/tokenize.h",
"$dir_pw_tokenizer/public/pw_tokenizer/token_database.h",
"$dir_pw_tokenizer/public/pw_tokenizer/tokenize.h",
"$dir_pw_toolchain/public/pw_toolchain/no_destructor.h",
"$dir_pw_varint/public/pw_varint/varint.h",
"$dir_pw_varint/public/pw_varint/stream.h",
]
"$dir_pw_varint/public/pw_varint/varint.h",
] # keep-sorted: end

pw_python_action("generate_doxygen") {
_output_dir = "docs/doxygen"
Expand Down
5 changes: 5 additions & 0 deletions pw_tokenizer/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ Tokenization
.. doxygenfunction:: pw_tokenizer_EncodeArgs
.. doxygentypedef:: pw_tokenizer_Token

Configuration
-------------
.. doxygenfile:: pw_tokenizer/config.h
:sections: define

Detokenization
==============
.. doxygenclass:: pw::tokenizer::TokenDatabase
Expand Down
4 changes: 4 additions & 0 deletions pw_tokenizer/encode_args.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
#include "pw_preprocessor/compiler.h"
#include "pw_varint/varint.h"

static_assert((PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES == 4) ||
(PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES == 8),
"PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES must be 4 or 8");

namespace pw {
namespace tokenizer {
namespace {
Expand Down
66 changes: 32 additions & 34 deletions pw_tokenizer/public/pw_tokenizer/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,48 +15,46 @@
// Configuration macros for the tokenizer module.
#pragma once

#include <assert.h>
#include <stdbool.h>

// For a tokenized string that has arguments, the types of the arguments are
// encoded in either a 4-byte (uint32_t) or a 8-byte (uint64_t) value. The 4 or
// 6 least-significant bits, respectively, store the number of arguments, while
// the remaining bits encode the argument types. Argument types are encoded
// two-bits per argument, in little-endian order. Up to 14 arguments in 4 bytes
// or 29 arguments in 8 bytes are supported.
/// For a tokenized string with arguments, the types of the arguments are
/// encoded in either 4 bytes (`uint32_t`) or 8 bytes (`uint64_t`). 4 bytes
/// supports up to 14 tokenized string arguments; 8 bytes supports up to 29
/// arguments. Using 8 bytes increases code size for 32-bit machines.
///
/// Argument types are encoded two bits per argument, in little-endian order.
/// The 4 or 6 least-significant bits, respectively, store the number of
/// arguments, while the remaining bits encode the argument types.
#ifndef PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES
#define PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES 4
#endif // PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES

static_assert((bool)(PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES == 4) ||
(bool)(PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES == 8),
"PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES must be 4 or 8");

// Maximum number of characters to hash in C. In C code, strings shorter than
// this length are treated as if they were zero-padded up to the length. Strings
// that are the same length and share a common prefix longer than this value
// hash to the same value. Increasing PW_TOKENIZER_CFG_C_HASH_LENGTH increases
// the compilation time for C due to the complexity of the hashing macros.
//
// PW_TOKENIZER_CFG_C_HASH_LENGTH has no effect on C++ code. In C++, hashing is
// done with a constexpr function instead of a macro. There are no string length
// limitations and compilation times are unaffected by this macro.
//
// Only hash lengths for which there is a corresponding macro header
// (pw_tokenizer/internal/mash_macro_#.h) are supported. Additional macros may
// be generated with the generate_hash_macro.py function. New macro headers must
// then be added to pw_tokenizer/internal/hash.h.
//
// This MUST match the value of DEFAULT_C_HASH_LENGTH in
// pw_tokenizer/py/pw_tokenizer/tokens.py.
/// Maximum number of characters to hash in C. In C code, strings shorter than
/// this length are treated as if they were zero-padded up to the length.
/// Strings that are the same length and share a common prefix longer than this
/// value hash to the same value. Increasing `PW_TOKENIZER_CFG_C_HASH_LENGTH`
/// increases the compilation time for C due to the complexity of the hashing
/// macros.
///
/// `PW_TOKENIZER_CFG_C_HASH_LENGTH` has no effect on C++ code. In C++, hashing
/// is done with a `constexpr` function instead of a macro. There are no string
/// length limitations and compilation times are unaffected by this macro.
///
/// Only hash lengths for which there is a corresponding macro header
/// (`pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_#_hash_macro.`) are
/// supported. Additional macros may be generated with the
/// `generate_hash_macro.py` function. New macro headers must then be added to
/// `pw_tokenizer/internal/tokenize_string.h`.
///
/// This MUST match the value of `DEFAULT_C_HASH_LENGTH` in
/// `pw_tokenizer/py/pw_tokenizer/tokens.py`.
#ifndef PW_TOKENIZER_CFG_C_HASH_LENGTH
#define PW_TOKENIZER_CFG_C_HASH_LENGTH 128
#endif // PW_TOKENIZER_CFG_C_HASH_LENGTH

// PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES is deprecated. It is used as the
// default value for pw_log_tokenized's
// PW_LOG_TOKENIZED_ENCODING_BUFFER_SIZE_BYTES. This value should not be
// configured; set PW_LOG_TOKENIZED_ENCODING_BUFFER_SIZE_BYTES instead.
/// `PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES` is deprecated. It is used as
/// the default value for pw_log_tokenized's
/// @c_macro{PW_LOG_TOKENIZED_ENCODING_BUFFER_SIZE_BYTES}. This value should not
/// be configured; set @c_macro{PW_LOG_TOKENIZED_ENCODING_BUFFER_SIZE_BYTES}
/// instead.
#ifndef PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES
#define PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES 52
#endif // PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES

0 comments on commit f7350d3

Please sign in to comment.