Skip to content

Commit

Permalink
Measure and improve performance (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
calladoum-elastic authored Jun 10, 2024
1 parent b8ffb58 commit f016dc5
Show file tree
Hide file tree
Showing 16 changed files with 895 additions and 178 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- {os: windows-2022, arch: win32, config: RelWithDebInfo}
- {os: windows-2022, arch: arm64, config: RelWithDebInfo}
- {os: windows-2022, arch: arm, config: RelWithDebInfo}
- {os: ubuntu-22.04, arch: x64, config: RelWithDebInfo}
- {os: ubuntu-22.04, arch: x64, config: RelWithDebInfo}
- {os: macos-13, arch: x64, config: Release}

runs-on: ${{ matrix.variants.os }}
Expand Down
15 changes: 9 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@ on:

jobs:
native_tests:
name: ${{ matrix.os }} / Library
name: ${{ matrix.variants.os }} / ${{ matrix.variants.config }} / Library
strategy:
fail-fast: false
matrix:
os: ["windows-latest", "ubuntu-latest", "macos-latest"]
runs-on: ${{ matrix.os }}
variants:
- {os: windows-2022, config: Debug}
- {os: ubuntu-2204", config: Debug}
- {os: macos-13, config: Debug}
runs-on: ${{ matrix.variants.os }}
steps:
- uses: actions/checkout@v4
- run: cmake -S. -B build -DTLSH_BUILD_WITH_ASAN=ON -DTLSH_BUILD_TESTS=ON -DTLSH_BUILD_BINDINGS=OFF
- run: cmake --build ./build
- run: ctest -j6 -C Debug -T test --output-on-failure --verbose
- run: cmake --build ./build --config ${{ matrix.variants.config }}
- run: ctest --progress --build-config ${{ matrix.variants.config }} --test-action test --test-dir ./build

binding_tests:
name: ${{ matrix.os }} / Python${{ matrix.py }}
Expand All @@ -34,4 +37,4 @@ jobs:
with:
python-version: ${{ matrix.py }}
- run: python -m pip install --user --upgrade .[tests]
- run: python -m pytest -vv python/tests
- run: python -m pytest -vv python/tests
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ __pycache__
build/
.vscode/
src/inc/tlsh_version.h
Testing/
Testing/
tests/datasets/large
27 changes: 26 additions & 1 deletion python/tests/test_tlsh.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,41 @@ def test_batch_test_base():
for x in (TEST_DATA_PATH / "base.txt").open().readlines()
]

# full read
for fname, expected_value in expected_values:
buf = (TEST_DATA_PATH / fname).open("rb").read()
tlsh_hex = tlsh.hexdigest(buf).upper()
tlsh_raw = tlsh.digest(buf)
assert (
tlsh_hex == expected_value.upper()
), f"Mismatch on {fname}: expected={expected_value.upper()} got={tlsh_hex}"
expected_value_raw = bytes.fromhex(expected_value)
assert (
tlsh_raw == expected_value_raw
), f"Mismatch on {fname}: got={tlsh_raw} expected={expected_value_raw}"

# block read
for fname, expected_value in expected_values:
bksz = 512
t = tlsh.Tlsh()
assert not t.valid
with (TEST_DATA_PATH / fname).open("rb") as fd:
while True:
buf = fd.read(bksz)
if not buf:
break
t.update(buf)
t.final()
assert t.valid
tlsh_hex = t.hexdigest(0).upper()
tlsh_raw = t.digest(0)
assert (
tlsh_hex == expected_value.upper()
), f"Mismatch on {fname}: expected={expected_value.upper()} got={tlsh_hex}"
expected_value_raw = bytes.fromhex(expected_value)
assert tlsh_raw == expected_value_raw, f"Mismatch on {fname}: got={tlsh_raw} expected={expected_value_raw}"
assert (
tlsh_raw == expected_value_raw
), f"Mismatch on {fname}: got={tlsh_raw} expected={expected_value_raw}"


def test_batch_test_extended_file_level():
Expand Down
45 changes: 31 additions & 14 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,37 @@ else()
#
# Build with ASAN if available AND debug build
#
if(BUILD_WITH_ASAN)
target_compile_definitions(tlsh
PUBLIC
$<$<CONFIG:Debug>:_DISABLE_VECTOR_ANNOTATION _DISABLE_STRING_ANNOTATION >
)
target_compile_options(tlsh
PUBLIC
$<$<CONFIG:Debug>:-fsanitize=address>
)
target_link_libraries(tlsh
PUBLIC
$<$<CONFIG:Debug>:-fsanitize=address>
)
endif(BUILD_WITH_ASAN)
if(TLSH_BUILD_WITH_ASAN)
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-fsanitize=address" HAS_ASAN_AVAILABLE)
if(HAS_ASAN_AVAILABLE)
if(WIN32)
target_compile_definitions(tlsh
PUBLIC
$<$<CONFIG:Debug>:_DISABLE_VECTOR_ANNOTATION _DISABLE_STRING_ANNOTATION >
)
target_compile_options(tlsh
PUBLIC
$<$<STREQUAL:${CMAKE_GENERATOR_PLATFORM},x64>:$<$<CONFIG:Debug>:/fsanitize=address>>
$<$<STREQUAL:${CMAKE_GENERATOR_PLATFORM},win32>:$<$<CONFIG:Debug>:/fsanitize=address>>
)
target_link_options(tlsh
PUBLIC
$<$<STREQUAL:${CMAKE_GENERATOR_PLATFORM},win32>:$<$<CONFIG:Debug>:/InferAsanLibs>>
$<$<STREQUAL:${CMAKE_GENERATOR_PLATFORM},x64>:$<$<CONFIG:Debug>:/InferAsanLibs>>
)
else()
target_compile_options(tlsh
PUBLIC
$<$<CONFIG:Debug>:-fsanitize=address>
)
target_link_libraries(tlsh
PUBLIC
$<$<CONFIG:Debug>:-fsanitize=address>
)
endif(WIN32)
endif(HAS_ASAN_AVAILABLE)
endif(TLSH_BUILD_WITH_ASAN)

install(TARGETS tlsh DESTINATION lib)

Expand Down
7 changes: 4 additions & 3 deletions src/inc/tlsh.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ class TLSH_API Tlsh
void
final(u32 tlsh_option = 0);

const std::vector<u8>
std::vector<u8> const &
getHashBytes(u8 showvers = 0) const;

/* to get the hex-encoded hash code */
const std::string
getHashString(u8 showvers = 0) const;
std::string const &
getHashString(u8 showvers = 0);

/* to get the hex-encoded hash code without allocating buffer in TlshImpl - bufSize should be
* TLSH_STRING_BUFFER_LEN */
Expand Down Expand Up @@ -155,6 +155,7 @@ class TLSH_API Tlsh

private:
std::unique_ptr<TlshImpl> m_Implementation;
std::array<std::string, 10> m_stringHashes;
};

#ifdef TLSH_DISTANCE_PARAMETERS
Expand Down
9 changes: 5 additions & 4 deletions src/inc/tlsh_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,7 @@ class TlshImpl
std::vector<u8> const &
hash(u8 showvers) const;

std::vector<u8> const &
hash(std::vector<u8> &,
u8 showvers) const; // saves allocating hash string in TLSH instance - bufSize should be
// TLSH_STRING_LEN + 1

int
compare(const TlshImpl &other) const;
int
Expand Down Expand Up @@ -85,6 +82,10 @@ class TlshImpl
}

private:
bool
generate_hash(u8 showvers) const; // saves allocating hash string in TLSH instance - bufSize
// should be TLSH_STRING_LEN + 1

std::unique_ptr<u32[]> a_bucket;
std::array<u8, SLIDING_WND_SIZE> slide_window;
unsigned int data_len;
Expand Down
2 changes: 1 addition & 1 deletion src/inc/tlsh_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ mod_diff(unsigned int x, unsigned int y, unsigned int R);
int
h_distance(int len, const unsigned char x[], const unsigned char y[]);
void
to_hex(u8 *psrc, int len, u8 *pdest);
to_hex(const u8 *psrc, int len, u8 *pdest);
// void
// from_hex(const u8 *psrc, size_t srclen, u8 *pdest, size_t dstlen);
unsigned char
Expand Down
7 changes: 3 additions & 4 deletions src/inc/tlsh_version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,11 @@ using i8 = char;
using u32 = uint32_t;
using i32 = int32_t;

#if defined _WINDOWS
#if defined _WIN32
using usize = size_t;
#elif defined _LINUX
#elif defined __linux__
using usize = std::size_t;
#elif defined _MACOS
#elif defined __APPLE__
#include <stddef.h>
using usize = size_t;
#endif

1 change: 1 addition & 0 deletions src/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ list(APPEND TEST_SOURCE_FILES

# Test cases
${TEST_DIR}/test_basic.cpp
${TEST_DIR}/test_batch.cpp

# Performance tests
${TEST_DIR}/perf_basic.cpp
Expand Down
Loading

0 comments on commit f016dc5

Please sign in to comment.