diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000000..93aeded4aa9
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,135 @@
+# C
+BasedOnStyle: LLVM
+AlignEscapedNewlines: Indent
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: false
+AlignConsecutiveStructMembers: true
+AlignConsecutiveMacros: true
+AlignDeclarationByPointer: true
+AlignAfterOpenBracket: true
+AlignOperands: true
+PointerAlignment: Right
+DerivePointerAlignment: false
+AlignTrailingComments: false
+AllowAllArgumentsOnNextLine: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AllowShortEnumsOnASingleLine: false
+AllowDesignatedInitializersOnASingleLine: false
+AlwaysBreakAfterReturnType: None
+PenaltyReturnTypeOnItsOwnLine: 20
+PenaltyBreakAssignment: 100
+PenaltyExcessCharacter: 100
+PenaltyBreakBeforeFirstCallParameter: 100
+PenaltyBreakMemberAccess: 250
+PenaltyBreakLastMemberAccess: 300
+PenaltyIndentedWhitespace: 0
+ColumnLimit: 80
+AlwaysBreakBeforeMultilineStrings: false
+BinPackArguments: true
+BinPackParameters: true
+BreakBeforeBraces: Custom
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ AfterExternBlock: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: false
+BreakBeforeTernaryOperators: false
+BreakStringLiterals: true
+ContinuationIndentWidth: 8
+IncludeBlocks: Regroup
+IndentCaseLabels: false
+IndentWidth: 4
+KeepEmptyLinesAtTheStartOfBlocks: false
+IndentPPDirectives: None
+MaxEmptyLinesToKeep: 2
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceInEmptyParentheses: false
+SpaceBeforeParens: ControlStatementsExceptForEachMacros
+SpaceBeforeAssignmentOperators: true
+SpaceAfterCStyleCast: false
+SortIncludes: false
+ForEachMacros: ['_UCS_BITMAP_FOR_EACH_WORD',
+ 'FOR_EACH_ENTITY',
+ 'kh_foreach',
+ 'kh_foreach_key',
+ 'kh_foreach_value',
+ 'ucp_unpacked_address_for_each',
+ 'ucs_array_for_each',
+ 'UCS_BITMAP_FOR_EACH_BIT',
+ 'ucs_for_each_bit',
+ 'ucs_for_each_submask',
+ 'ucs_hlist_for_each',
+ 'ucs_hlist_for_each_extract',
+ 'ucs_hlist_for_each_extract_if',
+ 'ucs_list_for_each',
+ 'ucs_list_for_each_safe',
+ 'ucs_memory_type_for_each',
+ 'UCS_PP_FOREACH',
+ 'UCS_PP_FOREACH_SEP',
+ 'ucs_profile_for_each_location',
+ 'ucs_ptr_array_for_each',
+ 'ucs_ptr_array_locked_for_each',
+ 'ucs_queue_for_each',
+ 'ucs_queue_for_each_extract',
+ 'ucs_queue_for_each_safe',
+ 'ucs_timerq_for_each_expired',
+ 'UCT_IB_IFACE_VERBS_FOREACH_RXWQE',
+ 'UCT_RC_VERBS_IFACE_FOREACH_TXWQE',
+ 'UCS_INIT_ONCE',
+ 'UCS_TEST_F',
+ 'UCX_PERF_TEST_FOREACH']
+StatementMacros : []
+TypenameMacros: ['khash_t', 'ucs_array_t']
+WhitespaceSensitiveMacros: []
+
+# CPP
+Standard: Cpp11
+AccessModifierOffset: -4
+AlwaysBreakTemplateDeclarations: false
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: AfterColon
+BreakConstructorInitializers: AfterColon
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+Cpp11BracedListStyle: true
+Cpp11BracedListLineBreak: true
+FixNamespaceComments: true
+NamespaceIndentation: None
+UseTab: Never
+ReflowComments: false
+SortIncludes: false
+IncludeCategories:
+ - Regex: '^"'
+ Priority: 1
+ - Regex: '^<'
+ Priority: 2
+SortUsingDeclarations: true
+TabWidth: 4
+SpacesInAngles: false
+SpacesBeforeTrailingComments: 1
+SpaceAfterTemplateKeyword: false
+SpacesInContainerLiterals: false
+---
+# Java
+Language: Java
+DisableFormat: true
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index a1bcd11741f..7af5d16c0d9 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -18,6 +18,7 @@ A clear and concise description of what the bug is.
### Setup and versions
- OS version (e.g Linux distro) + CPU architecture (x86_64/aarch64/ppc64le/...)
- `cat /etc/issue` or `cat /etc/redhat-release` + `uname -a`
+ - For Nvidia Bluefield SmartNIC include `cat /etc/mlnx-release` (the string identifies software and firmware setup)
- For RDMA/IB/RoCE related issues:
- Driver version:
- `rpm -q rdma-core` or `rpm -q libibverbs`
diff --git a/.gitignore b/.gitignore
index 0ffccf4851a..c712c60b0b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -88,3 +88,8 @@ GTAGS
*.swp
compile_commands.json
.idea/
+.externalToolBuilders
+.classpath
+.vscode
+src/tools/vfs/ucx_vfs
+test/apps/test_init_mt
diff --git a/Makefile.am b/Makefile.am
index 64364aa4fbf..073ead37243 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -13,7 +13,12 @@
EXTRA_DIST =
ACLOCAL_AMFLAGS = -I config/m4
-noinst_HEADERS = src/uct/api/uct.h src/uct/api/uct_def.h src/uct/api/tl.h
+noinst_HEADERS = \
+ src/uct/api/uct.h \
+ src/uct/api/v2/uct_v2.h \
+ src/uct/api/uct_def.h \
+ src/uct/api/tl.h
+
doxygen_doc_files = $(noinst_HEADERS)
doc_dir = $(pkgdatadir)/doc
@@ -37,6 +42,7 @@ SUBDIRS += $(UCG_SUBDIR)
endif
SUBDIRS += \
+ src/tools/vfs \
src/tools/info \
src/tools/perf \
src/tools/profile \
diff --git a/NEWS b/NEWS
index 720173b1e94..d6f490cceb2 100644
--- a/NEWS
+++ b/NEWS
@@ -1,17 +1,186 @@
#
-## Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED.
+## Copyright (C) Mellanox Technologies Ltd. 2001-2021. ALL RIGHTS RESERVED.
## Copyright (C) UT-Battelle, LLC. 2014-2019. ALL RIGHTS RESERVED.
-## Copyright (C) ARM Ltd. 2017-2020. ALL RIGHTS RESERVED.
+## Copyright (C) ARM Ltd. 2017-2021. ALL RIGHTS RESERVED.
##
## See file LICENSE for terms.
##
#
## Current
-### Features: TBD
-#### UCX Core TBD
-#### UCX Java (API Preview) TBD
-### Bugfixes: TBD
+### Features:
+#### UCP
+* Added API for querying UCP library attributes
+### Bugfixes:
+
+## 1.10.0 (March 9, 2021)
+### Features:
+#### Core
+* Added support for Nvidia HPC SDK
+* Added support for latest PGI and Clang
+* Added support for ROCM-3.7+ (warning generated if older version detected)
+* Added support for GCC11
+#### Architecture
+* Added Arm SVE memcpy()
+* Redesigned Arm WFE support
+* Improved clear_cache performance for Arm
+* Added architecture detection for Zhaoxin CPU
+#### CI
+* Added release builds on CUDA 11
+* Enabled performance validation in gtest
+* Added new OS for release CI
+#### UCP
+* Added locality awareness to the transport selection logic for GPU devices
+* Added put/offload/short and put/offload/zcopy protocols
+* Added receive message nbx routine
+* Reworked AM implementation and API, which adds support for RNDV semantics
+* Added support for multi-lane connection manager over TCP
+* Added support for printing AM tls with info log level
+* Implement flush and destroy for UCT EPs on UCP worker
+* Reduced UCP request size
+* Added support for keepalive protocol
+* Added support for multi-fragment protocol
+* Added implementation for protocol progress for eager, bcopy, and multicopy
+* Improved selection logic for protocol selection
+* Added new protocols for UCP get operation
+* Added bcopy protocols with support for GPU memory
+* Added RNDV protocol implementation for GPU devices (CUDA, ROCm)
+* Set SOCKADDR_CM_ENABLE=y by default
+* Added support for fast-path short with new tag protocols
+* Added a new parameter to control the CM listener's backlog
+* Added support sending AM RTS over short message protocol
+* Added support for shared memory multi-lane when CM is used
+* Added missing async locks
+#### UCT
+* Added API for keepalive_timeout value
+* Added add uct_completion.status
+* Allowed transports to access multiple mem_types
+* Removed status arg from uct_completion_callback_t
+* Restructured uct_mem_alloc/uct_md_mem_alloc to use mem_type
+* Updated documentation for uct_listener_params
+* Lowered the log level for certain network errors
+* Added cuda_copy wakeup feature
+* Added wakeup support for shared memory
+#### UCS
+* Added "inf" and "auto" values to time units
+* Added on-stack constructors for array and string buffer
+* Added ucs_ptr_map_t data structure
+* Added bool CSWAP
+* Improved logging
+* Added optimization for namespace processing
+* Fixes for connection matching functionality
+#### CUDA
+* Added support for global IPC cache
+#### RDMA CORE (IB, ROCE, etc.)
+* Added support for auto detection of adapative routing settings
+* Added an option to poll TX CQ every progress iteration
+* Added local and remote addresses to the reject error message
+* Added support for UAR allocation with non-cacheable memory type
+* Added support for multiple flush cancel without completion
+* Added async events callback support
+* Added detection for ConnectX-6, ConnectX-7 and BlueField-1/2 devices
+* Added support for connection matching for UD
+* Added a check for AM ordering
+* Added better support for non-4K MTU values
+#### Java (preview)
+* Added support for a different javadoc executable path for different java versions
+* Added UCS memory type constants
+* Added support build on Java10+
+* Added support for io-vector datatype.
+* Removed libjucx from packages.
+#### Tests
+* Added CI for CUDA 11
+* Added test_ucp_sockaddr_protocols.stream_short
+* Reimplemented tests using NBX API
+* Added flush(cancel) test
+* Added memory_wait mode to perftest
+* Added support for clang 10
+* Refactored RMA and atomic tests, add memtype support
+* Added test for uct_md_mem_query()
+* Added request interrupt support
+* Added support for connection manager fallbacks
+* Added new ucp request test checking for leaks from the ptr_map
+#### Documentation
+* Added glossaries
+
+### Bugfixes:
+#### Portability
+* Fixes in print functions to use format string like PRIx64, etc.
+* Fixes for Arm v8 cross compilation support
+#### Continues Integration:
+* Fixes in Github release flow
+* Fixes in docker image
+#### Packaging
+* Removed deb package dependencies
+* Fixes in SPEC to make the RPM relocatable
+#### Documentation
+* Fixes in documentation for ucp_am_recv_data_nbx
+* Fixes in quick start example
+* Fixes in installation instruction
+* Fixes in updates in author list
+#### Tests
+* Fixes for failures under valgrind runtime
+* Fixes in mmap tests for 0-length RMA
+* Fixes in definition of LAST_WQE wait timeout
+* Fixes in ROCm for mem_buffer test
+* Fixes in test name printing format
+* Fixes in tcp_sockcm test
+#### UCP
+* Fixes in worker cleanup flow
+* Fixes in RNDV RTS flow
+* Fix in length check condition for RMA PUT short
+* Fixes in handling failures from AM Bcopy
+* Fix in a release flow of deferred data
+* Fixes for invalid ID and handling of status in RNDV
+* Fixes in short active message reply protocol
+#### CUDA
+* Fixes in managed memory support
+* Fixes in topology detection
+#### RDMA CORE (IB, ROCE, etc.)
+* Fixes in assert definitions
+* Fixes in printing an error about invalid AM Bcopy length for UD
+* Fixes for thread safety support
+* Fixes to get ROCE device name according to GID
+* Fixes for SL selection
+* Fixes in create STRICT_ORDER key
+* Fixes addressing performance degradation in UD transport due to excess async events
+* Fixes in QP destroy
+* Fixes for CQ creation failure using old Verbs API
+#### UGNI
+* Fixing disable logic in config
+* Fixing clang 11 warnings
+#### Java
+* Fixes in build dependencies
+* Fixes in constructing UcpRequest object on error
+* Fixes in exception handling on endpoint closure request
+* Fixes for segfault in UcpErrorHandler
+#### UCP
+* Fixes in datatype support for get_zcopy RNDV
+* Fixes in connection manager disconnect
+* Fixes in assert definitions
+* Fixes in completion flow for failed EP
+* Fixes in flush error handling flow
+* Fixes in latency calculations for wireup protocol
+* Fixes in offload completion with inlined data
+* Fixes in unpacking flow
+* Fixes in error handling for various protocols
+#### UCT
+* Fixes in flush TX
+* Fixes in checks for enabling GPU Direct RDMA
+#### UCS
+* Fixes for crashes on incorrect value set in config
+* Fixes in ptr_array
+* Fixes in maximal size for ucs_snprintf_safe()
+* Fixes in compilation warning
+* Fixes in ucs_aarch64_dsb(_op) definition
+#### TCP
+* Fixes in default route interface confirmation flow
+* Fixes in PUT protocol
+* Fixes in max connection limit and improved error reporting
+#### UCM
+* Fixing crash on prevent unload
+* Fixes in libucm_rocm
+* Fixes for few racing conditions
## 1.9.0 (September 19, 2020)
### Features:
diff --git a/README b/README
deleted file mode 100644
index 231ac05a56a..00000000000
--- a/README
+++ /dev/null
@@ -1,184 +0,0 @@
-
-
-
-
-
-
- * [Unified Communication X](#unified-communication-x)
- * [Using UCX](#using-ucx)
- * [Building and Running Internal Unit Tests](#building-and-running-internal-unit-tests)
- * [UCX Performance Test](#ucx-performance-test)
- * [Our Community](#our-community)
- * [Licenses](#licenses)
- * [Contributor Agreement and Guidelines](#contributor-agreement-and-guidelines)
- * [UCX Publications](#ucx-publications)
- * [UCX Architecture](#ucx-architecture)
- * [Supported Transports](#supported-transports)
- * [Supported CPU Architectures](#supported-cpu-architectures)
-
-
-
-# Unified Communication X
-
-Unified Communication X (UCX) provides an optimized communication
-layer for Message Passing ([MPI](https://www.mpi-forum.org/)),
-[PGAS](http://www.pgas.org/)/[OpenSHMEM](http://www.openshmem.org/)
-libraries and RPC/data-centric applications.
-
-UCX utilizes high-speed networks for inter-node communication, and
-shared memory mechanisms for efficient intra-node communication.
-
-## Using UCX
-
-### Release Builds
-
-Building UCX is typically a combination of running "configure" and "make".
-Execute the following commands to install the UCX system from within the
-directory at the top of the tree:
-
-```sh
-$ ./autogen.sh
-$ ./contrib/configure-release --prefix=/where/to/install
-$ make -j8
-$ make install
-```
-
-NOTE: Compiling support for various networks or other specific hardware may
-require additional command line flags when running configure.
-
-### Developer Builds
-
-```bash
-$ ./autogen.sh
-$ ./contrib/configure-devel --prefix=$PWD/install-debug
-```
-
-*** NOTE: Developer builds of UCX typically include a large performance
-penalty at run-time because of extra debugging code.
-
-### Running internal unit tests
-
-```sh
-$ make -C test/gtest test
-```
-
-### Build RPM package
-```bash
-$ contrib/buildrpm.sh -s -b
-```
-
-### Build DEB package
-```bash
-$ dpkg-buildpackage -us -uc
-```
-
-### Build Doxygen documentation
-```bash
-$ make docs
-```
-
-### OpenMPI and OpenSHMEM installation with UCX
-[Wiki page](http://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX)
-
-### MPICH installation with UCX
-[Wiki page](http://github.com/openucx/ucx/wiki/MPICH-installation-with-UCX)
-
-### UCX Performance Test
-
-Start server:
-
-```sh
-$ ./src/tools/perf/ucx_perftest -c 0
-```
-
-Connect client:
-
-```sh
-$ ./src/tools/perf/ucx_perftest -t tag_lat -c 1
-```
-Note: the `-c` flag sets CPU affinity. If running both commands on same host, make sure you set the affinity to different CPU cores.
-
-## Our Community
-
-* [Project Website](http://www.openucx.org/)
-* [ReadTheDocs](https://openucx.readthedocs.io/en/master/)
-* [Github](http://www.github.com/openucx/ucx/)
-* [Software Releases](http://www.github.com/openucx/ucx/releases)
-* [Mailing List](https://elist.ornl.gov/mailman/listinfo/ucx-group)
-* [Twitter](https://twitter.com/openucx)
-
-## Licenses
-
-UCX is licensed as:
-
-* [BSD3](LICENSE)
-
-## Contributor Agreement and Guidelines
-
-In order to contribute to UCX, please sign up with an appropriate
-[Contributor Agreement](http://www.openucx.org/license/).
-
-Follow these
-[instructions](https://github.com/openucx/ucx/wiki/Guidance-for-contributors)
-when submitting contributions and changes.
-
-## UCX Publications
-
-To reference UCX in a publication, please use the following entry:
-
-```bibtex
-@inproceedings{shamis2015ucx,
- title={UCX: an open source framework for HPC network APIs and beyond},
- author={Shamis, Pavel and Venkata, Manjunath Gorentla and Lopez, M Graham and Baker, Matthew B and Hernandez, Oscar and Itigin, Yossi and Dubman, Mike and Shainer, Gilad and Graham, Richard L and Liss, Liran and others},
- booktitle={2015 IEEE 23rd Annual Symposium on High-Performance Interconnects},
- pages={40--43},
- year={2015},
- organization={IEEE}
-}
-```
-
-To reference the UCX website:
-
-```bibtex
-@misc{openucx-website,
- title = {{The Unified Communication X Library}},
- key = {{{The Unified Communication X Library}},
- howpublished = {{\url{http://www.openucx.org}}}
-}
-```
-
-## UCX Architecture
-
-![](docs/doxygen/Architecture.png)
-
-| Component | Role | Description |
-| :---: | :---: | --- |
-| UCP | Protocol | Implements high-level abstractions such as tag-matching, streams, connection negotiation and establishment, multi-rail, and handling different memory types |
-| UCT | Transport | Implements low-level communication primitives such as active messages, remote memory access, and atomic operations |
-| UCS | Services | A collection of data structures, algorithms, and system utilities for common use |
-| UCM | Memory | Intercepts memory allocation and release events, used by the memory registration cache |
-
-## Supported Transports
-
-* [Infiniband](https://www.infinibandta.org/)
-* [Omni-Path](https://www.intel.com/content/www/us/en/high-performance-computing-fabrics/omni-path-driving-exascale-computing.html)
-* [RoCE](http://www.roceinitiative.org/)
-* [Cray Gemini and Aries](https://www.cray.com/)
-* [CUDA](https://developer.nvidia.com/cuda-zone)
-* [ROCm](https://rocm.github.io/)
-* Shared Memory
- * posix, sysv, [cma](https://dl.acm.org/citation.cfm?id=2616532), [knem](http://knem.gforge.inria.fr/), and [xpmem](https://github.com/hjelmn/xpmem)
-* TCP/IP
-
-## Supported CPU Architectures
-
-* [x86_64](https://en.wikipedia.org/wiki/X86-64)
-* [Power8/9](https://www.ibm.com/support/knowledgecenter/en/POWER9/p9hdx/POWER9welcome.htm)
-* [Arm v8](https://www.arm.com/products/silicon-ip-cpu)
diff --git a/README b/README
new file mode 120000
index 00000000000..42061c01a1c
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+README.md
\ No newline at end of file
diff --git a/README.md b/README.md
deleted file mode 120000
index 100b93820ad..00000000000
--- a/README.md
+++ /dev/null
@@ -1 +0,0 @@
-README
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 00000000000..f02d791cb21
--- /dev/null
+++ b/README.md
@@ -0,0 +1,220 @@
+
+
+
+
+# Unified Communication X
+
+Unified Communication X (UCX) is an
+[award winning](https://losalamosreporter.com/2019/11/07/nine-los-alamos-national-laboratory-projects-win-rd-100-awards),
+optimized production proven-communication framework for modern, high-bandwidth
+and low-latency networks.
+
+UCX exposes a set of abstract communication primitives that utilize the best of
+available hardware resources and offloads. These include RDMA (InfiniBand and RoCE),
+TCP, GPUs, shared memory, and network atomic operations.
+
+Please visit our [documentation site](https://openucx.readthedocs.io/en/master)
+ for more details.
+
+
+
+
+
+* [Using UCX](#using-ucx)
+* [Known issues](#known-issues)
+* [Architecture](#architecture)
+* [Supported Transports](#supported-transports)
+* [Supported CPU Architectures](#supported-cpu-architectures)
+* [Licenses](#licenses)
+* [Our Community](#our-community)
+* [Contributor Agreement and Guidelines](#contributor-agreement-and-guidelines)
+* [Publications](#publications)
+
+
+
+
+## Using UCX
+
+### Release Builds
+
+Building UCX is typically a combination of running "configure" and "make".
+Execute the following commands to install the UCX system from within the
+directory at the top of the tree:
+
+```sh
+$ ./autogen.sh
+$ ./contrib/configure-release --prefix=/where/to/install
+$ make -j8
+$ make install
+```
+
+NOTE: Compiling support for various networks or other specific hardware may
+require additional command line flags when running configure.
+
+### Developer Builds
+
+```bash
+$ ./autogen.sh
+$ ./contrib/configure-devel --prefix=$PWD/install-debug
+```
+
+*** NOTE: Developer builds of UCX typically include a large performance
+penalty at run-time because of extra debugging code.
+
+### Build RPM package
+```bash
+$ contrib/buildrpm.sh -s -b
+```
+
+### Build DEB package
+```bash
+$ dpkg-buildpackage -us -uc
+```
+
+### Build Doxygen documentation
+```bash
+$ make docs
+```
+
+### OpenMPI and OpenSHMEM installation with UCX
+[Wiki page](http://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX)
+
+### MPICH installation with UCX
+[Wiki page](http://github.com/openucx/ucx/wiki/MPICH-installation-with-UCX)
+
+### UCX Performance Test
+
+Start server:
+
+```sh
+$ ./src/tools/perf/ucx_perftest -c 0
+```
+Connect client:
+
+```sh
+$ ./src/tools/perf/ucx_perftest -t tag_lat -c 1
+```
+> NOTE the `-c` flag sets CPU affinity. If running both >commands on same host, make sure you set the affinity to different CPU cores.
+
+
+### Running internal unit tests
+
+```sh
+$ make -C test/gtest test
+```
+
+
+
+
+## Known issues
+* UCX version 1.8.0 has a bug that may cause data corruption when TCP transport
+ is used in conjunction with shared memory transport. It is advised to upgrade
+ to UCX version 1.9.0 and above. UCX versions released before 1.8.0 don't have
+ this bug.
+
+* UCX may hang with glibc versions 2.25-2.29 due to known bugs in the
+ pthread_rwlock functions. When such hangs occur, one of the UCX threads gets
+ stuck in pthread_rwlock_rdlock (which is called by ucs_rcache_get), even
+ though no other thread holds the lock. A related issue is reported in
+ [glibc Bug 23844](https://sourceware.org/bugzilla/show_bug.cgi?id=23844).
+ If this issue occurs, it is advised to use glibc version provided with your
+ OS distribution or build glibc from source using versions less than 2.25 or
+ greater than 2.29.
+
+
+
+
+## Architecture
+
+![](docs/doxygen/Architecture.png)
+
+| Component | Role | Description |
+| :---: | :---: | --- |
+| UCP | Protocol | Implements high-level abstractions such as tag-matching, streams, connection negotiation and establishment, multi-rail, and handling different memory types |
+| UCT | Transport | Implements low-level communication primitives such as active messages, remote memory access, and atomic operations |
+| UCS | Services | A collection of data structures, algorithms, and system utilities for common use |
+| UCM | Memory | Intercepts memory allocation and release events, used by the memory registration cache |
+
+
+
+## Supported Transports
+
+* [Infiniband](https://www.infinibandta.org/)
+* [Omni-Path](https://www.intel.com/content/www/us/en/high-performance-computing-fabrics/omni-path-driving-exascale-computing.html)
+* [RoCE](http://www.roceinitiative.org/)
+* [Cray Gemini and Aries](https://www.cray.com/)
+* [CUDA](https://developer.nvidia.com/cuda-zone)
+* [ROCm](https://rocm.github.io/)
+* Shared Memory
+ * posix, sysv, [cma](https://dl.acm.org/citation.cfm?id=2616532), [knem](http://knem.gforge.inria.fr/), and [xpmem](https://github.com/hjelmn/xpmem)
+* TCP/IP
+
+
+
+## Supported CPU Architectures
+
+* [x86_64](https://en.wikipedia.org/wiki/X86-64)
+* [Power8/9](https://www.ibm.com/support/knowledgecenter/en/POWER9/p9hdx/POWER9welcome.htm)
+* [Arm v8](https://www.arm.com/products/silicon-ip-cpu)
+
+
+
+## Licenses
+
+UCX is licensed as:
+
+* [BSD3](LICENSE)
+
+
+
+## Our Community
+
+* [Project Website](http://www.openucx.org/)
+* [ReadTheDocs](https://openucx.readthedocs.io/en/master/)
+* [Github](http://www.github.com/openucx/ucx/)
+* [Software Releases](http://www.github.com/openucx/ucx/releases)
+* [Mailing List](https://elist.ornl.gov/mailman/listinfo/ucx-group)
+* [Twitter](https://twitter.com/openucx)
+
+
+
+## Contributor Agreement and Guidelines
+
+In order to contribute to UCX, please sign up with an appropriate
+[Contributor Agreement](http://www.openucx.org/license/).
+
+Follow these
+[instructions](https://github.com/openucx/ucx/wiki/Guidance-for-contributors)
+when submitting contributions and changes.
+
+## Publications
+
+To reference UCX in a publication, please use the following entry:
+
+```bibtex
+@inproceedings{shamis2015ucx,
+ title={UCX: an open source framework for HPC network APIs and beyond},
+ author={Shamis, Pavel and Venkata, Manjunath Gorentla and Lopez, M Graham and Baker, Matthew B and Hernandez, Oscar and Itigin, Yossi and Dubman, Mike and Shainer, Gilad and Graham, Richard L and Liss, Liran and others},
+ booktitle={2015 IEEE 23rd Annual Symposium on High-Performance Interconnects},
+ pages={40--43},
+ year={2015},
+ organization={IEEE}
+}
+```
+
+To reference the UCX website:
+
+```bibtex
+@misc{openucx-website,
+ title = {{The Unified Communication X Library}},
+ key = {{{The Unified Communication X Library}},
+ howpublished = {{\url{http://www.openucx.org}}}
+}
+```
diff --git a/bindings/java/pom.xml.in b/bindings/java/pom.xml.in
index b4311840aab..5bdeb959e4a 100644
--- a/bindings/java/pom.xml.in
+++ b/bindings/java/pom.xml.in
@@ -58,7 +58,7 @@
1.8
-
+
${java.home}/../bin/javadoc
@@ -78,7 +78,7 @@
1.9
-
+
${java.home}/../bin/javadoc
@@ -105,12 +105,11 @@
maven-compiler-plugin
- 1.9
- 1.9
+ 1.8
+ 1.8
-h
${native.dir}
- --add-exports java.base/sun.nio.ch=ALL-UNNAMED
@@ -395,6 +394,7 @@
maven-javadoc-plugin
3.2.0
+ 8
true
all,-missing
diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxException.java b/bindings/java/src/main/java/org/openucx/jucx/UcxException.java
index 8fb3554473c..4686d96d9e1 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/UcxException.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/UcxException.java
@@ -10,6 +10,8 @@
*/
public class UcxException extends RuntimeException {
+ private int status;
+
public UcxException() {
super();
}
@@ -17,4 +19,16 @@ public UcxException() {
public UcxException(String message) {
super(message);
}
+
+ public UcxException(String message, int status) {
+ super(message);
+ this.status = status;
+ }
+
+ /**
+ * Status of exception to compare with {@link org.openucx.jucx.ucs.UcsConstants.STATUS}
+ */
+ public int getStatus() {
+ return status;
+ }
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxNativeStruct.java b/bindings/java/src/main/java/org/openucx/jucx/UcxNativeStruct.java
index 2fd71cbfd54..51d0cf3151c 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/UcxNativeStruct.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/UcxNativeStruct.java
@@ -10,6 +10,30 @@
*/
public abstract class UcxNativeStruct {
private Long nativeId;
+ /**
+ * To use for hashCode and equals
+ */
+ private Long nativeIdCached;
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ UcxNativeStruct that = (UcxNativeStruct) o;
+
+ return this.nativeIdCached.equals(that.nativeIdCached);
+ }
+
+ @Override
+ public int hashCode() {
+ return nativeIdCached.hashCode();
+ }
/**
* Getter for native pointer as long.
@@ -19,11 +43,24 @@ public Long getNativeId() {
return nativeId;
}
+ private void setNativeId(long nativeId) {
+ if (nativeId > 0) {
+ this.nativeId = nativeId;
+ this.nativeIdCached = nativeId;
+ } else {
+ this.nativeId = null;
+ }
+ }
+
protected void setNativeId(Long nativeId) {
if (nativeId != null && nativeId < 0) {
throw new UcxException("UcxNativeStruct.setNativeId: invalid native pointer: "
+ nativeId);
}
+
+ if (nativeIdCached == null) {
+ this.nativeIdCached = nativeId;
+ }
this.nativeId = nativeId;
}
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxUtils.java b/bindings/java/src/main/java/org/openucx/jucx/UcxUtils.java
index 8f43bf0be82..5f7a4639b9c 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/UcxUtils.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/UcxUtils.java
@@ -5,38 +5,27 @@
package org.openucx.jucx;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
import java.nio.ByteBuffer;
public class UcxUtils {
- private static final Constructor> directBufferConstructor;
-
- static {
- try {
- Class> classDirectByteBuffer = Class.forName("java.nio.DirectByteBuffer");
- directBufferConstructor = classDirectByteBuffer.getDeclaredConstructor(long.class,
- int.class);
- directBufferConstructor.setAccessible(true);
- } catch (Exception e) {
- throw new UcxException(e.getMessage());
- }
- }
+ private UcxUtils() { }
/**
* Returns view of underlying memory region as a ByteBuffer.
* @param address - address of start of memory region
*/
- public static ByteBuffer getByteBufferView(long address, int length)
- throws IllegalAccessException, InvocationTargetException, InstantiationException {
- return (ByteBuffer)directBufferConstructor.newInstance(address, length);
+ public static ByteBuffer getByteBufferView(long address, long length) {
+ return getByteBufferViewNative(address, length);
}
/**
* Returns native address of the current position of a direct byte buffer.
*/
public static long getAddress(ByteBuffer buffer) {
- return ((sun.nio.ch.DirectBuffer) buffer).address() + buffer.position();
+ return getAddressNative(buffer) + buffer.position();
}
+
+ private static native long getAddressNative(ByteBuffer buffer);
+ private static native ByteBuffer getByteBufferViewNative(long address, long length);
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxBenchmark.java b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxBenchmark.java
index 26636e7f7aa..acb4ec525ec 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxBenchmark.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxBenchmark.java
@@ -92,9 +92,7 @@ protected static void createContextAndWorker() {
}
protected static double getBandwithGbits(long nanoTimeDelta, long size) {
- double sizeInGigabits = (double)size * 8.0 / 1e9;
- double secondsElapsed = nanoTimeDelta / 1e9;
- return sizeInGigabits / secondsElapsed;
+ return (double)size * 8.0 / nanoTimeDelta;
}
protected static void closeResources() throws IOException {
diff --git a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkReceiver.java b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkReceiver.java
index cc1b79c9558..288d9e6339b 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkReceiver.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkReceiver.java
@@ -42,14 +42,6 @@ public static void main(String[] args) throws Exception {
.setConnectionRequest(connRequest.get())
.setPeerErrorHandlingMode());
- // Temporary workaround until new connection establishment protocol in UCX.
- for (int i = 0; i < 10; i++) {
- worker.progress();
- try {
- Thread.sleep(10);
- } catch (Exception ignored) { }
- }
-
ByteBuffer recvBuffer = ByteBuffer.allocateDirect(4096);
UcpRequest recvRequest = worker.recvTaggedNonBlocking(recvBuffer, null);
@@ -72,13 +64,13 @@ public static void main(String[] args) throws Exception {
UcpMemory recvMemory = context.memoryMap(allocationParams);
resources.push(recvMemory);
ByteBuffer data = UcxUtils.getByteBufferView(recvMemory.getAddress(),
- (int)Math.min(Integer.MAX_VALUE, totalSize));
+ Math.min(Integer.MAX_VALUE, totalSize));
for (int i = 0; i < numIterations; i++) {
final int iterNum = i;
UcpRequest getRequest = endpoint.getNonBlocking(remoteAddress, remoteKey,
- recvMemory.getAddress(), totalSize,
+ recvMemory.getAddress(), remoteSize,
new UcxCallback() {
- long startTime = System.nanoTime();
+ final long startTime = System.nanoTime();
@Override
public void onSuccess(UcpRequest request) {
@@ -95,16 +87,8 @@ public void onSuccess(UcpRequest request) {
data.put(0, (byte)1);
}
- ByteBuffer sendBuffer = ByteBuffer.allocateDirect(100);
- sendBuffer.asCharBuffer().put("DONE");
-
- UcpRequest sent = endpoint.sendTaggedNonBlocking(sendBuffer, null);
- worker.progressRequest(sent);
-
UcpRequest closeRequest = endpoint.closeNonBlockingFlush();
worker.progressRequest(closeRequest);
- // Close request won't be return to pull automatically, since there's no callback.
- resources.push(closeRequest);
closeResources();
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkSender.java b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkSender.java
index 9c60206c11a..9aab66a521b 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkSender.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkSender.java
@@ -5,13 +5,12 @@
package org.openucx.jucx.examples;
-import org.openucx.jucx.UcxCallback;
-import org.openucx.jucx.ucp.UcpRequest;
+import org.openucx.jucx.UcxException;
+import org.openucx.jucx.ucp.*;
import org.openucx.jucx.UcxUtils;
-import org.openucx.jucx.ucp.UcpEndpoint;
-import org.openucx.jucx.ucp.UcpEndpointParams;
-import org.openucx.jucx.ucp.UcpMemory;
+import org.openucx.jucx.ucs.UcsConstants;
+import java.net.ConnectException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
@@ -28,12 +27,19 @@ public static void main(String[] args) throws Exception {
String serverHost = argsMap.get("s");
UcpEndpoint endpoint = worker.newEndpoint(new UcpEndpointParams()
.setPeerErrorHandlingMode()
+ .setErrorHandler((ep, status, errorMsg) -> {
+ if (status == UcsConstants.STATUS.UCS_ERR_CONNECTION_RESET) {
+ throw new ConnectException(errorMsg);
+ } else {
+ throw new UcxException(errorMsg);
+ }
+ })
.setSocketAddress(new InetSocketAddress(serverHost, serverPort)));
UcpMemory memory = context.memoryMap(allocationParams);
resources.push(memory);
ByteBuffer data = UcxUtils.getByteBufferView(memory.getAddress(),
- (int)Math.min(Integer.MAX_VALUE, totalSize));
+ Math.min(Integer.MAX_VALUE, totalSize));
// Send worker and memory address and Rkey to receiver.
ByteBuffer rkeyBuffer = memory.getRemoteKeyBuffer();
@@ -49,22 +55,23 @@ public static void main(String[] args) throws Exception {
// Send memory metadata and wait until receiver will finish benchmark.
endpoint.sendTaggedNonBlocking(sendData, null);
- ByteBuffer recvBuffer = ByteBuffer.allocateDirect(4096);
- UcpRequest recvRequest = worker.recvTaggedNonBlocking(recvBuffer,
- new UcxCallback() {
- @Override
- public void onSuccess(UcpRequest request) {
- System.out.println("Received a message:");
- System.out.println(recvBuffer.asCharBuffer().toString().trim());
- }
- });
-
- worker.progressRequest(recvRequest);
- UcpRequest closeRequest = endpoint.closeNonBlockingFlush();
- worker.progressRequest(closeRequest);
- resources.push(closeRequest);
+ try {
+ while (true) {
+ if (worker.progress() == 0) {
+ worker.waitForEvents();
+ }
+ }
+ } catch (ConnectException ignored) {
+ } catch (Exception ex) {
+ System.err.println(ex.getMessage());
+ }
- closeResources();
+ try {
+ worker.progressRequest(endpoint.closeNonBlockingForce());
+ } catch (Exception ignored) {
+ } finally {
+ closeResources();
+ }
}
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpAmData.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpAmData.java
new file mode 100755
index 00000000000..fb0a8588609
--- /dev/null
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpAmData.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) Mellanox Technologies Ltd. 2021. ALL RIGHTS RESERVED.
+ * See file LICENSE for terms.
+ */
+package org.openucx.jucx.ucp;
+
+import org.openucx.jucx.UcxCallback;
+import org.openucx.jucx.UcxException;
+import org.openucx.jucx.ucs.UcsConstants;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Wrapper over received active message data. Could be one of:
+ * - Internal ucx data descriptor. Need to call {@link UcpAmData#receive} to receive actual data.
+ * - Actual data. Need to call {@link UcpAmData#close()} when not needed.
+ */
+public class UcpAmData implements Closeable {
+ private final UcpWorker worker;
+ private final long address;
+ private final long length;
+ private final long flags;
+
+ private UcpAmData(UcpWorker worker, long address, long length, long flags) {
+ this.worker = worker;
+ this.address = address;
+ this.length = length;
+ this.flags = flags;
+ }
+
+ @Override
+ public String toString() {
+ return "UcpAmData{" +
+ "address=" + Long.toHexString(address) +
+ ", length=" + length +
+ ", received=" + isDataValid() +
+ '}';
+ }
+
+ /**
+ * Whether actual data is received or need to call {@link UcpAmData#receive(long, UcxCallback)}
+ */
+ public boolean isDataValid() {
+ return (flags & UcpConstants.UCP_AM_RECV_ATTR_FLAG_DATA) != 0;
+ }
+
+ /**
+ * Get an address of received data
+ */
+ public long getDataAddress() {
+ if (!isDataValid()) {
+ throw new UcxException("Data is not received yet.");
+ }
+ return address;
+ }
+
+ public long getLength() {
+ return length;
+ }
+
+ /**
+ * Get UCX data handle descriptor to pass to {@link UcpWorker#recvAmDataNonBlocking}
+ */
+ public long getDataHandle() {
+ return address;
+ }
+
+ public UcpRequest receive(long resultAddress, UcxCallback callback) {
+ return worker.recvAmDataNonBlocking(getDataHandle(), resultAddress,
+ length, callback, UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (isDataValid()) {
+ worker.amDataRelease(address);
+ }
+ }
+}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpAmRecvCallback.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpAmRecvCallback.java
new file mode 100755
index 00000000000..cbb1a78cae9
--- /dev/null
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpAmRecvCallback.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) Mellanox Technologies Ltd. 2021. ALL RIGHTS RESERVED.
+ * See file LICENSE for terms.
+ */
+package org.openucx.jucx.ucp;
+
+/**
+ * Callback to process incoming Active Message sent by {@link UcpEndpoint#sendAmNonBlocking }
+ * routine.
+ *
+ * The callback is always called from the progress context, therefore calling
+ * {@link UcpWorker#progress()} is not allowed. It is recommended to define
+ * callbacks with relatively short execution time to avoid blocking of
+ * communication progress.
+ */
+public interface UcpAmRecvCallback {
+
+ /**
+ * The callback is always called from the progress context, therefore calling
+ * {@link UcpWorker#progress()} is not allowed. It is recommended to define
+ * callbacks with relatively short execution time to avoid blocking of communication progress.
+ * @param headerAddress - User defined active message header. Can be 0.
+ * @param headerSize - Active message header length in bytes. If this
+ * value is 0, the headerAddress is undefined and should not be accessed.
+ * @param amData - Points to {@link UcpAmData} wrapper that has whether received data or
+ * data descriptor to receive in {@link UcpWorker#recvAmDataNonBlocking}
+ * @param replyEp - Endpoint, which can be used for reply to this message.
+ * @return - {@link org.openucx.jucx.ucs.UcsConstants.STATUS#UCS_OK} -
+ * data will not persist after the callback returns.
+ * {@link org.openucx.jucx.ucs.UcsConstants.STATUS#UCS_INPROGRESS} -
+ * The data will persist after the callback has returned.
+ * To free the memory, need to call {@link UcpAmData#close()}
+ */
+ int onReceive(long headerAddress, long headerSize,
+ UcpAmData amData, UcpEndpoint replyEp);
+}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConnectionRequest.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConnectionRequest.java
index f0e7529accf..db0ebd97155 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConnectionRequest.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConnectionRequest.java
@@ -6,13 +6,25 @@
import org.openucx.jucx.UcxNativeStruct;
+import java.net.InetSocketAddress;
+
/**
* A server-side handle to incoming connection request. Can be used to create an
* endpoint which connects back to the client.
*/
public class UcpConnectionRequest extends UcxNativeStruct {
- private UcpConnectionRequest(long nativeId) {
+ private InetSocketAddress clientAddress;
+
+ /**
+ * The address of the remote client that sent the connection request to the server.
+ */
+ public InetSocketAddress getClientAddress() {
+ return clientAddress;
+ }
+
+ private UcpConnectionRequest(long nativeId, InetSocketAddress clientAddress) {
setNativeId(nativeId);
+ this.clientAddress = clientAddress;
}
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConstants.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConstants.java
index e47a25af70b..800d45f71f8 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConstants.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConstants.java
@@ -38,6 +38,7 @@ public class UcpConstants {
static long UCP_FEATURE_AMO64;
static long UCP_FEATURE_WAKEUP;
static long UCP_FEATURE_STREAM;
+ static long UCP_FEATURE_AM;
/**
* UCP worker parameters field mask.
@@ -96,8 +97,7 @@ public class UcpConstants {
/**
* The enumeration is used to specify the behavior of UcpEndpoint closeNonBlocking.
*/
- static int UCP_EP_CLOSE_MODE_FORCE;
- static int UCP_EP_CLOSE_MODE_FLUSH;
+ static int UCP_EP_CLOSE_FLAG_FORCE;
/**
* UCP memory mapping parameters field mask.
@@ -105,6 +105,8 @@ public class UcpConstants {
static long UCP_MEM_MAP_PARAM_FIELD_ADDRESS;
static long UCP_MEM_MAP_PARAM_FIELD_LENGTH;
static long UCP_MEM_MAP_PARAM_FIELD_FLAGS;
+ static long UCP_MEM_MAP_PARAM_FIELD_PROT;
+ static long UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE;
/**
* The enumeration list describes the memory mapping flags.
@@ -113,11 +115,45 @@ public class UcpConstants {
static long UCP_MEM_MAP_ALLOCATE;
static long UCP_MEM_MAP_FIXED;
+ /**
+ * The enumeration list describes the memory mapping protections supported by
+ * {@link UcpContext#memoryMap(UcpMemMapParams)}
+ */
+ public static long UCP_MEM_MAP_PROT_LOCAL_READ;
+ public static long UCP_MEM_MAP_PROT_LOCAL_WRITE;
+ public static long UCP_MEM_MAP_PROT_REMOTE_READ;
+ public static long UCP_MEM_MAP_PROT_REMOTE_WRITE;
+
/**
* The enumeration defines behavior of
* {@link UcpEndpoint#recvStreamNonBlocking(long, long, long, UcxCallback)} function.
*/
public static long UCP_STREAM_RECV_FLAG_WAITALL;
+ /**
+ * Indicates that the data provided in {@link UcpAmRecvCallback} callback
+ * can be held by the user. If {@link org.openucx.jucx.ucs.UcsConstants.STATUS#UCS_INPROGRESS}
+ * is returned from the callback, the data parameter will persist and the user has to call
+ * {@link UcpWorker#amDataRelease } when data is no longer needed. This flag is
+ * mutually exclusive with {@link UcpConstants#UCP_AM_RECV_ATTR_FLAG_RNDV}.
+ */
+ public static long UCP_AM_RECV_ATTR_FLAG_DATA;
+
+ /**
+ * Indicates that the arriving data was sent using rendezvous protocol.
+ * In this case dataAddress parameter of the {@link UcpAmRecvCallback#onReceive} points
+ * to the internal UCP descriptor, which can be used for obtaining the actual
+ * data by calling {@link UcpWorker#recvAmDataNonBlocking} routine. This flag is mutually
+ * exclusive with {@link UcpConstants#UCP_AM_RECV_ATTR_FLAG_DATA}.
+ */
+ public static long UCP_AM_RECV_ATTR_FLAG_RNDV;
+
+ /**
+ * Flags dictate the behavior of {@link UcpEndpoint#sendAmNonBlocking} routine.
+ */
+ public static long UCP_AM_SEND_FLAG_REPLY;
+ public static long UCP_AM_SEND_FLAG_EAGER;
+ public static long UCP_AM_SEND_FLAG_RNDV;
+
private static native void loadConstants();
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpContext.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpContext.java
index 50cf4de6df5..767817cc43b 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpContext.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpContext.java
@@ -44,6 +44,14 @@ public void close() {
this.setNativeId(null);
}
+ /**
+ * @return - mask which memory types are supported, for supported memory types
+ * please see {@link org.openucx.jucx.ucs.UcsConstants.MEMORY_TYPE#isMemTypeSupported}
+ */
+ public long getMemoryTypesMask() {
+ return queryMemTypesNative(getNativeId());
+ }
+
/**
* Creates new UcpWorker on current context.
*/
@@ -83,6 +91,8 @@ public UcpMemory memoryMap(UcpMemMapParams params) {
private static native long createContextNative(UcpParams params);
+ private static native long queryMemTypesNative(long contextId);
+
private static native void cleanupContextNative(long contextId);
private native UcpMemory memoryMapNative(long conetxtId, UcpMemMapParams params);
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpoint.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpoint.java
index cdb5c751d27..c3fabe3c113 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpoint.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpoint.java
@@ -9,11 +9,20 @@
import java.io.Closeable;
import java.nio.ByteBuffer;
+import static org.openucx.jucx.ucs.UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_UNKNOWN;
+
public class UcpEndpoint extends UcxNativeStruct implements Closeable {
- private final String paramsString;
+ private String paramsString;
// Keep a reference to errorHandler to prevent it from GC and have valid ref
// from JNI error handler.
- private final UcpEndpointErrorHandler errorHandler;
+ private UcpEndpointErrorHandler errorHandler;
+
+ /**
+ * To construct reply endpoint for Active Messages from JNI.
+ */
+ private UcpEndpoint(long nativeId) {
+ setNativeId(nativeId);
+ }
@Override
public String toString() {
@@ -81,9 +90,15 @@ public UcpRequest putNonBlocking(ByteBuffer src, long remoteAddress, UcpRemoteKe
public UcpRequest putNonBlocking(long localAddress, long size,
long remoteAddress, UcpRemoteKey remoteKey,
UcxCallback callback) {
+ return putNonBlocking(localAddress, size, remoteAddress, remoteKey, callback,
+ UCS_MEMORY_TYPE_UNKNOWN);
+ }
+ public UcpRequest putNonBlocking(long localAddress, long size,
+ long remoteAddress, UcpRemoteKey remoteKey,
+ UcxCallback callback, int memoryType) {
return putNonBlockingNative(getNativeId(), localAddress,
- size, remoteAddress, remoteKey.getNativeId(), callback);
+ size, remoteAddress, remoteKey.getNativeId(), callback, memoryType);
}
/**
@@ -136,8 +151,16 @@ public UcpRequest getNonBlocking(long remoteAddress, UcpRemoteKey remoteKey,
public UcpRequest getNonBlocking(long remoteAddress, UcpRemoteKey remoteKey,
long localAddress, long size, UcxCallback callback) {
+ return getNonBlocking(remoteAddress, remoteKey, localAddress, size, callback,
+ UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
+ public UcpRequest getNonBlocking(long remoteAddress, UcpRemoteKey remoteKey,
+ long localAddress, long size, UcxCallback callback,
+ int memoryType) {
+
return getNonBlockingNative(getNativeId(), remoteAddress, remoteKey.getNativeId(),
- localAddress, size, callback);
+ localAddress, size, callback, memoryType);
}
/**
@@ -192,7 +215,13 @@ public UcpRequest sendTaggedNonBlocking(ByteBuffer sendBuffer, long tag, UcxCall
public UcpRequest sendTaggedNonBlocking(long localAddress, long size,
long tag, UcxCallback callback) {
- return sendTaggedNonBlockingNative(getNativeId(), localAddress, size, tag, callback);
+ return sendTaggedNonBlocking(localAddress, size, tag, callback, UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
+ public UcpRequest sendTaggedNonBlocking(long localAddress, long size,
+ long tag, UcxCallback callback, int memoryType) {
+ return sendTaggedNonBlockingNative(getNativeId(), localAddress, size, tag, callback,
+ memoryType);
}
/**
@@ -207,11 +236,17 @@ public UcpRequest sendTaggedNonBlocking(ByteBuffer sendBuffer, UcxCallback callb
* Iov version of non blocking send operaation
*/
public UcpRequest sendTaggedNonBlocking(long[] localAddresses, long[] sizes,
- long tag, UcxCallback callback) {
+ long tag, UcxCallback callback, int memoryType) {
UcxParams.checkArraySizes(localAddresses, sizes);
return sendTaggedIovNonBlockingNative(getNativeId(), localAddresses, sizes,
- tag, callback);
+ tag, callback, memoryType);
+ }
+
+ public UcpRequest sendTaggedNonBlocking(long[] localAddresses, long[] sizes,
+ long tag, UcxCallback callback) {
+
+ return sendTaggedNonBlocking(localAddresses, sizes, tag, callback, UCS_MEMORY_TYPE_UNKNOWN);
}
/**
@@ -222,14 +257,27 @@ public UcpRequest sendTaggedNonBlocking(long[] localAddresses, long[] sizes,
* completion of the send operation.
*/
public UcpRequest sendStreamNonBlocking(long localAddress, long size, UcxCallback callback) {
- return sendStreamNonBlockingNative(getNativeId(), localAddress, size, callback);
+ return sendStreamNonBlocking(localAddress, size, callback, UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
+ public UcpRequest sendStreamNonBlocking(long localAddress, long size, UcxCallback callback,
+ int memoryType) {
+ return sendStreamNonBlockingNative(getNativeId(), localAddress, size, callback, memoryType);
}
public UcpRequest sendStreamNonBlocking(long[] localAddresses, long[] sizes,
UcxCallback callback) {
UcxParams.checkArraySizes(localAddresses, sizes);
- return sendStreamIovNonBlockingNative(getNativeId(), localAddresses, sizes, callback);
+ return sendStreamNonBlocking(localAddresses, sizes, callback, UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
+ public UcpRequest sendStreamNonBlocking(long[] localAddresses, long[] sizes,
+ UcxCallback callback, int memoryType) {
+ UcxParams.checkArraySizes(localAddresses, sizes);
+
+ return sendStreamIovNonBlockingNative(getNativeId(), localAddresses, sizes, callback,
+ memoryType);
}
public UcpRequest sendStreamNonBlocking(ByteBuffer buffer, UcxCallback callback) {
@@ -244,17 +292,30 @@ public UcpRequest sendStreamNonBlocking(ByteBuffer buffer, UcxCallback callback)
* the UCP library will invoke the call-back when data is in the receive buffer
* and ready for application access.
*/
+ public UcpRequest recvStreamNonBlocking(long localAddress, long size, long flags,
+ UcxCallback callback, int memoryType) {
+ return recvStreamNonBlockingNative(getNativeId(), localAddress, size, flags, callback,
+ memoryType);
+ }
+
public UcpRequest recvStreamNonBlocking(long localAddress, long size, long flags,
UcxCallback callback) {
- return recvStreamNonBlockingNative(getNativeId(), localAddress, size, flags, callback);
+ return recvStreamNonBlocking(localAddress, size, flags, callback, UCS_MEMORY_TYPE_UNKNOWN);
}
public UcpRequest recvStreamNonBlocking(long[] localAddresses, long[] sizes, long flags,
- UcxCallback callback) {
+ UcxCallback callback, int memoryType) {
UcxParams.checkArraySizes(localAddresses, sizes);
return recvStreamIovNonBlockingNative(getNativeId(), localAddresses, sizes, flags,
- callback);
+ callback, memoryType);
+ }
+
+ public UcpRequest recvStreamNonBlocking(long[] localAddresses, long[] sizes, long flags,
+ UcxCallback callback) {
+
+ return recvStreamNonBlocking(localAddresses, sizes, flags, callback,
+ UCS_MEMORY_TYPE_UNKNOWN);
}
public UcpRequest recvStreamNonBlocking(ByteBuffer buffer, long flags, UcxCallback callback) {
@@ -262,6 +323,32 @@ public UcpRequest recvStreamNonBlocking(ByteBuffer buffer, long flags, UcxCallba
callback);
}
+ /**
+ * Send Active Message.
+ * @param activeMessageId - Active Message id. Specifies which callback registered by
+ * {@link UcpWorker#setAmRecvHandler(int, UcpAmRecvCallback)} to run.
+ * @param headerAddress - User defined Active Message header. NULL value is
+ * allowed if no header needed. In this case
+ * @param headerLength - Active message header length in bytes.
+ * @param dataAddress - Pointer to the data to be sent to the target node
+ * of the Active Message.
+ * @param dataLength - Data length size in bytes
+ * @param callback - Callback to call on a completion.
+ */
+ public UcpRequest sendAmNonBlocking(int activeMessageId, long headerAddress, long headerLength,
+ long dataAddress, long dataLength, long flags,
+ UcxCallback callback, int memoryType) {
+ return sendAmNonBlockingNative(getNativeId(), activeMessageId,
+ headerAddress, headerLength, dataAddress, dataLength, flags, callback, memoryType);
+ }
+
+ public UcpRequest sendAmNonBlocking(int activeMessageId, long headerAddress, long headerLength,
+ long dataAddress, long dataLength, long flags,
+ UcxCallback callback) {
+ return sendAmNonBlocking(activeMessageId, headerAddress, headerLength,
+ dataAddress, dataLength, flags, callback, UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
/**
* This routine flushes all outstanding AMO and RMA communications on this endpoint.
* All the AMO and RMA operations issued on this endpoint prior to this call
@@ -279,14 +366,14 @@ public UcpRequest flushNonBlocking(UcxCallback callback) {
* both (local and remote) sides to avoid undefined behavior.
*/
public UcpRequest closeNonBlockingForce() {
- return closeNonBlockingNative(getNativeId(), UcpConstants.UCP_EP_CLOSE_MODE_FORCE);
+ return closeNonBlockingNative(getNativeId(), UcpConstants.UCP_EP_CLOSE_FLAG_FORCE);
}
/**
* Releases the endpoint by scheduling flushes on all outstanding operations.
*/
public UcpRequest closeNonBlockingFlush() {
- return closeNonBlockingNative(getNativeId(), UcpConstants.UCP_EP_CLOSE_MODE_FLUSH);
+ return closeNonBlockingNative(getNativeId(), 0);
}
private native long createEndpointNative(UcpEndpointParams params, long workerId);
@@ -297,7 +384,8 @@ public UcpRequest closeNonBlockingFlush() {
private static native UcpRequest putNonBlockingNative(long enpointId, long localAddress,
long size, long remoteAddr,
- long ucpRkeyId, UcxCallback callback);
+ long ucpRkeyId, UcxCallback callback,
+ int memoryType);
private static native void putNonBlockingImplicitNative(long enpointId, long localAddress,
long size, long remoteAddr,
@@ -305,7 +393,8 @@ private static native void putNonBlockingImplicitNative(long enpointId, long loc
private static native UcpRequest getNonBlockingNative(long enpointId, long remoteAddress,
long ucpRkeyId, long localAddress,
- long size, UcxCallback callback);
+ long size, UcxCallback callback,
+ int memoryType);
private static native void getNonBlockingImplicitNative(long enpointId, long remoteAddress,
long ucpRkeyId, long localAddress,
@@ -313,29 +402,41 @@ private static native void getNonBlockingImplicitNative(long enpointId, long rem
private static native UcpRequest sendTaggedNonBlockingNative(long enpointId, long localAddress,
long size, long tag,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
private static native UcpRequest sendTaggedIovNonBlockingNative(long enpointId,
long[] localAddresses,
long[] sizes, long tag,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
private static native UcpRequest sendStreamNonBlockingNative(long enpointId, long localAddress,
- long size, UcxCallback callback);
+ long size, UcxCallback callback,
+ int memoryType);
private static native UcpRequest sendStreamIovNonBlockingNative(long enpointId,
long[] localAddresses,
long[] sizes,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
private static native UcpRequest recvStreamNonBlockingNative(long enpointId, long localAddress,
long size, long flags,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
private static native UcpRequest recvStreamIovNonBlockingNative(long enpointId,
long[] localAddresses,
long[] sizes, long flags,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
+
+ private static native UcpRequest sendAmNonBlockingNative(long enpointId, int activeMessageId,
+ long headerAddress, long headerLength,
+ long dataAddress, long dataLength,
+ long flags, UcxCallback callback,
+ int memoryType);
private static native UcpRequest flushNonBlockingNative(long enpointId, UcxCallback callback);
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointErrorHandler.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointErrorHandler.java
index 855e5ef5f46..e53e24d6642 100755
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointErrorHandler.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointErrorHandler.java
@@ -15,5 +15,5 @@ public interface UcpEndpointErrorHandler {
* all subsequent operations on this ep will fail with
* the error code passed in {@code status}.
*/
- void onError(UcpEndpoint ep, int status, String errorMsg);
+ void onError(UcpEndpoint ep, int status, String errorMsg) throws Exception;
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointParams.java
index bde0f080216..1ac8a96eb39 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointParams.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointParams.java
@@ -30,7 +30,8 @@ public String toString() {
}
if (connectionRequest != 0) {
- result += "connectionRequest,";
+ result += "connectionRequest" +
+ ((clientAddress != null) ? clientAddress.toString() : "");
}
return result;
}
@@ -43,6 +44,7 @@ public UcpEndpointParams clear() {
flags = 0;
socketAddress = null;
connectionRequest = 0;
+ clientAddress = null;
errorHandler = null;
return this;
}
@@ -55,6 +57,8 @@ public UcpEndpointParams clear() {
private InetSocketAddress socketAddress;
+ private InetSocketAddress clientAddress;
+
private long connectionRequest;
UcpEndpointErrorHandler errorHandler;
@@ -107,6 +111,9 @@ public UcpEndpointParams setNoLoopbackMode() {
public UcpEndpointParams setConnectionRequest(UcpConnectionRequest connectionRequest) {
this.fieldMask |= UcpConstants.UCP_EP_PARAM_FIELD_CONN_REQUEST;
this.connectionRequest = connectionRequest.getNativeId();
+ if (connectionRequest.getClientAddress() != null) {
+ this.clientAddress = connectionRequest.getClientAddress();
+ }
return this;
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListener.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListener.java
index 63c0ac003b1..00ea35eda4e 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListener.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListener.java
@@ -17,13 +17,18 @@
public class UcpListener extends UcxNativeStruct implements Closeable {
private InetSocketAddress address;
+ private UcpListenerConnectionHandler connectionHandler;
public UcpListener(UcpWorker worker, UcpListenerParams params) {
if (params.getSockAddr() == null) {
throw new UcxException("UcpListenerParams.sockAddr must be non-null.");
}
+ if (params.connectionHandler == null) {
+ throw new UcxException("Connection handler must be set");
+ }
+ this.connectionHandler = params.connectionHandler;
+ this.address = params.getSockAddr();
setNativeId(createUcpListener(params, worker.getNativeId()));
- address = params.getSockAddr();
}
/**
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerParams.java
index 28153a0772d..94fdc8c96ad 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerParams.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerParams.java
@@ -14,12 +14,13 @@ public class UcpListenerParams extends UcxParams {
public UcpListenerParams clear() {
super.clear();
sockAddr = null;
+ connectionHandler = null;
return this;
}
private InetSocketAddress sockAddr;
- private UcpListenerConnectionHandler connectionHandler;
+ UcpListenerConnectionHandler connectionHandler;
/**
* An address, on which {@link UcpListener} would bind.
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemMapParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemMapParams.java
index 9ce96b94089..7dcbcb1a110 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemMapParams.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemMapParams.java
@@ -8,6 +8,8 @@
public class UcpMemMapParams extends UcxParams {
private long flags;
+ private long prot;
+ private int memType;
private long address;
private long length;
@@ -17,6 +19,8 @@ public UcpMemMapParams clear() {
address = 0;
length = 0;
flags = 0;
+ prot = 0;
+ memType = 0;
return this;
}
@@ -69,4 +73,39 @@ public UcpMemMapParams fixed() {
flags |= UcpConstants.UCP_MEM_MAP_FIXED;
return this;
}
+
+ /**
+ * Memory protection mode, e.g. {@link UcpConstants#UCP_MEM_MAP_PROT_LOCAL_READ}
+ * This value is optional. If it's not set, the {@link UcpContext#memoryMap(UcpMemMapParams)}
+ * routine will consider the flags as set to
+ * UCP_MEM_MAP_PROT_LOCAL_READ|UCP_MEM_MAP_PROT_LOCAL_WRITE|
+ * UCP_MEM_MAP_PROT_REMOTE_READ|UCP_MEM_MAP_PROT_REMOTE_WRITE.
+ */
+ public UcpMemMapParams setProtection(long protection) {
+ this.fieldMask |= UcpConstants.UCP_MEM_MAP_PARAM_FIELD_PROT;
+ this.prot = protection;
+ return this;
+ }
+
+ /**
+ * Memory type (for possible memory types see
+ * {@link org.openucx.jucx.ucs.UcsConstants.MEMORY_TYPE})
+ * It is an optimization hint to avoid memory type detection for map buffer.
+ * The meaning of this field depends on the operation type.
+ *
+ * - Memory allocation: ({@link UcpMemMapParams#allocate()} is set) This field
+ * specifies the type of memory to allocate. If it's not set
+ * {@link org.openucx.jucx.ucs.UcsConstants.MEMORY_TYPE#UCS_MEMORY_TYPE_HOST}
+ * will be assumed by default.
+ *
+ * - Memory registration: This field specifies the type of memory which is
+ * pointed by {@link UcpMemMapParams#setAddress(long)}. If it's not set,
+ * or set to {@link org.openucx.jucx.ucs.UcsConstants.MEMORY_TYPE#UCS_MEMORY_TYPE_UNKNOWN},
+ * the memory type will be detected internally.
+ */
+ public UcpMemMapParams setMemoryType(int memoryType) {
+ this.fieldMask |= UcpConstants.UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE;
+ this.memType = memoryType;
+ return this;
+ }
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemory.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemory.java
index 360b33f9e3a..1f6a9882405 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemory.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemory.java
@@ -27,11 +27,17 @@ public class UcpMemory extends UcxNativeStruct implements Closeable {
private long length;
+ private int memType;
+
/**
* To prevent construct outside of JNI.
*/
- private UcpMemory(long nativeId) {
+ private UcpMemory(long nativeId, UcpContext context, long address, long length, int memType) {
setNativeId(nativeId);
+ this.address = address;
+ this.length = length;
+ this.memType = memType;
+ this.context = context;
}
/**
@@ -96,6 +102,13 @@ public long getLength() {
return length;
}
+ /**
+ * Type of allocated memory.
+ */
+ public int getMemType() {
+ return memType;
+ }
+
private static native void unmapMemoryNative(long contextId, long memoryId);
private static native ByteBuffer getRkeyBufferNative(long contextId, long memoryId);
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpParams.java
index d4ace227ed7..8797279e8e0 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpParams.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpParams.java
@@ -102,6 +102,15 @@ public UcpParams requestTagFeature() {
return this;
}
+ /**
+ * Request Active Message support feature.
+ */
+ public UcpParams requestAmFeature() {
+ this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_FEATURES;
+ this.features |= UcpConstants.UCP_FEATURE_AM;
+ return this;
+ }
+
/**
* Request remote memory access support.
*/
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRequest.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRequest.java
index 2761f1f4559..916e92a2d2c 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRequest.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRequest.java
@@ -7,6 +7,7 @@
import org.openucx.jucx.UcxCallback;
import org.openucx.jucx.UcxNativeStruct;
+import org.openucx.jucx.ucs.UcsConstants;
import java.io.Closeable;
import java.nio.ByteBuffer;
@@ -15,15 +16,17 @@
* Request object, that returns by ucp operations (GET, PUT, SEND, etc.).
* Call {@link UcpRequest#isCompleted()} to monitor completion of request.
*/
-public class UcpRequest extends UcxNativeStruct implements Closeable {
+public class UcpRequest extends UcxNativeStruct {
private long recvSize;
private long senderTag;
- private UcpRequest(long nativeId) {
- setNativeId(nativeId);
- }
+ private int status = UcsConstants.STATUS.UCS_INPROGRESS;
+
+ private long iovVector;
+
+ private UcxCallback callback;
/**
* To initialize for failed and immediately completed requests.
@@ -49,23 +52,14 @@ public long getSenderTag() {
* @return whether this request is completed.
*/
public boolean isCompleted() {
- return (getNativeId() == null) || isCompletedNative(getNativeId());
+ return status != UcsConstants.STATUS.UCS_INPROGRESS;
}
/**
- * This routine releases the non-blocking request back to the library, regardless
- * of its current state. Communications operations associated with this request
- * will make progress internally, however no further notifications or callbacks
- * will be invoked for this request.
+ * @return status of the current request
*/
- @Override
- public void close() {
- if (getNativeId() != null) {
- closeRequestNative(getNativeId());
- }
+ public int getStatus() {
+ return status;
}
- private static native boolean isCompletedNative(long ucpRequest);
-
- private static native void closeRequestNative(long ucpRequest);
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorker.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorker.java
index 8da85f2e4d2..83611a8c6d5 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorker.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorker.java
@@ -7,9 +7,12 @@
import java.io.Closeable;
import java.nio.ByteBuffer;
+import java.util.HashMap;
import org.openucx.jucx.*;
+import static org.openucx.jucx.ucs.UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_UNKNOWN;
+
/**
* UCP worker is an opaque object representing the communication context. The
* worker represents an instance of a local communication resource and the
@@ -31,6 +34,13 @@
*/
public class UcpWorker extends UcxNativeStruct implements Closeable {
+ /**
+ * To keep a reference to AmRecvCallback class to prevent it from GC.
+ */
+ private final HashMap amRecvHandlers = new HashMap<>();
+
+ private long maxAmHeaderSize = 0L;
+
public UcpWorker(UcpContext context, UcpWorkerParams params) {
setNativeId(createWorkerNative(params, context.getNativeId()));
}
@@ -53,20 +63,78 @@ public UcpListener newListener(UcpListenerParams params) {
public void close() {
releaseWorkerNative(getNativeId());
setNativeId(null);
+ amRecvHandlers.clear();
+ }
+
+ /**
+ * Maximal allowed header size for {@link UcpEndpoint#sendAmNonBlocking} routine.
+ */
+ public long getMaxAmHeaderSize() {
+ return maxAmHeaderSize;
+ }
+
+ /**
+ * This routine installs a user defined callback to handle incoming Active
+ * Messages with a specific id. This callback is called whenever an Active
+ * Message that was sent from the remote peer by @ref ucp_am_send_nbx is
+ * received on this worker.
+ *
+ * @param callback - Active Message callback. To clear the already set callback,
+ * this value should be set to null.
+ */
+ public void setAmRecvHandler(int amId, UcpAmRecvCallback callback) {
+ if (callback == null) {
+ removeAmRecvHandler(amId);
+ return;
+ }
+ Object[] callbackAndWorker = new Object[2];
+ callbackAndWorker[0] = callback;
+ callbackAndWorker[1] = this;
+ amRecvHandlers.put(amId, callbackAndWorker);
+ setAmRecvHandlerNative(getNativeId(), amId, callbackAndWorker);
+ }
+
+ /**
+ * Clears Active Message callback.
+ */
+ public void removeAmRecvHandler(int amId) {
+ amRecvHandlers.remove(amId);
+ setAmRecvHandlerNative(getNativeId(), amId, null);
+ }
+
+ /**
+ * This routine releases data that persisted through an Active Message
+ * callback because that callback returned UCS_INPROGRESS.
+ */
+ public void amDataRelease(long address) {
+ amDataReleaseNative(getNativeId(), address);
}
+ /**
+ * This routine receives a message that is described by the data descriptor
+ * {@code dataDesc}, local address {@code address} and size {@code size} on a worker.
+ * The routine is non-blocking and therefore returns immediately.
+ * The receive operation is considered completed when the message is delivered to the buffer.
+ */
+ public UcpRequest recvAmDataNonBlocking(long dataDesc, long address, long size,
+ UcxCallback callback, int memoryType) {
+ return recvAmDataNonBlockingNative(getNativeId(), dataDesc, address, size, callback,
+ memoryType);
+ }
+
+
/**
* This routine explicitly progresses all communication operations on a worker.
* @return Non-zero if any communication was progressed, zero otherwise.
*/
- public int progress() {
+ public int progress() throws Exception {
return progressWorkerNative(getNativeId());
}
/**
* Blocking progress for request until it's not completed.
*/
- public void progressRequest(UcpRequest request) {
+ public void progressRequest(UcpRequest request) throws Exception {
while (!request.isCompleted()) {
progress();
}
@@ -128,14 +196,20 @@ public UcpRequest recvTaggedNonBlocking(ByteBuffer recvBuffer, long tag, long ta
if (!recvBuffer.isDirect()) {
throw new UcxException("Recv buffer must be direct.");
}
- return recvTaggedNonBlockingNative(getNativeId(), UcxUtils.getAddress(recvBuffer),
+ return recvTaggedNonBlocking(UcxUtils.getAddress(recvBuffer),
recvBuffer.remaining(), tag, tagMask, callback);
}
public UcpRequest recvTaggedNonBlocking(long localAddress, long size, long tag, long tagMask,
UcxCallback callback) {
+ return recvTaggedNonBlocking(localAddress, size, tag, tagMask, callback,
+ UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
+ public UcpRequest recvTaggedNonBlocking(long localAddress, long size, long tag, long tagMask,
+ UcxCallback callback, int memoryType) {
return recvTaggedNonBlockingNative(getNativeId(), localAddress, size,
- tag, tagMask, callback);
+ tag, tagMask, callback, memoryType);
}
/**
@@ -150,10 +224,18 @@ public UcpRequest recvTaggedNonBlocking(ByteBuffer recvBuffer, UcxCallback callb
public UcpRequest recvTaggedNonBlocking(long[] localAddresses, long[] sizes,
long tag, long tagMask,
UcxCallback callback) {
+
+ return recvTaggedNonBlocking(localAddresses, sizes, tag, tagMask, callback,
+ UCS_MEMORY_TYPE_UNKNOWN);
+ }
+
+ public UcpRequest recvTaggedNonBlocking(long[] localAddresses, long[] sizes,
+ long tag, long tagMask,
+ UcxCallback callback, int memoryType) {
UcxParams.checkArraySizes(localAddresses, sizes);
return recvTaggedIovNonBlockingNative(getNativeId(), localAddresses, sizes, tag,
- tagMask, callback);
+ tagMask, callback, memoryType);
}
/**
@@ -201,9 +283,15 @@ public UcpTagMessage tagProbeNonBlocking(long tag, long tagMask, boolean remove)
* If the receive operation cannot be stated the routine returns an error.
*/
public UcpRequest recvTaggedMessageNonBlocking(long address, long size, UcpTagMessage message,
- UcxCallback callback) {
+ UcxCallback callback, int memoryType) {
return recvTaggedMessageNonBlockingNative(getNativeId(), address, size,
- message.getNativeId(), callback);
+ message.getNativeId(), callback, memoryType);
+ }
+
+ public UcpRequest recvTaggedMessageNonBlocking(long address, long size, UcpTagMessage message,
+ UcxCallback callback) {
+ return recvTaggedMessageNonBlocking(address, size, message, callback,
+ UCS_MEMORY_TYPE_UNKNOWN);
}
public UcpRequest recvTaggedMessageNonBlocking(ByteBuffer buffer, UcpTagMessage message,
@@ -212,6 +300,7 @@ public UcpRequest recvTaggedMessageNonBlocking(ByteBuffer buffer, UcpTagMessage
message, callback);
}
+
/**
* This routine tries to cancels an outstanding communication request. After
* calling this routine, the request will be in completed or canceled (but
@@ -222,6 +311,9 @@ public UcpRequest recvTaggedMessageNonBlocking(ByteBuffer buffer, UcpTagMessage
* case it is canceled the status argument is set to UCS_ERR_CANCELED.
*/
public void cancelRequest(UcpRequest request) {
+ if (request.getNativeId() == null) {
+ throw new UcxException("Request is not valid");
+ }
cancelRequestNative(getNativeId(), request.getNativeId());
}
@@ -243,7 +335,7 @@ public ByteBuffer getAddress() {
return result;
}
- private static native long createWorkerNative(UcpWorkerParams params, long ucpContextId);
+ private native long createWorkerNative(UcpWorkerParams params, long ucpContextId);
private static native void releaseWorkerNative(long workerId);
@@ -251,7 +343,7 @@ public ByteBuffer getAddress() {
private static native void releaseAddressNative(long workerId, ByteBuffer addressId);
- private static native int progressWorkerNative(long workerId);
+ private static native int progressWorkerNative(long workerId) throws Exception;
private static native UcpRequest flushNonBlockingNative(long workerId, UcxCallback callback);
@@ -259,22 +351,35 @@ public ByteBuffer getAddress() {
private static native void signalWorkerNative(long workerId);
+ private static native void setAmRecvHandlerNative(long workerId, int amId,
+ Object[] callbackAndWorker);
+
+ private static native UcpRequest recvAmDataNonBlockingNative(long workerId, long dataDesc,
+ long address, long size,
+ UcxCallback callback,
+ int memoryType);
+
+ private static native void amDataReleaseNative(long workerId, long dataAddress);
+
private static native UcpRequest recvTaggedNonBlockingNative(long workerId, long localAddress,
long size, long tag, long tagMask,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
private static native UcpRequest recvTaggedIovNonBlockingNative(long workerId,
long[] localAddresses,
long[] sizes,
long tag, long tagMask,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
private static native UcpTagMessage tagProbeNonBlockingNative(long workerId, long tag,
long tagMask, boolean remove);
private static native UcpRequest recvTaggedMessageNonBlockingNative(long workerId, long address,
long size, long tagMsgId,
- UcxCallback callback);
+ UcxCallback callback,
+ int memoryType);
private static native void cancelRequestNative(long workerId, long requestId);
}
diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucs/UcsConstants.java b/bindings/java/src/main/java/org/openucx/jucx/ucs/UcsConstants.java
index b22f0b1da60..58a495c3a35 100644
--- a/bindings/java/src/main/java/org/openucx/jucx/ucs/UcsConstants.java
+++ b/bindings/java/src/main/java/org/openucx/jucx/ucs/UcsConstants.java
@@ -6,6 +6,7 @@
package org.openucx.jucx.ucs;
import org.openucx.jucx.NativeLibs;
+import org.openucx.jucx.ucp.UcpContext;
public class UcsConstants {
static {
@@ -22,6 +23,79 @@ public static class ThreadMode {
public static int UCS_THREAD_MODE_MULTI;
}
+ /**
+ * Status codes
+ */
+ public static class STATUS {
+ static {
+ load();
+ }
+
+ /* Operation completed successfully */
+ public static int UCS_OK;
+
+ /* Operation is queued and still in progress */
+ public static int UCS_INPROGRESS;
+
+ /* Failure codes */
+ public static int UCS_ERR_NO_MESSAGE;
+ public static int UCS_ERR_NO_RESOURCE;
+ public static int UCS_ERR_IO_ERROR;
+ public static int UCS_ERR_NO_MEMORY;
+ public static int UCS_ERR_INVALID_PARAM;
+ public static int UCS_ERR_UNREACHABLE;
+ public static int UCS_ERR_INVALID_ADDR;
+ public static int UCS_ERR_NOT_IMPLEMENTED;
+ public static int UCS_ERR_MESSAGE_TRUNCATED;
+ public static int UCS_ERR_NO_PROGRESS;
+ public static int UCS_ERR_BUFFER_TOO_SMALL;
+ public static int UCS_ERR_NO_ELEM;
+ public static int UCS_ERR_SOME_CONNECTS_FAILED;
+ public static int UCS_ERR_NO_DEVICE;
+ public static int UCS_ERR_BUSY;
+ public static int UCS_ERR_CANCELED;
+ public static int UCS_ERR_SHMEM_SEGMENT;
+ public static int UCS_ERR_ALREADY_EXISTS;
+ public static int UCS_ERR_OUT_OF_RANGE;
+ public static int UCS_ERR_TIMED_OUT;
+ public static int UCS_ERR_EXCEEDS_LIMIT;
+ public static int UCS_ERR_UNSUPPORTED;
+ public static int UCS_ERR_REJECTED;
+ public static int UCS_ERR_NOT_CONNECTED;
+ public static int UCS_ERR_CONNECTION_RESET;
+
+ public static int UCS_ERR_FIRST_LINK_FAILURE;
+ public static int UCS_ERR_LAST_LINK_FAILURE;
+ public static int UCS_ERR_FIRST_ENDPOINT_FAILURE;
+ public static int UCS_ERR_ENDPOINT_TIMEOUT;
+ public static int UCS_ERR_LAST_ENDPOINT_FAILURE;
+
+ public static int UCS_ERR_LAST;
+ }
+
+ public static class MEMORY_TYPE {
+ static {
+ load();
+ }
+
+ /**
+ * Checks whether context's memory type mask
+ * (received via {@link UcpContext#getMemoryTypesMask()})
+ * supports particular memory type.
+ */
+ public static boolean isMemTypeSupported(long mask, int memoryType) {
+ return ((1L << memoryType) & mask) != 0;
+ }
+
+ public static int UCS_MEMORY_TYPE_HOST; // Default system memory
+ public static int UCS_MEMORY_TYPE_CUDA; // NVIDIA CUDA memory
+ public static int UCS_MEMORY_TYPE_CUDA_MANAGED; // NVIDIA CUDA managed (or unified) memory
+ public static int UCS_MEMORY_TYPE_ROCM; // AMD ROCM memory
+ public static int UCS_MEMORY_TYPE_ROCM_MANAGED; // AMD ROCM managed system memory
+ public static int UCS_MEMORY_TYPE_LAST;
+ public static int UCS_MEMORY_TYPE_UNKNOWN;
+ }
+
private static void load() {
NativeLibs.load();
loadConstants();
diff --git a/bindings/java/src/main/native/Makefile.am b/bindings/java/src/main/native/Makefile.am
index 67ce262f6a7..57f21ddd539 100644
--- a/bindings/java/src/main/native/Makefile.am
+++ b/bindings/java/src/main/native/Makefile.am
@@ -22,11 +22,11 @@ JUCX_GENERATED_H_FILES = org_openucx_jucx_ucp_UcpConstants.h \
org_openucx_jucx_ucp_UcpEndpoint.h \
org_openucx_jucx_ucp_UcpListener.h \
org_openucx_jucx_ucp_UcpMemory.h \
- org_openucx_jucx_ucp_UcpRequest.h \
org_openucx_jucx_ucp_UcpRemoteKey.h \
org_openucx_jucx_ucp_UcpWorker.h \
org_openucx_jucx_ucs_UcsConstants_ThreadMode.h \
- org_openucx_jucx_ucs_UcsConstants.h
+ org_openucx_jucx_ucs_UcsConstants.h \
+ org_openucx_jucx_UcxUtils.h
BUILT_SOURCES = $(JUCX_GENERATED_H_FILES)
@@ -40,7 +40,8 @@ MOSTLYCLEANFILES = $(JUCX_GENERATED_H_FILES) $(STAMP_FILE)
#
$(STAMP_FILE): \
$(javadir)/src/main/java/org/openucx/jucx/ucs/*.java \
- $(javadir)/src/main/java/org/openucx/jucx/ucp/*.java
+ $(javadir)/src/main/java/org/openucx/jucx/ucp/*.java \
+ $(javadir)/src/main/java/org/openucx/jucx/examples/*.java
$(MVNCMD) compile
touch $(STAMP_FILE)
@@ -58,12 +59,11 @@ libjucx_la_SOURCES = context.cc \
jucx_common_def.cc \
listener.cc \
memory.cc \
- request.cc \
ucp_constants.cc \
ucs_constants.cc \
worker.cc
-libjucx_la_CXXFLAGS = -fPIC -DPIC -Werror -std=gnu++98
+libjucx_la_CXXFLAGS = $(BASE_CXXFLAGS) -std=gnu++98
libjucx_la_LIBADD = $(topdir)/src/ucs/libucs.la \
$(topdir)/src/uct/libuct.la \
@@ -73,7 +73,7 @@ libjucx_la_LIBADD = $(topdir)/src/ucs/libucs.la \
libjucx_la_DEPENDENCIES = Makefile.am Makefile.in Makefile
# Compile Java source code and pack to jar
-$(jarfile):
+$(jarfile): libjucx.la
$(MVNCMD) package -DskipTests
package : $(jarfile)
diff --git a/bindings/java/src/main/native/context.cc b/bindings/java/src/main/native/context.cc
index e68eee74974..d9bb5e45b3d 100644
--- a/bindings/java/src/main/native/context.cc
+++ b/bindings/java/src/main/native/context.cc
@@ -5,9 +5,6 @@
#include "jucx_common_def.h"
#include "org_openucx_jucx_ucp_UcpContext.h"
-extern "C" {
-#include
-}
/**
* Iterates through entries of java's hash map and apply
@@ -91,11 +88,6 @@ Java_org_openucx_jucx_ucp_UcpContext_createContextNative(JNIEnv *env, jclass cls
field);
}
- ucp_params.field_mask |= UCP_PARAM_FIELD_REQUEST_INIT |
- UCP_PARAM_FIELD_REQUEST_SIZE;
- ucp_params.request_size = sizeof(struct jucx_context);
- ucp_params.request_init = jucx_request_init;
-
ucp_config_t *config = NULL;
ucs_status_t status;
@@ -160,30 +152,54 @@ Java_org_openucx_jucx_ucp_UcpContext_memoryMapNative(JNIEnv *env, jobject ctx,
params.flags = env->GetLongField(jucx_mmap_params, field);;
}
+ if (params.field_mask & UCP_MEM_MAP_PARAM_FIELD_PROT) {
+ field = env->GetFieldID(jucx_mmap_class, "prot", "J");
+ params.prot = env->GetLongField(jucx_mmap_params, field);;
+ }
+
+ if (params.field_mask & UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE) {
+ field = env->GetFieldID(jucx_mmap_class, "memType", "I");
+ params.memory_type =
+ static_cast(env->GetIntField(jucx_mmap_params, field));
+ }
+
ucs_status_t status = ucp_mem_map((ucp_context_h)ucp_context_ptr, ¶ms, &memh);
if (status != UCS_OK) {
JNU_ThrowExceptionByStatus(env, status);
}
- // Construct UcpMemory class
- jclass jucx_mem_cls = env->FindClass("org/openucx/jucx/ucp/UcpMemory");
- jmethodID constructor = env->GetMethodID(jucx_mem_cls, "", "(J)V");
- jobject jucx_mem = env->NewObject(jucx_mem_cls, constructor, (native_ptr)memh);
+ ucp_mem_attr_t attr = {0};
- // Set UcpContext pointer
- field = env->GetFieldID(jucx_mem_cls, "context", "Lorg/openucx/jucx/ucp/UcpContext;");
- env->SetObjectField(jucx_mem, field, ctx);
+ attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS | UCP_MEM_ATTR_FIELD_LENGTH |
+ UCP_MEM_ATTR_FIELD_MEM_TYPE;
- // Set address
- field = env->GetFieldID(jucx_mem_cls, "address", "J");
- env->SetLongField(jucx_mem, field, (native_ptr)memh->address);
+ ucp_mem_query(memh, &attr);
- // Set length
- field = env->GetFieldID(jucx_mem_cls, "length", "J");
- env->SetLongField(jucx_mem, field, memh->length);
+ // Construct UcpMemory class
+ jclass jucx_mem_cls = env->FindClass("org/openucx/jucx/ucp/UcpMemory");
+ jmethodID constructor = env->GetMethodID(jucx_mem_cls, "",
+ "(JLorg/openucx/jucx/ucp/UcpContext;JJI)V");
+ jobject jucx_mem = env->NewObject(jucx_mem_cls, constructor, (native_ptr)memh, ctx,
+ attr.address, attr.length, attr.mem_type);
/* Coverity thinks that memh is a leaked object here,
* but it's stored in a UcpMemory object */
/* coverity[leaked_storage] */
return jucx_mem;
}
+
+JNIEXPORT jlong JNICALL
+Java_org_openucx_jucx_ucp_UcpContext_queryMemTypesNative(JNIEnv *env, jclass cls,
+ jlong ucp_context_ptr)
+{
+ ucp_context_attr_t params;
+
+ params.field_mask = UCP_ATTR_FIELD_MEMORY_TYPES;
+
+ ucs_status_t status = ucp_context_query((ucp_context_h)ucp_context_ptr, ¶ms);
+ if (status != UCS_OK) {
+ JNU_ThrowExceptionByStatus(env, status);
+ }
+
+ return params.memory_types;
+}
diff --git a/bindings/java/src/main/native/endpoint.cc b/bindings/java/src/main/native/endpoint.cc
index 9f1151b398e..e756e9c0355 100644
--- a/bindings/java/src/main/native/endpoint.cc
+++ b/bindings/java/src/main/native/endpoint.cc
@@ -110,9 +110,18 @@ JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_closeNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jint mode)
{
- ucs_status_ptr_t request = ucp_ep_close_nb((ucp_ep_h)ep_ptr, mode);
+ ucp_request_param_t param = {0};
- return process_request(request, NULL);
+ jobject jucx_request = jucx_request_allocate(env, NULL, ¶m, UCS_MEMORY_TYPE_UNKNOWN);
+
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_FLAGS;
+ param.flags = mode;
+ param.cb.send = jucx_request_callback;
+
+ ucs_status_ptr_t status = ucp_ep_close_nbx((ucp_ep_h)ep_ptr, ¶m);
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
@@ -138,14 +147,22 @@ JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_putNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jlong laddr,
jlong size, jlong raddr,
- jlong rkey_ptr, jobject callback)
+ jlong rkey_ptr, jobject callback,
+ jint memory_type)
{
- ucs_status_ptr_t request = ucp_put_nb((ucp_ep_h)ep_ptr, (void *)laddr, size, raddr,
- (ucp_rkey_h)rkey_ptr, jucx_request_callback);
+ ucp_request_param_t param = {0};
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.cb.send = jucx_request_callback;
+
+ ucs_status_ptr_t status = ucp_put_nbx((ucp_ep_h)ep_ptr, (void *)laddr, size, raddr,
+ (ucp_rkey_h)rkey_ptr, ¶m);
- ucs_trace_req("JUCX: put_nb request %p, of size: %zu, raddr: %zu",
- request, size, raddr);
- return process_request(request, callback);
+ process_request(env, jucx_request, status);
+
+ ucs_trace_req("JUCX: put_nb request %p, of size: %zu, raddr: %zu", status, size, raddr);
+
+ return jucx_request;
}
JNIEXPORT void JNICALL
@@ -166,14 +183,22 @@ JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_getNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jlong raddr,
jlong rkey_ptr, jlong laddr,
- jlong size, jobject callback)
+ jlong size, jobject callback,
+ jint memory_type)
{
- ucs_status_ptr_t request = ucp_get_nb((ucp_ep_h)ep_ptr, (void *)laddr, size,
- raddr, (ucp_rkey_h)rkey_ptr, jucx_request_callback);
+ ucp_request_param_t param = {0};
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+ param.cb.send = jucx_request_callback;
+
+ ucs_status_ptr_t status = ucp_get_nbx((ucp_ep_h)ep_ptr, (void *)laddr, size,
+ raddr, (ucp_rkey_h)rkey_ptr, ¶m);
ucs_trace_req("JUCX: get_nb request %p, raddr: %zu, size: %zu, result address: %zu",
- request, raddr, size, laddr);
- return process_request(request, callback);
+ status, raddr, size, laddr);
+
+ process_request(env, jucx_request, status);
+ return jucx_request;
}
JNIEXPORT void JNICALL
@@ -194,145 +219,201 @@ JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_sendTaggedNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jlong addr,
jlong size, jlong tag,
- jobject callback)
+ jobject callback, jint memory_type)
{
- ucs_status_ptr_t request = ucp_tag_send_nb((ucp_ep_h)ep_ptr, (void *)addr, size,
- ucp_dt_make_contig(1), tag, jucx_request_callback);
+ ucp_request_param_t param = {0};
- ucs_trace_req("JUCX: send_tag_nb request %p, size: %zu, tag: %ld",
- request, size, tag);
- return process_request(request, callback);
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.cb.send = jucx_request_callback;
+
+ ucs_status_ptr_t status = ucp_tag_send_nbx((ucp_ep_h)ep_ptr, (void *)addr, size, tag, ¶m);
+ ucs_trace_req("JUCX: send_tag_nb request %p, size: %zu, tag: %ld", status, size, tag);
+
+ process_request(env, jucx_request, status);
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_sendTaggedIovNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jlongArray addresses,
jlongArray sizes, jlong tag,
- jobject callback)
+ jobject callback, jint memory_type)
{
int iovcnt;
+ ucp_request_param_t param = {0};
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
ucp_dt_iov_t* iovec = get_ucp_iov(env, addresses, sizes, iovcnt);
if (iovec == NULL) {
return NULL;
}
- ucs_status_ptr_t request = ucp_tag_send_nb((ucp_ep_h)ep_ptr, iovec, iovcnt,
- ucp_dt_make_iov(), tag, jucx_request_callback);
+ jucx_request_set_iov(env, jucx_request, iovec);
- if (UCS_PTR_IS_PTR(request)) {
- struct jucx_context *ctx = (struct jucx_context *)request;
- ctx->iovec = iovec;
- } else {
- ucs_free(iovec);
- }
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_DATATYPE;
+ param.cb.send = jucx_request_callback;
+ param.datatype = ucp_dt_make_iov();
+
+ ucs_status_ptr_t status = ucp_tag_send_nbx((ucp_ep_h)ep_ptr, iovec, iovcnt, tag, ¶m);
+ ucs_trace_req("JUCX: send_tag_iov_nb request %p, tag: %ld", status, tag);
- ucs_trace_req("JUCX: send_tag_iov_nb request %p, tag: %ld", request, tag);
- return process_request(request, callback);
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_sendStreamNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jlong addr,
- jlong size, jobject callback)
+ jlong size, jobject callback,
+ jint memory_type)
{
- ucs_status_ptr_t request = ucp_stream_send_nb((ucp_ep_h)ep_ptr, (void *)addr, size,
- ucp_dt_make_contig(1), jucx_request_callback, 0);
+ ucp_request_param_t param = {0};
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.cb.send = jucx_request_callback;
+
+ ucs_status_ptr_t status = ucp_stream_send_nbx((ucp_ep_h)ep_ptr, (void *)addr, size, ¶m);
+ ucs_trace_req("JUCX: send_stream_nb request %p, size: %zu", status, size);
- ucs_trace_req("JUCX: send_stream_nb request %p, size: %zu", request, size);
- return process_request(request, callback);
+ process_request(env, jucx_request, status);
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_sendStreamIovNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jlongArray addresses,
- jlongArray sizes,
- jobject callback)
+ jlongArray sizes, jobject callback,
+ jint memory_type)
{
int iovcnt;
+ ucp_request_param_t param = {0};
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
ucp_dt_iov_t* iovec = get_ucp_iov(env, addresses, sizes, iovcnt);
if (iovec == NULL) {
return NULL;
}
- ucs_status_ptr_t request = ucp_stream_send_nb((ucp_ep_h)ep_ptr, iovec, iovcnt,
- ucp_dt_make_iov(), jucx_request_callback, 0);
+ jucx_request_set_iov(env, jucx_request, iovec);
- if (UCS_PTR_IS_PTR(request)) {
- struct jucx_context *ctx = (struct jucx_context *)request;
- ctx->iovec = iovec;
- } else {
- ucs_free(iovec);
- }
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_DATATYPE;
+ param.cb.send = jucx_request_callback;
+ param.datatype = ucp_dt_make_iov();
+
+ ucs_status_ptr_t status = ucp_stream_send_nbx((ucp_ep_h)ep_ptr, iovec, iovcnt, ¶m);
+ ucs_trace_req("JUCX: send_stream_iov_nb request %p", status);
- ucs_trace_req("JUCX: send_stream_iov_nb request %p", request);
- return process_request(request, callback);
+ process_request(env, jucx_request, status);
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_recvStreamNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr, jlong addr,
jlong size, jlong flags,
- jobject callback)
+ jobject callback,
+ jint memory_type)
{
size_t rlength;
- ucs_status_ptr_t request = ucp_stream_recv_nb((ucp_ep_h)ep_ptr, (void *)addr, size,
- ucp_dt_make_contig(1), stream_recv_callback,
- &rlength, flags);
+ ucp_request_param_t param = {0};
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_FLAGS;
+ param.cb.recv_stream = stream_recv_callback;
+ param.flags = flags;
- ucs_trace_req("JUCX: recv_stream_nb request %p, size: %zu", request, size);
+ ucs_status_ptr_t status = ucp_stream_recv_nbx((ucp_ep_h)ep_ptr, (void *)addr, size,
+ &rlength, ¶m);
+ ucs_trace_req("JUCX: recv_stream_nb request %p, size: %zu", status, size);
- if (request == NULL) {
- // If request completed immidiately.
- return process_completed_stream_recv(rlength, callback);
+ if (status == NULL) {
+ jucx_request_update_recv_length(env, jucx_request, rlength);
}
- return process_request(request, callback);
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_recvStreamIovNonBlockingNative(JNIEnv *env, jclass cls,
jlong ep_ptr,
jlongArray addresses, jlongArray sizes,
- jlong flags, jobject callback)
+ jlong flags, jobject callback,
+ jint memory_type)
{
size_t rlength;
-
int iovcnt;
+ ucp_request_param_t param = {0};
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
ucp_dt_iov_t* iovec = get_ucp_iov(env, addresses, sizes, iovcnt);
if (iovec == NULL) {
return NULL;
}
- ucs_status_ptr_t request = ucp_stream_recv_nb((ucp_ep_h)ep_ptr, iovec, iovcnt,
- ucp_dt_make_iov(), stream_recv_callback,
- &rlength, flags);
+ jucx_request_set_iov(env, jucx_request, iovec);
- ucs_trace_req("JUCX: recv_stream_iov_nb request %p", request);
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_FLAGS |
+ UCP_OP_ATTR_FIELD_DATATYPE;
+ param.cb.recv_stream = stream_recv_callback;
+ param.datatype = ucp_dt_make_iov();
+ param.flags = flags;
- if (UCS_PTR_IS_PTR(request)) {
- struct jucx_context *ctx = (struct jucx_context *)request;
- ctx->iovec = iovec;
- } else {
- ucs_free(iovec);
- }
+ ucs_status_ptr_t status = ucp_stream_recv_nbx((ucp_ep_h)ep_ptr, iovec, iovcnt, &rlength,
+ ¶m);
+ ucs_trace_req("JUCX: recv_stream_iov_nb request %p", status);
- if (request == NULL) {
- // If request completed immidiately.
- return process_completed_stream_recv(rlength, callback);
+ if (status == NULL) {
+ jucx_request_update_recv_length(env, jucx_request, rlength);
}
- return process_request(request, callback);
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpEndpoint_flushNonBlockingNative(JNIEnv *env, jclass cls,
- jlong ep_ptr,
- jobject callback)
+ jlong ep_ptr, jobject callback)
{
- ucs_status_ptr_t request = ucp_ep_flush_nb((ucp_ep_h)ep_ptr, 0, jucx_request_callback);
+ ucp_request_param_t param = {0};
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, UCS_MEMORY_TYPE_UNKNOWN);
+
+ param.cb.send = jucx_request_callback;
+
+ ucs_status_ptr_t status = ucp_ep_flush_nbx((ucp_ep_h)ep_ptr, ¶m);
+ ucs_trace_req("JUCX: ucp_ep_flush_nbx request %p", status);
+
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_openucx_jucx_ucp_UcpEndpoint_sendAmNonBlockingNative(JNIEnv *env, jclass cls,
+ jlong ep_ptr, jint am_id,
+ jlong header_addr, jlong header_length,
+ jlong data_address, jlong data_length,
+ jlong flags, jobject callback,
+ jint memory_type)
+{
+ ucp_request_param_t param = {0};
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_FLAGS;
+ param.cb.send = jucx_request_callback;
+ param.flags = flags;
+
+ ucs_status_ptr_t status = ucp_am_send_nbx((ucp_ep_h)ep_ptr, am_id, (void*)header_addr, header_length,
+ (void*)data_address, data_length, ¶m);
+ ucs_trace_req("JUCX: ucp_am_send_nbx request %p", status);
- return process_request(request, callback);
+ process_request(env, jucx_request, status);
+ return jucx_request;
}
diff --git a/bindings/java/src/main/native/jucx_common_def.cc b/bindings/java/src/main/native/jucx_common_def.cc
index 913edc66dfd..ea5407ee926 100644
--- a/bindings/java/src/main/native/jucx_common_def.cc
+++ b/bindings/java/src/main/native/jucx_common_def.cc
@@ -7,7 +7,7 @@
extern "C" {
#include
#include
- #include
+ #include
}
#include /* inet_addr */
@@ -17,15 +17,27 @@ extern "C" {
static JavaVM *jvm_global;
static jclass jucx_request_cls;
+static jclass jucx_endpoint_cls;
+static jclass jucx_am_data_cls;
+static jclass ucp_rkey_cls;
+static jclass ucp_tag_msg_cls;
+
static jfieldID native_id_field;
static jfieldID recv_size_field;
static jfieldID sender_tag_field;
-static jmethodID on_success;
+static jfieldID request_callback;
+static jfieldID request_status;
+static jfieldID request_iov_vec;
+
static jmethodID jucx_request_constructor;
-static jclass ucp_rkey_cls;
+static jmethodID jucx_endpoint_constructor;
+static jmethodID jucx_am_data_constructor;
static jmethodID ucp_rkey_cls_constructor;
-static jclass ucp_tag_msg_cls;
static jmethodID ucp_tag_msg_cls_constructor;
+static jmethodID on_success;
+static jmethodID on_am_receive;
+static jmethodID jucx_set_native_id;
+
extern "C" JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void* reserved) {
setlocale(LC_NUMERIC, "C");
@@ -37,21 +49,37 @@ extern "C" JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void* reserved) {
}
jclass jucx_request_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpRequest");
- jucx_request_cls = (jclass) env->NewGlobalRef(jucx_request_cls_local);
jclass jucx_callback_cls = env->FindClass("org/openucx/jucx/UcxCallback");
+ jclass ucp_rkey_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpRemoteKey");
+ jclass ucp_tag_msg_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpTagMessage");
+ jclass jucx_endpoint_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpEndpoint");
+ jclass jucx_am_data_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpAmData");
+ jclass jucx_am_recv_callback_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpAmRecvCallback");
+
+ jucx_request_cls = (jclass) env->NewGlobalRef(jucx_request_cls_local);
+ ucp_rkey_cls = (jclass) env->NewGlobalRef(ucp_rkey_cls_local);
+ ucp_tag_msg_cls = (jclass) env->NewGlobalRef(ucp_tag_msg_cls_local);
+ jucx_endpoint_cls = (jclass) env->NewGlobalRef(jucx_endpoint_cls_local);
+ jucx_am_data_cls = (jclass) env->NewGlobalRef(jucx_am_data_cls_local);
+
native_id_field = env->GetFieldID(jucx_request_cls, "nativeId", "Ljava/lang/Long;");
+ request_callback = env->GetFieldID(jucx_request_cls, "callback", "Lorg/openucx/jucx/UcxCallback;");
+ request_status = env->GetFieldID(jucx_request_cls, "status", "I");
recv_size_field = env->GetFieldID(jucx_request_cls, "recvSize", "J");
+ request_iov_vec = env->GetFieldID(jucx_request_cls, "iovVector", "J");
sender_tag_field = env->GetFieldID(jucx_request_cls, "senderTag", "J");
+
+ jucx_set_native_id = env->GetMethodID(jucx_request_cls, "setNativeId", "(J)V");
on_success = env->GetMethodID(jucx_callback_cls, "onSuccess",
"(Lorg/openucx/jucx/ucp/UcpRequest;)V");
- jucx_request_constructor = env->GetMethodID(jucx_request_cls, "", "(J)V");
-
- jclass ucp_rkey_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpRemoteKey");
- ucp_rkey_cls = (jclass) env->NewGlobalRef(ucp_rkey_cls_local);
+ on_am_receive = env->GetMethodID(jucx_am_recv_callback_cls_local, "onReceive",
+ "(JJLorg/openucx/jucx/ucp/UcpAmData;Lorg/openucx/jucx/ucp/UcpEndpoint;)I");
+ jucx_endpoint_constructor = env->GetMethodID(jucx_endpoint_cls, "", "(J)V");
+ jucx_am_data_constructor = env->GetMethodID(jucx_am_data_cls, "", "(Lorg/openucx/jucx/ucp/UcpWorker;JJJ)V");
+ jucx_request_constructor = env->GetMethodID(jucx_request_cls, "", "()V");
ucp_rkey_cls_constructor = env->GetMethodID(ucp_rkey_cls, "", "(J)V");
- jclass ucp_tag_msg_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpTagMessage");
- ucp_tag_msg_cls = (jclass) env->NewGlobalRef(ucp_tag_msg_cls_local);
ucp_tag_msg_cls_constructor = env->GetMethodID(ucp_tag_msg_cls, "", "(JJJ)V");
+
return JNI_VERSION_1_1;
}
@@ -64,6 +92,44 @@ extern "C" JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *jvm, void *reserved) {
if (jucx_request_cls != NULL) {
env->DeleteGlobalRef(jucx_request_cls);
}
+
+ if (jucx_endpoint_cls != NULL) {
+ env->DeleteGlobalRef(jucx_endpoint_cls);
+ }
+
+ if (jucx_am_data_cls != NULL) {
+ env->DeleteGlobalRef(jucx_am_data_cls);
+ }
+}
+
+jobject c2jInetSockAddr(JNIEnv *env, const sockaddr_storage* ss)
+{
+ jbyteArray buff;
+ int port = 0;
+
+ // 1. Construct InetAddress object
+ jclass inet_address_cls = env->FindClass("java/net/InetAddress");
+ jmethodID getByAddress = env->GetStaticMethodID(inet_address_cls, "getByAddress",
+ "([B)Ljava/net/InetAddress;");
+ if(ss->ss_family == AF_INET6) {
+ const sockaddr_in6* sin6 = reinterpret_cast(ss);
+ buff = env->NewByteArray(16);
+ env->SetByteArrayRegion(buff, 0, 16, (jbyte*)&sin6->sin6_addr.s6_addr);
+ port = ntohs(sin6->sin6_port);
+ } else {
+ const sockaddr_in* sin = reinterpret_cast(ss);
+ buff = env->NewByteArray(4);
+ env->SetByteArrayRegion(buff, 0, 4, (jbyte*)&sin->sin_addr);
+ port = ntohs(sin->sin_port);
+ }
+
+ jobject inet_address_obj = env->CallStaticObjectMethod(inet_address_cls, getByAddress, buff);
+ // 2. Construct InetSocketAddress object from InetAddress, port
+ jclass inet_socket_address_cls = env->FindClass("java/net/InetSocketAddress");
+ jmethodID inetSocketAddress_constructor = env->GetMethodID(inet_socket_address_cls,
+ "", "(Ljava/net/InetAddress;I)V");
+
+ return env->NewObject(inet_socket_address_cls, inetSocketAddress_constructor, inet_address_obj, port);
}
bool j2cInetSockAddr(JNIEnv *env, jobject sock_addr, sockaddr_storage& ss, socklen_t& sa_len)
@@ -147,49 +213,33 @@ bool j2cInetSockAddr(JNIEnv *env, jobject sock_addr, sockaddr_storage& ss, sock
return false;
}
-static inline void jucx_context_reset(struct jucx_context* ctx)
+JNIEnv* get_jni_env()
{
- ctx->callback = NULL;
- ctx->jucx_request = NULL;
- ctx->status = UCS_INPROGRESS;
- ctx->length = 0;
- ctx->iovec = NULL;
- ctx->sender_tag = 0;
+ void *env;
+ jint rs = jvm_global->AttachCurrentThread(&env, NULL);
+ ucs_assert_always(rs == JNI_OK);
+ return (JNIEnv*)env;
}
-void jucx_request_init(void *request)
+void jucx_request_set_iov(JNIEnv *env, jobject jucx_request, ucp_dt_iov_t* iovec)
{
- struct jucx_context *ctx = (struct jucx_context *)request;
- jucx_context_reset(ctx);
- ucs_recursive_spinlock_init(&ctx->lock, 0);
+ env->SetLongField(jucx_request, request_iov_vec, (native_ptr)iovec);
}
-JNIEnv* get_jni_env()
+void jucx_request_update_status(JNIEnv *env, jobject jucx_request, ucs_status_t status)
{
- void *env;
- jint rs = jvm_global->AttachCurrentThread(&env, NULL);
- ucs_assert_always(rs == JNI_OK);
- return (JNIEnv*)env;
+ env->SetIntField(jucx_request, request_status, status);
}
-static inline void set_jucx_request_completed(JNIEnv *env, jobject jucx_request,
- struct jucx_context *ctx)
+static inline void set_jucx_request_completed(JNIEnv *env, jobject jucx_request, ucs_status_t status)
{
env->SetObjectField(jucx_request, native_id_field, NULL);
- if (ctx != NULL) {
- /* sender_tag and length are initialized to 0,
- * so try to avoid the overhead of setting them again */
- if (ctx->sender_tag != 0) {
- env->SetLongField(jucx_request, sender_tag_field, ctx->sender_tag);
- }
-
- if (ctx->length > 0) {
- env->SetLongField(jucx_request, recv_size_field, ctx->length);
- }
+ jucx_request_update_status(env, jucx_request, status);
+ long iov_vec = env->GetLongField(jucx_request, request_iov_vec);
- if (ctx->iovec != NULL) {
- ucs_free(ctx->iovec);
- }
+ if (iov_vec != 0L) {
+ ucp_dt_iov_t* iovec = reinterpret_cast(iov_vec);
+ ucs_free(iovec);
}
}
@@ -224,124 +274,150 @@ static inline void jucx_call_callback(jobject callback, jobject jucx_request,
}
}
-UCS_PROFILE_FUNC_VOID(jucx_request_callback, (request, status), void *request, ucs_status_t status)
+UCS_PROFILE_FUNC_VOID(jucx_request_callback, (request, status, user_data), void *request,
+ ucs_status_t status, void *user_data)
{
- struct jucx_context *ctx = (struct jucx_context *)request;
- ucs_recursive_spin_lock(&ctx->lock);
- if (ctx->jucx_request == NULL) {
- // here because 1 of 2 reasons:
- // 1. progress is in another thread and got here earlier then process_request happened.
- // 2. this callback is inside ucp_tag_recv_nb function.
- ctx->status = status;
- ucs_recursive_spin_unlock(&ctx->lock);
- return;
- }
+ jobject jucx_request = reinterpret_cast(user_data);
JNIEnv *env = get_jni_env();
- set_jucx_request_completed(env, ctx->jucx_request, ctx);
- if (ctx->callback != NULL) {
- jucx_call_callback(ctx->callback, ctx->jucx_request, status);
- env->DeleteGlobalRef(ctx->callback);
+ set_jucx_request_completed(env, jucx_request, UCS_PTR_STATUS(status));
+ ucp_request_free(request);
+
+ jobject callback = env->GetObjectField(jucx_request, request_callback);
+
+ if (callback != NULL) {
+ jucx_call_callback(callback, jucx_request, status);
+ // Remove callback reference from request.
+ env->SetObjectField(jucx_request, request_callback, NULL);
}
- env->DeleteGlobalRef(ctx->jucx_request);
- jucx_context_reset(ctx);
- ucp_request_free(request);
- ucs_recursive_spin_unlock(&ctx->lock);
+ env->DeleteGlobalRef(jucx_request);
}
-void recv_callback(void *request, ucs_status_t status, ucp_tag_recv_info_t *info)
+void jucx_request_update_recv_length(JNIEnv *env, jobject jucx_request,
+ size_t rlength)
{
- struct jucx_context *ctx = (struct jucx_context *)request;
- ctx->length = info->length;
- ctx->sender_tag = info->sender_tag;
- jucx_request_callback(request, status);
+ env->SetLongField(jucx_request, recv_size_field, rlength);
}
-void stream_recv_callback(void *request, ucs_status_t status, size_t length)
+void jucx_request_update_sender_tag(JNIEnv *env, jobject jucx_request,
+ ucp_tag_t sender_tag)
{
- struct jucx_context *ctx = (struct jucx_context *)request;
- ctx->length = length;
- jucx_request_callback(request, status);
+ env->SetLongField(jucx_request, sender_tag_field, sender_tag);
}
-UCS_PROFILE_FUNC(jobject, process_request, (request, callback), void *request, jobject callback)
+void recv_callback(void *request, ucs_status_t status,
+ const ucp_tag_recv_info_t *info, void *user_data)
{
JNIEnv *env = get_jni_env();
- jobject jucx_request;
-
- if (UCS_PTR_IS_PTR(request)) {
- jucx_request = env->NewObject(jucx_request_cls, jucx_request_constructor,
- (native_ptr)request);
- struct jucx_context *ctx = (struct jucx_context *)request;
- ucs_recursive_spin_lock(&ctx->lock);
- if (ctx->status == UCS_INPROGRESS) {
- // request not completed yet, install user callback
- if (callback != NULL) {
- ctx->callback = env->NewGlobalRef(callback);
- }
- ctx->jucx_request = env->NewGlobalRef(jucx_request);
- } else {
- // request was completed whether by progress in other thread or inside
- // ucp_tag_recv_nb function call.
- set_jucx_request_completed(env, jucx_request, ctx);
- if (callback != NULL) {
- jucx_call_callback(callback, jucx_request, ctx->status);
- }
- jucx_context_reset(ctx);
- ucp_request_free(request);
- }
- ucs_recursive_spin_unlock(&ctx->lock);
- } else {
- jmethodID empty_constructor = env->GetMethodID(jucx_request_cls, "", "()V");
- jucx_request = env->NewObject(jucx_request_cls, empty_constructor);
- set_jucx_request_completed(env, jucx_request, NULL);
- if (UCS_PTR_IS_ERR(request)) {
- JNU_ThrowExceptionByStatus(env, UCS_PTR_STATUS(request));
- if (callback != NULL) {
- call_on_error(callback, UCS_PTR_STATUS(request));
- }
- } else if (callback != NULL) {
- call_on_success(callback, jucx_request);
- }
- }
- return jucx_request;
+ jobject jucx_request = reinterpret_cast(user_data);
+
+ jucx_request_update_sender_tag(env, jucx_request, info->sender_tag);
+ jucx_request_update_recv_length(env, jucx_request, info->length);
+ jucx_request_callback(request, status, user_data);
}
-jobject process_completed_stream_recv(size_t length, jobject callback)
+void stream_recv_callback(void *request, ucs_status_t status, size_t length,
+ void *user_data)
{
JNIEnv *env = get_jni_env();
- jobject jucx_request = env->NewObject(jucx_request_cls, jucx_request_constructor, NULL);
- env->SetObjectField(jucx_request, native_id_field, NULL);
- env->SetLongField(jucx_request, recv_size_field, length);
+ jobject jucx_request = reinterpret_cast(user_data);
+ jucx_request_update_recv_length(env, jucx_request, length);
+
+ jucx_request_callback(request, status, user_data);
+}
+
+ucs_status_t am_recv_callback(void *arg, const void *header, size_t header_length,
+ void *data, size_t length, const ucp_am_recv_param_t *param)
+{
+ JNIEnv *env = get_jni_env();
+ jobject jucx_endpoint = NULL;
+
+ jobjectArray callback_and_worker = reinterpret_cast(arg);
+
+ jobject callback = env->GetObjectArrayElement(callback_and_worker, 0);
+ jobject worker = env->GetObjectArrayElement(callback_and_worker, 1);
+
+ jobject jucx_am_data = env->NewObject(jucx_am_data_cls, jucx_am_data_constructor,
+ worker, (native_ptr)data, length, param->recv_attr);
+
+ if (param->recv_attr & UCP_AM_RECV_ATTR_FIELD_REPLY_EP) {
+ jucx_endpoint = env->NewObject(jucx_endpoint_cls, jucx_endpoint_constructor, param->reply_ep);
+ }
+
+
+ return static_cast(env->CallIntMethod(callback, on_am_receive, (native_ptr)header, header_length,
+ jucx_am_data, jucx_endpoint));
+}
+
+jobject jucx_request_allocate(JNIEnv *env, const jobject callback,
+ ucp_request_param_t *param, jint memory_type)
+{
+ jobject jucx_request = env->NewObject(jucx_request_cls, jucx_request_constructor);
+
+ param->op_attr_mask = UCP_OP_ATTR_FIELD_USER_DATA |
+ UCP_OP_ATTR_FIELD_CALLBACK |
+ UCP_OP_ATTR_FIELD_MEMORY_TYPE;
+ param->user_data = env->NewGlobalRef(jucx_request);
+ param->memory_type = static_cast(memory_type);
+
if (callback != NULL) {
- jucx_call_callback(callback, jucx_request, UCS_OK);
+ env->SetObjectField(jucx_request, request_callback, callback);
}
+
return jucx_request;
}
+void process_request(JNIEnv *env, jobject jucx_request, ucs_status_ptr_t status)
+{
+ // If status is error - throw an exception in java.
+ if (UCS_PTR_IS_ERR(status)) {
+ JNU_ThrowExceptionByStatus(env, UCS_PTR_STATUS(status));
+ }
+
+ if (UCS_PTR_IS_PTR(status)) {
+ env->CallVoidMethod(jucx_request, jucx_set_native_id, (native_ptr)status);
+ } else {
+ // Request completed immidiately. Call jucx callback.
+ set_jucx_request_completed(env, jucx_request, UCS_PTR_RAW_STATUS(status));
+ jobject callback = env->GetObjectField(jucx_request, request_callback);
+ if (callback != NULL) {
+ jucx_call_callback(callback, jucx_request, UCS_PTR_RAW_STATUS(status));
+ // Remove callback reference from request.
+ env->SetObjectField(jucx_request, request_callback, NULL);
+ }
+ }
+}
+
void jucx_connection_handler(ucp_conn_request_h conn_request, void *arg)
{
- jobject jucx_conn_handler = reinterpret_cast(arg);
+ jobject client_address = NULL;
+ jobject jucx_conn_handler = reinterpret_cast(arg);
JNIEnv *env = get_jni_env();
+ ucp_conn_request_attr_t attr;
+ attr.field_mask = UCP_CONN_REQUEST_ATTR_FIELD_CLIENT_ADDR;
+ ucs_status_t status = ucp_conn_request_query(conn_request, &attr);
+
+ if (status == UCS_OK) {
+ client_address = c2jInetSockAddr(env, &attr.client_address);
+ }
// Construct connection request class instance
jclass conn_request_cls = env->FindClass("org/openucx/jucx/ucp/UcpConnectionRequest");
- jmethodID conn_request_constructor = env->GetMethodID(conn_request_cls, "", "(J)V");
+ jmethodID conn_request_constructor = env->GetMethodID(conn_request_cls, "",
+ "(JLjava/net/InetSocketAddress;)V");
jobject jucx_conn_request = env->NewObject(conn_request_cls, conn_request_constructor,
- (native_ptr)conn_request);
+ (native_ptr)conn_request, client_address);
// Call onConnectionRequest method
jclass jucx_conn_hndl_cls = env->FindClass("org/openucx/jucx/ucp/UcpListenerConnectionHandler");
jmethodID on_conn_request = env->GetMethodID(jucx_conn_hndl_cls, "onConnectionRequest",
- "(Lorg/openucx/jucx/ucp/UcpConnectionRequest;)V");
+ "(Lorg/openucx/jucx/ucp/UcpConnectionRequest;)V");
env->CallVoidMethod(jucx_conn_handler, on_conn_request, jucx_conn_request);
- env->DeleteGlobalRef(jucx_conn_handler);
}
-
jobject new_rkey_instance(JNIEnv *env, ucp_rkey_h rkey)
{
return env->NewObject(ucp_rkey_cls, ucp_rkey_cls_constructor, (native_ptr)rkey);
diff --git a/bindings/java/src/main/native/jucx_common_def.h b/bindings/java/src/main/native/jucx_common_def.h
index 6e83266d001..e225ba65016 100644
--- a/bindings/java/src/main/native/jucx_common_def.h
+++ b/bindings/java/src/main/native/jucx_common_def.h
@@ -42,7 +42,12 @@ typedef uintptr_t native_ptr;
} while(0)
#define JNU_ThrowExceptionByStatus(_env, _status) do { \
- JNU_ThrowException(_env, ucs_status_string(_status)); \
+ jclass _cls = _env->FindClass("org/openucx/jucx/UcxException"); \
+ jmethodID _constr = _env->GetMethodID(_cls, "", "(Ljava/lang/String;I)V"); \
+ jstring _error_msg = _env->NewStringUTF(ucs_status_string(_status)); \
+ jthrowable _ex = \
+ static_cast(_env->NewObject(_cls, _constr, _error_msg, _status)); \
+ _env->Throw(_ex); \
} while(0)
/**
@@ -52,18 +57,6 @@ typedef uintptr_t native_ptr;
*/
bool j2cInetSockAddr(JNIEnv *env, jobject sock_addr, sockaddr_storage& ss, socklen_t& sa_len);
-struct jucx_context {
- jobject callback;
- volatile jobject jucx_request;
- ucs_status_t status;
- ucs_recursive_spinlock_t lock;
- size_t length;
- ucp_dt_iov_t* iovec;
- ucp_tag_t sender_tag;
-};
-
-void jucx_request_init(void *request);
-
/**
* @brief Get the jni env object. To be able to call java methods from ucx async callbacks.
*/
@@ -72,29 +65,60 @@ JNIEnv* get_jni_env();
/**
* @brief Send callback used to invoke java callback class on completion of ucp operations.
*/
-void jucx_request_callback(void *request, ucs_status_t status);
+void jucx_request_callback(void *request, ucs_status_t status, void *user_data);
/**
* @brief Recv callback used to invoke java callback class on completion of ucp tag_recv_nb operation.
*/
-void recv_callback(void *request, ucs_status_t status, ucp_tag_recv_info_t *info);
+void recv_callback(void *request, ucs_status_t status, const ucp_tag_recv_info_t *info,
+ void *user_data);
/**
* @brief Recv callback used to invoke java callback class on completion of ucp stream_recv_nb operation.
*/
-void stream_recv_callback(void *request, ucs_status_t status, size_t length);
+void stream_recv_callback(void *request, ucs_status_t status, size_t length, void *user_data);
+
+/**
+ * @brief Active message receive callback.
+ */
+ucs_status_t am_recv_callback(void *arg, const void *header, size_t header_length, void *data, size_t length,
+ const ucp_am_recv_param_t *param);
+
+/**
+ * @ingroup JUCX_REQ
+ * @brief Utility to allocate jucx request and set appropriate java callback in it.
+ */
+jobject jucx_request_allocate(JNIEnv *env, jobject callback, ucp_request_param_t *param,
+ jint memory_type);
+
+/**
+ * @ingroup JUCX_REQ
+ * @brief Utility to set iov verctor in jucx_request, to release it on completion.
+ */
+void jucx_request_set_iov(JNIEnv *env, jobject request, ucp_dt_iov_t* iovec);
+
+/**
+ * @ingroup JUCX_REQ
+ * @brief Utility to update status of JUCX request to corresponding ucx request.
+ */
+void jucx_request_update_status(JNIEnv *env, jobject jucx_request, ucs_status_t status);
+
+/**
+ * @ingroup JUCX_REQ
+ * @brief Utility to set recv length in JUCX request.
+ */
+void jucx_request_update_recv_length(JNIEnv *env, jobject jucx_request, size_t rlength);
/**
- * @brief Utility to process request logic: if request is pointer - set callback to request context.
- * If request is status - call callback directly.
- * Returns jucx_request object, that could be monitored on completion.
+ * @ingroup JUCX_REQ
+ * @brief Utility to set sender tag in JUCX request.
*/
-jobject process_request(void *request, jobject callback);
+void jucx_request_update_sender_tag(JNIEnv *env, jobject jucx_request, ucp_tag_t sender_tag);
/**
- * @brief Call java callback on completed stream recv operation, that didn't invoke callback.
+ * @brief Function to handle result of ucx function submition, to handle immidiate completion.
*/
-jobject process_completed_stream_recv(size_t length, jobject callback);
+void process_request(JNIEnv *env, jobject request, ucs_status_ptr_t status);
void jucx_connection_handler(ucp_conn_request_h conn_request, void *arg);
diff --git a/bindings/java/src/main/native/listener.cc b/bindings/java/src/main/native/listener.cc
index 3114e71488f..062b08028f2 100644
--- a/bindings/java/src/main/native/listener.cc
+++ b/bindings/java/src/main/native/listener.cc
@@ -44,7 +44,7 @@ Java_org_openucx_jucx_ucp_UcpListener_createUcpListener(JNIEnv *env, jclass cls,
field = env->GetFieldID(jucx_listener_param_class,
"connectionHandler", "Lorg/openucx/jucx/ucp/UcpListenerConnectionHandler;");
jobject jucx_conn_handler = env->GetObjectField(ucp_listener_params, field);
- params.conn_handler.arg = env->NewGlobalRef(jucx_conn_handler);
+ params.conn_handler.arg = env->NewWeakGlobalRef(jucx_conn_handler);
params.conn_handler.cb = jucx_connection_handler;
}
diff --git a/bindings/java/src/main/native/memory.cc b/bindings/java/src/main/native/memory.cc
index 8627aca89b5..239a071c2a4 100644
--- a/bindings/java/src/main/native/memory.cc
+++ b/bindings/java/src/main/native/memory.cc
@@ -5,6 +5,7 @@
#include "jucx_common_def.h"
#include "org_openucx_jucx_ucp_UcpMemory.h"
#include "org_openucx_jucx_ucp_UcpRemoteKey.h"
+#include "org_openucx_jucx_UcxUtils.h"
JNIEXPORT void JNICALL
@@ -43,3 +44,16 @@ Java_org_openucx_jucx_ucp_UcpRemoteKey_rkeyDestroy(JNIEnv *env, jclass cls, jlon
{
ucp_rkey_destroy((ucp_rkey_h) rkey_ptr);
}
+
+JNIEXPORT jlong JNICALL
+Java_org_openucx_jucx_UcxUtils_getAddressNative(JNIEnv *env, jclass cls, jobject buffer)
+{
+ return (native_ptr)env->GetDirectBufferAddress(buffer);
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_openucx_jucx_UcxUtils_getByteBufferViewNative(JNIEnv *env, jclass cls,
+ jlong address, jlong size)
+{
+ return env->NewDirectByteBuffer((void*)address, size);
+}
diff --git a/bindings/java/src/main/native/request.cc b/bindings/java/src/main/native/request.cc
deleted file mode 100644
index d65619b922e..00000000000
--- a/bindings/java/src/main/native/request.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED.
- * See file LICENSE for terms.
- */
-
-#include "org_openucx_jucx_ucp_UcpRequest.h"
-
-#include
-#include
-
-JNIEXPORT jboolean JNICALL
-Java_org_openucx_jucx_ucp_UcpRequest_isCompletedNative(JNIEnv *env, jclass cls,
- jlong ucp_req_ptr)
-{
- return ucp_request_check_status((void *)ucp_req_ptr) != UCS_INPROGRESS;
-}
-
-JNIEXPORT void JNICALL
-Java_org_openucx_jucx_ucp_UcpRequest_closeRequestNative(JNIEnv *env, jclass cls,
- jlong ucp_req_ptr)
-{
- ucp_request_free((void *)ucp_req_ptr);
-}
diff --git a/bindings/java/src/main/native/ucp_constants.cc b/bindings/java/src/main/native/ucp_constants.cc
index c156aae4aea..ee5eeb2642c 100644
--- a/bindings/java/src/main/native/ucp_constants.cc
+++ b/bindings/java/src/main/native/ucp_constants.cc
@@ -30,6 +30,7 @@ Java_org_openucx_jucx_ucp_UcpConstants_loadConstants(JNIEnv *env, jclass cls)
JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_AMO64);
JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_WAKEUP);
JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_STREAM);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_AM);
// UCP worker parameters
JUCX_DEFINE_LONG_CONSTANT(UCP_WORKER_PARAM_FIELD_THREAD_MODE);
@@ -65,8 +66,7 @@ Java_org_openucx_jucx_ucp_UcpConstants_loadConstants(JNIEnv *env, jclass cls)
JUCX_DEFINE_INT_CONSTANT(UCP_ERR_HANDLING_MODE_PEER);
// UCP endpoint close non blocking mode.
- JUCX_DEFINE_INT_CONSTANT(UCP_EP_CLOSE_MODE_FORCE);
- JUCX_DEFINE_INT_CONSTANT(UCP_EP_CLOSE_MODE_FLUSH);
+ JUCX_DEFINE_INT_CONSTANT(UCP_EP_CLOSE_FLAG_FORCE);
// The enumeration list describes the endpoint's parameters flags
JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAMS_FLAGS_CLIENT_SERVER);
@@ -76,12 +76,30 @@ Java_org_openucx_jucx_ucp_UcpConstants_loadConstants(JNIEnv *env, jclass cls)
JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_ADDRESS);
JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_LENGTH);
JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_FLAGS);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_PROT);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE);
// The enumeration list describes the memory mapping flags
JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_NONBLOCK);
JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_ALLOCATE);
JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_FIXED);
+ // The enumeration list describes the memory mapping protections
+ JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PROT_LOCAL_READ);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PROT_LOCAL_WRITE);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PROT_REMOTE_READ);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PROT_REMOTE_WRITE);
+
// The enumeration defines behavior of @ref ucp_stream_recv_nb function
JUCX_DEFINE_LONG_CONSTANT(UCP_STREAM_RECV_FLAG_WAITALL);
+
+ // The enumeration allows specifying which fields in @ref ucp_am_recv_param_t
+ // are present and receive operation flags are used.
+ JUCX_DEFINE_LONG_CONSTANT(UCP_AM_RECV_ATTR_FLAG_DATA);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_AM_RECV_ATTR_FLAG_RNDV);
+
+ // Flags dictate the behavior of @ref ucp_am_send_nbx routine.
+ JUCX_DEFINE_LONG_CONSTANT(UCP_AM_SEND_FLAG_REPLY);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_AM_SEND_FLAG_EAGER);
+ JUCX_DEFINE_LONG_CONSTANT(UCP_AM_SEND_FLAG_RNDV);
}
diff --git a/bindings/java/src/main/native/ucs_constants.cc b/bindings/java/src/main/native/ucs_constants.cc
index 28507b05c66..3461748959f 100644
--- a/bindings/java/src/main/native/ucs_constants.cc
+++ b/bindings/java/src/main/native/ucs_constants.cc
@@ -9,9 +9,59 @@
#include
JNIEXPORT void JNICALL
-Java_org_openucx_jucx_ucs_UcsConstants_loadConstants(JNIEnv *env, jclass cls)
+Java_org_openucx_jucx_ucs_UcsConstants_loadConstants(JNIEnv *env, jclass ucs_class)
{
jclass thread_mode = env->FindClass("org/openucx/jucx/ucs/UcsConstants$ThreadMode");
jfieldID field = env->GetStaticFieldID(thread_mode, "UCS_THREAD_MODE_MULTI", "I");
env->SetStaticIntField(thread_mode, field, UCS_THREAD_MODE_MULTI);
+
+ jclass cls = env->FindClass("org/openucx/jucx/ucs/UcsConstants$STATUS");
+
+ /* Operation completed successfully */
+ JUCX_DEFINE_INT_CONSTANT(UCS_OK);
+
+ /* Operation is queued and stil in progress */
+ JUCX_DEFINE_INT_CONSTANT(UCS_INPROGRESS);
+ /* Failure codes */
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NO_MESSAGE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NO_RESOURCE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_IO_ERROR);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NO_MEMORY);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_INVALID_PARAM);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_UNREACHABLE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_INVALID_ADDR);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NOT_IMPLEMENTED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_MESSAGE_TRUNCATED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NO_PROGRESS);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_BUFFER_TOO_SMALL);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NO_ELEM);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_SOME_CONNECTS_FAILED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NO_DEVICE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_BUSY);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_CANCELED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_SHMEM_SEGMENT);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_ALREADY_EXISTS);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_OUT_OF_RANGE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_TIMED_OUT);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_EXCEEDS_LIMIT);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_UNSUPPORTED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_REJECTED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_NOT_CONNECTED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_CONNECTION_RESET);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_FIRST_LINK_FAILURE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_LAST_LINK_FAILURE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_FIRST_ENDPOINT_FAILURE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_ENDPOINT_TIMEOUT);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_LAST_ENDPOINT_FAILURE);
+ JUCX_DEFINE_INT_CONSTANT(UCS_ERR_LAST);
+
+ // Memory type
+ cls = env->FindClass("org/openucx/jucx/ucs/UcsConstants$MEMORY_TYPE");
+ JUCX_DEFINE_INT_CONSTANT(UCS_MEMORY_TYPE_HOST);
+ JUCX_DEFINE_INT_CONSTANT(UCS_MEMORY_TYPE_CUDA);
+ JUCX_DEFINE_INT_CONSTANT(UCS_MEMORY_TYPE_CUDA_MANAGED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_MEMORY_TYPE_ROCM);
+ JUCX_DEFINE_INT_CONSTANT(UCS_MEMORY_TYPE_ROCM_MANAGED);
+ JUCX_DEFINE_INT_CONSTANT(UCS_MEMORY_TYPE_LAST);
+ JUCX_DEFINE_INT_CONSTANT(UCS_MEMORY_TYPE_UNKNOWN);
}
diff --git a/bindings/java/src/main/native/worker.cc b/bindings/java/src/main/native/worker.cc
index cc1c4af45de..c72f49fc03b 100644
--- a/bindings/java/src/main/native/worker.cc
+++ b/bindings/java/src/main/native/worker.cc
@@ -10,7 +10,7 @@
* Bridge method for creating ucp_worker from java
*/
JNIEXPORT jlong JNICALL
-Java_org_openucx_jucx_ucp_UcpWorker_createWorkerNative(JNIEnv *env, jclass cls,
+Java_org_openucx_jucx_ucp_UcpWorker_createWorkerNative(JNIEnv *env, jobject jucx_worker,
jobject jucx_worker_params,
jlong context_ptr)
{
@@ -63,7 +63,20 @@ Java_org_openucx_jucx_ucp_UcpWorker_createWorkerNative(JNIEnv *env, jclass cls,
ucs_status_t status = ucp_worker_create(ucp_context, &worker_params, &ucp_worker);
if (status != UCS_OK) {
JNU_ThrowExceptionByStatus(env, status);
+ return -1L;
}
+
+ ucp_worker_attr_t attr = {0};
+ attr.field_mask = UCP_WORKER_ATTR_FIELD_MAX_AM_HEADER;
+
+ status = ucp_worker_query(ucp_worker, &attr);
+ if (status != UCS_OK) {
+ JNU_ThrowExceptionByStatus(env, status);
+ }
+
+ field = env->GetFieldID(env->GetObjectClass(jucx_worker), "maxAmHeaderSize", "J");
+ env->SetLongField(jucx_worker, field, attr.max_am_header);
+
return (native_ptr)ucp_worker;
}
@@ -114,10 +127,18 @@ Java_org_openucx_jucx_ucp_UcpWorker_flushNonBlockingNative(JNIEnv *env, jclass c
jlong ucp_worker_ptr,
jobject callback)
{
- ucs_status_ptr_t request = ucp_worker_flush_nb((ucp_worker_h)ucp_worker_ptr, 0,
- jucx_request_callback);
+ ucp_request_param_t param;
- return process_request(request, callback);
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, UCS_MEMORY_TYPE_UNKNOWN);
+
+ param.cb.send = jucx_request_callback;
+
+ ucs_status_ptr_t status = ucp_worker_flush_nbx((ucp_worker_h)ucp_worker_ptr, ¶m);
+ ucs_trace_req("JUCX: ucp_worker_flush_nbx request %p", status);
+
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
}
JNIEXPORT void JNICALL
@@ -145,46 +166,68 @@ Java_org_openucx_jucx_ucp_UcpWorker_recvTaggedNonBlockingNative(JNIEnv *env, jcl
jlong ucp_worker_ptr,
jlong laddr, jlong size,
jlong tag, jlong tag_mask,
- jobject callback)
+ jobject callback, jint memory_type)
{
- ucs_status_ptr_t request = ucp_tag_recv_nb((ucp_worker_h)ucp_worker_ptr,
- (void *)laddr, size,
- ucp_dt_make_contig(1), tag, tag_mask,
- recv_callback);
+ ucp_request_param_t param = {0};
+ ucp_tag_recv_info_t recv_info = {0};
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_RECV_INFO;
+ param.cb.recv = recv_callback;
+ param.recv_info.tag_info = &recv_info;
- ucs_trace_req("JUCX: tag_recv_nb request %p, msg size: %zu, tag: %ld", request, size, tag);
+ ucs_status_ptr_t status = ucp_tag_recv_nbx((ucp_worker_h)ucp_worker_ptr,
+ (void *)laddr, size, tag, tag_mask, ¶m);
+ ucs_trace_req("JUCX: tag_recv_nb request %p, msg size: %zu, tag: %ld", status, size, tag);
- return process_request(request, callback);
+ if (UCS_PTR_STATUS(status) == UCS_OK) {
+ jucx_request_update_recv_length(env, jucx_request, recv_info.length);
+ jucx_request_update_sender_tag(env, jucx_request, recv_info.sender_tag);
+ }
+
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
Java_org_openucx_jucx_ucp_UcpWorker_recvTaggedIovNonBlockingNative(JNIEnv *env, jclass cls,
jlong ucp_worker_ptr,
- jlongArray addresses, jlongArray sizes,
- jlong tag, jlong tag_mask,
- jobject callback)
+ jlongArray addresses,
+ jlongArray sizes, jlong tag,
+ jlong tag_mask, jobject callback,
+ jint memory_type)
{
int iovcnt;
+ ucp_request_param_t param = {0};
+ ucp_tag_recv_info_t recv_info = {0};
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
ucp_dt_iov_t* iovec = get_ucp_iov(env, addresses, sizes, iovcnt);
if (iovec == NULL) {
return NULL;
}
- ucs_status_ptr_t request = ucp_tag_recv_nb((ucp_worker_h)ucp_worker_ptr,
- iovec, iovcnt,
- ucp_dt_make_iov(), tag, tag_mask,
- recv_callback);
+ jucx_request_set_iov(env, jucx_request, iovec);
- if (UCS_PTR_IS_PTR(request)) {
- struct jucx_context *ctx = (struct jucx_context *)request;
- ctx->iovec = iovec;
- } else {
- ucs_free(iovec);
- }
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_RECV_INFO |
+ UCP_OP_ATTR_FIELD_DATATYPE;
+ param.cb.recv = recv_callback;
+ param.datatype = ucp_dt_make_iov();
+ param.recv_info.tag_info = &recv_info;
+
+ ucs_status_ptr_t status = ucp_tag_recv_nbx((ucp_worker_h)ucp_worker_ptr,
+ iovec, iovcnt, tag, tag_mask, ¶m);
+ ucs_trace_req("JUCX: tag_recv_iov_nb request %p, tag: %ld", status, tag);
- ucs_trace_req("JUCX: tag_recv_iov_nb request %p, tag: %ld", request, tag);
+ if (UCS_PTR_STATUS(status) == UCS_OK) {
+ jucx_request_update_recv_length(env, jucx_request, recv_info.length);
+ jucx_request_update_sender_tag(env, jucx_request, recv_info.sender_tag);
+ }
+ process_request(env, jucx_request, status);
- return process_request(request, callback);
+ return jucx_request;
}
JNIEXPORT jobject JNICALL
@@ -210,18 +253,33 @@ Java_org_openucx_jucx_ucp_UcpWorker_recvTaggedMessageNonBlockingNative(JNIEnv *e
jlong ucp_worker_ptr,
jlong laddr, jlong size,
jlong msg_ptr,
- jobject callback)
+ jobject callback,
+ jint memory_type)
{
- ucs_status_ptr_t request = ucp_tag_msg_recv_nb((ucp_worker_h)ucp_worker_ptr,
+ ucp_request_param_t param = {0};
+ ucp_tag_recv_info_t recv_info = {0};
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_RECV_INFO;
+ param.cb.recv = recv_callback;
+ param.recv_info.tag_info = &recv_info;
+
+ ucs_status_ptr_t status = ucp_tag_msg_recv_nbx((ucp_worker_h)ucp_worker_ptr,
(void *)laddr, size,
- ucp_dt_make_contig(1),
(ucp_tag_message_h)msg_ptr,
- recv_callback);
-
- ucs_trace_req("JUCX: tag_msg_recv_nb request %p, msg size: %zu, msg: %p", request, size,
+ ¶m);
+ ucs_trace_req("JUCX: tag_msg_recv_nb request %p, msg size: %zu, msg: %p", status, size,
(ucp_tag_message_h)msg_ptr);
- return process_request(request, callback);
+ if (UCS_PTR_STATUS(status) == UCS_OK) {
+ jucx_request_update_recv_length(env, jucx_request, recv_info.length);
+ jucx_request_update_sender_tag(env, jucx_request, recv_info.sender_tag);
+ }
+
+ process_request(env, jucx_request, status);
+
+ return jucx_request;
}
JNIEXPORT void JNICALL
@@ -231,3 +289,62 @@ Java_org_openucx_jucx_ucp_UcpWorker_cancelRequestNative(JNIEnv *env, jclass cls,
{
ucp_request_cancel((ucp_worker_h)ucp_worker_ptr, (void *)ucp_request_ptr);
}
+
+JNIEXPORT void JNICALL
+Java_org_openucx_jucx_ucp_UcpWorker_setAmRecvHandlerNative(JNIEnv *env, jclass cls,
+ jlong ucp_worker_ptr, jint amId,
+ jobjectArray callbackAndWorker)
+{
+ ucp_am_handler_param_t param = {0};
+ param.field_mask = UCP_AM_HANDLER_PARAM_FIELD_ID |
+ UCP_AM_HANDLER_PARAM_FIELD_FLAGS |
+ UCP_AM_HANDLER_PARAM_FIELD_CB |
+ UCP_AM_HANDLER_PARAM_FIELD_ARG;
+ param.id = amId;
+ param.flags = UCP_AM_FLAG_WHOLE_MSG;
+ param.cb = am_recv_callback;
+ param.arg = env->NewWeakGlobalRef(callbackAndWorker);
+
+ ucs_status_t status = ucp_worker_set_am_recv_handler((ucp_worker_h)ucp_worker_ptr, ¶m);
+
+ if (status != UCS_OK) {
+ JNU_ThrowExceptionByStatus(env, status);
+ }
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_openucx_jucx_ucp_UcpWorker_recvAmDataNonBlockingNative(JNIEnv *env, jclass cls,
+ jlong ucp_worker_ptr,
+ jlong data_descr_ptr,
+ jlong address, jlong length,
+ jobject callback, jint memory_type)
+{
+ ucp_request_param_t param = {0};
+ size_t recv_length;
+
+
+ jobject jucx_request = jucx_request_allocate(env, callback, ¶m, memory_type);
+
+ param.op_attr_mask |= UCP_OP_ATTR_FIELD_RECV_INFO;
+ param.cb.recv_am = stream_recv_callback;
+ param.recv_info.length = &recv_length;
+
+ ucs_status_ptr_t status = ucp_am_recv_data_nbx((ucp_worker_h)ucp_worker_ptr, (void*)data_descr_ptr,
+ (void*)address, length, ¶m);
+ ucs_trace_req("JUCX: ucp_am_recv_data_nbx request %p, msg size: %zu, data: %p", status, length,
+ (void*)data_descr_ptr);
+
+ if (UCS_PTR_STATUS(status) == UCS_OK) {
+ jucx_request_update_recv_length(env, jucx_request, recv_length);
+ }
+
+ process_request(env, jucx_request, status);
+ return jucx_request;
+}
+
+JNIEXPORT void JNICALL
+Java_org_openucx_jucx_ucp_UcpWorker_amDataReleaseNative(JNIEnv *env, jclass cls,
+ jlong ucp_worker_ptr, jlong data_descr_ptr)
+{
+ ucp_am_data_release((ucp_worker_h)ucp_worker_ptr, (void*)data_descr_ptr);
+}
diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpContextTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpContextTest.java
index 8450604083d..1b67854bf50 100644
--- a/bindings/java/src/test/java/org/openucx/jucx/UcpContextTest.java
+++ b/bindings/java/src/test/java/org/openucx/jucx/UcpContextTest.java
@@ -7,28 +7,31 @@
import org.junit.Test;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
import org.openucx.jucx.ucp.UcpContext;
import org.openucx.jucx.ucp.UcpParams;
+import org.openucx.jucx.ucs.UcsConstants;
+
+import static org.junit.Assert.*;
public class UcpContextTest {
public static UcpContext createContext(UcpParams contextParams) {
UcpContext context = new UcpContext(contextParams);
assertTrue(context.getNativeId() > 0);
+ assertTrue(UcsConstants.MEMORY_TYPE.isMemTypeSupported(context.getMemoryTypesMask(),
+ UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_HOST));
return context;
}
public static void closeContext(UcpContext context) {
context.close();
- assertEquals(context.getNativeId(), null);
+ assertNull(context.getNativeId());
}
@Test
public void testCreateSimpleUcpContext() {
- UcpParams contextParams = new UcpParams().requestTagFeature();
+ UcpParams contextParams = new UcpParams().requestTagFeature()
+ .requestAmFeature();
UcpContext context = createContext(contextParams);
closeContext(context);
}
diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java
index ea5ffec6188..575f33afc88 100644
--- a/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java
+++ b/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java
@@ -6,17 +6,40 @@
package org.openucx.jucx;
import org.junit.Test;
+import org.junit.experimental.theories.DataPoints;
+import org.junit.experimental.theories.Theories;
+import org.junit.experimental.theories.Theory;
+import org.junit.runner.RunWith;
import org.openucx.jucx.ucp.*;
+import org.openucx.jucx.ucs.UcsConstants;
import java.nio.ByteBuffer;
-import java.util.Collections;
-import java.util.HashMap;
+import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.Assert.*;
+@RunWith(Theories.class)
public class UcpEndpointTest extends UcxTest {
+
+ @DataPoints
+ public static ArrayList memTypes() {
+ ArrayList resut = new ArrayList<>();
+ resut.add(UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_HOST);
+ UcpContext testContext = new UcpContext(new UcpParams().requestTagFeature());
+ long memTypeMask = testContext.getMemoryTypesMask();
+ if (UcsConstants.MEMORY_TYPE.isMemTypeSupported(memTypeMask,
+ UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_CUDA)) {
+ resut.add(UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_CUDA);
+ }
+ if (UcsConstants.MEMORY_TYPE.isMemTypeSupported(memTypeMask,
+ UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_CUDA_MANAGED)) {
+ resut.add(UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_CUDA_MANAGED);
+ }
+ return resut;
+ }
+
@Test
public void testConnectToListenerByWorkerAddr() {
UcpContext context = new UcpContext(new UcpParams().requestStreamFeature());
@@ -30,8 +53,9 @@ public void testConnectToListenerByWorkerAddr() {
closeResources();
}
- @Test
- public void testGetNB() {
+ @Theory
+ public void testGetNB(int memType) throws Exception {
+ System.out.println("Running testGetNB with memType: " + memType);
// Crerate 2 contexts + 2 workers
UcpParams params = new UcpParams().requestRmaFeature();
UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA();
@@ -46,47 +70,35 @@ public void testGetNB() {
UcpEndpoint endpoint = worker1.newEndpoint(epParams);
// Allocate 2 source and 2 destination buffers, to perform 2 RDMA Read operations
- ByteBuffer src1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- ByteBuffer src2 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- ByteBuffer dst1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- ByteBuffer dst2 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- src1.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT);
- src2.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT);
+ MemoryBlock src1 = allocateMemory(context2, worker2, memType, UcpMemoryTest.MEM_SIZE);
+ MemoryBlock src2 = allocateMemory(context2, worker2, memType, UcpMemoryTest.MEM_SIZE);
+ MemoryBlock dst1 = allocateMemory(context1, worker1, memType, UcpMemoryTest.MEM_SIZE);
+ MemoryBlock dst2 = allocateMemory(context1, worker1, memType, UcpMemoryTest.MEM_SIZE);
+
+ src1.setData(UcpMemoryTest.RANDOM_TEXT);
+ src2.setData(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT);
// Register source buffers on context2
- UcpMemory memory1 = context2.registerMemory(src1);
- UcpMemory memory2 = context2.registerMemory(src2);
+ UcpMemory memory1 = src1.getMemory();
+ UcpMemory memory2 = src2.getMemory();
UcpRemoteKey rkey1 = endpoint.unpackRemoteKey(memory1.getRemoteKeyBuffer());
UcpRemoteKey rkey2 = endpoint.unpackRemoteKey(memory2.getRemoteKeyBuffer());
AtomicInteger numCompletedRequests = new AtomicInteger(0);
- HashMap requestToData = new HashMap<>();
+
UcxCallback callback = new UcxCallback() {
@Override
public void onSuccess(UcpRequest request) {
- // Here thread safety is guaranteed since worker progress is called after
- // request added to map. In multithreaded environment could be an issue that
- // callback is called, but request wasn't added yet to map.
- if (requestToData.get(request) == dst1) {
- assertEquals(UcpMemoryTest.RANDOM_TEXT, dst1.asCharBuffer().toString().trim());
- memory1.deregister();
- } else {
- assertEquals(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT,
- dst2.asCharBuffer().toString().trim());
- memory2.deregister();
- }
numCompletedRequests.incrementAndGet();
}
};
// Submit 2 get requests
- UcpRequest request1 = endpoint.getNonBlocking(memory1.getAddress(), rkey1, dst1, callback);
- UcpRequest request2 = endpoint.getNonBlocking(memory2.getAddress(), rkey2, dst2, callback);
-
- // Map each request to corresponding data buffer.
- requestToData.put(request1, dst1);
- requestToData.put(request2, dst2);
+ UcpRequest request1 = endpoint.getNonBlocking(memory1.getAddress(), rkey1,
+ dst1.getMemory().getAddress(), dst1.getMemory().getLength(), callback);
+ UcpRequest request2 = endpoint.getNonBlocking(memory2.getAddress(), rkey2,
+ dst2.getMemory().getAddress(), dst2.getMemory().getLength(), callback);
// Wait for 2 get operations to complete
while (numCompletedRequests.get() != 2) {
@@ -94,15 +106,18 @@ public void onSuccess(UcpRequest request) {
worker2.progress();
}
+ assertEquals(src1.getData().asCharBuffer(), dst1.getData().asCharBuffer());
+ assertEquals(src2.getData().asCharBuffer(), dst2.getData().asCharBuffer());
assertTrue(request1.isCompleted() && request2.isCompleted());
Collections.addAll(resources, context2, context1, worker2, worker1, endpoint, rkey2,
- rkey1);
+ rkey1, src1, src2, dst1, dst2);
closeResources();
}
- @Test
- public void testPutNB() {
+ @Theory
+ public void testPutNB(int memType) throws Exception {
+ System.out.println("Running testPutNB with memType: " + memType);
// Crerate 2 contexts + 2 workers
UcpParams params = new UcpParams().requestRmaFeature();
UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA();
@@ -111,28 +126,29 @@ public void testPutNB() {
UcpWorker worker1 = context1.newWorker(rdmaWorkerParams);
UcpWorker worker2 = context2.newWorker(rdmaWorkerParams);
- ByteBuffer src = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- ByteBuffer dst = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- src.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT);
+ MemoryBlock src = allocateMemory(context1, worker1, memType, UcpMemoryTest.MEM_SIZE);
+ MemoryBlock dst = allocateMemory(context2, worker2, memType, UcpMemoryTest.MEM_SIZE);
+
+ src.setData(UcpMemoryTest.RANDOM_TEXT);
- // Register destination buffer on context2
- UcpMemory memory = context2.registerMemory(dst);
UcpEndpoint ep =
worker1.newEndpoint(new UcpEndpointParams().setUcpAddress(worker2.getAddress()));
- UcpRemoteKey rkey = ep.unpackRemoteKey(memory.getRemoteKeyBuffer());
- ep.putNonBlocking(src, memory.getAddress(), rkey, null);
+ UcpRemoteKey rkey = ep.unpackRemoteKey(dst.getMemory().getRemoteKeyBuffer());
+ ep.putNonBlocking(src.getMemory().getAddress(), UcpMemoryTest.MEM_SIZE,
+ dst.getMemory().getAddress(), rkey, null);
worker1.progressRequest(worker1.flushNonBlocking(null));
- assertEquals(UcpMemoryTest.RANDOM_TEXT, dst.asCharBuffer().toString().trim());
+ assertEquals(UcpMemoryTest.RANDOM_TEXT, dst.getData().asCharBuffer().toString().trim());
- Collections.addAll(resources, context2, context1, worker2, worker1, rkey, ep, memory);
+ Collections.addAll(resources, context2, context1, worker2, worker1, rkey, ep, src, dst);
closeResources();
}
- @Test
- public void testSendRecv() throws Exception {
+ @Theory
+ public void testSendRecv(int memType) throws Exception {
+ System.out.println("Running testSendRecv with memType: " + memType);
// Crerate 2 contexts + 2 workers
UcpParams params = new UcpParams().requestRmaFeature().requestTagFeature();
UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA();
@@ -141,47 +157,48 @@ public void testSendRecv() throws Exception {
UcpWorker worker1 = context1.newWorker(rdmaWorkerParams);
UcpWorker worker2 = context2.newWorker(rdmaWorkerParams);
- // Allocate 2 source and 2 destination buffers, to perform 2 RDMA Read operations
- UcpMemMapParams allocationParams = new UcpMemMapParams().allocate()
- .setLength(UcpMemoryTest.MEM_SIZE);
- UcpMemory memory1 = context1.memoryMap(allocationParams);
- UcpMemory memory2 = context1.memoryMap(allocationParams);
- ByteBuffer src1 = UcxUtils.getByteBufferView(memory1.getAddress(), UcpMemoryTest.MEM_SIZE);
- ByteBuffer src2 = UcxUtils.getByteBufferView(memory1.getAddress(), UcpMemoryTest.MEM_SIZE);
- ByteBuffer dst1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- ByteBuffer dst2 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- src1.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT);
- src2.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT);
+ MemoryBlock src1 = allocateMemory(context1, worker1, memType, UcpMemoryTest.MEM_SIZE);
+ MemoryBlock src2 = allocateMemory(context1, worker1, memType, UcpMemoryTest.MEM_SIZE);
+
+ MemoryBlock dst1 = allocateMemory(context2, worker2, memType, UcpMemoryTest.MEM_SIZE);
+ MemoryBlock dst2 = allocateMemory(context2, worker2, memType, UcpMemoryTest.MEM_SIZE);
+
+ src1.setData(UcpMemoryTest.RANDOM_TEXT);
+ src2.setData(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT);
AtomicInteger receivedMessages = new AtomicInteger(0);
- worker2.recvTaggedNonBlocking(dst1, 0, 0, new UcxCallback() {
- @Override
- public void onSuccess(UcpRequest request) {
- assertEquals(dst1, src1);
- receivedMessages.incrementAndGet();
- }
- });
+ worker2.recvTaggedNonBlocking(dst1.getMemory().getAddress(), UcpMemoryTest.MEM_SIZE, 0, 0,
+ new UcxCallback() {
+ @Override
+ public void onSuccess(UcpRequest request) {
+ receivedMessages.incrementAndGet();
+ }
+ });
- worker2.recvTaggedNonBlocking(dst2, 1, -1, new UcxCallback() {
- @Override
- public void onSuccess(UcpRequest request) {
- assertEquals(dst2, src2);
- receivedMessages.incrementAndGet();
- }
- });
+ worker2.recvTaggedNonBlocking(dst2.getMemory().getAddress(), UcpMemoryTest.MEM_SIZE,
+ 1, -1, new UcxCallback() {
+ @Override
+ public void onSuccess(UcpRequest request) {
+ receivedMessages.incrementAndGet();
+ }
+ });
UcpEndpoint ep = worker1.newEndpoint(new UcpEndpointParams()
.setUcpAddress(worker2.getAddress()));
- ep.sendTaggedNonBlocking(src1, 0, null);
- ep.sendTaggedNonBlocking(src2, 1, null);
+ ep.sendTaggedNonBlocking(src1.getMemory().getAddress(), UcpMemoryTest.MEM_SIZE, 0, null);
+ ep.sendTaggedNonBlocking(src2.getMemory().getAddress(), UcpMemoryTest.MEM_SIZE, 1, null);
while (receivedMessages.get() != 2) {
worker1.progress();
worker2.progress();
}
- Collections.addAll(resources, context2, context1, worker2, worker1, memory2, memory1, ep);
+ assertEquals(src1.getData().asCharBuffer(), dst1.getData().asCharBuffer());
+ assertEquals(src2.getData().asCharBuffer(), dst2.getData().asCharBuffer());
+
+ Collections.addAll(resources, context2, context1, worker2, worker1, ep,
+ src1, src2, dst1, dst2);
closeResources();
}
@@ -200,6 +217,7 @@ public void testRecvAfterSend() {
UcpEndpoint ep = worker1.newEndpoint(new UcpEndpointParams()
.setPeerErrorHandlingMode()
+ .setErrorHandler((errEp, status, errorMsg) -> { })
.setUcpAddress(worker2.getAddress()));
ByteBuffer src1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
@@ -211,8 +229,13 @@ public void testRecvAfterSend() {
@Override
public void run() {
while (!isInterrupted()) {
- worker1.progress();
- worker2.progress();
+ try {
+ worker1.progress();
+ worker2.progress();
+ } catch (Exception ex) {
+ System.err.println(ex.getMessage());
+ ex.printStackTrace();
+ }
}
}
};
@@ -248,8 +271,6 @@ public void onSuccess(UcpRequest request) {
Thread.sleep(10);
} catch (InterruptedException e) {
e.printStackTrace();
- } finally {
- closeRequest.close();
}
}
@@ -263,7 +284,7 @@ public void onSuccess(UcpRequest request) {
}
@Test
- public void testBufferOffset() {
+ public void testBufferOffset() throws Exception {
int msgSize = 200;
int offset = 100;
// Crerate 2 contexts + 2 workers
@@ -311,7 +332,7 @@ public void testBufferOffset() {
}
@Test
- public void testFlushEp() {
+ public void testFlushEp() throws Exception {
int numRequests = 10;
// Crerate 2 contexts + 2 workers
UcpParams params = new UcpParams().requestRmaFeature();
@@ -356,7 +377,7 @@ public void onSuccess(UcpRequest request) {
}
@Test
- public void testRecvSize() {
+ public void testRecvSize() throws Exception {
UcpContext context1 = new UcpContext(new UcpParams().requestTagFeature());
UcpContext context2 = new UcpContext(new UcpParams().requestTagFeature());
@@ -386,7 +407,7 @@ public void testRecvSize() {
}
@Test
- public void testStreamingAPI() {
+ public void testStreamingAPI() throws Exception {
UcpParams params = new UcpParams().requestStreamFeature().requestRmaFeature();
UcpContext context1 = new UcpContext(params);
UcpContext context2 = new UcpContext(params);
@@ -401,7 +422,7 @@ public void testStreamingAPI() {
new UcpEndpointParams().setUcpAddress(worker1.getAddress()));
ByteBuffer sendBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE);
- sendBuffer.put(0, (byte)1);
+ sendBuffer.put(0, (byte) 1);
ByteBuffer recvBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE * 2);
UcpRequest[] sends = new UcpRequest[2];
@@ -421,7 +442,7 @@ public void onSuccess(UcpRequest request) {
AtomicBoolean received = new AtomicBoolean(false);
serverToClient.recvStreamNonBlocking(
- UcxUtils.getAddress(recvBuffer), UcpMemoryTest.MEM_SIZE * 2,
+ UcxUtils.getAddress(recvBuffer), UcpMemoryTest.MEM_SIZE * 2L,
UcpConstants.UCP_STREAM_RECV_FLAG_WAITALL,
new UcxCallback() {
@Override
@@ -443,12 +464,12 @@ public void onSuccess(UcpRequest request) {
closeResources();
}
- @Test
- public void testIovOperations() throws Exception {
+ @Theory
+ public void testIovOperations(int memType) throws Exception {
+ System.out.println("Running testIovOperations with memType: " + memType);
int NUM_IOV = 6;
long buffMultiplier = 10L;
- UcpMemMapParams memMapParams = new UcpMemMapParams().allocate();
// Crerate 2 contexts + 2 workers
UcpParams params = new UcpParams().requestTagFeature().requestStreamFeature();
UcpWorkerParams workerParams = new UcpWorkerParams();
@@ -463,10 +484,12 @@ public void testIovOperations() throws Exception {
UcpEndpoint recvEp = worker2.newEndpoint(new UcpEndpointParams()
.setUcpAddress(worker1.getAddress()));
+ MemoryBlock[] sendMemory = new MemoryBlock[NUM_IOV];
UcpMemory[] sendBuffers = new UcpMemory[NUM_IOV];
long[] sendAddresses = new long[NUM_IOV];
long[] sizes = new long[NUM_IOV];
+ MemoryBlock[] recvMemory = new MemoryBlock[NUM_IOV];
UcpMemory[] recvBuffers = new UcpMemory[NUM_IOV];
long[] recvAddresses = new long[NUM_IOV];
@@ -475,16 +498,16 @@ public void testIovOperations() throws Exception {
for (int i = 0; i < NUM_IOV; i++) {
long bufferSize = (i + 1) * buffMultiplier;
totalSize += bufferSize;
- memMapParams.setLength(bufferSize);
- sendBuffers[i] = context1.memoryMap(memMapParams);
+ sendMemory[i] = allocateMemory(context1, worker1, memType, bufferSize);
+ sendBuffers[i] = sendMemory[i].getMemory();
sendAddresses[i] = sendBuffers[i].getAddress();
sizes[i] = bufferSize;
- ByteBuffer buf = UcxUtils.getByteBufferView(sendAddresses[i], (int)bufferSize);
- buf.putInt(0, (i + 1));
+ sendMemory[i].setData(String.valueOf(i + 1));
- recvBuffers[i] = context2.memoryMap(memMapParams);
+ recvMemory[i] = allocateMemory(context2, worker2, memType, bufferSize);
+ recvBuffers[i] = recvMemory[i].getMemory();
recvAddresses[i] = recvBuffers[i].getAddress();
}
@@ -499,12 +522,13 @@ public void testIovOperations() throws Exception {
assertEquals(totalSize, recv.getRecvSize());
for (int i = 0; i < NUM_IOV; i++) {
- ByteBuffer buf = UcxUtils.getByteBufferView(recvAddresses[i], (int)sizes[i]);
- assertEquals((i + 1), buf.getInt(0));
- recvBuffers[i].deregister();
+ assertEquals(String.valueOf(i + 1),
+ recvMemory[i].getData().asCharBuffer().toString().trim());
+ recvMemory[i].close();
}
// Test 6 send IOV to 3 recv IOV
+ recvMemory = new MemoryBlock[NUM_IOV / 2];
recvBuffers = new UcpMemory[NUM_IOV / 2];
recvAddresses = new long[NUM_IOV / 2];
long[] recvSizes = new long[NUM_IOV / 2];
@@ -513,7 +537,8 @@ public void testIovOperations() throws Exception {
for (int i = 0; i < NUM_IOV / 2; i++) {
long bufferLength = (i + 1) * buffMultiplier * 2;
totalSize += bufferLength;
- recvBuffers[i] = context2.memoryMap(memMapParams.setLength(bufferLength));
+ recvMemory[i] = allocateMemory(context2, worker2, memType, bufferLength);
+ recvBuffers[i] = recvMemory[i].getMemory();
recvAddresses[i] = recvBuffers[i].getAddress();
recvSizes[i] = bufferLength;
}
@@ -527,17 +552,16 @@ public void testIovOperations() throws Exception {
}
assertEquals(totalSize, recv.getRecvSize());
- ByteBuffer buf = UcxUtils.getByteBufferView(recvAddresses[0], (int)recvSizes[0]);
- assertEquals(1, buf.getInt(0));
+ assertEquals('1', recvMemory[0].getData().asCharBuffer().get(0));
Collections.addAll(resources, context1, context2, worker1, worker2, ep);
- Collections.addAll(resources, sendBuffers);
- Collections.addAll(resources, recvBuffers);
+ Collections.addAll(resources, sendMemory);
+ Collections.addAll(resources, recvMemory);
closeResources();
}
@Test
- public void testEpErrorHandler() {
+ public void testEpErrorHandler() throws Exception {
// Crerate 2 contexts + 2 workers
UcpParams params = new UcpParams().requestTagFeature();
UcpWorkerParams workerParams = new UcpWorkerParams();
@@ -591,4 +615,124 @@ public void onError(int ucsStatus, String errorMsg) {
worker1.close();
context1.close();
}
+
+ @Theory
+ public void testActiveMessages(int memType) throws Exception {
+ System.out.println("Running testActiveMessages with memType: " + memType);
+ UcpParams params = new UcpParams().requestAmFeature().requestTagFeature();
+ UcpContext context1 = new UcpContext(params);
+ UcpContext context2 = new UcpContext(params);
+
+ UcpWorker worker1 = context1.newWorker(new UcpWorkerParams());
+ UcpWorker worker2 = context2.newWorker(new UcpWorkerParams());
+
+ String headerString = "Hello";
+ String dataString = "Active messages";
+ long headerSize = headerString.length() * 2;
+ long dataSize = UcpMemoryTest.MEM_SIZE;
+ assertTrue(headerSize < worker1.getMaxAmHeaderSize());
+
+ ByteBuffer header = ByteBuffer.allocateDirect((int) headerSize);
+ header.asCharBuffer().append(headerString);
+
+ header.rewind();
+
+ MemoryBlock sendData = allocateMemory(context2, worker2, memType, dataSize);
+ sendData.setData(dataString);
+
+ MemoryBlock recvData = allocateMemory(context1, worker1, memType, dataSize);
+ MemoryBlock recvEagerData = allocateMemory(context1, worker1, memType, dataSize);
+ ByteBuffer recvHeader = ByteBuffer.allocateDirect((int) headerSize);
+ UcpRequest[] requests = new UcpRequest[6];
+
+ UcpEndpoint ep = worker2.newEndpoint(
+ new UcpEndpointParams().setUcpAddress(worker1.getAddress()));
+
+ Set cachedEp = new HashSet<>();
+
+ // Test rndv flow
+ worker1.setAmRecvHandler(0, (headerAddress, headerSize12, amData, replyEp) -> {
+ assertFalse(amData.isDataValid());
+ try {
+ assertEquals(headerString,
+ UcxUtils.getByteBufferView(headerAddress, (int) headerSize12)
+ .asCharBuffer().toString().trim());
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ requests[2] = replyEp.sendTaggedNonBlocking(header, null);
+ requests[3] = amData.receive(recvData.getMemory().getAddress(), null);
+
+ if (!cachedEp.isEmpty()) {
+ assertTrue(cachedEp.contains(replyEp));
+ } else {
+ cachedEp.add(replyEp);
+ }
+
+ return UcsConstants.STATUS.UCS_OK;
+ });
+
+ // Test eager flow
+ worker1.setAmRecvHandler(1, (headerAddress, headerSize1, amData, replyEp) -> {
+ assertTrue(amData.isDataValid());
+ try {
+ assertEquals(dataString,
+ UcxUtils.getByteBufferView(amData.getDataAddress(), (int) amData.getLength())
+ .asCharBuffer().toString().trim());
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ if (!cachedEp.isEmpty()) {
+ assertTrue(cachedEp.contains(replyEp));
+ } else {
+ cachedEp.add(replyEp);
+ }
+
+ requests[5] = amData.receive(recvEagerData.getMemory().getAddress(), null);
+
+ return UcsConstants.STATUS.UCS_OK;
+ });
+
+ requests[0] = ep.sendAmNonBlocking(0,
+ UcxUtils.getAddress(header), headerSize,
+ sendData.getMemory().getAddress(), sendData.getMemory().getLength(),
+ UcpConstants.UCP_AM_SEND_FLAG_REPLY | UcpConstants.UCP_AM_SEND_FLAG_RNDV,
+ new UcxCallback() {
+ @Override
+ public void onSuccess(UcpRequest request) {
+ assertTrue(request.isCompleted());
+ }
+ });
+
+ requests[1] = worker2.recvTaggedNonBlocking(recvHeader, null);
+ requests[4] = ep.sendAmNonBlocking(1, 0L, 0L,
+ sendData.getMemory().getAddress(), dataSize,
+ UcpConstants.UCP_AM_SEND_FLAG_REPLY | UcpConstants.UCP_AM_SEND_FLAG_EAGER, null);
+
+
+ while (!Arrays.stream(requests).allMatch(r -> (r != null) && r.isCompleted())) {
+ worker1.progress();
+ worker2.progress();
+ }
+
+ assertEquals(dataString,
+ recvData.getData().asCharBuffer().toString().trim());
+
+ assertEquals(dataString,
+ recvEagerData.getData().asCharBuffer().toString().trim());
+
+ assertEquals(headerString,
+ recvHeader.asCharBuffer().toString().trim());
+
+ // Reset AM callback
+ worker1.removeAmRecvHandler(0);
+ worker1.removeAmRecvHandler(1);
+
+ Collections.addAll(resources, context1, context2, worker1, worker2, ep,
+ cachedEp.iterator().next(), sendData, recvData, recvEagerData);
+ closeResources();
+ cachedEp.clear();
+ }
}
diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpListenerTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpListenerTest.java
index 658a6019700..54b4c8c9c0c 100644
--- a/bindings/java/src/test/java/org/openucx/jucx/UcpListenerTest.java
+++ b/bindings/java/src/test/java/org/openucx/jucx/UcpListenerTest.java
@@ -6,6 +6,7 @@
import org.junit.Test;
import org.openucx.jucx.ucp.*;
+import org.openucx.jucx.ucs.UcsConstants;
import java.net.InetAddress;
import java.net.InetSocketAddress;
@@ -24,32 +25,6 @@ public class UcpListenerTest extends UcxTest {
static final int port = Integer.parseInt(
System.getenv().getOrDefault("JUCX_TEST_PORT", "55321"));
- @Test
- public void testCreateUcpListener() {
- UcpContext context = new UcpContext(new UcpParams().requestStreamFeature());
- UcpWorker worker = context.newWorker(new UcpWorkerParams());
- InetSocketAddress ipv4 = new InetSocketAddress("0.0.0.0", port);
- try {
- UcpListener ipv4Listener = worker.newListener(
- new UcpListenerParams().setSockAddr(ipv4));
-
- assertNotNull(ipv4Listener);
- ipv4Listener.close();
- } catch (UcxException ignored) { }
-
- try {
- InetSocketAddress ipv6 = new InetSocketAddress("::", port);
- UcpListener ipv6Listener = worker.newListener(
- new UcpListenerParams().setSockAddr(ipv6));
-
- assertNotNull(ipv6Listener);
- ipv6Listener.close();
- } catch (UcxException ignored) { }
-
- worker.close();
- context.close();
- }
-
static Stream getInterfaces() {
try {
return Collections.list(NetworkInterface.getNetworkInterfaces()).stream()
@@ -74,19 +49,27 @@ static UcpListener tryBindListener(UcpWorker worker, UcpListenerParams params) {
List addresses = getInterfaces().flatMap(iface ->
Collections.list(iface.getInetAddresses()).stream())
.collect(Collectors.toList());
+ Collections.reverse(addresses);
for (InetAddress address : addresses) {
- try {
- result = worker.newListener(
- params.setSockAddr(new InetSocketAddress(address, port)));
- break;
- } catch (UcxException ignored) { }
+ for (int i = 0; i < 10; i++) {
+ try {
+ result = worker.newListener(
+ params.setSockAddr(new InetSocketAddress(address, port + i)));
+ break;
+ } catch (UcxException ex) {
+ if (ex.getStatus() != UcsConstants.STATUS.UCS_ERR_BUSY) {
+ break;
+ }
+ }
+ }
}
assertNotNull("Could not find socket address to start UcpListener", result);
+ System.out.println("Bound UcpListner on: " + result.getAddress());
return result;
}
@Test
- public void testConnectionHandler() {
+ public void testConnectionHandler() throws Exception {
UcpContext context1 = new UcpContext(new UcpParams().requestStreamFeature()
.requestRmaFeature());
UcpContext context2 = new UcpContext(new UcpParams().requestStreamFeature()
@@ -100,28 +83,58 @@ public void testConnectionHandler() {
// Create listener and set connection handler
UcpListenerParams listenerParams = new UcpListenerParams()
.setConnectionHandler(conRequest::set);
- UcpListener listener = tryBindListener(serverWorker1, listenerParams);
+ UcpListener serverListener = tryBindListener(serverWorker1, listenerParams);
+ UcpListener clientListener = tryBindListener(clientWorker, listenerParams);
UcpEndpoint clientToServer = clientWorker.newEndpoint(new UcpEndpointParams()
- .setSocketAddress(listener.getAddress()));
+ .setErrorHandler((ep, status, errorMsg) ->
+ System.err.println("clientToServer error: " + errorMsg))
+ .setPeerErrorHandlingMode().setSocketAddress(serverListener.getAddress()));
while (conRequest.get() == null) {
serverWorker1.progress();
clientWorker.progress();
}
+ assertNotNull(conRequest.get().getClientAddress());
+ UcpEndpoint serverToClientListener = serverWorker2.newEndpoint(
+ new UcpEndpointParams().setSocketAddress(conRequest.get().getClientAddress())
+ .setPeerErrorHandlingMode()
+ .setErrorHandler((errEp, status, errorMsg) ->
+ System.err.println("serverToClientListener error: " +
+ errorMsg)));
+ serverWorker2.progressRequest(serverToClientListener.closeNonBlockingForce());
+
// Create endpoint from another worker from pool.
UcpEndpoint serverToClient = serverWorker2.newEndpoint(
new UcpEndpointParams().setConnectionRequest(conRequest.get()));
-
- // Temporary workaround until new connection establishment protocol in UCX.
+
+ // Test connection handler persists
for (int i = 0; i < 10; i++) {
- serverWorker1.progress();
- serverWorker2.progress();
- clientWorker.progress();
- try {
- Thread.sleep(10);
- } catch (Exception ignored) { }
+ conRequest.set(null);
+ UcpEndpoint tmpEp = clientWorker.newEndpoint(new UcpEndpointParams()
+ .setSocketAddress(serverListener.getAddress()).setPeerErrorHandlingMode()
+ .setErrorHandler((ep, status, errorMsg) ->
+ System.err.println("tmpEp error: " + errorMsg)));
+
+ while (conRequest.get() == null) {
+ serverWorker1.progress();
+ serverWorker2.progress();
+ clientWorker.progress();
+ }
+
+ UcpEndpoint tmpEp2 = serverWorker2.newEndpoint(
+ new UcpEndpointParams().setPeerErrorHandlingMode()
+ .setConnectionRequest(conRequest.get()));
+
+ UcpRequest close1 = tmpEp.closeNonBlockingFlush();
+ UcpRequest close2 = tmpEp2.closeNonBlockingFlush();
+
+ while (!close1.isCompleted() || !close2.isCompleted()) {
+ serverWorker1.progress();
+ serverWorker2.progress();
+ clientWorker.progress();
+ }
}
UcpRequest sent = serverToClient.sendStreamNonBlocking(
@@ -142,13 +155,22 @@ public void testConnectionHandler() {
while (!sent.isCompleted() || !recv.isCompleted()) {
serverWorker1.progress();
+ serverWorker2.progress();
clientWorker.progress();
}
assertEquals(UcpMemoryTest.MEM_SIZE, recv.getRecvSize());
+ UcpRequest serverClose = serverToClient.closeNonBlockingFlush();
+ UcpRequest clientClose = clientToServer.closeNonBlockingFlush();
+
+ while (!serverClose.isCompleted() || !clientClose.isCompleted()) {
+ serverWorker2.progress();
+ clientWorker.progress();
+ }
+
Collections.addAll(resources, context2, context1, clientWorker, serverWorker1,
- serverWorker2, listener, serverToClient, clientToServer);
+ serverWorker2, serverListener, clientListener);
closeResources();
}
}
diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpMemoryTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpMemoryTest.java
index 01668d003ee..aca3c51166a 100644
--- a/bindings/java/src/test/java/org/openucx/jucx/UcpMemoryTest.java
+++ b/bindings/java/src/test/java/org/openucx/jucx/UcpMemoryTest.java
@@ -7,18 +7,21 @@
import org.junit.Test;
import org.openucx.jucx.ucp.*;
+import org.openucx.jucx.ucs.UcsConstants;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.Collections;
import java.util.UUID;
import static java.nio.file.StandardOpenOption.*;
import static org.junit.Assert.*;
+import static org.junit.Assume.assumeTrue;
-public class UcpMemoryTest {
+public class UcpMemoryTest extends UcxTest {
static int MEM_SIZE = 4096;
static String RANDOM_TEXT = UUID.randomUUID().toString();
@@ -39,7 +42,8 @@ public void testMmapFile() throws Exception {
// 3. Test allocation
UcpMemory allocatedMemory = context.memoryMap(new UcpMemMapParams()
- .allocate().setLength(MEM_SIZE).nonBlocking());
+ .allocate().setProtection(UcpConstants.UCP_MEM_MAP_PROT_LOCAL_READ)
+ .setLength(MEM_SIZE).nonBlocking());
assertEquals(allocatedMemory.getLength(), MEM_SIZE);
allocatedMemory.deregister();
@@ -71,11 +75,8 @@ public void testRemoteKeyUnpack() {
UcpMemory mem = context.registerMemory(buf);
UcpRemoteKey rkey = endpoint.unpackRemoteKey(mem.getRemoteKeyBuffer());
assertNotNull(rkey.getNativeId());
- rkey.close();
- mem.deregister();
- endpoint.close();
- worker1.close();
- worker2.close();
- context.close();
+
+ Collections.addAll(resources, context, worker1, worker2, endpoint, mem, rkey);
+ closeResources();
}
}
diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpRequestTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpRequestTest.java
index 0ac1fc6327c..bdca94fc67d 100644
--- a/bindings/java/src/test/java/org/openucx/jucx/UcpRequestTest.java
+++ b/bindings/java/src/test/java/org/openucx/jucx/UcpRequestTest.java
@@ -6,13 +6,14 @@
import org.junit.Test;
import org.openucx.jucx.ucp.*;
+import org.openucx.jucx.ucs.UcsConstants;
import java.nio.ByteBuffer;
import static org.junit.Assert.*;
public class UcpRequestTest {
@Test
- public void testCancelRequest() {
+ public void testCancelRequest() throws Exception {
UcpContext context = new UcpContext(new UcpParams().requestTagFeature());
UcpWorker worker = context.newWorker(new UcpWorkerParams());
UcpRequest recv = worker.recvTaggedNonBlocking(ByteBuffer.allocateDirect(100), null);
@@ -22,6 +23,7 @@ public void testCancelRequest() {
worker.progress();
}
+ assertEquals(UcsConstants.STATUS.UCS_ERR_CANCELED, recv.getStatus());
assertTrue(recv.isCompleted());
assertNull(recv.getNativeId());
diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpWorkerTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpWorkerTest.java
index d896898a038..da6596dd07f 100644
--- a/bindings/java/src/test/java/org/openucx/jucx/UcpWorkerTest.java
+++ b/bindings/java/src/test/java/org/openucx/jucx/UcpWorkerTest.java
@@ -19,7 +19,7 @@ public class UcpWorkerTest extends UcxTest {
private static int numWorkers = Runtime.getRuntime().availableProcessors();
@Test
- public void testSingleWorker() {
+ public void testSingleWorker() throws Exception {
UcpContext context = new UcpContext(new UcpParams().requestTagFeature());
assertEquals(2, UcsConstants.ThreadMode.UCS_THREAD_MODE_MULTI);
assertNotEquals(context.getNativeId(), null);
@@ -99,8 +99,12 @@ public void testWorkerSleepWakeup() throws InterruptedException {
@Override
public void run() {
while (!isInterrupted()) {
- if (worker.progress() == 0) {
- worker.waitForEvents();
+ try {
+ if (worker.progress() == 0) {
+ worker.waitForEvents();
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
}
}
success.set(true);
@@ -120,7 +124,7 @@ public void run() {
}
@Test
- public void testFlushWorker() {
+ public void testFlushWorker() throws Exception {
int numRequests = 10;
// Crerate 2 contexts + 2 workers
UcpParams params = new UcpParams().requestRmaFeature();
@@ -166,7 +170,7 @@ public void onSuccess(UcpRequest request) {
}
@Test
- public void testTagProbe() {
+ public void testTagProbe() throws Exception {
UcpParams params = new UcpParams().requestTagFeature();
UcpContext context1 = new UcpContext(params);
UcpContext context2 = new UcpContext(params);
diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcxTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcxTest.java
index 5d40f2da2bd..42de8ad8e2f 100644
--- a/bindings/java/src/test/java/org/openucx/jucx/UcxTest.java
+++ b/bindings/java/src/test/java/org/openucx/jucx/UcxTest.java
@@ -5,11 +5,72 @@
package org.openucx.jucx;
+import org.openucx.jucx.ucp.*;
+import org.openucx.jucx.ucs.UcsConstants;
+
import java.io.Closeable;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.Stack;
abstract class UcxTest {
+ protected static class MemoryBlock implements Closeable {
+ private final UcpMemory memory;
+ private UcpEndpoint selfEp;
+ private ByteBuffer buffer;
+ private final UcpWorker worker;
+ private UcpRemoteKey rkey;
+
+ protected MemoryBlock(UcpWorker worker, UcpMemory memory) {
+ this.memory = memory;
+ this.worker = worker;
+ if (memory.getMemType() == UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_CUDA) {
+ this.selfEp = worker.newEndpoint(
+ new UcpEndpointParams().setUcpAddress(worker.getAddress()));
+ rkey = selfEp.unpackRemoteKey(memory.getRemoteKeyBuffer());
+ } else {
+ buffer = UcxUtils.getByteBufferView(memory.getAddress(), memory.getLength());
+ }
+ }
+
+ public UcpMemory getMemory() {
+ return memory;
+ }
+
+ public void setData(String data) throws Exception {
+ if (memory.getMemType() == UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_CUDA) {
+ ByteBuffer srcBuffer = ByteBuffer.allocateDirect(data.length());
+ srcBuffer.asCharBuffer().put(data);
+ worker.progressRequest(selfEp.putNonBlocking(srcBuffer, memory.getAddress(), rkey,
+ null));
+ } else {
+ buffer.asCharBuffer().put(data);
+ }
+ }
+
+ public ByteBuffer getData() throws Exception {
+ if (memory.getMemType() == UcsConstants.MEMORY_TYPE.UCS_MEMORY_TYPE_CUDA) {
+ ByteBuffer dstBuffer = ByteBuffer.allocateDirect((int)memory.getLength());
+ worker.progressRequest(selfEp.getNonBlocking(memory.getAddress(), rkey,
+ dstBuffer, null));
+ return dstBuffer;
+ } else {
+ return buffer;
+ }
+ }
+
+ @Override
+ public void close() {
+ if (rkey != null) {
+ rkey.close();
+ }
+ memory.close();
+ if (selfEp != null) {
+ selfEp.close();
+ }
+ }
+ }
+
// Stack of closable resources (context, worker, etc.) to be closed at the end.
protected static Stack resources = new Stack<>();
@@ -22,4 +83,11 @@ protected void closeResources() {
}
}
}
+
+ protected static MemoryBlock allocateMemory(UcpContext context, UcpWorker worker, int memType,
+ long length) {
+ UcpMemMapParams memMapParams = new UcpMemMapParams().allocate().setLength(length)
+ .setMemoryType(memType);
+ return new MemoryBlock(worker, context.memoryMap(memMapParams));
+ }
}
diff --git a/buildlib/az-distro-release.yml b/buildlib/az-distro-release.yml
index 720ff6ad6de..8e35231ee2b 100644
--- a/buildlib/az-distro-release.yml
+++ b/buildlib/az-distro-release.yml
@@ -1,6 +1,12 @@
jobs:
- job: distro_release
+ condition: eq(stageDependencies.Check_Commit.Check.outputs['Commit.Launch'], 'Yes')
displayName: distro
+ variables:
+ ${{ if eq(variables['Build.Reason'], 'ResourceTrigger') }}:
+ POSTFIX: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}
+ ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+ POSTFIX: ucx-test
pool:
name: MLNX
@@ -11,34 +17,53 @@ jobs:
matrix:
centos7_cuda10_1:
build_container: centos7_cuda10_1
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-centos7-mofed5.0-cuda10.1.tar.bz2
+ artifact_name: $(POSTFIX)-centos7-mofed5.x-cuda10.1.tar.bz2
centos7_cuda10_2:
build_container: centos7_cuda10_2
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-centos7-mofed5.0-cuda10.2.tar.bz2
+ artifact_name: $(POSTFIX)-centos7-mofed5.x-cuda10.2.tar.bz2
centos7_cuda11_0:
build_container: centos7_cuda11_0
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-centos7-mofed5.0-cuda11.0.tar.bz2
+ artifact_name: $(POSTFIX)-centos7-mofed5.x-cuda11.0.tar.bz2
+ centos7_cuda11_2:
+ build_container: centos7_cuda11_2
+ artifact_name: $(POSTFIX)-centos7-mofed5.x-cuda11.2.tar.bz2
+ centos8_cuda11_0:
+ build_container: centos8_cuda11_0
+ artifact_name: $(POSTFIX)-centos8-mofed5.x-cuda11.0.tar.bz2
+ centos8_cuda11_2:
+ build_container: centos8_cuda11_2
+ artifact_name: $(POSTFIX)-centos8-mofed5.x-cuda11.2.tar.bz2
ubuntu16_cuda10_1:
build_container: ubuntu16_cuda10_1
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-ubuntu16.04-mofed5.0-cuda10.1.deb
+ artifact_name: $(POSTFIX)-ubuntu16.04-mofed5.x-cuda10.1.deb
ubuntu16_cuda10_2:
build_container: ubuntu16_cuda10_2
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-ubuntu16.04-mofed5.0-cuda10.2.deb
+ artifact_name: $(POSTFIX)-ubuntu16.04-mofed5.x-cuda10.2.deb
ubuntu18_cuda10_1:
build_container: ubuntu18_cuda10_1
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-ubuntu18.04-mofed5.0-cuda10.1.deb
+ artifact_name: $(POSTFIX)-ubuntu18.04-mofed5.x-cuda10.1.deb
ubuntu18_cuda10_2:
build_container: ubuntu18_cuda10_2
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-ubuntu18.04-mofed5.0-cuda10.2.deb
- ubuntu18_cuda11.0:
- build_container: ubuntu18_cuda11.0
- artifact_name: ucx-${{ replace(variables['Build.SourceBranch'], 'refs/tags/', '') }}-ubuntu18.04-mofed5.0-cuda11.0.deb
+ artifact_name: $(POSTFIX)-ubuntu18.04-mofed5.x-cuda10.2.deb
+ ubuntu18_cuda11_0:
+ build_container: ubuntu18_cuda11_0
+ artifact_name: $(POSTFIX)-ubuntu18.04-mofed5.x-cuda11.0.deb
+ ubuntu18_cuda11_2:
+ build_container: ubuntu18_cuda11_2
+ artifact_name: $(POSTFIX)-ubuntu18.04-mofed5.x-cuda11.2.deb
+ ubuntu20_cuda11_0:
+ build_container: ubuntu20_cuda11_0
+ artifact_name: $(POSTFIX)-ubuntu20.04-mofed5.x-cuda11.0.deb
+ ubuntu20_cuda11_2:
+ build_container: ubuntu20_cuda11_2
+ artifact_name: $(POSTFIX)-ubuntu20.04-mofed5.x-cuda11.2.deb
container: $[ variables['build_container'] ]
steps:
- checkout: self
clean: true
+ fetchDepth: 100
path: "we/need/to/go/deeper"
# ^ Avoid rpmbuild error: Dest dir longer than base dir is not supported
@@ -47,13 +72,13 @@ jobs:
./autogen.sh
mkdir pkg-build
cd pkg-build
- ../contrib/configure-release --with-cuda
+ ../contrib/configure-release --with-cuda --with-java=no
displayName: Configure
- bash: |
set -eE
cd pkg-build
- ../contrib/buildrpm.sh -s -t -b --strict-ibverbs-dep
+ ../contrib/buildrpm.sh -s -t -b
cd rpm-dist/`uname -m`
tar -cjf "../../../${AZ_ARTIFACT_NAME}" *.rpm
cd ../../..
@@ -76,6 +101,7 @@ jobs:
AZ_ARTIFACT_NAME: $(artifact_name)
- task: GithubRelease@0
+ condition: eq(variables['Build.Reason'], 'ResourceTrigger')
displayName: Upload artifacts to draft release
inputs:
githubConnection: release
diff --git a/buildlib/az-helpers.sh b/buildlib/az-helpers.sh
index 286881ef591..77e504e33a0 100644
--- a/buildlib/az-helpers.sh
+++ b/buildlib/az-helpers.sh
@@ -38,6 +38,30 @@ function azure_log_issue() {
echo "##vso[task.complete result=Failed;]"
}
+# Report an error message to Azure pipeline
+function azure_log_error() {
+ test "x$RUNNING_IN_AZURE" = "xno" && return
+ msg=$1
+ set +x
+ echo "##vso[task.logissue type=error]${msg}"
+}
+
+# Report an warning message to Azure pipeline
+function azure_log_warning() {
+ test "x$RUNNING_IN_AZURE" = "xno" && return
+ msg=$1
+ set +x
+ echo "##vso[task.logissue type=warning]${msg}"
+}
+
+# Complete the task as "succeeeded with issues"
+function azure_complete_with_issues() {
+ test "x$RUNNING_IN_AZURE" = "xno" && return
+ msg=$1
+ set +x
+ echo "##vso[task.complete result=SucceededWithIssues;]DONE${msg}"
+}
+
# Get IPv4 address of an interface
function get_ip() {
iface=$1
@@ -45,6 +69,11 @@ function get_ip() {
echo "$ip"
}
+# Get active RDMA interfaces
+function get_rdma_interfaces() {
+ echo `ibdev2netdev | grep Up | awk '{print $5}'`
+}
+
# Prepend each line with a timestamp
function add_timestamp() {
set +x
@@ -52,3 +81,68 @@ function add_timestamp() {
echo "$(date -u +"%Y-%m-%dT%T.%NZ") $line"
done
}
+
+function az_init_modules() {
+ . /etc/profile.d/modules.sh
+ export MODULEPATH="/hpc/local/etc/modulefiles:$MODULEPATH"
+}
+
+#
+# Test if an environment module exists and load it if yes.
+# Otherwise, return error code.
+#
+function az_module_load() {
+ module=$1
+
+ if module avail -t 2>&1 | grep -q "^$module\$"
+ then
+ module load $module
+ return 0
+ else
+ echo "MODULEPATH='${MODULEPATH}'"
+ module avail || true
+ azure_log_warning "Module $module cannot be loaded"
+ return 1
+ fi
+}
+
+#
+# Safe unload for env modules (even if it doesn't exist)
+#
+function az_module_unload() {
+ module=$1
+ module unload "${module}" || true
+}
+
+
+#
+# try load cuda modules if nvidia driver is installed
+#
+try_load_cuda_env() {
+ num_gpus=0
+ have_cuda=no
+ have_gdrcopy=no
+ if [ -f "/proc/driver/nvidia/version" ]; then
+ have_cuda=yes
+ have_gdrcopy=yes
+ az_module_load dev/cuda11.1.1 || have_cuda=no
+ az_module_load dev/gdrcopy2.1_cuda11.1.1 || have_gdrcopy=no
+ num_gpus=$(nvidia-smi -L | wc -l)
+ fi
+}
+
+
+check_commit_message() {
+ git_id=$1
+ title_mask=$2
+ build_reason=$3
+ echo "Get commit message target $git_id"
+ title=`git log -1 --format="%s" $git_id`
+
+ if [[ ( "$build_reason" == "IndividualCI" ) || ( "$title" == "$title_mask"* && "$build_reason" == "PullRequest" ) ]]
+ then
+ echo "##vso[task.setvariable variable=Launch;isOutput=true]Yes"
+ else
+ echo "##vso[task.setvariable variable=Launch;isOutput=true]No"
+ fi
+}
diff --git a/buildlib/az-io_demo.sh b/buildlib/az-io_demo.sh
deleted file mode 100755
index 32e67fa3cd5..00000000000
--- a/buildlib/az-io_demo.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash -leE
-
-# avoid Azure error: TERM environment variable not set
-export TERM=xterm
-
-basedir=$(cd $(dirname $0) && pwd)
-workspace=${WORKSPACE:="$basedir"}
-cd "$workspace"
-
-echo "Running $0 $*..."
-eval "$*"
-source "${workspace}/az-helpers.sh"
-
-server_ip=${server_ip:=""}
-duration=${duration:=2}
-iface=${iface:="bond0"}
-
-export UCX_MAX_EAGER_LANES=2
-export UCX_TLS=rc
-export UCX_IB_SEG_SIZE=2k
-export UCX_IB_RX_QUEUE_LEN=1024
-export UCX_RC_MAX_RD_ATOMIC=16
-export UCX_RC_ROCE_PATH_FACTOR=2
-export UCX_SOCKADDR_CM_ENABLE=y
-export UCX_RC_MAX_GET_ZCOPY=32k
-export UCX_RC_TX_NUM_GET_BYTES=256K
-
-## run server
-if [ "x$server_ip" = "x" ]; then
- ip addr show ${iface}
- server_ip=$(get_ip ${iface})
- azure_set_variable "server_ip" "$server_ip"
- echo "Starting server on IP ${server_ip}"
-
- server_cmd="${workspace}/../test/apps/iodemo/io_demo"
- if ! "${server_cmd}" |& add_timestamp &>server.log & then
- cat server.log
- error "Failed to run server command ${server_cmd}"
- fi
-
- # wait for io_demo to start
- echo "Waiting for server to start.."
- sleep 10
-
- server_pid=$(pgrep -u "$USER" -f 'apps/iodemo')
- echo "Server pid is '${server_pid}'"
-
- num_pids=$(echo "${server_pid}" | wc -w)
- if [ ${num_pids} -ne 1 ]; then
- cat server.log
- ps -f -U "$USER" # show all runing processes
- error "Expected 1 running server, found ${num_pids}"
- fi
-
- echo "Server is running, PID='$server_pid'"
- azure_set_variable "server_pid" "$server_pid"
-
- # double check the process is running
- sleep 5
- if ! kill -0 "$server_pid"; then
- cat server.log
- error "Failed to start server"
- fi
-
- exit 0
-fi
-
-## run client
-
-timeout="$(( duration - 1 ))m"
-
-echo "Client connecting to server at IP $server_ip"
-echo "Timeout is $timeout"
-
-if ! "${workspace}/../test/apps/iodemo/io_demo" -l $timeout -i 10000000 "$server_ip"; then
- error "Failed to start client"
-fi
diff --git a/buildlib/az-network-corrupter.sh b/buildlib/az-network-corrupter.sh
index 16ebeef8b72..eb4ffc7054b 100755
--- a/buildlib/az-network-corrupter.sh
+++ b/buildlib/az-network-corrupter.sh
@@ -14,6 +14,7 @@ manager_script=/hpc/noarch/git_projects/swx_infrastructure/clusters/bin/manage_h
if [ "x$reset" = "xyes" ]; then
echo "Resetting interface on $(hostname)..."
${manager_script} "$(hostname)" "bond-up"
+ sleep "$uptime"
exit $?
fi
diff --git a/buildlib/azure-pipelines-int4.yml b/buildlib/azure-pipelines-int4.yml
new file mode 100644
index 00000000000..9cff3be2173
--- /dev/null
+++ b/buildlib/azure-pipelines-int4.yml
@@ -0,0 +1,22 @@
+# See https://aka.ms/yaml
+# This pipeline to be run on PRs
+
+trigger: none
+
+resources:
+ pipelines:
+ - pipeline: rebaseMainUcx
+ source: UCX snapshot
+
+ containers:
+ - container: centos7
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7:2
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+
+stages:
+ - stage: Rebase
+ jobs:
+ - job: rebase_master
+ steps:
+ - bash: |
+ echo "Hello world"
\ No newline at end of file
diff --git a/buildlib/azure-pipelines-pr.yml b/buildlib/azure-pipelines-pr.yml
index 1a3c9121c7e..6c2ea30ef7f 100644
--- a/buildlib/azure-pipelines-pr.yml
+++ b/buildlib/azure-pipelines-pr.yml
@@ -3,17 +3,61 @@
trigger: none
pr:
- - master
- - v*.*.x
+ branches:
+ include:
+ - master
+ - v*.*.x
+ paths:
+ exclude:
+ - .gitignore
+ - docs/source
+ - docs/CodeStyle.md
+ - docs/LoggingStyle.md
+ - docs/OptimizationStyle.md
+ - README.md
+ - NEWS
resources:
containers:
- container: centos7
- image: ucfconsort.azurecr.io/ucx/centos7:1
- endpoint: ucfconsort_registry
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7:2
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
- container: fedora
- image: ucfconsort.azurecr.io/ucx/fedora:3
- endpoint: ucfconsort_registry
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/fedora33:1
+ options: --privileged
+ - container: fedora34
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/fedora34:2
+ options: --privileged -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: coverity_rh7
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/coverity:mofed-5.1-2.3.8.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: rhel76
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/rhel7.6/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: rhel76_mofed47
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/rhel7.6/builder:mofed-4.7-1.0.0.1
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: rhel74
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/rhel7.4/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: rhel72
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/rhel7.2/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: rhel82
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/rhel8.2/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: ubuntu2004
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/ubuntu20.04/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: ubuntu1804
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/ubuntu18.04/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: sles15sp2
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/sles15sp2/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
+ - container: sles12sp5
+ image: rdmz-harbor.rdmz.labs.mlnx/swx-infra/x86_64/sles12sp5/builder:mofed-5.0-1.0.0.0
+ options: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools
stages:
- stage: Codestyle
@@ -24,10 +68,12 @@ stages:
steps:
- checkout: self
clean: true
+ fetchDepth: 100
- bash: |
set -eE
- range="remotes/origin/$(System.PullRequest.TargetBranch)..$(Build.SourceVersion)"
+ BASE_SOURCEVERSION=$(git rev-parse HEAD^)
+ range="$BASE_SOURCEVERSION..$(Build.SourceVersion)"
ok=1
for sha1 in `git log $range --format="%h"`
do
@@ -48,40 +94,71 @@ stages:
fi
condition: eq(variables['Build.Reason'], 'PullRequest')
- - stage: Build
+ # Check that the code is formatted according to the code style guidelines
+ - job: format
+ displayName: format code
+ pool:
+ name: MLNX
+ demands:
+ - ucx_docker -equals yes
+ container: fedora
+ steps:
+ - checkout: self
+ clean: true
+ fetchDepth: 100
+
+ - bash: |
+ source ./buildlib/az-helpers.sh
+ set -x
+ git log -1 HEAD
+ git log -1 HEAD^
+ BASE_SOURCEVERSION=$(git rev-parse HEAD^)
+ echo "Checking code format on diff ${BASE_SOURCEVERSION}..${BUILD_SOURCEVERSION}"
+ git-clang-format --diff ${BASE_SOURCEVERSION} ${BUILD_SOURCEVERSION} > format.patch
+ echo "Generated patch file:"
+ cat format.patch
+ if [ "`cat format.patch`" = "no modified files to format" ]; then
+ exit
+ fi
+ git apply format.patch
+ if ! git diff --quiet --exit-code
+ then
+ url="https://github.com/openucx/ucx/wiki/Code-style-checking"
+ azure_complete_with_issues "Code is not formatted according to the code style, see $url for more info."
+ fi
+ condition: eq(variables['Build.Reason'], 'PullRequest')
+
+ - stage: Static_check
+ dependsOn: [Codestyle]
jobs:
- job: static_checks
displayName: Static checks
+ pool:
+ name: MLNX
+ demands:
+ - ucx_docker_fast -equals yes
container: fedora
steps:
- checkout: self
clean: true
+ fetchDepth: 100
- bash: ./autogen.sh
displayName: Setup autotools
- bash: |
set -eE
- mkdir build && cd build
+ . buildlib/tools/common.sh
+ prepare_build
clang --version
gcc --version
cppcheck --version
- ../contrib/configure-release
- displayName: Configure
-
- - bash: |
- set -eE
-
- cd build
+ ${WORKSPACE}/contrib/configure-release
export PATH="`csclng --print-path-to-wrap`:`cscppc --print-path-to-wrap`:`cswrap --print-path-to-wrap`:$PATH"
+ set -o pipefail
make -j`nproc` 2>&1 | tee compile.log
- displayName: Build
-
- - bash: |
- set -eE
-
- cd build
+ set +o pipefail
cs_errors="cs.err"
cslinker --quiet compile.log \
@@ -99,10 +176,66 @@ stages:
echo "No errors reported by static checkers"
fi
displayName: cstools reports
+ env:
+ BUILD_ID: "$(Build.BuildId)-$(Build.BuildNumber)"
+
+ - stage: Build
+ dependsOn: [Static_check]
+ jobs:
+ - job: build_source
+ pool:
+ name: MLNX
+ demands:
+ - ucx_docker -equals yes
+ strategy:
+ matrix:
+ rhel72:
+ CONTAINER: rhel72
+ rhel74:
+ CONTAINER: rhel74
+ rhel76:
+ CONTAINER: rhel76
+ long_test: yes
+ rhel76_mofed47:
+ CONTAINER: rhel76_mofed47
+ long_test: yes
+ ubuntu2004:
+ CONTAINER: ubuntu2004
+ long_test: yes
+ ubuntu1804:
+ CONTAINER: ubuntu1804
+ sles15sp2:
+ CONTAINER: sles15sp2
+ rhel82:
+ CONTAINER: rhel82
+ fedora34:
+ CONTAINER: fedora34
+ long_test: yes
+ container: $[ variables['CONTAINER'] ]
+ timeoutInMinutes: 240
+
+ steps:
+ - checkout: self
+ clean: true
+ fetchDepth: 100
- # Perform test builds on relevant distributions
+ - bash: |
+ ./buildlib/tools/builds.sh
+ displayName: Build
+ env:
+ BUILD_ID: "$(Build.BuildId)-$(Build.BuildNumber)"
+ long_test: $(long_test)
+
+ - stage: Distro
+ dependsOn: [Static_check]
+ jobs:
+ # Perform test builds on relevant distributions.
- job: Distros
displayName: Build for
+ pool:
+ name: MLNX
+ demands:
+ - ucx_docker -equals yes
strategy:
matrix:
centos7:
@@ -112,50 +245,36 @@ stages:
steps:
- checkout: self
clean: true
-
- - bash: ./autogen.sh
- displayName: Setup autotools
-
- - bash: |
- set -eE
- mkdir build && cd build
- ../configure $(CONFIGURE_OPTS)
- displayName: Configure
+ fetchDepth: 100
- bash: |
set -eE
- cd build
+ . buildlib/tools/common.sh
+ prepare_build
+ ${WORKSPACE}/configure $(CONFIGURE_OPTS)
gcc -v
make -s -j `nproc`
- displayName: Build for $(CONTAINER)
-
- # Test RPM build
- - job: build_rpm
- displayName: Build tarball and source rpm
- container: fedora
- steps:
- - checkout: self
- clean: true
-
- - bash: ./autogen.sh
- displayName: Setup autotools
-
- - bash: |
- set -eE
- gcc --version
- ./contrib/configure-release
- stdbuf -e0 -o0 ./contrib/buildrpm.sh -s -t -b |& tee rpmbuild.log
- pattern='^warning: '
- if grep -q "$pattern" rpmbuild.log; then
- echo "rpm build generated warnings:"
- grep "$pattern" rpmbuild.log
- echo "##vso[task.logissue type=error]rpm build generated warnings"
- echo "##vso[task.complete result=Failed;]"
+ set +eE
+ set -x
+ ./src/tools/info/ucx_info -e -u t 2>&1 | tee info.txt
+ grep -i error info.txt
+ retVal=$?
+ if [ $retVal -eq 0 ]; then
+ exit 1;
fi
- displayName: Configure source and build RPM
+ exit 0;
+ displayName: Test ucx_info
+
+ - stage: Coverity
+ dependsOn: [Static_check]
+ jobs:
+ - template: coverity.yml
+ parameters:
+ demands: ucx_docker -equals yes
+ container: coverity_rh7
- stage: Tests
- dependsOn: [Codestyle]
+ dependsOn: [Static_check]
jobs:
- template: tests.yml
parameters:
@@ -177,4 +296,25 @@ stages:
name: hwi
demands: ucx_hwi -equals yes
test_perf: 0
- - template: io-demo.yml
+ - template: tests.yml
+ parameters:
+ name: sputnik
+ demands: ucx_sputnik -equals yes
+ test_perf: 0
+
+ - stage: io_demo
+ dependsOn: [Static_check]
+ jobs:
+ - template: io_demo/io-demo.yml
+
+ - stage: jucx
+ dependsOn: [Static_check]
+ jobs:
+ - template: jucx/jucx-test.yml
+ parameters:
+ name: new
+ demands: ucx_new -equals yes
+ - template: jucx/jucx-test.yml
+ parameters:
+ name: gpu
+ demands: ucx_gpu -equals yes
diff --git a/buildlib/azure-pipelines-release.yml b/buildlib/azure-pipelines-release.yml
index c83eefcaacb..25083f43f18 100644
--- a/buildlib/azure-pipelines-release.yml
+++ b/buildlib/azure-pipelines-release.yml
@@ -1,23 +1,28 @@
# See https://aka.ms/yaml
# This pipeline to be run on tags creation
-pr: none
trigger:
tags:
include:
- v*
+pr:
+ - master
+ - v*.*.x
resources:
containers:
- - container: centos7
- image: ucfconsort.azurecr.io/ucx/centos7:2
- endpoint: ucfconsort_registry
- container: centos7_cuda10_1
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7-mofed5.0-cuda10.1:1
- container: centos7_cuda10_2
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7-mofed5.0-cuda10.2:1
- container: centos7_cuda11_0
- image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7-mofed5.0-cuda11.0:1
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7-mofed5.0-cuda11.0:2
+ - container: centos7_cuda11_2
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7-mofed5.0-cuda11.2:2
+ - container: centos8_cuda11_0
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5.0-cuda11.0:2
+ - container: centos8_cuda11_2
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5.1-cuda11.2:2
- container: ubuntu16_cuda10_1
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu16.04-mofed5.0-cuda10.1:1
- container: ubuntu16_cuda10_2
@@ -26,18 +31,44 @@ resources:
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5.0-cuda10.1:1
- container: ubuntu18_cuda10_2
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5.0-cuda10.2:1
- - container: ubuntu18_cuda11
- image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5.0-cuda11.0:1
+ - container: ubuntu18_cuda11_0
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5.0-cuda11.0:2
+ - container: ubuntu18_cuda11_2
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5.0-cuda11.2:2
+ - container: ubuntu20_cuda11_0
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu20.04-mofed5.0-cuda11.0:2
+ - container: ubuntu20_cuda11_2
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu20.04-mofed5.0-cuda11.2:2
stages:
+ - stage: Check_Commit
+ jobs:
+ - job: Check
+ steps:
+ - checkout: self
+ clean: true
+
+ - bash: |
+ set -eE
+ source ./buildlib/az-helpers.sh
+ set -x
+ check_commit_message $(system.pullRequest.sourceCommitId) "AZP/RELEASE: " $(Build.Reason)
+ name: Commit
# Create an empty draft to avoid race condition in distro releases
- stage: GitHubDraft
+ dependsOn: Check_Commit
jobs:
- job: DraftRelease
- container: centos7
+ condition: eq(stageDependencies.Check_Commit.Check.outputs['Commit.Launch'], 'Yes')
+ container: centos7_cuda11_2
+ pool:
+ name: MLNX
+ demands:
+ - ucx_docker -equals yes
steps:
- checkout: self
clean: true
+ fetchDepth: 100
path: "we/need/to/go/deeper"
- bash: ./autogen.sh
@@ -46,11 +77,12 @@ stages:
- bash: |
set -eE
gcc --version
- ./contrib/configure-release
+ ./contrib/configure-release --with-java=no
./contrib/buildrpm.sh -s -t -b
displayName: Build tarball
- task: GithubRelease@0
+ condition: eq(variables['Build.Reason'], 'IndividualCI')
displayName: Create/edit GitHub Draft Release
inputs:
githubConnection: release
@@ -67,8 +99,14 @@ stages:
./rpm-dist/ucx-*.src.rpm
- stage: Release
+ dependsOn: Check_Commit
+ variables:
+ ${{ if eq(variables['Build.Reason'], 'IndividualCI') }}:
+ TARGET: publish-release
+ ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+ TARGET: package
jobs:
- template: az-distro-release.yml
- - template: jucx-publish.yml
+ - template: jucx/jucx-publish.yml
parameters:
- target: publish-release
+ target: $(TARGET)
diff --git a/buildlib/azure-pipelines.yml b/buildlib/azure-pipelines.yml
index e0d833f24a3..280e1657581 100644
--- a/buildlib/azure-pipelines.yml
+++ b/buildlib/azure-pipelines.yml
@@ -8,13 +8,23 @@ trigger:
resources:
containers:
- - container: centos7
- image: ucfconsort.azurecr.io/ucx/centos7:1
- endpoint: ucfconsort_registry
+ - container: centos7_cuda10_1
+ image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7-mofed5.0-cuda10.1:1
stages:
+ - stage: Check_Commit
+ jobs:
+ - job: Check
+ steps:
+ - checkout: self
+ clean: true
+ - bash: |
+ echo "##vso[task.setvariable variable=Launch;isOutput=true]Yes"
+ name: Commit
- stage: Build
+ dependsOn: Check_Commit
jobs:
- - template: jucx-publish.yml
+ - template: jucx/jucx-publish.yml
parameters:
target: publish-snapshot
+
diff --git a/buildlib/coverity.yml b/buildlib/coverity.yml
new file mode 100644
index 00000000000..86a93236132
--- /dev/null
+++ b/buildlib/coverity.yml
@@ -0,0 +1,43 @@
+parameters:
+ demands: []
+ container: rhel76
+ modes: ["release", "devel"]
+
+jobs:
+ - ${{each mode in parameters.modes }}:
+ - job: coverity_${{ mode }}
+ workspace:
+ clean: all
+ pool:
+ name: MLNX
+ demands: ${{ parameters.demands }}
+ displayName: coverity ${{ mode }} on ${{ parameters.container }}
+ container: ${{ parameters.container }}
+ timeoutInMinutes: 30
+ steps:
+ - checkout: self
+ clean: true
+ fetchDepth: 100
+ - bash: |
+ ./buildlib/tools/coverity.sh ${{ mode }}
+ res=$?
+ reportExists=False
+ set -x
+ cov_error_folder=$(System.DefaultWorkingDirectory)/cov_build_${{ mode }}/output/errors
+ echo "##vso[task.setvariable variable=cov_error_folder]$cov_error_folder"
+ ls -la $cov_error_folder
+ test -f $cov_error_folder/index.html && reportExists=True
+ echo "##vso[task.setvariable variable=reportExists]$reportExists"
+ if [[ $res -eq 0 ]] ; then
+ echo "##vso[task.complete result=Succeeded;]Done"
+ else
+ echo "##vso[task.complete result=Failed;]Coverity have errors"
+ fi
+ displayName: ${{ mode }}
+ env:
+ BUILD_ID: "$(Build.BuildId)-$(Build.BuildNumber)"
+ - task: PublishPipelineArtifact@1
+ inputs:
+ targetPath: $(cov_error_folder)
+ artifactName: coverity_${{ mode }}
+ condition: eq(variables['reportExists'], 'True')
diff --git a/buildlib/docker-compose.yml b/buildlib/docker-compose.yml
deleted file mode 100644
index 312d5698781..00000000000
--- a/buildlib/docker-compose.yml
+++ /dev/null
@@ -1,84 +0,0 @@
-version: "3"
-
-services:
- centos7-mofed5.0-cuda10.1:
- image: centos7-mofed5.0-cuda10.1
- build:
- context: .
- network: host
- dockerfile: centos7-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- MOFED_OS: rhel7.6
- CUDA_VERSION: 10.1
- centos7-mofed5.0-cuda10.2:
- image: centos7-mofed5.0-cuda10.2
- build:
- context: .
- network: host
- dockerfile: centos7-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- MOFED_OS: rhel7.6
- CUDA_VERSION: 10.2
- centos7-mofed5.0-cuda11.0:
- image: centos7-mofed5.0-cuda11.0
- build:
- context: .
- network: host
- dockerfile: centos7-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- MOFED_OS: rhel7.6
- CUDA_VERSION: 11.0
- ubuntu16.04-mofed5.0-cuda10.1:
- image: ubuntu16.04-mofed5.0-cuda10.1
- build:
- context: .
- network: host
- dockerfile: ubuntu-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- UBUNTU_VERSION: 16.04
- CUDA_VERSION: 10.1
- ubuntu16.04-mofed5.0-cuda10.2:
- image: ubuntu16.04-mofed5.0-cuda10.2
- build:
- context: .
- network: host
- dockerfile: ubuntu-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- UBUNTU_VERSION: 16.04
- CUDA_VERSION: 10.2
- ubuntu18.04-mofed5.0-cuda10.1:
- image: ubuntu18.04-mofed5.0-cuda10.1
- build:
- context: .
- network: host
- dockerfile: ubuntu-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- UBUNTU_VERSION: 18.04
- CUDA_VERSION: 10.1
- ubuntu18.04-mofed5.0-cuda10.2:
- image: ubuntu18.04-mofed5.0-cuda10.2
- build:
- context: .
- network: host
- dockerfile: ubuntu-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- UBUNTU_VERSION: 18.04
- CUDA_VERSION: 10.2
- ubuntu18.04-mofed5.0-cuda11.0:
- image: ubuntu18.04-mofed5.0-cuda11.0
- build:
- context: .
- network: host
- dockerfile: ubuntu-release.Dockerfile
- args:
- MOFED_VERSION: 5.0-1.0.0.0
- UBUNTU_VERSION: 18.04
- CUDA_VERSION: 11.0
-
diff --git a/buildlib/dockers/centos-release.Dockerfile b/buildlib/dockers/centos-release.Dockerfile
new file mode 100644
index 00000000000..ba653433e47
--- /dev/null
+++ b/buildlib/dockers/centos-release.Dockerfile
@@ -0,0 +1,49 @@
+ARG CUDA_VERSION
+ARG OS_VERSION
+FROM nvidia/cuda:${CUDA_VERSION}-devel-centos${OS_VERSION}
+
+RUN yum install -y \
+ autoconf \
+ automake \
+ doxygen \
+ file \
+ gcc-c++ \
+ git \
+ glibc-devel \
+ libtool \
+ make \
+ maven \
+ numactl-devel \
+ rdma-core-devel \
+ rpm-build \
+ tcl \
+ tcsh \
+ tk \
+ wget \
+ libusbx \
+ fuse-libs \
+ && yum clean all
+
+# MOFED
+ARG MOFED_VERSION
+ARG MOFED_OS
+ENV MOFED_DIR MLNX_OFED_LINUX-${MOFED_VERSION}-${MOFED_OS}-x86_64
+ENV MOFED_SITE_PLACE MLNX_OFED-${MOFED_VERSION}
+ENV MOFED_IMAGE ${MOFED_DIR}.tgz
+RUN wget --no-verbose http://content.mellanox.com/ofed/${MOFED_SITE_PLACE}/${MOFED_IMAGE} && \
+ tar -xzf ${MOFED_IMAGE} && \
+ ${MOFED_DIR}/mlnxofedinstall --all -q \
+ --user-space-only \
+ --without-fw-update \
+ --skip-distro-check \
+ --without-ucx \
+ --without-hcoll \
+ --without-openmpi \
+ --without-sharp \
+ && rm -rf ${MOFED_DIR} && rm -rf *.tgz
+
+ENV CPATH /usr/local/cuda/include:${CPATH}
+ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
+ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
+ENV PATH /usr/local/cuda/compat:${PATH}
+
diff --git a/buildlib/centos7-release.Dockerfile b/buildlib/dockers/centos7-release.Dockerfile
similarity index 92%
rename from buildlib/centos7-release.Dockerfile
rename to buildlib/dockers/centos7-release.Dockerfile
index 65ad6cfbac2..5e2609684cb 100644
--- a/buildlib/centos7-release.Dockerfile
+++ b/buildlib/dockers/centos7-release.Dockerfile
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION=10.1
+ARG CUDA_VERSION
FROM nvidia/cuda:${CUDA_VERSION}-devel-centos7
RUN yum install -y \
@@ -10,6 +10,7 @@ RUN yum install -y \
git \
glibc-devel \
libtool \
+ librdmacm \
make \
maven \
numactl-devel \
@@ -19,11 +20,13 @@ RUN yum install -y \
tcsh \
tk \
wget \
+ libusbx \
+ fuse-libs \
&& yum clean all
# MOFED
-ARG MOFED_VERSION=5.0-1.0.0.0
-ARG MOFED_OS=rhel7.6
+ARG MOFED_VERSION
+ARG MOFED_OS
ENV MOFED_DIR MLNX_OFED_LINUX-${MOFED_VERSION}-${MOFED_OS}-x86_64
ENV MOFED_SITE_PLACE MLNX_OFED-${MOFED_VERSION}
ENV MOFED_IMAGE ${MOFED_DIR}.tgz
@@ -43,4 +46,3 @@ ENV CPATH /usr/local/cuda/include:${CPATH}
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
ENV PATH /usr/local/cuda/compat:${PATH}
-
diff --git a/buildlib/centos7.Dockerfile b/buildlib/dockers/centos7.Dockerfile
similarity index 95%
rename from buildlib/centos7.Dockerfile
rename to buildlib/dockers/centos7.Dockerfile
index 8f997b702ae..0d484ec4a61 100644
--- a/buildlib/centos7.Dockerfile
+++ b/buildlib/dockers/centos7.Dockerfile
@@ -10,6 +10,7 @@ RUN yum install -y \
git \
glibc-devel \
libtool \
+ librdmacm \
make \
maven \
numactl-devel \
diff --git a/buildlib/dockers/centos8-release.Dockerfile b/buildlib/dockers/centos8-release.Dockerfile
new file mode 100644
index 00000000000..cf7d7b6f8c3
--- /dev/null
+++ b/buildlib/dockers/centos8-release.Dockerfile
@@ -0,0 +1,49 @@
+ARG CUDA_VERSION
+ARG OS_VERSION
+FROM nvidia/cuda:${CUDA_VERSION}-devel-centos${OS_VERSION}
+
+RUN yum install -y \
+ autoconf \
+ automake \
+ file \
+ gcc-c++ \
+ git \
+ glibc-devel \
+ libtool \
+ make \
+ maven \
+ numactl-devel \
+ rdma-core-devel \
+ rpm-build \
+ tcl \
+ tcsh \
+ tk \
+ wget \
+ libusbx \
+ fuse-libs \
+ python36 \
+ && yum clean all
+
+# MOFED
+ARG MOFED_VERSION
+ARG MOFED_OS
+ENV MOFED_DIR MLNX_OFED_LINUX-${MOFED_VERSION}-${MOFED_OS}-x86_64
+ENV MOFED_SITE_PLACE MLNX_OFED-${MOFED_VERSION}
+ENV MOFED_IMAGE ${MOFED_DIR}.tgz
+RUN wget --no-verbose http://content.mellanox.com/ofed/${MOFED_SITE_PLACE}/${MOFED_IMAGE} && \
+ tar -xzf ${MOFED_IMAGE} && \
+ ${MOFED_DIR}/mlnxofedinstall --all -q \
+ --user-space-only \
+ --without-fw-update \
+ --skip-distro-check \
+ --without-ucx \
+ --without-hcoll \
+ --without-openmpi \
+ --without-sharp \
+ && rm -rf ${MOFED_DIR} && rm -rf *.tgz
+
+ENV CPATH /usr/local/cuda/include:${CPATH}
+ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
+ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
+ENV PATH /usr/local/cuda/compat:${PATH}
+
diff --git a/buildlib/dockers/docker-compose.yml b/buildlib/dockers/docker-compose.yml
new file mode 100644
index 00000000000..82584563f2a
--- /dev/null
+++ b/buildlib/dockers/docker-compose.yml
@@ -0,0 +1,67 @@
+version: "3"
+
+services:
+ centos7-mofed5.1-cuda11.1:
+ image: centos7-mofed5.1-cuda11.1
+ build:
+ context: .
+ network: host
+ dockerfile: centos-release.Dockerfile
+ args:
+ MOFED_VERSION: 5.1-2.5.8.0
+ MOFED_OS: rhel7.6
+ CUDA_VERSION: 11.1
+ OS_VERSION: 7
+ centos8-mofed5.1-cuda11.1:
+ image: centos8-mofed5.1-cuda11.1
+ build:
+ context: .
+ network: host
+ dockerfile: centos8-release.Dockerfile
+ args:
+ MOFED_VERSION: 5.1-2.5.8.0
+ MOFED_OS: rhel8.3
+ CUDA_VERSION: 11.1
+ OS_VERSION: 8
+ ubuntu18.04-mofed5.1-cuda11.1:
+ image: ubuntu18.04-mofed5.1-cuda11.1
+ build:
+ context: .
+ network: host
+ dockerfile: ubuntu-release.Dockerfile
+ args:
+ MOFED_VERSION: 5.1-2.5.8.0
+ UBUNTU_VERSION: 18.04
+ CUDA_VERSION: 11.1
+ ubuntu20.04-mofed5.1-cuda11.1:
+ image: ubuntu20.04-mofed5.1-cuda11.1
+ build:
+ context: .
+ network: host
+ dockerfile: ubuntu-release.Dockerfile
+ args:
+ MOFED_VERSION: 5.1-2.5.8.0
+ UBUNTU_VERSION: 20.04
+ CUDA_VERSION: 11.1
+ ubuntu20.10-mofed5.1-cuda11.1:
+ image: ubuntu20.10-mofed5.1-cuda11.1
+ build:
+ context: .
+ network: host
+ dockerfile: ubuntu-release.Dockerfile
+ args:
+ MOFED_VERSION: 5.1-2.5.8.0
+ UBUNTU_VERSION: 20.10
+ CUDA_VERSION: 11.1
+ MOFED_OS: ubuntu20.04
+ fedora31-mofed5.1-cuda11.1:
+ image: fedora31-mofed5.1-cuda11.1
+ build:
+ context: .
+ network: host
+ dockerfile: fedora-release.Dockerfile
+ args:
+ MOFED_VERSION: 5.1-2.5.8.0
+ OS_VERSION: 31
+ CUDA_VERSION: 11.1
+ MOFED_OS: fc31
diff --git a/buildlib/dockers/fedora-release.Dockerfile b/buildlib/dockers/fedora-release.Dockerfile
new file mode 100644
index 00000000000..6e660dda934
--- /dev/null
+++ b/buildlib/dockers/fedora-release.Dockerfile
@@ -0,0 +1,51 @@
+ARG OS_VERSION
+FROM fedora:${OS_VERSION}
+
+RUN dnf install -y \
+ autoconf \
+ automake \
+ clang \
+ cppcheck \
+ csclng \
+ cscppc \
+ csmock-common \
+ doxygen \
+ file \
+ gcc-c++ \
+ git \
+ git-clang-format \
+ glibc-devel \
+ java-1.8.0-openjdk-devel \
+ libtool \
+ make \
+ maven \
+ numactl-devel \
+ rdma-core-devel \
+ rpm-build \
+ libusbx \
+ fuse-libs \
+ && dnf clean dbcache packages
+
+# MOFED
+ARG MOFED_VERSION
+ARG MOFED_OS
+ENV MOFED_DIR MLNX_OFED_LINUX-${MOFED_VERSION}-${MOFED_OS}-x86_64
+ENV MOFED_SITE_PLACE MLNX_OFED-${MOFED_VERSION}
+ENV MOFED_IMAGE ${MOFED_DIR}.tgz
+RUN wget --no-verbose http://content.mellanox.com/ofed/${MOFED_SITE_PLACE}/${MOFED_IMAGE} && \
+ tar -xzf ${MOFED_IMAGE} && \
+ ${MOFED_DIR}/mlnxofedinstall --all -q \
+ --user-space-only \
+ --without-fw-update \
+ --skip-distro-check \
+ --without-ucx \
+ --without-hcoll \
+ --without-openmpi \
+ --without-sharp \
+ && rm -rf ${MOFED_DIR} && rm -rf *.tgz
+
+ENV CPATH /usr/local/cuda/include:${CPATH}
+ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
+ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
+ENV PATH /usr/local/cuda/compat:${PATH}
+
diff --git a/buildlib/dockers/fedora.Dockerfile b/buildlib/dockers/fedora.Dockerfile
new file mode 100644
index 00000000000..cb75a540f5b
--- /dev/null
+++ b/buildlib/dockers/fedora.Dockerfile
@@ -0,0 +1,32 @@
+# docker build -t ucfconsort.azurecr.io/ucx/fedora:5 -f buildlib/fedora.Dockerfile buildlib/
+FROM fedora:33
+
+RUN dnf install -y \
+ autoconf \
+ automake \
+ cmake \
+ cppcheck \
+ csclng \
+ cscppc \
+ csmock-common \
+ doxygen \
+ file \
+ gcc-c++ \
+ git \
+ git-clang-format \
+ glibc-devel \
+ java-1.8.0-openjdk-devel \
+ libtool \
+ make \
+ maven \
+ numactl-devel \
+ python \
+ rdma-core-devel \
+ rpm-build \
+ && dnf clean dbcache packages
+RUN export BUILD_ROOT=/tmp/llvm-project && \
+ git clone https://github.com/openucx/llvm-project.git --depth=1 -b ucx-clang-format --single-branch ${BUILD_ROOT} && \
+ mkdir -p ${BUILD_ROOT}/build && cd ${BUILD_ROOT}/build && \
+ cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS=clang -G "Unix Makefiles" \
+ ../llvm && \
+ make -j$(nproc) && make install && rm -rf ${BUILD_ROOT}
diff --git a/buildlib/push-release-images.sh b/buildlib/dockers/push-release-images.sh
similarity index 100%
rename from buildlib/push-release-images.sh
rename to buildlib/dockers/push-release-images.sh
diff --git a/buildlib/dockers/sles-release.Dockerfile b/buildlib/dockers/sles-release.Dockerfile
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/buildlib/ubuntu-release.Dockerfile b/buildlib/dockers/ubuntu-release.Dockerfile
similarity index 93%
rename from buildlib/ubuntu-release.Dockerfile
rename to buildlib/dockers/ubuntu-release.Dockerfile
index 61140f2a82a..53c0b262901 100644
--- a/buildlib/ubuntu-release.Dockerfile
+++ b/buildlib/dockers/ubuntu-release.Dockerfile
@@ -3,6 +3,7 @@ ARG UBUNTU_VERSION=16.04
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
RUN apt-get update && \
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
apt-get install -y \
automake \
default-jdk \
diff --git a/buildlib/fedora.Dockerfile b/buildlib/fedora.Dockerfile
deleted file mode 100644
index 785d84bd445..00000000000
--- a/buildlib/fedora.Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-# docker build -t ucfconsort.azurecr.io/ucx/fedora:1 -f buildlib/fedora.Dockerfile buildlib/
-FROM fedora:32
-
-RUN dnf install -y \
- autoconf \
- automake \
- clang \
- cppcheck \
- csclng \
- cscppc \
- csmock-common \
- doxygen \
- file \
- gcc-c++ \
- git \
- glibc-devel \
- java-1.8.0-openjdk-devel \
- libtool \
- make \
- maven \
- numactl-devel \
- rdma-core-devel \
- rpm-build \
- && dnf clean dbcache packages
diff --git a/buildlib/io-demo.yml b/buildlib/io-demo.yml
deleted file mode 100644
index eb51e4bbe15..00000000000
--- a/buildlib/io-demo.yml
+++ /dev/null
@@ -1,165 +0,0 @@
-parameters:
- - name: test_duration
- type: number
- default: 2
- - name: tests
- type: object
- default:
- base:
- initial_delay: 20
- cycles: 100
- downtime: 5
- uptime: 20
-
-jobs:
- - job: io_build
- displayName: Build io_demo
-
- pool:
- name: MLNX
- demands:
- - ucx_roce -equals yes
-
- steps:
- # address permissions issue when some files created as read-only
- - bash: chmod u+rwx ./ -R
-
- - checkout: self
- clean: true
- displayName: Checkout
-
- - bash: |
- set -eEx
-
- ./autogen.sh
- ./contrib/configure-release --prefix=$PWD/__install
- make -j`nproc`
-
- echo "##vso[task.setvariable variable=workspace;isOutput=true]$(Build.Repository.LocalPath)/buildlib"
- displayName: Build
- name: build
-
- - job: test
- dependsOn: io_build
-
- pool:
- name: MLNX
- demands:
- - ucx_roce -equals yes
-
- strategy:
- matrix:
- ${{ each test in parameters.tests }}:
- ${{ test.Key }}:
- test_name: ${{ test.Key }}
- initial_delay: ${{ test.Value.initial_delay }}
- cycles: ${{ test.Value.cycles }}
- downtime: ${{ test.Value.downtime }}
- uptime: ${{ test.Value.uptime }}
- maxParallel: 1
-
- variables:
- workspace: $[ dependencies.io_build.outputs['build.workspace'] ]
-
- displayName: "io_demo: "
- steps:
- - checkout: none
-
- - bash: |
- set -eEx
- # set UCX environment variables
- export UCX_SOCKADDR_CM_ENABLE=y
- # TODO get hostname of 'ucx-roce-client' SSH service endpoint, and run on it
- $(workspace)/../test/apps/iodemo/run_io_demo.sh \
- -H swx-rdmz-ucx-roce-01,swx-rdmz-ucx-roce-02 \
- --tasks-per-node 1 \
- --duration 60 \
- -v \
- --num-clients 1 \
- --num-servers 1 \
- --map-by slot \
- $(workspace)/../test/apps/iodemo/io_demo \
- -d 512:524288 \
- -o read,write \
- -i 0 \
- -w 16 \
- -t 10
- displayName: Launch with run_io_demo.sh
- timeoutInMinutes: 2
-
- - bash: |
- set -eEx
- source ./buildlib/az-helpers.sh
- ./buildlib/az-network-corrupter.sh \
- initial_delay=$(initial_delay) \
- cycles=$(cycles) \
- downtime=$(downtime) \
- uptime=$(uptime) \
- |& add_timestamp &>corrupter.log &
-
- pgrep -u "$USER" -f 'network-corrupter'
- corrupter_pid=$(pgrep -u "$USER" -f 'network-corrupter')
- echo "corrupter_pid=$corrupter_pid"
- azure_set_variable "corrupter_pid" "$corrupter_pid"
- displayName: Start network corrupter
- timeoutInMinutes: 5
-
- - bash: |
- set -eEx
- source ./buildlib/az-helpers.sh
- ./buildlib/az-io_demo.sh workspace=$(workspace)
- displayName: Start server
- name: server
- timeoutInMinutes: 5
-
- - task: SSH@0
- inputs:
- sshEndpoint: ucx-roce-client
- runOptions: inline
- inline: |
- set -eEx
- $(workspace)/az-io_demo.sh \
- workspace=$(workspace) \
- server_ip=$(server_ip)
- duration=${{ parameters.test_duration }}
- failOnStdErr: false
- displayName: Test
- timeoutInMinutes: ${{ parameters.test_duration }}
-
- - bash: |
- set -eEx
- cd $(workspace)
- pid=$(server_pid)
- echo "Stopping the server, PID=${pid}"
- if ! kill ${pid}; then
- echo "##vso[task.logissue type=error]Can't stop server: process doesn't exist"
- echo "##vso[task.complete result=Failed;]"
- else
- echo "Server stopped successfully"
- fi
- cat $(workspace)/server.log
- displayName: Kill the server
- condition: always()
- timeoutInMinutes: 5
-
- - bash: |
- set -eEx
- pid=$(corrupter_pid)
- echo "Stopping corrupter, PID=${pid}"
- if ! kill ${pid}; then
- echo "##vso[task.logissue type=warning]Can't stop corrupter: process doesn't exist"
- echo "##vso[task.complete result=Failed;]"
- else
- echo "Corrupter stopped successfully"
- fi
- cat corrupter.log
- displayName: Kill corrupter
- condition: always()
- timeoutInMinutes: 10
-
- - bash: |
- set -eEx
- ./buildlib/az-network-corrupter.sh reset=yes
- displayName: Restore port state
- condition: always()
- timeoutInMinutes: 1
diff --git a/buildlib/io_demo/az-stage-io-demo.yaml b/buildlib/io_demo/az-stage-io-demo.yaml
new file mode 100644
index 00000000000..f251f02c30c
--- /dev/null
+++ b/buildlib/io_demo/az-stage-io-demo.yaml
@@ -0,0 +1,86 @@
+parameters:
+- name: name # defaults for any parameters that aren't specified
+ default: 'test'
+- name: iodemo_args
+ default: ''
+- name: iodemo_tls
+ default: 'rc_x'
+- name: duration
+ default: 60
+
+steps:
+- bash: |
+ set -eEx
+ source $(workspace)/buildlib/az-helpers.sh
+ $(workspace)/buildlib/az-network-corrupter.sh \
+ initial_delay=$(initial_delay) \
+ cycles=$(cycles) \
+ downtime=$(downtime) \
+ uptime=$(uptime) \
+ |& add_timestamp &>corrupter.log &
+ while ! pgrep -u "$USER" -f 'network-corrupter'
+ do
+ sleep 1
+ done
+ pgrep -u "$USER" -f 'network-corrupter'
+ corrupter_pid=$(pgrep -u "$USER" -f 'network-corrupter')
+ echo "corrupter_pid=$corrupter_pid"
+ azure_set_variable "corrupter_pid" "$corrupter_pid"
+ displayName: Start network corrupter
+ timeoutInMinutes: 2
+
+- bash: |
+ set -eEx
+ sudo /hpc/local/bin/lshca
+ mkdir -p $(workspace)/${{ parameters.name }}
+ # set UCX environment variables
+ export UCX_NET_DEVICES=$(ibdev2netdev | sed -ne 's/\(\w*\) port \([0-9]\) ==> '${roce_iface}' .*/\1:\2/p')
+ export UCX_TLS=${{ parameters.iodemo_tls }}
+ export LD_LIBRARY_PATH=$(workspace)/install/lib:$LD_LIBRARY_PATH
+ $(workspace)/test/apps/iodemo/run_io_demo.sh \
+ -H $(agent_hosts) \
+ --tasks-per-node 1 \
+ --duration ${{ parameters.duration }} \
+ -v \
+ --num-clients 1 \
+ --num-servers 1 \
+ --map-by slot \
+ --log-dir $(workspace)/${{ parameters.name }} \
+ -i $(roce_iface) \
+ $(io_demo_exe) \
+ -d 512:524288 \
+ -P 2 \
+ -o read,write \
+ -i 0 \
+ -w 16 \
+ -t 10 \
+ ${{ parameters.iodemo_args }}
+ displayName: Launch with run_io_demo.sh ( ${{ parameters.name }} )
+ timeoutInMinutes: 10
+
+- bash: |
+ python $(workspace)/buildlib/io_demo/iodemo_analyzer.py -d $(workspace)/${{ parameters.name }} --duration ${{ parameters.duration }}
+ displayName: Analyze for ${{ parameters.name }}
+ timeoutInMinutes: 1
+
+- bash: |
+ set -eEx
+ pid=$(corrupter_pid)
+ echo "Stopping corrupter, PID=${pid}"
+ if ! kill ${pid}; then
+ echo "##vso[task.logissue type=warning]Can't stop corrupter: process doesn't exist"
+ echo "##vso[task.complete result=Failed;]"
+ else
+ echo "Corrupter stopped successfully"
+ fi
+ cat corrupter.log
+ displayName: Kill corrupter
+ condition: always()
+ timeoutInMinutes: 10
+
+- bash: |
+ set -eEx
+ $(workspace)/buildlib/az-network-corrupter.sh reset=yes
+ displayName: Restore port state
+ condition: always()
+ timeoutInMinutes: 2
diff --git a/buildlib/io_demo/io-demo.yml b/buildlib/io_demo/io-demo.yml
new file mode 100644
index 00000000000..51c5141ea58
--- /dev/null
+++ b/buildlib/io_demo/io-demo.yml
@@ -0,0 +1,104 @@
+parameters:
+ - name: demands
+ type: string
+ default: "ucx_iodemo -equals yes"
+ - name: initial_delay
+ type: number
+ default: 20
+ - name: cycles
+ type: number
+ default: 100
+ - name: downtime
+ type: number
+ default: 5
+ - name: uptime
+ type: number
+ default: 40
+ - name: tests
+ type: object
+ default:
+ tag:
+ args: ""
+ duration: 480
+ active:
+ args: "-q -A"
+ duration: 480
+
+jobs:
+ - job: io_build
+ displayName: Build io_demo
+
+ pool:
+ name: MLNX
+ demands: ${{ parameters.demands }}
+
+ steps:
+ # address permissions issue when some files created as read-only
+ - bash: chmod u+rwx ./ -R
+
+ - checkout: self
+ clean: true
+ fetchDepth: 100
+ displayName: Checkout
+ - bash: |
+ set -eEx
+ ./autogen.sh
+ ./contrib/configure-release --prefix=$(Build.Repository.LocalPath)/install
+ make -j`nproc`
+ make install
+ displayName: Build
+ name: build
+ - task: CopyFiles@2
+ inputs:
+ sourceFolder: '$(Build.Repository.LocalPath)'
+ contents: |
+ buildlib/az-helpers.sh
+ buildlib/az-network-corrupter.sh
+ buildlib/io_demo/iodemo_analyzer.py
+ install/**
+ test/apps/iodemo/run_io_demo.sh
+ targetFolder: '$(Build.ArtifactStagingDirectory)'
+ - task: PublishBuildArtifacts@1
+ inputs:
+ pathToPublish: '$(Build.ArtifactStagingDirectory)'
+ artifactName: drop_$(Build.BuildId)
+
+ - job: test
+ dependsOn: io_build
+
+ pool:
+ name: MLNX
+ demands: ${{ parameters.demands }}
+
+ strategy:
+ matrix:
+ ${{ each test in parameters.tests }}:
+ ${{ test.Key }}:
+ test_name: ${{ test.Key }}
+ test_args: ${{ test.Value.args }}
+ test_time: ${{ test.Value.duration }}
+ maxParallel: 1
+
+ variables:
+ workspace: drop_$(Build.BuildId)
+ io_demo_exe: drop_$(Build.BuildId)/install/bin/io_demo
+ initial_delay: ${{ parameters.initial_delay }}
+ cycles: ${{ parameters.cycles }}
+ downtime: ${{ parameters.downtime }}
+ uptime: ${{ parameters.uptime }}
+
+
+ displayName: "io_demo: "
+ steps:
+ - checkout: none
+ - task: DownloadBuildArtifacts@0
+ displayName: 'Download Build Artifacts'
+ inputs:
+ artifactName: drop_$(Build.BuildId)
+ downloadPath: $(System.DefaultWorkingDirectory)
+ - bash: chmod u+rwx $(workspace) -R
+ - template: az-stage-io-demo.yaml
+ parameters:
+ name: $(test_name)
+ iodemo_args: $(test_args)
+ duration: $(test_time)
diff --git a/buildlib/io_demo/iodemo_analyzer.py b/buildlib/io_demo/iodemo_analyzer.py
new file mode 100644
index 00000000000..f898163c499
--- /dev/null
+++ b/buildlib/io_demo/iodemo_analyzer.py
@@ -0,0 +1,246 @@
+import subprocess
+import os
+import argparse
+import re
+import datetime,time
+import traceback,sys
+
+allow_error_list = [
+ 'Connection reset by remote peer',
+ 'UCX-connection.*detected error:',
+ 'ERROR Remote QP on mlx',
+ 'UCX ERROR RC QP',
+ 'ERROR IB Async event on',
+ 'setting error flag on connection',
+ 'Operation rejected by remote peer',
+ 'got error event RDMA_CM_EVENT_ADDR_ERROR',
+ 'rdma_accept',
+ 'UCX ERROR Remote access on',
+ 'UCX ERROR Transport retry count exceeded on',
+ 'UCX WARN failed to disconnect CM lane',
+ 'ucp_ep_create\(\) failed: Input/output error',
+ 'terminate connection.*due to Input/output error',
+ 'UCX ERROR Local QP operation on',
+ 'conn_id send request.*failed: Input/output error',
+ 'deleting connection with status Input/output error',
+ 'UCX WARN failed to disconnect CM lane .* Operation rejected by remote peer',
+ 'ucp_ep_query\(\) failed: Endpoint timeout',
+ 'UCX ERROR rdma_reject.*failed with error: Invalid argument',
+ 'UCX ERROR rdma_init_qp_attr.*failed: Invalid argument',
+ 'UCX ERROR rdma_establish on ep.*failed: Invalid argument',
+ 'UCX ERROR .*client.*failed to process a connect response'
+ ]
+
+
+re_allow_list = re.compile("|".join(allow_error_list), re.I)
+re_timestamp = re.compile(r"\[(\d+\.\d+)\].*")
+re_traffic = re.compile(r"\[(\d+\.\d+)\].*read (\d+.\d+).*min:(\d+).*write (\d+.\d+).*min:(\d+).*")
+re_traffic_read = re.compile(r"\[(\d+\.\d+)\].*read (\d+.\d+) MB\/s min:(\d+).*")
+re_traffic_write = re.compile(r"\[(\d+\.\d+)\].*write (\d+.\d+) MB\/s min:(\d+).*")
+re_error = re.compile(r".*(error|assert|backtrace|segmentation).*", re.I)
+re_warning = re.compile(r".*warn.*", re.I)
+
+
+def in_allow_list(line, is_allow_list):
+ if is_allow_list:
+ s = re_allow_list.search(line)
+ if s:
+ return True
+ return False
+
+
+def process_seek(seek_file):
+ data = {}
+ if not seek_file or not os.path.exists(seek_file):
+ return data
+
+ with open(seek_file) as f:
+ for line in f.readlines():
+ if line:
+ # [log name] \t [position] \t [previous timestamp]
+ d = line.split('\t')
+ ts = datetime.datetime.fromtimestamp(float(d[2]))
+ rx_ts = float(d[3])
+ tx_ts = float(d[4])
+ data[d[0]] = {
+ 'pos': int(d[1]),
+ 'timestamp': ts,
+ 'timestamp_rx': rx_ts,
+ 'timestamp_tx': tx_ts,
+ }
+ # Burn After Reading
+ open(seek_file, 'w').close()
+ return data
+
+
+def get_logs(directory):
+ client_list = []
+ server_list = []
+ for f in os.listdir(directory):
+ filename = os.path.join(directory, f)
+ if os.path.isfile(filename) and "_client_" in f:
+ client_list.append(filename)
+ if os.path.isfile(filename) and "_server_" in f:
+ server_list.append(filename)
+ return client_list, server_list
+
+
+def process_server(files, is_allow_list):
+ for log in files:
+ with open(log) as f:
+ while True:
+ line = f.readline()
+ if not line:
+ break
+
+ m = re_error.match(line, re.IGNORECASE) or re_warning.match(line, re.IGNORECASE)
+ if m and not in_allow_list(line, is_allow_list):
+ raise Exception("Contains error: {}\nLog {}:\nLine {}".format(line, log, line))
+
+
+def process_client(files, threshold, seek_file, is_allow_list, duration):
+ seek_data = process_seek(seek_file)
+ for log in files:
+ with open(log) as f:
+ curr_ts = 0
+ curr_traffic_ts = 0
+ start_traffic_ts = 0
+ cur_traffic_date = ""
+ prev_traffic_ts = seek_data.get(log, {}).get('timestamp', 0)
+ zero_rx_ts = seek_data.get(log, {}).get('timestamp_rx', 0)
+ zero_tx_ts = seek_data.get(log, {}).get('timestamp_tx', 0)
+ pos_prev = seek_data.get(log, {}).get('pos', 0)
+ f.seek(pos_prev)
+ i = 0
+ while True:
+ line = f.readline()
+ if not line:
+ if seek_file and cur_traffic_date:
+ pos = f.tell()
+ with open(seek_file, 'a+') as s:
+ s.write("{}\t{}\t{}\t{}\t{}\n".format(
+ log, pos, cur_traffic_date, zero_rx_ts, zero_tx_ts))
+ break
+
+ timestamp_match = re_timestamp.match(line)
+ if timestamp_match:
+ date = float(timestamp_match.group(1))
+ curr_ts = datetime.datetime.fromtimestamp(date)
+ if not prev_traffic_ts:
+ prev_traffic_ts = curr_ts
+ if not start_traffic_ts:
+ start_traffic_ts = curr_ts
+
+ i += 1
+ read_match = re_traffic_read.match(line)
+ write_match = re_traffic_write.match(line)
+
+ current_match = None
+
+ if read_match:
+ current_match = read_match
+ cur_traffic_date = current_match.group(1)
+ date_traffic = float(cur_traffic_date)
+ curr_traffic_ts = datetime.datetime.fromtimestamp(date_traffic)
+ rx = float(current_match.group(2))
+ min_server_rx = int(current_match.group(3))
+
+ if min_server_rx == 0 and zero_rx_ts:
+ delta = curr_traffic_ts - datetime.datetime.fromtimestamp(zero_rx_ts)
+ if delta.total_seconds() > threshold * 60:
+ raise Exception("Have read min:0 servers {} minutes \
+ (more threshold:{})\nlog {}:\nLine {}".format(
+ delta.total_seconds()/60.0, threshold, log, line))
+ else:
+ zero_rx_ts = date_traffic
+
+ if not rx:
+ raise Exception("Have read zero speed:\nLog {}:\nLine {}".format(log, line))
+ prev_traffic_ts = curr_traffic_ts
+
+ if write_match:
+ current_match = write_match
+ cur_traffic_date = current_match.group(1)
+ date_traffic = float(cur_traffic_date)
+ curr_traffic_ts = datetime.datetime.fromtimestamp(date_traffic)
+ tx = float(current_match.group(2))
+ min_server_tx=int(current_match.group(3))
+
+ if min_server_tx == 0 and zero_tx_ts:
+ delta = curr_traffic_ts - datetime.datetime.fromtimestamp(zero_tx_ts)
+ if delta.total_seconds() > threshold * 60:
+ raise Exception("Have write min:0 servers {} minutes \
+ (more threshold:{})\nLog {}:\nLine {}".format(
+ delta.total_seconds()/60.0, threshold, log, line))
+ else:
+ zero_tx_ts = date_traffic
+
+ if not tx:
+ raise Exception("Have write zero speed:\nLog {}:\nLine {}".format(log, line))
+
+ prev_traffic_ts = curr_traffic_ts
+
+
+ if current_match and prev_traffic_ts:
+ delta = curr_traffic_ts - prev_traffic_ts
+ if delta.total_seconds() > threshold * 60:
+ raise Exception("Have delta {} more {} minutes\nLog {}:\nLine {}".format(
+ delta.total_seconds()/60.0, threshold, log, line))
+
+ if not current_match:
+ current_match = re_error.match(line, re.IGNORECASE)
+ if current_match:
+ if not in_allow_list(line, is_allow_list):
+ raise Exception("contains error: {}\nLog {}:\nLine {}".format(line, log, line))
+ else:
+ current_match = re_warning.match(line, re.IGNORECASE)
+ if current_match:
+ print("log {} [{}] contains warning: {}".format(log, i, line))
+
+ if curr_ts and (curr_ts - prev_traffic_ts).total_seconds() > threshold * 60:
+ raise Exception("No traffic\n{}\nLog {}".format(line, log))
+ if duration and curr_traffic_ts and start_traffic_ts:
+ traffic_duration = curr_traffic_ts - start_traffic_ts
+ delta = duration - traffic_duration.total_seconds()
+ if delta > threshold * 60:
+ raise Exception("No traffic for more than {} minutes at the end of the test".format(
+ delta/60.0))
+
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-f', '--filename', type=str,
+ help='Log filename')
+ parser.add_argument('-d', '--directory', type=str,
+ help='Directory name with Logs')
+ parser.add_argument('-t', '--no_traffic_in_sec', type=int, default=1,
+ help='No traffic Threshold in min')
+ parser.add_argument('-s', '--seek', type=str, default="",
+ help='path to seek file')
+ parser.add_argument('--duration', type=int, default=0,
+ help='set io_demo duration time in sec')
+ parser.add_argument('-r', '--role', type=str, default="client", choices=['client', 'server'],
+ help='choice role if you set filename')
+ parser.add_argument('--no-allow-list', dest='allow_list', action='store_false')
+
+ args = parser.parse_args()
+
+ clients = []
+ servers = []
+ if args.filename:
+ if args.role == "client":
+ clients.append(args.filename)
+ elif args.role == "server":
+ servers.append(args.filename)
+
+ if args.directory:
+ clients, servers = get_logs(args.directory)
+
+ try:
+ process_client(clients, args.no_traffic_in_sec, args.seek, args.allow_list, args.duration)
+ process_server(servers, args.allow_list)
+ except Exception as e:
+ print("Error iodemo analyzer: {}\n".format(e))
+ traceback.print_exc(file=sys.stdout)
+ exit(1)
diff --git a/buildlib/jucx-publish.yml b/buildlib/jucx/jucx-publish.yml
similarity index 89%
rename from buildlib/jucx-publish.yml
rename to buildlib/jucx/jucx-publish.yml
index de4bef44960..9dbdff32821 100644
--- a/buildlib/jucx-publish.yml
+++ b/buildlib/jucx/jucx-publish.yml
@@ -5,12 +5,18 @@ parameters:
jobs:
- job: jucx_release
-
- container: centos7
+ condition: eq(stageDependencies.Check_Commit.Check.outputs['Commit.Launch'], 'Yes')
+ # we need to use lowest version for compatible
+ container: centos7_cuda10_1
+ pool:
+ name: MLNX
+ demands:
+ - ucx_docker -equals yes
steps:
- checkout: self
clean: true
+ fetchDepth: 100
- bash: |
set -eE
diff --git a/buildlib/jucx/jucx-test.yml b/buildlib/jucx/jucx-test.yml
new file mode 100755
index 00000000000..41709a5bf3f
--- /dev/null
+++ b/buildlib/jucx/jucx-test.yml
@@ -0,0 +1,91 @@
+parameters:
+ name: java test
+ demands: []
+
+jobs:
+ - job: ${{ parameters.name }}
+
+ pool:
+ name: MLNX
+ demands: ${{ parameters.demands }}
+
+ strategy:
+ matrix:
+ java8:
+ JAVA_VERSION: 1.8
+ java11:
+ JAVA_VERSION: 1.11
+
+ steps:
+ - checkout: self
+ fetchDepth: 100
+ clean: true
+ displayName: Checkout
+ - bash: |
+ set -x
+ source buildlib/az-helpers.sh
+ az_init_modules
+ res=0
+ az_module_load dev/mvn
+ res=$(($res+$?))
+ az_module_load dev/jdk-${JAVA_VERSION}
+ res=$(($res+$?))
+ if [ $res -ne 0 ]; then
+ exit 0;
+ fi
+ try_load_cuda_env
+ set -eE
+ ./autogen.sh
+ ./contrib/configure-devel --prefix=$(Build.Repository.LocalPath)/install \
+ --with-java --enable-gtest=no --with-cuda=$have_cuda
+ make -j`nproc`
+ make install
+ displayName: Build UCX
+ - bash: |
+ set -x
+ source buildlib/az-helpers.sh
+ az_init_modules
+ try_load_cuda_env
+ res=0
+ az_module_load dev/mvn
+ res=$(($res+$?))
+ az_module_load dev/jdk-${JAVA_VERSION}
+ res=$(($res+$?))
+ if [ $res -ne 0 ]; then
+ exit 0;
+ fi
+ set -eE
+ ifaces=`get_rdma_interfaces`
+ if [ -z "$ifaces" ]; then
+ azure_log_warning "No active RDMA interfaces on machine"
+ exit 0;
+ fi
+ jucx_port=$((20000 + $RANDOM % 10000))
+ export JUCX_TEST_PORT=$jucx_port
+ make -C bindings/java/src/main/native test
+ make -C bindings/java/src/main/native package
+ ipv4_found=0
+ for iface in $ifaces
+ do
+ server_ip=$(get_ip ${iface})
+ if [ -z "$server_ip" ]; then
+ continue
+ fi
+ echo "Running standalone benchamrk on $iface:$jucx_port"
+ java_cmd='java -XX:ErrorFile=$(Build.ArtifactStagingDirectory)/hs_err_$(Build.BuildId)_%p.log \
+ -XX:OnError="cat $(Build.ArtifactStagingDirectory)/hs_err_$(Build.BuildId)_%p.log" \
+ -cp "bindings/java/resources/:bindings/java/src/main/native/build-java/*" \
+ org.openucx.jucx.examples.$bench_class s=$server_ip p=$jucx_port t=1000000'
+ bench_class=UcxReadBWBenchmarkReceiver
+ eval "$java_cmd &"
+ java_pid=$!
+ sleep 10
+ bench_class=UcxReadBWBenchmarkSender
+ eval "$java_cmd"
+ wait $java_pid
+ ipv4_found=1
+ done
+ if [[ $ipv4_found -eq 0 ]]; then
+ azure_log_warning "No IPv4 address on any of $ifaces"
+ fi
+ displayName: Run jucx tests
diff --git a/buildlib/tests.yml b/buildlib/tests.yml
index 040d7024e76..bef13f02548 100644
--- a/buildlib/tests.yml
+++ b/buildlib/tests.yml
@@ -10,7 +10,7 @@ jobs:
name: MLNX
demands: ${{ parameters.demands }}
displayName: ${{ parameters.name }} on worker
- timeoutInMinutes: 360
+ timeoutInMinutes: 300
strategy:
matrix:
${{ each wid in parameters.worker_ids }}:
@@ -22,8 +22,10 @@ jobs:
- checkout: self
clean: true
+ fetchDepth: 100
- bash: |
+ source ./buildlib/az-helpers.sh
./contrib/test_jenkins.sh
displayName: Run ./contrib/test_jenkins.sh
env:
@@ -37,4 +39,5 @@ jobs:
EXECUTOR_NUMBER: $(AZP_AGENT_ID)
RUN_TESTS: yes
JENKINS_TEST_PERF: ${{ parameters.test_perf }}
-
+ JENKINS_NO_VALGRIND: ${{ parameters.valgrind_disable }}
+ RUNNING_IN_AZURE: yes
diff --git a/buildlib/tools/builds.sh b/buildlib/tools/builds.sh
new file mode 100755
index 00000000000..cc4e4eba6c3
--- /dev/null
+++ b/buildlib/tools/builds.sh
@@ -0,0 +1,356 @@
+#!/bin/bash -eExl
+
+realdir=$(realpath $(dirname $0))
+source ${realdir}/common.sh
+source ${realdir}/../az-helpers.sh
+long_test=${long_test:-no}
+
+#
+# Build documentation
+#
+build_docs() {
+ if [ `cat /etc/system-release | grep -i "fedora release 34" | wc -l` -gt 0 ]; then
+ azure_log_warning "Skip build docs on Fedora 34"
+ return 0
+ fi
+ doxy_ready=0
+ doxy_target_version="1.8.11"
+ doxy_version="$(doxygen --version)" || true
+
+ # Try load newer doxygen if native is older than 1.8.11
+ if ! (echo $doxy_target_version; echo $doxy_version) | sort -CV
+ then
+ if az_module_load tools/doxygen-1.8.11
+ then
+ doxy_ready=1
+ fi
+ else
+ doxy_ready=1
+ fi
+
+ if [ $doxy_ready -eq 1 ]
+ then
+ echo " ==== Build docs only ===="
+ ${WORKSPACE}/contrib/configure-release --prefix=$ucx_inst --with-docs-only
+ $MAKE docs
+ fi
+}
+
+#
+# Build without verbs
+#
+build_no_verbs() {
+ echo "==== Build without IB verbs ===="
+ ${WORKSPACE}/contrib/configure-release --prefix=$ucx_inst --without-verbs
+ $MAKEP
+}
+
+#
+# Build without numa support check
+#
+build_disable_numa() {
+ echo "==== Check --disable-numa compilation option ===="
+ ${WORKSPACE}/contrib/configure-release --prefix=$ucx_inst --disable-numa
+ $MAKEP
+ # Make sure config.h file undefines HAVE_NUMA proceprocessor macro
+ grep 'undef HAVE_NUMA' config.h || exit 1
+}
+
+#
+# Build a package in release mode
+#
+build_release_pkg() {
+ echo "==== Build release ===="
+ ${WORKSPACE}/contrib/configure-release
+ $MAKEP distcheck
+
+ if [ -f /etc/redhat-release -o -f /etc/fedora-release ]; then
+ rpm_based=yes
+ elif [ `cat /etc/os-release | grep -i "ubuntu\|mint"|wc -l` -gt 0 ]; then
+ rpm_based=no
+ else
+ # try rpm tool to detect distro
+ set +e
+ out=$(rpm -q rpm 2>/dev/null)
+ rc=$?
+ set -e
+ rpm_based=yes
+ if [[ $rc != 0 || "$out" == *"not installed"* ]]; then
+ rpm_based=no
+ fi
+ fi
+
+ if [[ "$rpm_based" == "no" && -x /usr/bin/dpkg-buildpackage ]]; then
+ echo "==== Build debian package ===="
+ dpkg-buildpackage -us -uc
+ else
+ echo "==== Build RPM ===="
+ echo "$PWD"
+ ${WORKSPACE}/contrib/buildrpm.sh -s -b --nodeps --define "_topdir $PWD"
+ fi
+
+ # check that UCX version is present in spec file
+ cd ${WORKSPACE}
+ # extract version from configure.ac and convert to MAJOR.MINOR.PATCH representation
+ version=$(grep -P "define\S+ucx_ver" configure.ac | awk '{print $2}' | sed 's,),,' | xargs echo | tr ' ' '.')
+ if ! grep -q "$version" ucx.spec.in; then
+ azure_log_error "Current UCX version ($version) is not present in ucx.spec.in changelog"
+ exit 1
+ fi
+ cd -
+}
+
+#
+# Build with Intel compiler
+#
+build_icc() {
+ if az_module_load $INTEL_MODULE && icc -v
+ then
+ echo "==== Build with Intel compiler ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst CC=icc CXX=icpc
+ $MAKEP
+ make_clean distclean
+
+ echo "==== Build with Intel compiler (clang) ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst CC=clang CXX=clang++
+ $MAKEP
+ make_clean distclean
+ else
+ azure_log_warning "Not building with Intel compiler"
+ fi
+ az_module_unload $INTEL_MODULE
+}
+
+#
+# Build with PGI compiler
+#
+build_pgi() {
+ if az_module_load $PGI_MODULE
+ then
+ # add_network_host utility from $PGI_MODULE it create config file for machine
+ # Doc: https://docs.nvidia.com/hpc-sdk/hpc-sdk-install-guide/index.html
+ add_network_host
+ echo "==== Build with PGI compiler ===="
+ # PGI failed to build valgrind headers, disable it for now
+ # TODO: Using non-default PGI compiler - pgcc18 which is going to be default
+ # in next versions.
+ # Switch to default CC compiler after pgcc18 is default for pgi module
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst --without-valgrind
+ $MAKEP
+ # TODO: Check why "make distclean" is needed to cleanup after PGI compiler
+ make_clean distclean
+ else
+ azure_log_warning "Not building with PGI compiler"
+ fi
+ az_module_unload $PGI_MODULE
+}
+
+#
+# Build debug version
+#
+build_debug() {
+ echo "==== Build with --enable-debug option ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst --enable-debug --enable-examples
+ $MAKEP
+
+ # Show UCX info
+ ./src/tools/info/ucx_info -s -f -c -v -y -d -b -p -w -e -uart -m 20M
+}
+
+#
+# Build prof
+#
+build_prof() {
+ echo "==== Build configure-prof ===="
+ ${WORKSPACE}/contrib/configure-prof --prefix=$ucx_inst
+ $MAKEP
+}
+
+#
+# Build UGNI
+#
+build_ugni() {
+ echo "==== Build with cray-ugni ===="
+ #
+ # Point pkg-config to contrib/cray-ugni-mock, and replace
+ # PKG_CONFIG_TOP_BUILD_DIR with source dir, since the mock .pc files contain
+ # relative paths.
+ #
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst --with-ugni \
+ PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/${WORKSPACE}/contrib/cray-ugni-mock \
+ PKG_CONFIG_TOP_BUILD_DIR=${WORKSPACE}
+ $MAKEP
+
+ # make sure UGNI transport is enabled
+ grep '#define HAVE_TL_UGNI 1' config.h
+
+ $MAKEP distcheck
+}
+
+#
+# Build CUDA
+#
+build_cuda() {
+ if az_module_load $CUDA_MODULE
+ then
+ if az_module_load $GDRCOPY_MODULE
+ then
+ echo "==== Build with enable cuda, gdr_copy ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst --with-cuda --with-gdrcopy
+ $MAKEP
+ make_clean distclean
+
+ ${WORKSPACE}/contrib/configure-release --prefix=$ucx_inst --with-cuda --with-gdrcopy
+ $MAKEP
+ make_clean distclean
+ az_module_unload $GDRCOPY_MODULE
+ fi
+
+ echo "==== Build with enable cuda, w/o gdr_copy ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst --with-cuda --without-gdrcopy
+ $MAKEP
+
+ az_module_unload $CUDA_MODULE
+
+ echo "==== Running test_link_map with cuda build but no cuda module ===="
+ env UCX_HANDLE_ERRORS=bt ./test/apps/test_link_map
+ else
+ echo "==== Not building with cuda flags ===="
+ fi
+}
+
+#
+# Build with clang compiler
+#
+build_clang() {
+ if which clang > /dev/null 2>&1
+ then
+ echo "==== Build with clang compiler ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst CC=clang CXX=clang++
+ $MAKEP
+ $MAKEP install
+ else
+ echo "==== Not building with clang compiler ===="
+ fi
+}
+
+#
+# Build with gcc-latest module
+#
+build_gcc() {
+ #If the glibc version on the host is older than 2.14, don't run
+ #check the glibc version with the ldd version since it comes with glibc
+ #see https://www.linuxquestions.org/questions/linux-software-2/how-to-check-glibc-version-263103/
+ #see https://benohead.com/linux-check-glibc-version/
+ #see https://stackoverflow.com/questions/9705660/check-glibc-version-for-a-particular-gcc-compiler
+ if [ `cat /etc/os-release | grep -i "ubuntu\|mint"|wc -l` -gt 0 ]; then
+ azure_log_warning "Not building with latest gcc compiler on Ubuntu"
+ return 0
+ fi
+
+ ldd_ver="$(ldd --version | awk '/ldd/{print $NF}')"
+ if (echo "2.14"; echo $ldd_ver) | sort -CV
+ then
+ if az_module_load $GCC_MODULE
+ then
+ echo "==== Build with GCC compiler ($(gcc --version|head -1)) ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst
+ $MAKEP
+ $MAKEP install
+ az_module_unload $GCC_MODULE
+ fi
+ else
+ azure_log_warning "Not building with gcc compiler, glibc version is too old ($ldd_ver)"
+ fi
+}
+
+#
+# Build with armclang compiler
+#
+build_armclang() {
+ arch=$(uname -m)
+ if [ "${arch}" != "aarch64" ]
+ then
+ echo "==== Not building with armclang compiler on ${arch} ===="
+ return 0
+ fi
+
+ armclang_test_file=$(mktemp ./XXXXXX).c
+ echo "int main() {return 0;}" > ${armclang_test_file}
+ if az_module_load $ARM_MODULE && armclang --version && armclang ${armclang_test_file} -o ${armclang_test_file}.out
+ then
+ echo "==== Build with armclang compiler ===="
+ ${WORKSPACE}/contrib/configure-devel --prefix=$ucx_inst CC=armclang CXX=armclang++
+ $MAKEP
+ $MAKEP install
+ fi
+
+ rm -rf ${armclang_test_file} ${armclang_test_file}.out
+ az_module_unload $ARM_MODULE
+}
+
+check_inst_headers() {
+ echo "==== Testing installed headers ===="
+
+ ${WORKSPACE}/contrib/configure-release --prefix=${ucx_inst}
+ $MAKEP install
+ ${WORKSPACE}/contrib/check_inst_headers.sh ${ucx_inst}/include
+}
+
+check_config_h() {
+ srcdir=${WORKSPACE}/src
+
+ # Check if all .c files include config.h
+ echo "==== Checking for config.h files in directory $srcdir ===="
+
+ missing=`find $srcdir \( -name "*.c" -o -name "*.cc" \) -type f -exec grep -LP '\#\s*include\s+"config.h"' {} \;`
+
+ if [ `echo $missing | wc -w` -eq 0 ]
+ then
+ echo "Check successful "
+ else
+ azure_log_error "Missing include config.h in files: $missing"
+ exit 1
+ fi
+}
+
+#
+# Do a given task and update progress indicator
+#
+do_task() {
+ amount=$1
+ shift
+ # cleanup build dir before the task
+ [ -n "${ucx_build_dir}" ] && rm -rf "${ucx_build_dir}/*"
+
+ $@
+
+ echo "##vso[task.setprogress value=$PROGRESS;]Progress Indicator"
+ PROGRESS=$((PROGRESS+amount))
+}
+
+
+az_init_modules
+prepare_build
+
+[ "${long_test}" = "yes" ] && prog=5 || prog=12
+
+do_task "${prog}" build_docs
+do_task "${prog}" build_debug
+do_task "${prog}" build_prof
+do_task "${prog}" build_ugni
+do_task "${prog}" build_disable_numa
+do_task "${prog}" build_cuda
+do_task "${prog}" build_no_verbs
+do_task "${prog}" build_release_pkg
+
+if [ "${long_test}" = "yes" ]
+then
+ do_task 5 check_config_h
+ do_task 5 check_inst_headers
+ do_task 10 build_icc
+ do_task 10 build_pgi
+ do_task 10 build_gcc
+ do_task 10 build_clang
+ do_task 10 build_armclang
+fi
diff --git a/buildlib/tools/common.sh b/buildlib/tools/common.sh
new file mode 100644
index 00000000000..121165e8f82
--- /dev/null
+++ b/buildlib/tools/common.sh
@@ -0,0 +1,56 @@
+#!/bin/bash -eExl
+
+WORKSPACE=${WORKSPACE:=$PWD}
+# build in local directory which goes away when docker exits
+ucx_build_dir=$HOME/${BUILD_ID}/build
+ucx_inst=$ucx_build_dir/install
+CUDA_MODULE="dev/cuda11.1.1"
+GDRCOPY_MODULE="dev/gdrcopy2.1_cuda11.1.1"
+JDK_MODULE="dev/jdk"
+MVN_MODULE="dev/mvn"
+XPMEM_MODULE="dev/xpmem-90a95a4"
+PGI_MODULE="hpc-sdk/nvhpc/21.2"
+GCC_MODULE="dev/gcc-10.1.0"
+ARM_MODULE="arm-compiler/armcc-19.0"
+INTEL_MODULE="intel/ics-19.1.1"
+
+#
+# Parallel build command runs with 4 tasks, or number of cores on the system,
+# whichever is lowest
+#
+num_cpus=$(lscpu -p | grep -v '^#' | wc -l)
+[ -z $num_cpus ] && num_cpus=1
+parallel_jobs=4
+[ $parallel_jobs -gt $num_cpus ] && parallel_jobs=$num_cpus
+num_pinned_threads=$(nproc)
+[ $parallel_jobs -gt $num_pinned_threads ] && parallel_jobs=$num_pinned_threads
+
+MAKE="make V=1"
+MAKEP="make V=1 -j${parallel_jobs}"
+export AUTOMAKE_JOBS=$parallel_jobs
+
+#
+# cleanup ucx
+#
+make_clean() {
+ rm -rf ${ucx_inst}
+ $MAKEP ${1:-clean}
+}
+
+#
+# Prepare build environment
+#
+prepare_build() {
+ echo " ==== Prepare ===="
+ env
+ cd ${WORKSPACE}
+ if [ -d ${ucx_build_dir} ]
+ then
+ chmod u+rwx ${ucx_build_dir} -R
+ rm -rf ${ucx_build_dir}
+ fi
+ ./autogen.sh
+ mkdir -p ${ucx_build_dir}
+ cd ${ucx_build_dir}
+ export PROGRESS=0
+}
diff --git a/buildlib/tools/coverity.sh b/buildlib/tools/coverity.sh
new file mode 100755
index 00000000000..fe3acc01546
--- /dev/null
+++ b/buildlib/tools/coverity.sh
@@ -0,0 +1,80 @@
+#!/bin/bash -eExl
+
+realdir=$(realpath $(dirname $0))
+source ${realdir}/common.sh
+source ${realdir}/../az-helpers.sh
+
+COV_MODULE="tools/cov"
+
+#
+# Run Coverity and report errors
+# The argument is a UCX build type: devel or release
+#
+modules_for_coverity() {
+ res=0
+ az_module_load $COV_MODULE
+ res=$(($res+$?))
+ az_module_load $CUDA_MODULE
+ res=$(($res+$?))
+ az_module_load $GDRCOPY_MODULE
+ res=$(($res+$?))
+ az_module_load $JDK_MODULE
+ res=$(($res+$?))
+ az_module_load $MVN_MODULE
+ res=$(($res+$?))
+ az_module_load $XPMEM_MODULE
+ res=$(($res+$?))
+ return $res
+}
+
+modules_for_coverity_unload() {
+ res=0
+ az_module_unload $COV_MODULE
+ res=$(($res+$?))
+ az_module_unload $CUDA_MODULE
+ res=$(($res+$?))
+ az_module_unload $GDRCOPY_MODULE
+ res=$(($res+$?))
+ az_module_unload $JDK_MODULE
+ res=$(($res+$?))
+ az_module_unload $MVN_MODULE
+ res=$(($res+$?))
+ az_module_unload $XPMEM_MODULE
+ res=$(($res+$?))
+ return $res
+}
+
+run_coverity() {
+
+ az_init_modules
+ modules_for_coverity
+
+ ucx_build_type=$1
+
+ xpmem_root=$(module show $XPMEM_MODULE 2>&1 | awk '/CPATH/ {print $3}' | sed -e 's,/include,,')
+ with_xpmem="--with-xpmem=$xpmem_root"
+
+ ${WORKSPACE}/contrib/configure-$ucx_build_type --prefix=$ucx_inst --with-cuda --with-gdrcopy --with-java $with_xpmem
+ cov_build_id="cov_build_${ucx_build_type}"
+ cov_build="$ucx_build_dir/$cov_build_id"
+ rm -rf $cov_build
+ mkdir -p $cov_build
+ cov-build --dir $cov_build $MAKEP all
+ cov-analyze --jobs $parallel_jobs $COV_OPT --security --concurrency --dir $cov_build
+ nerrors=$(cov-format-errors --dir $cov_build | awk '/Processing [0-9]+ errors?/ { print $2 }')
+ rc=$(($rc+$nerrors))
+
+ if [ $nerrors -gt 0 ]; then
+ cov-format-errors --dir $cov_build --emacs-style
+ cp -ar $cov_build $WORKSPACE/$cov_build_id
+ echo "not ok 1 Coverity Detected $nerrors failures"
+ else
+ echo "ok 1 Coverity found no issues"
+ rm -rf $cov_build
+ fi
+ modules_for_coverity_unload
+ return $rc
+}
+
+prepare_build
+run_coverity "$@"
diff --git a/config/m4/compiler.m4 b/config/m4/compiler.m4
index 8e40b335d40..4d27c0053a9 100644
--- a/config/m4/compiler.m4
+++ b/config/m4/compiler.m4
@@ -205,7 +205,7 @@ AC_DEFUN([DETECT_UARCH],
# CHECK_COMPILER_FLAG
# Usage: CHECK_COMPILER_FLAG([name], [flag], [program], [if-true], [if-false])
#
-# The macro checks if program may be compiled using specified flag
+# The macro checks if program may be compiled and linked using specified flag
#
AC_DEFUN([CHECK_COMPILER_FLAG],
[
@@ -214,15 +214,15 @@ AC_DEFUN([CHECK_COMPILER_FLAG],
SAVE_CXXFLAGS="$CFLAGS"
CFLAGS="$BASE_CFLAGS $CFLAGS $2"
CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS $2"
- AC_COMPILE_IFELSE([$3],
- [AC_MSG_RESULT([yes])
- CFLAGS="$SAVE_CFLAGS"
- CXXFLAGS="$SAVE_CXXFLAGS"
- $4],
- [AC_MSG_RESULT([no])
- CFLAGS="$SAVE_CFLAGS"
- CXXFLAGS="$SAVE_CXXFLAGS"
- $5])
+ AC_LINK_IFELSE([$3],
+ [AC_MSG_RESULT([yes])
+ CFLAGS="$SAVE_CFLAGS"
+ CXXFLAGS="$SAVE_CXXFLAGS"
+ $4],
+ [AC_MSG_RESULT([no])
+ CFLAGS="$SAVE_CFLAGS"
+ CXXFLAGS="$SAVE_CXXFLAGS"
+ $5])
])
@@ -314,7 +314,7 @@ ADD_COMPILER_FLAG_IF_SUPPORTED([-diag-disable 269],
# Set default datatype alignment to 16 bytes.
# Some compilers (LLVM based, clang) expects allocation of datatypes by 32 bytes
# to optimize operations memset/memcpy/etc using vectorized processor instructions
-# which requires aligment of memory buffer by 32 or higer bytes. Default malloc method
+# which requires alignment of memory buffer by 32 or higer bytes. Default malloc method
# guarantee alignment for 16 bytes only. Force using compiler 16-bytes alignment
# by default if option is supported.
#
@@ -322,7 +322,7 @@ UCX_ALLOC_ALIGN=16
ADD_COMPILER_FLAG_IF_SUPPORTED([-fmax-type-align=$UCX_ALLOC_ALIGN],
[-fmax-type-align=$UCX_ALLOC_ALIGN],
[AC_LANG_SOURCE([[int main(int argc, char** argv){return 0;}]])],
- [AC_DEFINE_UNQUOTED([UCX_ALLOC_ALIGN], $UCX_ALLOC_ALIGN, [Set aligment assumption for compiler])],
+ [AC_DEFINE_UNQUOTED([UCX_ALLOC_ALIGN], $UCX_ALLOC_ALIGN, [Set alignment assumption for compiler])],
[])
@@ -467,13 +467,23 @@ AC_LANG_POP
#
# PGI specific switches
#
+# --diag_suppress 1 - Suppress last line ends without a newline
+# --diag_suppress 68 - Suppress integer conversion resulted in a change of sign
+# --diag_suppress 111 - Suppress statement is unreachable
+# --diag_suppress 167 - Suppress int* incompatible with unsigned int*
# --diag_suppress 181 - Suppress incorrect printf format for PGI18 compiler. TODO: remove it after compiler fix
+# --diag_suppress 188 - Suppress enumerated type mixed with another type
# --diag_suppress 381 - Suppress extra ";" ignored
# --diag_suppress 1215 - Suppress deprecated API warning for PGI18 compiler
# --diag_suppress 1901 - Use of a const variable in a constant expression is nonstandard in C
# --diag_suppress 1902 - Use of a const variable in a constant expression is nonstandard in C (same as 1901)
ADD_COMPILER_FLAGS_IF_SUPPORTED([[--display_error_number],
+ [--diag_suppress 1],
+ [--diag_suppress 68],
+ [--diag_suppress 111],
+ [--diag_suppress 167],
[--diag_suppress 181],
+ [--diag_suppress 188],
[--diag_suppress 381],
[--diag_suppress 1215],
[--diag_suppress 1901],
@@ -519,7 +529,8 @@ ADD_COMPILER_FLAGS_IF_SUPPORTED([[-Wno-pointer-sign],
[-Werror-implicit-function-declaration],
[-Wno-format-zero-length],
[-Wnested-externs],
- [-Wshadow]],
+ [-Wshadow],
+ [-Werror=declaration-after-statement]],
[AC_LANG_SOURCE([[int main(int argc, char **argv){return 0;}]])])
diff --git a/config/m4/cuda.m4 b/config/m4/cuda.m4
index 1862eb6148c..74bf0e56435 100644
--- a/config/m4/cuda.m4
+++ b/config/m4/cuda.m4
@@ -12,13 +12,19 @@ AS_IF([test "x$cuda_checked" != "xyes"],
[], [with_cuda=guess])
AS_IF([test "x$with_cuda" = "xno"],
- [cuda_happy=no],
+ [
+ cuda_happy=no
+ have_cuda_static=no
+ ],
[
save_CPPFLAGS="$CPPFLAGS"
save_LDFLAGS="$LDFLAGS"
+ save_LIBS="$LIBS"
CUDA_CPPFLAGS=""
CUDA_LDFLAGS=""
+ CUDA_LIBS=""
+ CUDA_STATIC_LIBS=""
AS_IF([test ! -z "$with_cuda" -a "x$with_cuda" != "xyes" -a "x$with_cuda" != "xguess"],
[ucx_check_cuda_dir="$with_cuda"
@@ -40,18 +46,31 @@ AS_IF([test "x$cuda_checked" != "xyes"],
# Check cuda libraries
AS_IF([test "x$cuda_happy" = "xyes"],
- [AC_CHECK_LIB([cuda], [cuDeviceGetUuid],
- [CUDA_LDFLAGS="$CUDA_LDFLAGS -lcuda"], [cuda_happy="no"])])
+ [AC_CHECK_LIB([cuda], [cuDeviceGetUuid],
+ [CUDA_LIBS="$CUDA_LIBS -lcuda"], [cuda_happy="no"])])
+ AS_IF([test "x$cuda_happy" = "xyes"],
+ [AC_CHECK_LIB([cudart], [cudaGetDeviceCount],
+ [CUDA_LIBS="$CUDA_LIBS -lcudart"], [cuda_happy="no"])])
+
+ LDFLAGS="$save_LDFLAGS"
+
+ # Check for cuda static library
+ have_cuda_static="no"
AS_IF([test "x$cuda_happy" = "xyes"],
- [AC_CHECK_LIB([cudart], [cudaGetDeviceCount],
- [CUDA_LDFLAGS="$CUDA_LDFLAGS -lcudart"], [cuda_happy="no"])])
+ [AC_CHECK_LIB([cudart_static], [cudaGetDeviceCount],
+ [CUDA_STATIC_LIBS="$CUDA_STATIC_LIBS -lcudart_static"
+ have_cuda_static="yes"],
+ [], [-ldl -lrt -lpthread])])
CPPFLAGS="$save_CPPFLAGS"
LDFLAGS="$save_LDFLAGS"
+ LIBS="$save_LIBS"
AS_IF([test "x$cuda_happy" = "xyes"],
[AC_SUBST([CUDA_CPPFLAGS], ["$CUDA_CPPFLAGS"])
AC_SUBST([CUDA_LDFLAGS], ["$CUDA_LDFLAGS"])
+ AC_SUBST([CUDA_LIBS], ["$CUDA_LIBS"])
+ AC_SUBST([CUDA_STATIC_LIBS], ["$CUDA_STATIC_LIBS"])
AC_DEFINE([HAVE_CUDA], 1, [Enable CUDA support])],
[AS_IF([test "x$with_cuda" != "xguess"],
[AC_MSG_ERROR([CUDA support is requested but cuda packages cannot be found])],
@@ -61,6 +80,7 @@ AS_IF([test "x$cuda_checked" != "xyes"],
cuda_checked=yes
AM_CONDITIONAL([HAVE_CUDA], [test "x$cuda_happy" != xno])
+ AM_CONDITIONAL([HAVE_CUDA_STATIC], [test "X$have_cuda_static" = "Xyes"])
]) # "x$cuda_checked" != "xyes"
diff --git a/config/m4/fuse3.m4 b/config/m4/fuse3.m4
new file mode 100644
index 00000000000..c79b31a5019
--- /dev/null
+++ b/config/m4/fuse3.m4
@@ -0,0 +1,51 @@
+#
+# Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED.
+# See file LICENSE for terms.
+#
+
+fuse3_happy="no"
+
+AC_ARG_WITH([fuse3],
+ [AS_HELP_STRING([--with-fuse3=(DIR)],
+ [Enable the use of FUSEv3 (default is guess).])],
+ [], [with_fuse3=guess])
+
+AS_IF([test "x$with_fuse3" != xno],
+ [
+ AS_IF([test "x$with_fuse3" = "xguess" \
+ -o "x$with_fuse3" = "xyes" \
+ -o "x$with_fuse3" = "x"],
+ [FUSE3_CPPFLAGS=$(pkg-config --cflags fuse3)
+ FUSE3_LIBS=$(pkg-config --libs fuse3)],
+ [FUSE3_CPPFLAGS="-I${with_fuse3}/include/fuse3"
+ FUSE3_LIBS="-L${with_fuse3}/lib -L${with_fuse3}/lib64"])
+
+ save_CPPFLAGS="$CPPFLAGS"
+ save_LDFLAGS="$LDFLAGS"
+
+ CPPFLAGS="$FUSE3_CPPFLAGS $CPPFLAGS"
+ LDFLAGS="$FUSE3_LIBS $LDFLAGS"
+
+ fuse3_happy="yes"
+ AC_CHECK_DECLS([fuse_open_channel, fuse_mount, fuse_unmount],
+ [AC_SUBST([FUSE3_CPPFLAGS], [$FUSE3_CPPFLAGS])
+ AC_DEFINE([FUSE_USE_VERSION], 30, [Fuse API version])],
+ [fuse3_happy="no"],
+ [[#define FUSE_USE_VERSION 30
+ #include ]])
+
+ AC_CHECK_FUNCS([fuse_open_channel fuse_mount fuse_unmount],
+ [AC_SUBST([FUSE3_LIBS], [$FUSE3_LIBS])],
+ [fuse3_happy="no"])
+
+ AS_IF([test "x$fuse3_happy" != "xyes" -a "x$with_fuse3" != "xguess"],
+ [AC_MSG_ERROR([FUSEv3 requested but could not be found])])
+
+ CPPFLAGS="$save_CPPFLAGS"
+ LDFLAGS="$save_LDFLAGS"
+ ],
+ [AC_MSG_WARN([FUSEv3 was explicitly disabled])]
+)
+
+AM_CONDITIONAL([HAVE_FUSE3], [test "x$fuse3_happy" != xno])
+vfs_enable=$fuse3_happy
diff --git a/config/m4/sysdep.m4 b/config/m4/sysdep.m4
index 9a8d5d8f4c6..cec8cc585ac 100644
--- a/config/m4/sysdep.m4
+++ b/config/m4/sysdep.m4
@@ -134,21 +134,42 @@ AS_IF([test "x$with_valgrind" = xno],
#
AC_ARG_ENABLE([numa],
AC_HELP_STRING([--disable-numa], [Disable NUMA support]),
+ [],
+ [enable_numa=guess])
+AS_IF([test "x$enable_numa" = xno],
[
- AC_MSG_NOTICE([NUMA support is disabled])
+ AC_MSG_NOTICE([NUMA support is explictly disabled])
+ numa_enable=disabled
],
[
- AC_DEFUN([NUMA_W1], [not found. Please reconfigure with --disable-numa. ])
- AC_DEFUN([NUMA_W2], [Warning: this may have negative impact on library performance. It is better to install])
- AC_CHECK_HEADERS([numa.h numaif.h], [],
- [AC_MSG_ERROR([NUMA headers NUMA_W1 NUMA_W2 libnuma-devel package])])
- AC_CHECK_LIB(numa, mbind,
- [AC_SUBST(NUMA_LIBS, [-lnuma])],
- [AC_MSG_ERROR([NUMA library NUMA_W1 NUMA_W2 libnuma package])])
- AC_DEFINE([HAVE_NUMA], 1, [Define to 1 to enable NUMA support])
- AC_CHECK_TYPES([struct bitmask], [], [], [[#include ]])
- ]
-)
+ save_LDFLAGS="$LDFLAGS"
+
+ numa_happy=yes
+ AC_CHECK_HEADERS([numa.h numaif.h], [], [numa_happy=no])
+ AC_CHECK_LIB(numa, mbind,
+ [AC_SUBST(NUMA_LIBS, [-lnuma])],
+ [numa_happy=no])
+ AC_CHECK_TYPES([struct bitmask], [], [numa_happy=no], [[#include ]])
+
+ LDFLAGS="$save_LDFLAGS"
+
+ AS_IF([test "x$numa_happy" = xyes],
+ [
+ AC_DEFINE([HAVE_NUMA], 1, [Define to 1 to enable NUMA support])
+ numa_enable=enabled
+ ],
+ [
+ AC_DEFUN([NUMA_W1], [NUMA support not found])
+ AC_DEFUN([NUMA_W2], [Please consider installing libnuma-devel package.])
+ AS_IF([test "x$enable_numa" = xyes],
+ [AC_MSG_ERROR([NUMA_W1. NUMA_W2])],
+ [
+ AC_MSG_WARN([NUMA_W1, this many impact library performance.])
+ AC_MSG_WARN([NUMA_W2])
+ ])
+ numa_enable=disabled
+ ])
+ ])
#
diff --git a/config/m4/ucm.m4 b/config/m4/ucm.m4
index 9c7c820d9ff..1e229edc51f 100644
--- a/config/m4/ucm.m4
+++ b/config/m4/ucm.m4
@@ -5,19 +5,6 @@
#
-#
-# Enable overriding library symbols
-#
-AC_ARG_ENABLE([symbol-override],
- AS_HELP_STRING([--disable-symbol-override], [Disable overriding library symbols, default: NO]),
- [],
- [enable_symbol_override=yes])
-
-AS_IF([test "x$enable_symbol_override" = xyes],
- [AC_DEFINE([ENABLE_SYMBOL_OVERRIDE], [1], [Enable symbol override])]
- [:]
-)
-
#
# Memory allocator selection
#
diff --git a/configure.ac b/configure.ac
index 3c4483cae2f..6621985a8db 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
#
-# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
+# Copyright (C) Mellanox Technologies Ltd. 2001-2021. ALL RIGHTS RESERVED.
# Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED.
# Copyright (C) The University of Tennessee and The University
# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
@@ -9,7 +9,7 @@
AC_PREREQ([2.63])
define([ucx_ver_major], 1)
-define([ucx_ver_minor], 10)
+define([ucx_ver_minor], 11)
define([ucx_ver_patch], 0)
define([ts], esyscmd([sh -c "date +%Y%m%d%H%M%S"]))
@@ -109,6 +109,10 @@ AS_IF([${LN_S} --relative symlinktest 2>/dev/null],
#
AC_DEFINE_UNQUOTED([UCX_CONFIGURE_FLAGS], ["$config_flags"], [UCX configure flags])
+#
+# Define path of ucx.conf configuration file
+#
+AC_SUBST([ucx_conf_dir], [${sysconfdir}/ucx])
#
# Provide the functionality of AS_VAR_APPEND if Autoconf does not have it.
@@ -182,9 +186,9 @@ AS_IF([test "x$with_docs_only" = xyes],
AM_CONDITIONAL([HAVE_DC_DV], [false])
AM_CONDITIONAL([HAVE_DC_EXP], [false])
AM_CONDITIONAL([HAVE_TL_UD], [false])
- AM_CONDITIONAL([HAVE_TL_CM], [false])
AM_CONDITIONAL([HAVE_CRAY_UGNI], [false])
AM_CONDITIONAL([HAVE_CUDA], [false])
+ AM_CONDITIONAL([HAVE_CUDA_STATIC], [false])
AM_CONDITIONAL([HAVE_GDR_COPY], [false])
AM_CONDITIONAL([HAVE_ROCM], [false])
AM_CONDITIONAL([HAVE_HIP], [false])
@@ -192,7 +196,6 @@ AS_IF([test "x$with_docs_only" = xyes],
AM_CONDITIONAL([HAVE_CMA], [false])
AM_CONDITIONAL([HAVE_KNEM], [false])
AM_CONDITIONAL([HAVE_RDMACM], [false])
- AM_CONDITIONAL([HAVE_RDMACM_QP_LESS], [false])
AM_CONDITIONAL([HAVE_MPI], [false])
AM_CONDITIONAL([HAVE_MPIRUN], [false])
AM_CONDITIONAL([HAVE_MPICC], [false])
@@ -209,20 +212,22 @@ AS_IF([test "x$with_docs_only" = xyes],
AM_CONDITIONAL([HAVE_AARCH64_THUNDERX2], [false])
AM_CONDITIONAL([HAVE_AARCH64_THUNDERX1], [false])
AM_CONDITIONAL([HAVE_AARCH64_HI1620], [false])
+ AM_CONDITIONAL([HAVE_FUSE3], [false])
],
[
AM_CONDITIONAL([DOCS_ONLY], [false])
m4_include([config/m4/compiler.m4])
m4_include([config/m4/sysdep.m4])
- m4_include([config/m4/ucs.m4])
m4_include([config/m4/ucm.m4])
m4_include([config/m4/mpi.m4])
m4_include([config/m4/rte.m4])
+ m4_include([config/m4/fuse3.m4])
m4_include([config/m4/java.m4])
m4_include([config/m4/cuda.m4])
m4_include([config/m4/rocm.m4])
m4_include([config/m4/gdrcopy.m4])
m4_include([src/ucm/configure.m4])
+ m4_include([src/ucs/configure.m4])
m4_include([src/uct/configure.m4])
m4_include([src/tools/perf/configure.m4])
m4_include([test/gtest/configure.m4])
@@ -328,6 +333,7 @@ AS_IF([test "x$with_docs_only" = xyes],
# Print which transports are built
#
build_modules="${uct_modules}"
+build_modules="${build_modules}${ucs_modules}"
build_modules="${build_modules}${uct_ib_modules}"
build_modules="${build_modules}${uct_cuda_modules}"
build_modules="${build_modules}${ucm_modules}"
@@ -357,10 +363,10 @@ AC_CONFIG_FILES([
debian/rules
debian/control
debian/changelog
- src/ucs/Makefile
src/ucp/Makefile
src/ucp/api/ucp_version.h
src/ucp/core/ucp_version.c
+ src/tools/vfs/Makefile
src/tools/info/Makefile
src/tools/profile/Makefile
test/apps/Makefile
@@ -392,13 +398,17 @@ AC_MSG_NOTICE([Building documents only])
[
AC_MSG_NOTICE([UCX build configuration:])
AC_MSG_NOTICE([ Build prefix: ${prefix}])
+AC_MSG_NOTICE([ Configuration dir: ${ucx_conf_dir}])
AC_MSG_NOTICE([Preprocessor flags: ${BASE_CPPFLAGS}])
AC_MSG_NOTICE([ C compiler: ${CC} ${BASE_CFLAGS}])
AC_MSG_NOTICE([ C++ compiler: ${CXX} ${BASE_CXXFLAGS}])
AC_MSG_NOTICE([ Multi-thread: ${mt_enable}])
+AC_MSG_NOTICE([ NUMA support: ${numa_enable}])
AC_MSG_NOTICE([ MPI tests: ${mpi_enable}])
+AC_MSG_NOTICE([ VFS support: ${vfs_enable}])
AC_MSG_NOTICE([ Devel headers: ${enable_devel_headers}])
AC_MSG_NOTICE([ Bindings: <$(echo ${build_bindings}|tr ':' ' ') >])
+AC_MSG_NOTICE([ UCS modules: <$(echo ${ucs_modules}|tr ':' ' ') >])
AC_MSG_NOTICE([ UCT modules: <$(echo ${uct_modules}|tr ':' ' ') >])
AC_MSG_NOTICE([ CUDA modules: <$(echo ${uct_cuda_modules}|tr ':' ' ') >])
AC_MSG_NOTICE([ ROCM modules: <$(echo ${uct_rocm_modules}|tr ':' ' ') >])
diff --git a/contrib/buildrpm.sh b/contrib/buildrpm.sh
index 46d4187b8fb..6f45f63af7f 100755
--- a/contrib/buildrpm.sh
+++ b/contrib/buildrpm.sh
@@ -100,12 +100,12 @@ if [ $opt_binrpm -eq 1 ]; then
with_args+=" $(with_arg cuda)"
with_args+=" $(with_arg gdrcopy)"
with_args+=" $(with_arg ib)"
- with_args+=" $(with_arg cm ib_cm)"
with_args+=" $(with_arg knem)"
with_args+=" $(with_arg rdmacm)"
with_args+=" $(with_arg rocm)"
with_args+=" $(with_arg ugni)"
with_args+=" $(with_arg xpmem)"
+ with_args+=" $(with_arg vfs)"
with_args+=" $(with_arg java)"
echo rpmbuild -bb $rpmmacros $rpmopts $rpmspec $defines $with_args | bash -eEx
diff --git a/contrib/check_inst_headers.sh b/contrib/check_inst_headers.sh
index 865fd37a075..fcf4f90a795 100755
--- a/contrib/check_inst_headers.sh
+++ b/contrib/check_inst_headers.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -eE
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED.
#
@@ -12,6 +12,7 @@
#
CC=${CC:-gcc}
+CXX=${CXX:-g++}
cd ${1:-.}
@@ -27,9 +28,12 @@ do
fi
# try to compile a test program (from stdin) which includes hfile
- ${CC} -I. -x c -c - -o /dev/null -DHAVE_CONFIG_H=1 <&/dev/null
+ then
+ return
+ fi
+
+ # get the interface of the ip address that is the default gateway (pure Ethernet IPv4 address).
+ eth_iface=$(ip route show| sed -n 's/default via \(\S*\) dev \(\S*\).*/\2/p')
+
+ # the pure Ethernet interface should not appear in the ibdev2netdev output. it should not be an IPoIB or
+ # RoCE interface.
+ if ibdev2netdev|grep -qw "${eth_iface}"
+ then
+ echo "Failed to retrieve an IP of a non IPoIB/RoCE interface"
+ exit 1
+ fi
+
+ get_ifaddr ${eth_iface}
+}
+
#
# Prepare build environment
#
@@ -285,387 +343,6 @@ prepare() {
cd build-test
}
-#
-# Build documentation
-#
-build_docs() {
- doxy_ready=0
- doxy_target_version="1.8.11"
- doxy_version="$(doxygen --version)" || true
-
- # Try load newer doxygen if native is older than 1.8.11
- if ! (echo $doxy_target_version; echo $doxy_version) | sort -CV
- then
- if module_load tools/doxygen-1.8.11
- then
- doxy_ready=1
- else
- echo " doxygen was not found"
- fi
- else
- doxy_ready=1
- fi
-
- if [ $doxy_ready -eq 1 ]
- then
- echo " ==== Build docs only ===="
- ../configure --prefix=$ucx_inst --with-docs-only
- make_clean
- $MAKE docs
- make_clean # FIXME distclean does not work with docs-only
- fi
-}
-
-#
-# Building java docs
-#
-build_java_docs() {
- echo " ==== Building java docs ===="
- if module_load dev/jdk && module_load dev/mvn
- then
- ../configure --prefix=$ucx_inst --with-java
- $MAKE -C ../build-test/bindings/java/src/main/native docs
- module unload dev/jdk
- module unload dev/mvn
- else
- echo "No jdk and mvn module, failed to build docs".
- fi
-}
-
-#
-# Build without verbs
-#
-build_no_verbs() {
- echo "==== Build without IB verbs ===="
- ../contrib/configure-release --prefix=$ucx_inst --without-verbs
- make_clean
- $MAKEP
- make_clean distclean
-}
-
-#
-# Build without numa support check
-#
-build_disable_numa() {
- echo "==== Check --disable-numa compilation option ===="
- ../contrib/configure-release --prefix=$ucx_inst --disable-numa
- make_clean
- $MAKEP
- make_clean distclean
-}
-
-#
-# Build a package in release mode
-#
-build_release_pkg() {
- echo "==== Build release ===="
- ../contrib/configure-release
- make_clean
- $MAKEP
- $MAKEP distcheck
-
- # Show UCX info
- ./src/tools/info/ucx_info -s -f -c -v -y -d -b -p -w -e -uart -m 20M
-
- if [ -f /etc/redhat-release -o -f /etc/fedora-release ]; then
- rpm_based=yes
- elif [ `cat /etc/os-release | grep -i "ubuntu\|mint"|wc -l` -gt 0 ]; then
- rpm_based=no
- else
- # try rpm tool to detect distro
- set +e
- out=$(rpm -q rpm 2>/dev/null)
- rc=$?
- set -e
- rpm_based=yes
- if [[ $rc != 0 || "$out" == *"not installed"* ]]; then
- rpm_based=no
- fi
- fi
-
- if [[ "$rpm_based" == "no" && -x /usr/bin/dpkg-buildpackage ]]; then
- echo "==== Build debian package ===="
- dpkg-buildpackage -us -uc
- else
- echo "==== Build RPM ===="
- ../contrib/buildrpm.sh -s -b --nodeps --define "_topdir $PWD"
- fi
-
- # check that UCX version is present in spec file
- cd ${WORKSPACE}
- # extract version from configure.ac and convert to MAJOR.MINOR.PATCH representation
- version=$(grep -P "define\S+ucx_ver" configure.ac | awk '{print $2}' | sed 's,),,' | xargs echo | tr ' ' '.')
- if ! grep -q "$version" ucx.spec.in; then
- echo "Current UCX version ($version) is not present in ucx.spec.in changelog"
- exit 1
- fi
- cd -
-
- make_clean distclean
-}
-
-#
-# Build with Intel compiler
-#
-build_icc() {
- echo 1..1 > build_icc.tap
- if module_load intel/ics && icc -v
- then
- echo "==== Build with Intel compiler ===="
- ../contrib/configure-devel --prefix=$ucx_inst CC=icc CXX=icpc
- make_clean
- $MAKEP
- make_clean distclean
- echo "==== Build with Intel compiler (clang) ===="
- ../contrib/configure-devel --prefix=$ucx_inst CC=clang CXX=clang++
- make_clean
- $MAKEP
- make_clean distclean
- echo "ok 1 - build successful " >> build_icc.tap
- else
- echo "==== Not building with Intel compiler ===="
- echo "ok 1 - # SKIP because Intel compiler not installed" >> build_icc.tap
- fi
- module_unload intel/ics
-}
-
-#
-# Build with PGI compiler
-#
-build_pgi() {
- echo 1..1 > build_pgi.tap
- pgi_test_file=$(mktemp ./XXXXXX).c
- echo "int main() {return 0;}" > ${pgi_test_file}
-
- if module_load pgi/latest && pgcc18 --version && pgcc18 ${pgi_test_file} -o ${pgi_test_file}.out
- then
- echo "==== Build with PGI compiler ===="
- # PGI failed to build valgrind headers, disable it for now
- # TODO: Using non-default PGI compiler - pgcc18 which is going to be default
- # in next versions.
- # Switch to default CC compiler after pgcc18 is default for pgi module
- ../contrib/configure-devel --prefix=$ucx_inst CC=pgcc18 --without-valgrind
- make_clean
- $MAKEP
- make_clean distclean
- echo "ok 1 - build successful " >> build_pgi.tap
- else
- echo "==== Not building with PGI compiler ===="
- echo "ok 1 - # SKIP because PGI compiler not installed" >> build_pgi.tap
- fi
-
- rm -rf ${pgi_test_file} ${pgi_test_file}.out
- module_unload pgi/latest
-}
-
-#
-# Build debug version
-#
-build_debug() {
- echo "==== Build with --enable-debug option ===="
- ../contrib/configure-devel --prefix=$ucx_inst --enable-debug --enable-examples
- make_clean
- $MAKEP
- make_clean distclean
-}
-
-#
-# Build prof
-#
-build_prof() {
- echo "==== Build configure-prof ===="
- ../contrib/configure-prof --prefix=$ucx_inst
- make_clean
- $MAKEP
- make_clean distclean
-}
-
-#
-# Build UGNI
-#
-build_ugni() {
- echo 1..1 > build_ugni.tap
-
- echo "==== Build with cray-ugni ===="
- #
- # Point pkg-config to contrib/cray-ugni-mock, and replace
- # PKG_CONFIG_TOP_BUILD_DIR with source dir, since the mock .pc files contain
- # relative paths.
- #
- ../contrib/configure-devel --prefix=$ucx_inst --with-ugni \
- PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$PWD/../contrib/cray-ugni-mock \
- PKG_CONFIG_TOP_BUILD_DIR=$PWD/..
- make_clean
- $MAKEP
-
- # make sure UGNI transport is enabled
- grep '#define HAVE_TL_UGNI 1' config.h
-
- $MAKE distcheck
- make_clean distclean
-
- module_unload dev/cray-ugni
- echo "ok 1 - build successful " >> build_ugni.tap
-}
-
-#
-# Build CUDA
-#
-build_cuda() {
- echo 1..1 > build_cuda.tap
- if module_load $CUDA_MODULE
- then
- if module_load $GDRCOPY_MODULE
- then
- echo "==== Build with enable cuda, gdr_copy ===="
- ../contrib/configure-devel --prefix=$ucx_inst --with-cuda --with-gdrcopy
- make_clean
- $MAKEP
- make_clean distclean
-
- ../contrib/configure-release --prefix=$ucx_inst --with-cuda --with-gdrcopy
- make_clean
- $MAKEP
- make_clean distclean
- module unload $GDRCOPY_MODULE
- fi
-
- echo "==== Build with enable cuda, w/o gdr_copy ===="
- ../contrib/configure-devel --prefix=$ucx_inst --with-cuda --without-gdrcopy
- make_clean
- $MAKEP
-
- module unload $CUDA_MODULE
-
- echo "==== Running test_link_map with cuda build but no cuda module ===="
- env UCX_HANDLE_ERRORS=bt ./test/apps/test_link_map
-
- make_clean distclean
- echo "ok 1 - build successful " >> build_cuda.tap
- else
- echo "==== Not building with cuda flags ===="
- echo "ok 1 - # SKIP because cuda not installed" >> build_cuda.tap
- fi
- unload_cuda_env
-}
-
-#
-# Build with clang compiler
-#
-build_clang() {
- echo 1..1 > build_clang.tap
- if which clang > /dev/null 2>&1
- then
- echo "==== Build with clang compiler ===="
- ../contrib/configure-devel --prefix=$ucx_inst CC=clang CXX=clang++
- make_clean
- $MAKEP
- $MAKEP install
- UCX_HANDLE_ERRORS=bt,freeze UCX_LOG_LEVEL_TRIGGER=ERROR $ucx_inst/bin/ucx_info -d
- make_clean distclean
- echo "ok 1 - build successful " >> build_clang.tap
- else
- echo "==== Not building with clang compiler ===="
- echo "ok 1 - # SKIP because clang not installed" >> build_clang.tap
- fi
-}
-
-#
-# Build with gcc-latest module
-#
-build_gcc_latest() {
- echo 1..1 > build_gcc_latest.tap
- #If the glibc version on the host is older than 2.14, don't run
- #check the glibc version with the ldd version since it comes with glibc
- #see https://www.linuxquestions.org/questions/linux-software-2/how-to-check-glibc-version-263103/
- #see https://benohead.com/linux-check-glibc-version/
- #see https://stackoverflow.com/questions/9705660/check-glibc-version-for-a-particular-gcc-compiler
- ldd_ver="$(ldd --version | awk '/ldd/{print $NF}')"
- if (echo "2.14"; echo $ldd_ver) | sort -CV
- then
- if module_load dev/gcc-latest
- then
- echo "==== Build with GCC compiler ($(gcc --version|head -1)) ===="
- ../contrib/configure-devel --prefix=$ucx_inst
- make_clean
- $MAKEP
- $MAKEP install
- UCX_HANDLE_ERRORS=bt,freeze UCX_LOG_LEVEL_TRIGGER=ERROR $ucx_inst/bin/ucx_info -d
- make_clean distclean
- echo "ok 1 - build successful " >> build_gcc_latest.tap
- module unload dev/gcc-latest
- else
- echo "==== Not building with latest gcc compiler ===="
- echo "ok 1 - # SKIP because dev/gcc-latest module is not available" >> build_gcc_latest.tap
- fi
- else
- echo "==== Not building with gcc compiler ===="
- echo "Required glibc version is too old ($ldd_ver)"
- echo "ok 1 - # SKIP because glibc version is older than 2.14" >> build_gcc_latest.tap
- fi
-}
-
-#
-# Builds jucx
-#
-build_jucx() {
- echo 1..1 > build_jucx.tap
- if module_load dev/jdk && module_load dev/mvn
- then
- echo "==== Building JUCX bindings (java api for ucx) ===="
- ../contrib/configure-release --prefix=$ucx_inst --with-java
- make_clean
- $MAKEP
- $MAKEP install
- make_clean distclean
- echo "ok 1 - build successful " >> build_jucx.tap
- module unload dev/jdk
- module unload dev/mvn
- else
- echo "==== No jdk and mvn modules ==== "
- echo "ok 1 - # SKIP because dev/jdk and dev/mvn modules are not available" >> build_jucx.tap
- fi
-}
-
-#
-# Build with armclang compiler
-#
-build_armclang() {
- echo 1..1 > build_armclang.tap
- armclang_test_file=$(mktemp ./XXXXXX).c
- echo "int main() {return 0;}" > ${armclang_test_file}
- if module_load arm-compiler/latest && armclang --version && armclang ${armclang_test_file} -o ${armclang_test_file}.out
- then
- echo "==== Build with armclang compiler ===="
- ../contrib/configure-devel --prefix=$ucx_inst CC=armclang CXX=armclang++
- make_clean
- $MAKEP
- $MAKEP install
- UCX_HANDLE_ERRORS=bt,freeze UCX_LOG_LEVEL_TRIGGER=ERROR $ucx_inst/bin/ucx_info -d
- make_clean distclean
- echo "ok 1 - build successful " >> build_armclang.tap
- else
- echo "==== Not building with armclang compiler ===="
- echo "ok 1 - # SKIP because armclang not installed" >> build_armclang.tap
- fi
-
- rm -rf ${armclang_test_file} ${armclang_test_file}.out
- module_unload arm-compiler/latest
-}
-
-check_inst_headers() {
- echo 1..1 > inst_headers.tap
- echo "==== Testing installed headers ===="
-
- ../contrib/configure-release --prefix=$PWD/install
- make_clean
- $MAKEP install
- ../contrib/check_inst_headers.sh $PWD/install/include
- make_clean distclean
-
- echo "ok 1 - build successful " >> inst_headers.tap
-}
-
check_make_distcheck() {
echo 1..1 > make_distcheck.tap
@@ -679,26 +356,7 @@ check_make_distcheck() {
../contrib/configure-release --prefix=$PWD/install
$MAKEP DISTCHECK_CONFIGURE_FLAGS="--enable-gtest" distcheck
else
- echo "Not testing make distcheck: GCC version is too old ($(gcc --version|head -1))"
- fi
-}
-
-check_config_h() {
- echo 1..1 > check_config_h.tap
-
- srcdir=$PWD/../src
-
- # Check if all .c files include config.h
- echo "==== Checking for config.h files in directory $srcdir ===="
-
- missing=`find $srcdir \( -name "*.c" -o -name "*.cc" \) -type f -exec grep -LP '\#\s*include\s+"config.h"' {} \;`
-
- if [ `echo $missing | wc -w` -eq 0 ]
- then
- echo "ok 1 - check successful " >> check_config_h.tap
- else
- echo "Error: missing include config.h in files: $missing"
- exit 1
+ log_warning "Not testing make distcheck: GCC version is too old ($(gcc --version|head -1))"
fi
}
@@ -751,7 +409,7 @@ rename_files() {
}
run_client_server_app() {
- test_name=$1
+ test_exe=$1
test_args=$2
server_addr_arg=$3
kill_server=$4
@@ -763,7 +421,7 @@ run_client_server_app() {
affinity_server=$(slice_affinity 0)
affinity_client=$(slice_affinity 1)
- taskset -c $affinity_server ${test_name} ${test_args} ${server_port_arg} &
+ taskset -c $affinity_server ${test_exe} ${test_args} ${server_port_arg} &
server_pid=$!
sleep 15
@@ -773,7 +431,7 @@ run_client_server_app() {
set +Ee
fi
- taskset -c $affinity_client ${test_name} ${test_args} ${server_addr_arg} ${server_port_arg} &
+ taskset -c $affinity_client ${test_exe} ${test_args} ${server_addr_arg} ${server_port_arg} &
client_pid=$!
wait ${client_pid}
@@ -802,21 +460,21 @@ run_hello() {
fi
# set smaller timeouts so the test will complete faster
- if [[ ${test_args} == *"-e"* ]]
+ if [[ ${test_args} =~ "-e" ]]
then
export UCX_UD_TIMEOUT=15s
export UCX_RC_TIMEOUT=1ms
export UCX_RC_RETRY_COUNT=4
fi
- if [[ ${test_args} == *"-e"* ]]
+ if [[ ${test_args} =~ "-e" ]]
then
error_emulation=1
else
error_emulation=0
fi
- run_client_server_app "./examples/${test_name}" "${test_args}" "-n $(hostname)" 0 $error_emulation
+ run_client_server_app "./examples/${test_name}" "${test_args}" "-n $(hostname)" 0 ${error_emulation}
if [[ ${test_args} == *"-e"* ]]
then
@@ -842,7 +500,10 @@ run_ucp_hello() {
mem_types_list+="cuda cuda-managed "
fi
- for test_mode in -w -f -b -e
+ export UCX_KEEPALIVE_INTERVAL=1s
+ export UCX_KEEPALIVE_NUM_EPS=10
+
+ for test_mode in -w -f -b -erecv -esend -ekeepalive
do
for mem_type in $mem_types_list
do
@@ -851,6 +512,9 @@ run_ucp_hello() {
done
done
rm -f ./ucp_hello_world
+
+ unset UCX_KEEPALIVE_INTERVAL
+ unset UCX_KEEPALIVE_NUM_EPS
}
#
@@ -891,50 +555,76 @@ run_uct_hello() {
run_client_server() {
test_name=ucp_client_server
+ mem_types_list="host"
+
+ if [ "X$have_cuda" == "Xyes" ]
+ then
+ mem_types_list+=" cuda cuda-managed "
+ fi
+
if [ ! -x ${test_name} ]
then
- gcc -o ${test_name} ${ucx_inst}/share/ucx/examples/${test_name}.c \
- -lucp -lucs -I${ucx_inst}/include -L${ucx_inst}/lib \
- -Wl,-rpath=${ucx_inst}/lib
+ $MAKEP -C examples ${test_name}
fi
- server_ip=$(get_rdma_device_ip_addr)
+ server_ip=$1
if [ "$server_ip" == "" ]
then
return
fi
- run_client_server_app "./${test_name}" "" "-a ${server_ip}" 1 0
+ for mem_type in ${mem_types_list}
+ do
+ echo "==== Running UCP client-server with \"${mem_type}\" memory type ===="
+ run_client_server_app "./examples/${test_name}" "-m ${mem_type}" "-a ${server_ip}" 1 0
+ done
}
run_ucp_client_server() {
echo "==== Running UCP client-server ===="
- run_client_server
-
- rm -f ./ucp_client_server
+ run_client_server $(get_rdma_device_ip_addr)
+ run_client_server $(get_non_rdma_ip_addr)
+ run_client_server "127.0.0.1"
}
run_io_demo() {
- server_ip=$(get_rdma_device_ip_addr)
- if [ "$server_ip" == "" ]
+ server_rdma_addr=$(get_rdma_device_ip_addr)
+ server_nonrdma_addr=$(get_non_rdma_ip_addr)
+ server_loopback_addr="127.0.0.1"
+ mem_types_list="host "
+
+ if [ "X$have_cuda" == "Xyes" ]
+ then
+ mem_types_list+="cuda cuda-managed "
+ fi
+
+ if [ -z "$server_rdma_addr" ] && [ -z "$server_nonrdma_addr" ]
then
return
fi
- echo "==== Running UCP IO demo ===="
+ for mem_type in $mem_types_list
+ do
+ echo "==== Running UCP IO demo with \"${mem_type}\" memory type ===="
- test_args="$@ -o write,read -d 128:4194304 -i 10000 -w 10"
- test_name=io_demo
+ test_args="$@ -o write,read -d 128:4194304 -P 2 -i 10000 -w 10 -m ${mem_type}"
+ test_name=io_demo
- if [ ! -x ${test_name} ]
- then
- $MAKEP -C test/apps/iodemo ${test_name}
- fi
+ if [ ! -x ${test_name} ]
+ then
+ $MAKEP -C test/apps/iodemo ${test_name}
+ fi
- export UCX_SOCKADDR_CM_ENABLE=y
- run_client_server_app "./test/apps/iodemo/${test_name}" "${test_args}" "${server_ip}" 1 0
+ for server_ip in $server_rdma_addr $server_nonrdma_addr $server_loopback_addr
+ do
+ run_client_server_app "./test/apps/iodemo/${test_name}" "${test_args}" "${server_ip}" 1 0
+ for server_ip in $server_rdma_addr $server_nonrdma_addr
+ do
+ run_client_server_app "./test/apps/iodemo/${test_name}" "${test_args}" "${server_ip}" 1 0
+ done
+ done
+ done
- unset UCX_SOCKADDR_CM_ENABLE
make_clean
}
@@ -1096,16 +786,21 @@ run_ucx_perftest() {
# Test malloc hooks with mpi
#
test_malloc_hooks_mpi() {
- for tname in malloc_hooks malloc_hooks_unmapped external_events flag_no_install
+ for mode in reloc bistro
do
- echo "==== Running memory hook (${tname}) on MPI ===="
- $MPIRUN -np 1 $AFFINITY ./test/mpi/test_memhooks -t $tname
- done
+ for tname in malloc_hooks malloc_hooks_unmapped external_events flag_no_install
+ do
+ echo "==== Running memory hook (${tname} mode ${mode}) on MPI ===="
+ $MPIRUN -np 1 $AFFINITY \
+ ./test/mpi/test_memhooks -t $tname -m ${mode}
+ done
- echo "==== Running memory hook (malloc_hooks) on MPI with LD_PRELOAD ===="
- ucm_lib=$PWD/src/ucm/.libs/libucm.so
- ls -l $ucm_lib
- $MPIRUN -np 1 -x LD_PRELOAD=$ucm_lib $AFFINITY ./test/mpi/test_memhooks -t malloc_hooks
+ echo "==== Running memory hook (malloc_hooks mode ${mode}) on MPI with LD_PRELOAD ===="
+ ucm_lib=$PWD/src/ucm/.libs/libucm.so
+ ls -l $ucm_lib
+ $MPIRUN -np 1 -x LD_PRELOAD=$ucm_lib $AFFINITY \
+ ./test/mpi/test_memhooks -t malloc_hooks -m ${mode}
+ done
}
#
@@ -1222,6 +917,15 @@ test_ucp_dlopen() {
fi
}
+test_init_mt() {
+ echo "==== Running multi-thread init ===="
+ $MAKEP
+ for ((i=0;i<50;++i))
+ do
+ $AFFINITY timeout 1m ./test/apps/test_init_mt
+ done
+}
+
test_memtrack() {
../contrib/configure-devel --prefix=$ucx_inst
make_clean
@@ -1234,7 +938,7 @@ test_memtrack() {
test_unused_env_var() {
# We must create a UCP worker to get the warning about unused variables
echo "==== Running ucx_info env vars test ===="
- UCX_SOCKADDR_CM_ENABLE=y UCX_IB_PORTS=mlx5_0:1 ./src/tools/info/ucx_info -epw -u t | grep "unused" | grep -q -E "UCX_IB_PORTS"
+ UCX_IB_PORTS=mlx5_0:1 ./src/tools/info/ucx_info -epw -u t | grep "unused" | grep -q -E "UCX_IB_PORTS"
}
test_env_var_aliases() {
@@ -1280,113 +984,42 @@ test_malloc_hook() {
then
./test/apps/test_tcmalloc
fi
-}
-test_jucx() {
- echo "==== Running jucx test ===="
- echo "1..2" > jucx_tests.tap
- iface=`ibdev2netdev | grep Up | awk '{print $5}' | head -1`
- if [ -z "$iface" ]
- then
- echo "Failed to find active ib devices." >> jucx_tests.tap
- return
- elif module_load dev/jdk && module_load dev/mvn
+ if [ "X$have_cuda" == "Xyes" ]
then
- jucx_port=$((20000 + EXECUTOR_NUMBER))
- export JUCX_TEST_PORT=$jucx_port
- export UCX_MEM_EVENTS=no
- $MAKE -C bindings/java/src/main/native test
- ifaces=`ibdev2netdev | grep Up | awk '{print $5}'`
- if [ -n "$ifaces" ]
- then
- $MAKE -C bindings/java/src/main/native package
- fi
- for iface in $ifaces
+ cuda_dynamic_exe=./test/apps/test_cuda_hook_dynamic
+ cuda_static_exe=./test/apps/test_cuda_hook_static
+
+ for mode in reloc bistro
do
- if [ -n "$iface" ]
- then
- server_ip=$(get_ifaddr ${iface})
- fi
+ export UCX_MEM_CUDA_HOOK_MODE=${mode}
+
+ # Run cuda memory hooks with dynamic link
+ ${cuda_dynamic_exe}
- if [ -z "$server_ip" ]
+ # Run cuda memory hooks with static link, if exists. If the static
+ # library 'libcudart_static.a' is not present, static test will not
+ # be built.
+ if [ -x ${cuda_static_exe} ]
then
- echo "Interface $iface has no IPv4"
- continue
+ ${cuda_static_exe} && status="pass" || status="fail"
+ [ ${mode} == "bistro" ] && exp_status="pass" || exp_status="fail"
+ if [ ${status} == ${exp_status} ]
+ then
+ echo "Static link with cuda ${status}, as expected"
+ else
+ echo "Static link with cuda is expected to ${exp_status}, actual: ${status}"
+ exit 1
+ fi
fi
- echo "Running standalone benchamrk on $iface"
-
- java -XX:ErrorFile=$WORKSPACE/hs_err_${BUILD_NUMBER}_%p.log \
- -XX:OnError="cat $WORKSPACE/hs_err_${BUILD_NUMBER}_%p.log" \
- -cp "bindings/java/resources/:bindings/java/src/main/native/build-java/*" \
- org.openucx.jucx.examples.UcxReadBWBenchmarkReceiver \
- s=$server_ip p=$JUCX_TEST_PORT &
- java_pid=$!
-
- sleep 10
+ # Test that driver API hooks work in both reloc and bistro modes,
+ # since we call them directly from the test
+ ${cuda_dynamic_exe} -d
+ [ -x ${cuda_static_exe} ] && ${cuda_static_exe} -d
- java -XX:ErrorFile=$WORKSPACE/hs_err_${BUILD_NUMBER}_%p.log \
- -XX:OnError="cat $WORKSPACE/hs_err_${BUILD_NUMBER}_%p.log" \
- -cp "bindings/java/resources/:bindings/java/src/main/native/build-java/*" \
- org.openucx.jucx.examples.UcxReadBWBenchmarkSender \
- s=$server_ip p=$JUCX_TEST_PORT t=10000000
- wait $java_pid
+ unset UCX_MEM_CUDA_HOOK_MODE
done
-
- unset JUCX_TEST_PORT
- unset UCX_MEM_EVENTS
- module unload dev/jdk
- module unload dev/mvn
- echo "ok 1 - jucx test" >> jucx_tests.tap
- else
- echo "Failed to load dev/jdk and dev/mvn modules." >> jucx_tests.tap
- fi
-}
-
-#
-# Run Coverity and report errors
-# The argument is a UCX build type: devel or release
-#
-run_coverity() {
- echo 1..1 > coverity.tap
- if module_load tools/cov
- then
- ucx_build_type=$1
-
- echo "==== Running coverity ===="
- ../contrib/configure-$ucx_build_type --prefix=$ucx_inst
- make_clean
- cov_build_id="cov_build_${ucx_build_type}_${BUILD_NUMBER}"
- cov_build="$WORKSPACE/$cov_build_id"
- rm -rf $cov_build
- cov-build --dir $cov_build $MAKEP all
- cov-analyze --jobs $parallel_jobs $COV_OPT --security --concurrency --dir $cov_build
- nerrors=$(cov-format-errors --dir $cov_build | awk '/Processing [0-9]+ errors?/ { print $2 }')
- rc=$(($rc+$nerrors))
-
- index_html=$(cd $cov_build && find . -name index.html | cut -c 3-)
- if [ -z "$BUILD_URL" ]; then
- cov_url="${WS_URL}/${cov_build_id}/${index_html}"
- else
- cov_url="${BUILD_URL}/artifact/${cov_build_id}/${index_html}"
- fi
- rm -f jenkins_sidelinks.txt
- if [ $nerrors -gt 0 ]; then
- cov-format-errors --dir $cov_build --emacs-style
- echo "not ok 1 Coverity Detected $nerrors failures # $cov_url" >> coverity.tap
- else
- echo "ok 1 Coverity found no issues" >> coverity.tap
- rm -rf $cov_build
- fi
-
- echo Coverity report: $cov_url
- printf "%s\t%s\n" Coverity $cov_url >> jenkins_sidelinks.txt
- module unload tools/cov
-
- return $rc
- else
- echo "==== Not running Coverity ===="
- echo "ok 1 - # SKIP because Coverity not installed" >> coverity.tap
fi
}
@@ -1504,12 +1137,12 @@ run_gtest() {
# Load newer valgrind if naative is older than 3.10
if ! (echo "valgrind-3.10.0"; valgrind --version) | sort -CV
then
- module load tools/valgrind-latest
+ module load tools/valgrind-3.12.0
fi
$AFFINITY $TIMEOUT_VALGRIND make -C test/gtest test_valgrind
(cd test/gtest && rename_files .tap _vg.tap *.tap && mv *.tap $GTEST_REPORT_DIR)
- module unload tools/valgrind-latest
+ module unload tools/valgrind-3.12.0
else
echo "==== Not running valgrind tests with $compiler_name compiler ===="
echo "1..1" > vg_skipped.tap
@@ -1586,8 +1219,15 @@ run_ucx_tl_check() {
echo "1..1" > ucx_tl_check.tap
+ # Test transport selection
../test/apps/test_ucx_tls.py -p $ucx_inst
+ # Test setting many lanes
+ UCX_IB_NUM_PATHS=8 \
+ UCX_MAX_EAGER_LANES=4 \
+ UCX_MAX_RNDV_LANES=4 \
+ ./src/tools/info/ucx_info -u t -e
+
if [ $? -ne 0 ]; then
echo "not ok 1" >> ucx_tl_check.tap
else
@@ -1604,24 +1244,11 @@ run_tests() {
export UCX_ERROR_MAIL_TO=$ghprbActualCommitAuthorEmail
export UCX_ERROR_MAIL_FOOTER=$JOB_URL/$BUILD_NUMBER/console
export UCX_TCP_PORT_RANGE="$((33000 + EXECUTOR_NUMBER * 100))"-"$((34000 + EXECUTOR_NUMBER * 100))"
- export UCX_TCP_CM_ALLOW_ADDR_INUSE=y
-
- # test cuda build if cuda modules available
- do_distributed_task 2 4 build_cuda
+ export UCX_TCP_CM_REUSEADDR=y
# load cuda env only if GPU available for remaining tests
try_load_cuda_env
- do_distributed_task 0 4 build_icc
- do_distributed_task 0 4 build_pgi
- do_distributed_task 1 4 build_debug
- do_distributed_task 1 4 build_prof
- do_distributed_task 1 4 build_ugni
- do_distributed_task 3 4 build_clang
- do_distributed_task 0 4 build_armclang
- do_distributed_task 1 4 build_gcc_latest
- do_distributed_task 0 4 build_jucx
-
# all are running mpi tests
run_mpi_tests
@@ -1634,43 +1261,33 @@ run_tests() {
$MAKEP
$MAKEP install
- run_ucx_tl_check
-
+ do_distributed_task 2 4 run_ucx_tl_check
do_distributed_task 1 4 run_ucp_hello
do_distributed_task 2 4 run_uct_hello
do_distributed_task 1 4 run_ucp_client_server
do_distributed_task 2 4 run_ucx_perftest
do_distributed_task 1 4 run_io_demo
do_distributed_task 3 4 test_profiling
- do_distributed_task 0 3 test_jucx
do_distributed_task 1 4 test_ucs_dlopen
do_distributed_task 3 4 test_ucs_load
do_distributed_task 3 4 test_memtrack
do_distributed_task 0 4 test_unused_env_var
do_distributed_task 2 4 test_env_var_aliases
- do_distributed_task 1 3 test_malloc_hook
+ do_distributed_task 1 4 test_malloc_hook
do_distributed_task 0 4 test_ucp_dlopen
+ do_distributed_task 1 4 test_init_mt
# all are running gtest
run_gtest_default
run_gtest_armclang
- do_distributed_task 3 4 run_coverity release
- do_distributed_task 0 4 run_coverity devel
do_distributed_task 1 4 run_gtest_release
}
prepare
try_load_cuda_env
-do_distributed_task 0 4 build_docs
-do_distributed_task 0 4 build_java_docs
-do_distributed_task 0 4 build_disable_numa
-do_distributed_task 1 4 build_no_verbs
-do_distributed_task 2 4 build_release_pkg
-do_distributed_task 3 4 check_inst_headers
-do_distributed_task 1 4 check_make_distcheck
-do_distributed_task 2 4 check_config_h
if [ -n "$JENKINS_RUN_TESTS" ] || [ -n "$RUN_TESTS" ]
then
+ check_machine
run_tests
fi
diff --git a/contrib/ucx_perftest_config/test_types_ucp b/contrib/ucx_perftest_config/test_types_ucp
index 2a9ecfa44d6..2c28996fe65 100644
--- a/contrib/ucx_perftest_config/test_types_ucp
+++ b/contrib/ucx_perftest_config/test_types_ucp
@@ -1,36 +1,48 @@
-# UCP
-ucp_iov_contig_tag_lat -t tag_lat -D iov,contig
-ucp_iov_iov_tag_lat -t tag_lat -D iov,iov
-ucp_contig_contig_tag_lat -t tag_lat -D contig,contig
+#
+# UCP basic
+#
+ucp_iov_contig_tag_lat -t tag_lat -D iov,contig
+ucp_iov_iov_tag_lat -t tag_lat -D iov,iov
+ucp_contig_tag_lat -t tag_lat -D contig,contig
#IOV with RNDV is not yet supported
-#ucp_contig_iov_tag_lat -t tag_lat -D contig,iov
-ucp_iov_contig_tag_bw -t tag_bw -D iov,contig
-ucp_iov_iov_tag_bw -t tag_bw -D iov,iov
-ucp_contig_contig_tag_bw -t tag_bw -D contig,contig
+#ucp_contig_iov_tag_lat -t tag_lat -D contig,iov
+ucp_iov_contig_tag_bw -t tag_bw -D iov,contig
+ucp_iov_iov_tag_bw -t tag_bw -D iov,iov
+ucp_contig_tag_bw -t tag_bw -D contig,contig
#IOV with RNDV is not yet supported
-#ucp_contig_iov_tag_bw -t tag_bw -D contig,iov
-ucp_sync_tag_lat -t tag_sync_lat
-ucp_unexp_tag_lat -t tag_lat -U
-ucp_wild_tag_lat -t tag_lat -C
-ucp_contig_stream_bw -t stream_bw -r recv_data
-ucp_contig_stream_lat -t stream_lat -r recv_data
-ucp_contig_stream_bw -t stream_bw -r recv
-ucp_contig_stream_lat -t stream_lat -r recv
-#CUDA
-ucp_contig_contig_cuda_tag_lat -t tag_lat -D contig,contig -m cuda,cuda
-ucp_contig_contig_cuda_tag_lat -t tag_lat -D contig,contig -m cuda,host
-ucp_contig_contig_cuda_tag_lat -t tag_lat -D contig,contig -m host,cuda
-ucp_contig_contig_cuda_tag_bw -t tag_bw -D contig,contig -m cuda,cuda
-ucp_contig_contig_cuda_tag_bw -t tag_bw -D contig,contig -m cuda,host
-ucp_contig_contig_cuda_tag_bw -t tag_bw -D contig,contig -m host,cuda
-ucp_contig_cuda_stream_bw -t stream_bw -r recv_data -m cuda
-ucp_contig_cuda_stream_lat -t stream_lat -r recv_data -m cuda
-ucp_contig_cuda_stream_bw -t stream_bw -r recv -m cuda
-ucp_contig_cuda_stream_lat -t stream_lat -r recv -m cuda
-ucp_contig_contig_cuda_mng_tag_lat -t tag_lat -D contig,contig -m cuda-managed
-ucp_contig_contig_cuda_mng_tag_bw -t tag_bw -D contig,contig -m cuda-managed
-ucp_contig_cuda_mng_stream_bw -t stream_bw -r recv_data -m cuda-managed
-ucp_contig_cuda_mng_stream_lat -t stream_lat -r recv_data -m cuda-managed
+#ucp_contig_iov_tag_bw -t tag_bw -D contig,iov
+ucp_sync_tag_lat -t tag_sync_lat
+ucp_unexp_tag_lat -t tag_lat -U
+ucp_wild_tag_lat -t tag_lat -C
+ucp_contig_stream_data_bw -t stream_bw -r recv_data
+ucp_contig_stream_data_lat -t stream_lat -r recv_data
+ucp_contig_stream_bw -t stream_bw -r recv
+ucp_contig_stream_lat -t stream_lat -r recv
+#
+# CUDA
+#
+ucp_contig_cuda_tag_lat -t tag_lat -D contig,contig -m cuda,cuda
+ucp_contig_cuda_host_tag_lat -t tag_lat -D contig,contig -m cuda,host
+ucp_contig_host_cuda_tag_lat -t tag_lat -D contig,contig -m host,cuda
+ucp_contig_cuda_tag_bw -t tag_bw -D contig,contig -m cuda,cuda
+ucp_contig_cuda_host_tag_bw -t tag_bw -D contig,contig -m cuda,host
+ucp_contig_host_cuda_tag_bw -t tag_bw -D contig,contig -m host,cuda
+ucp_contig_cuda_stream_bw -t stream_bw -r recv -m cuda
+ucp_contig_cuda_stream_lat -t stream_lat -r recv -m cuda
+ucp_contig_cuda_stream_data_bw -t stream_bw -r recv_data -m cuda
+ucp_contig_cuda_stream_data_lat -t stream_lat -r recv_data -m cuda
+ucp_contig_cuda_mng_tag_lat -t tag_lat -D contig,contig -m cuda-managed
+ucp_contig_cuda_mng_tag_bw -t tag_bw -D contig,contig -m cuda-managed
+ucp_contig_cuda_mng_stream_data_bw -t stream_bw -r recv_data -m cuda-managed
+ucp_contig_cuda_mng_stream_data_lat -t stream_lat -r recv_data -m cuda-managed
ucp_contig_cuda_mng_stream_bw -t stream_bw -r recv -m cuda-managed
ucp_contig_cuda_mng_stream_lat -t stream_lat -r recv -m cuda-managed
-
+#
+# CUDA wakeup mode
+#
+ucp_contig_cuda_tag_lat_sleep -I -E sleep -t tag_lat -D contig,contig -m cuda,cuda
+ucp_contig_cuda_host_tag_lat_sleep -I -E sleep -t tag_lat -D contig,contig -m cuda,host
+ucp_contig_host_cuda_tag_lat_sleep -I -E sleep -t tag_lat -D contig,contig -m host,cuda
+ucp_contig_cuda_tag_bw_sleep -I -E sleep -t tag_bw -D contig,contig -m cuda,cuda
+ucp_contig_cuda_host_tag_bw_sleep -I -E sleep -t tag_bw -D contig,contig -m cuda,host
+ucp_contig_host_cuda_tag_bw_sleep -I -E sleep -t tag_bw -D contig,contig -m host,cuda
diff --git a/contrib/ucx_perftest_config/test_types_uct b/contrib/ucx_perftest_config/test_types_uct
index 2769ee481de..40edda3cb20 100644
--- a/contrib/ucx_perftest_config/test_types_uct
+++ b/contrib/ucx_perftest_config/test_types_uct
@@ -6,12 +6,14 @@ put_short_lat -t put_lat -D short
put_bcopy_lat -t put_lat -D bcopy
put_zcopy_lat -t put_lat -D zcopy
# AM
-am_short_lat -t am_lat -D short
-am_bcopy_lat -t am_lat -D bcopy
-am_zcopy_lat -t am_lat -D zcopy
-am_short_bw -t am_bw -D short
-am_bcopy_bw -t am_bw -D bcopy
-am_zcopy_bw -t am_bw -D zcopy
+am_short_lat -t am_lat -D short
+am_short_iov_lat -t am_lat -D shortiov
+am_bcopy_lat -t am_lat -D bcopy
+am_zcopy_lat -t am_lat -D zcopy
+am_short_bw -t am_bw -D short
+am_short_iov_bw -t am_bw -D shortiov
+am_bcopy_bw -t am_bw -D bcopy
+am_zcopy_bw -t am_bw -D zcopy
# GET
get_bcopy -t get -D bcopy
get_zcopy -t get -D zcopy
diff --git a/contrib/valgrind.supp b/contrib/valgrind.supp
index a6ad76fc0a3..37718a5d7a3 100644
--- a/contrib/valgrind.supp
+++ b/contrib/valgrind.supp
@@ -287,3 +287,20 @@
...
fun:cudaGetDeviceCount
}
+{
+ rdmacm_event_channel
+ Memcheck:Leak
+ ...
+ fun:rdma_create_event_channel
+}
+{
+ rdmacm_bind_addr
+ Memcheck:Leak
+ ...
+ fun:rdma_bind_addr
+}
+{
+ xpmem_get
+ Memcheck:Cond
+ fun:xpmem_get
+}
diff --git a/debian/control.in b/debian/control.in
index 767f02fe054..8575b7d0ef7 100644
--- a/debian/control.in
+++ b/debian/control.in
@@ -14,6 +14,7 @@ Homepage: http://www.openucx.org
Package: @PACKAGE@
Section: libs
+Depends: libc6, libgomp1, libnuma1
Architecture: any
Description: Unified Communication X
UCX is a communication library implementing high-performance messaging.
diff --git a/debian/rules.in b/debian/rules.in
index a2e812d695b..9f85dfecb7d 100755
--- a/debian/rules.in
+++ b/debian/rules.in
@@ -13,7 +13,7 @@
dh $@
override_dh_auto_configure:
- @top_top_srcdir@/contrib/configure-release --prefix=/usr --enable-examples
+ @top_top_srcdir@/contrib/configure-release --prefix=/usr --enable-examples --with-java=no
chmod +x debian/rules
override_dh_shlibdeps:
diff --git a/docs/doxygen/design.md b/docs/doxygen/design.md
index dc88dccbe3f..94d008db512 100644
--- a/docs/doxygen/design.md
+++ b/docs/doxygen/design.md
@@ -22,8 +22,8 @@ the differences across various hardware architectures and provides a
low-level API that enables the implementation of communication protocols.
The primary goal of the layer is to provide direct and efficient access to
hardware network functionality. For this purpose,
-UCT relies on vendor provided low-level drivers such as InfiniBand
-Verbs, Cray's uGNI, libfabrics, etc. In addition, the layer provides
+UCT relies on vendor provided low-level drivers such as uGNI, Verbs,
+shared memory, ROCM, CUDA. In addition, the layer provides
constructs for communication context management (thread-based and application level), and
allocation and management of device-specific memories including those found
in accelerators. In terms of communication APIs, UCT defines interfaces for
diff --git a/docs/doxygen/intro.md b/docs/doxygen/intro.md
index 35404bbfd67..178849929d2 100644
--- a/docs/doxygen/intro.md
+++ b/docs/doxygen/intro.md
@@ -42,8 +42,8 @@ communications (one-sided and two-sided), collective communication,
and remote atomic operations required for popular parallel programming models.
Also, the initial UCX reference implementation
is targeted to support current network technologies such as:
-+ Open Fabrics - InfiniBand (Mellanox, Qlogic, IBM), libfabrics, iWARP, RoCE
-+ Cray GEMINI \& ARIES
++ Open Fabrics - InfiniBand (Mellanox, Qlogic, IBM), iWARP, RoCE
++ Cray uGNI - GEMINI and ARIES interconnects
+ Shared memory (MMAP, Posix, CMA, KNEM, XPMEM, etc.)
+ Ethernet (TCP/UDP)
diff --git a/docs/source/faq.md b/docs/source/faq.md
index 9a0d2a70b17..a5fc4fef490 100644
--- a/docs/source/faq.md
+++ b/docs/source/faq.md
@@ -10,12 +10,12 @@ and relatively easy way to construct widely used HPC protocols: MPI tag matching
RMA operations, rendezvous protocols, stream, fragmentation, remote atomic operations, etc.
#### What is UCP, UCT, UCS?
-* **UCT** is a transport layer that abstracts the differences across various hardware architectures and provides a low-level API that enables the implementation of communication protocols. The primary goal of the layer is to provide direct and efficient access to hardware network resources with minimal software overhead. For this purpose UCT relies on low-level drivers provided by vendors such as InfiniBand Verbs, Cray's uGNI, libfabrics, etc. In addition, the layer provides constructs for communication context management (thread-based and ap- plication level), and allocation and management of device- specific memories including those found in accelerators. In terms of communication APIs, UCT defines interfaces for immediate (short), buffered copy-and-send (bcopy), and zero- copy (zcopy) communication operations. The short operations are optimized for small messages that can be posted and completed in place. The bcopy operations are optimized for medium size messages that are typically sent through a so- called bouncing-buffer. Finally, the zcopy operations expose zero-copy memory-to-memory communication semantics.
+* **UCT** is a transport layer that abstracts the differences across various hardware architectures and provides a low-level API that enables the implementation of communication protocols. The primary goal of the layer is to provide direct and efficient access to hardware network resources with minimal software overhead. For this purpose, UCT relies on low-level drivers such as uGNI, Verbs, shared memory, ROCM, CUDA. In addition, the layer provides constructs for communication context management (thread-based and application level), and allocation and management of device-specific memories including those found in accelerators. In terms of communication APIs, UCT defines interfaces for immediate (short), buffered copy-and-send (bcopy), and zero-copy (zcopy) communication operations. The short operations are optimized for small messages that can be posted and completed in place. The bcopy operations are optimized for medium size messages that are typically sent through a so-called bouncing-buffer. Finally, the zcopy operations expose zero-copy memory-to-memory communication semantics.
* **UCP** implements higher-level protocols that are typically used by message passing (MPI) and PGAS programming models by using lower-level capabilities exposed through the UCT layer.
UCP is responsible for the following functionality: initialization of the library, selection of transports for communication, message fragmentation, and multi-rail communication. Currently, the API has the following classes of interfaces: Initialization, Remote Memory Access (RMA) communication, Atomic Memory Operations (AMO), Active Message, Tag-Matching, and Collectives.
-* **UCS** is a service layer that provides the necessary func- tionality for implementing portable and efficient utilities.
+* **UCS** is a service layer that provides the necessary functionality for implementing portable and efficient utilities.
#### How can I contribute?
1. Fork
@@ -35,20 +35,20 @@ submit issues on github: https://github.com/openucx/ucx/issues
The UCX framework is maintained and supported by hardware vendors in addition to the open source community. Every pull-request is tested and multiple hardware platforms supported by vendors community.
* **Performance, performance, performance!**
-The framework design, data structures, and components are design to provide highly optimized access to the network hardware.
+The framework architecture, data structures, and components are designed to provide optimized access to the network hardware.
* **High level API for a broad range HPC programming models.**
-UCX provides a high level API implemented in software 'UCP' to fill in the gaps across interconnects. This allows to use a single set of APIs in a library to implement multiple interconnects. This reduces the level of complexities when implementing libraries such as Open MPI or OpenSHMEM. Because of this, UCX performance portable because a single implementation (in Open MPI or OpenSHMEM) will work efficiently on multiple interconnects. (e.g. uGNI, Verbs, libfabrics, etc).
+UCX provides a high-level and performance-portable network API. The API targets a variety of programming models ranging from high-performance MPI implementation to Apache Spark. UCP API abstracts differences and fills in the gaps across interconnects implemented in the UCT layer. As a result, implementations of programming models and libraries (MPI, OpenSHMEM, Apache Spark, RAPIDS, etc.) is simplified while providing efficient support for multiple interconnects (uGNI, Verbs, TCP, shared memory, ROCM, CUDA, etc.).
* **Support for interaction between multiple transports (or providers) to deliver messages.**
-For example, UCX has the logic (in UCP) to make 'GPUDirect', IB' and share memory work together efficiently to deliver the data where is needed without the user dealing with this.
+For example, UCX has the logic (in UCP) to make 'GPUDirect', IB' and share memory work together efficiently to deliver the data where it is needed without the user dealing with this.
* **Cross-transport multi-rail capabilities.** UCX protocol layer can utilize multiple transports,
event on different types of hardware, to deliver messages faster, without the need for
any special tuning.
* **Utilizing hardware offloads for optimized performance**, such as RDMA, Hardware tag-matching
- hardware atomic operations, etc.
+ hardware atomic operations, etc.
#### What protocols are supported by UCX?
UCP implements RMA put/get, send/receive with tag matching, Active messages, atomic operations. In near future we plan to add support for commonly used collective operations.
@@ -61,10 +61,10 @@ Instead, GASNET can leverage UCX framework for fast end efficient implementation
UCX framework does not provide drivers, instead it relies on the drivers provided by vendors. Currently we use: OFA VERBs, Cray's UGNI, NVIDIA CUDA.
#### What is the relation between UCX and OFA Verbs or Libfabrics?
-UCX, is a middleware communication layer that relies on vendors provided user level drivers including OFA Verbs or libfabrics (or any other drivers provided by another communities or vendors) to implement high-level protocols which can be used to close functionality gaps between various vendors drivers including various libfabrics providers: coordination across various drivers, multi-rail capabilities, software based RMA, AMOs, tag-matching for transports and drivers that do not support such capabilities natively.
+UCX is a middleware communication framework that relies on device drivers, e.g. RDMA, CUDA, ROCM. RDMA and OS-bypass network devices typically implement device drivers using the RDMA-core Linux subsystem that is supported by UCX. Support for other network abstractions can be added based on requests and contributions from the community.
-#### Is UCX a user level driver?
-No. Typically, Drivers aim to expose fine-grain access to the network architecture specific features.
+#### Is UCX a user-level driver?
+UCX is not a user-level driver. Typically, drivers aim to expose fine-grained access to the network architecture-specific features.
UCX abstracts the differences across various drivers and fill-in the gaps using software protocols for some of the architectures that don't provide hardware level support for all the operations.
@@ -99,15 +99,21 @@ UCX does not depend on an external runtime environment.
UCX takes parameters from specific **environment variables**, which start with the
prefix `UCX_`.
-> **IMPORTANT NOTE:** Changing the values of UCX environment variables to non-default
-may lead to undefined behavior. The environment variables are mostly indented for
- dvanced users, or for specific tunings or workarounds recommended by UCX community.
+> **IMPORTANT NOTE:** Setting UCX environment variables to non-default values
+may lead to undefined behavior. The environment variables are mostly intended for
+advanced users, or for specific tunings or workarounds recommended by the UCX community.
-#### 2. Where can I see all UCX environment variables?
+#### Where can I see all UCX environment variables?
* Running `ucx_info -c` prints all environment variables and their default values.
* Running `ucx_info -cf` prints the documentation for all environment variables.
+#### UCX configuration file
+
+Upon installing the UCX package (RPM/DEB), a `/etc/ucx/ucx.conf` file is created.
+It allows customization of the various parameters. An environment variable
+has precedence over the value defined in `ucx.conf`.
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8712fdb5f29..58c8070e373 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,4 +1,4 @@
-..
+..
.. Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED.
..
.. See file LICENSE for terms.
@@ -9,12 +9,12 @@ OpenUCX
*******
Unified Communication X (UCX) is an `award winning `_,
-optimized production proven communication framework for modern, high-bandwidth
+optimized production-proven communication framework for modern, high-bandwidth
and low-latency networks.
-UCX exposes a set of abstract communication primitives which utilize the best of
+UCX exposes a set of abstract communication primitives that utilize the best of
available hardware resources and offloads. These include RDMA (InfiniBand and RoCE),
-TCP, GPUs, shared Memory, and network atomic operations.
+TCP, GPUs, shared memory, and network atomic operations.
UCX facilitates rapid development by providing a high-level API, masking the
low-level details, while maintaining high-performance and scalability.
@@ -73,7 +73,7 @@ Documentation
*************
* API doc: `HTML `_ `PDF `_
-* `Examples `_
+* `Examples `_
Projects using UCX
diff --git a/docs/source/running.md b/docs/source/running.md
index 76e773ed900..006b9facd05 100644
--- a/docs/source/running.md
+++ b/docs/source/running.md
@@ -75,7 +75,7 @@ improvements.
```
$ mkdir build-ucx
$ cd build-ucx
- $ ../configure --prefix= --with-ucx=
+ $ ../configure --prefix= --with-ucx=
```
> **NOTE**: With OpenMPI 4.0 and above, there could be compilation errors from "btl_uct" component.
> This component is not critical for using UCX; so it could be disabled this way:
diff --git a/docs/source/ucx_features.rst b/docs/source/ucx_features.rst
index a7bfdbd52a4..681e06adb29 100644
--- a/docs/source/ucx_features.rst
+++ b/docs/source/ucx_features.rst
@@ -57,6 +57,6 @@ Protocols, Optimizations and Advanced Features
- Pipeline protocols for GPU memory
- QoS and traffic isolation for RDMA transports
- Platform (micro-architecture) specific optimizations (such as memcpy, memory barriers, etc.)
-- Multi-rail support
+- Multi-rail and RoCE link aggregation group support
- Bare-metal, containers and cloud environments support
- Advanced protocols for transfer messages of different sizes
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 05cde2765dd..e9ed2d49652 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -13,18 +13,20 @@ dist_examples_DATA = \
ucp_client_server.c
if HAVE_CUDA
-EXAMPLE_CUDA_LDFLAGS = $(CUDA_LDFLAGS)
+EXAMPLE_CUDA_LD_FLAGS = $(CUDA_LDFLAGS)
+EXAMPLE_CUDA_LIBS = $(CUDA_LIBS)
# cuda.h couldn't be compiled with -pedantic flag
EXAMPLE_CUDA_CFLAGS =
EXAMPLE_CUDA_CPPFLAGS = $(CUDA_CPPFLAGS) -DHAVE_CUDA
else
-EXAMPLE_CUDA_LDFLAGS =
+EXAMPLE_CUDA_LD_FLAGS =
+EXAMPLE_CUDA_LIBS =
EXAMPLE_CUDA_CFLAGS = $(CFLAGS_PEDANTIC)
EXAMPLE_CUDA_CPPFLAGS =
endif
EXAMPLE_CCLD_FLAGS = -lucs -I$(includedir) -L$(libdir) -Wall -Werror -Wl,-rpath,$(libdir) \
- $(EXAMPLE_CUDA_LDFLAGS) $(EXAMPLE_CUDA_CPPFLAGS)
+ $(EXAMPLE_CUDA_LD_FLAGS) $(EXAMPLE_CUDA_LIBS) $(EXAMPLE_CUDA_CPPFLAGS)
installcheck-local:
@echo "INSTALLCHECK: Compiling examples with installed library"
@@ -35,7 +37,7 @@ installcheck-local:
if HAVE_EXAMPLES
-bin_PROGRAMS = \
+noinst_PROGRAMS = \
ucp_hello_world \
uct_hello_world \
ucp_client_server
@@ -43,21 +45,25 @@ bin_PROGRAMS = \
ucp_hello_world_SOURCES = ucp_hello_world.c
ucp_hello_world_CFLAGS = $(BASE_CFLAGS) $(EXAMPLE_CUDA_CFLAGS)
ucp_hello_world_CPPFLAGS = $(BASE_CPPFLAGS) $(EXAMPLE_CUDA_CPPFLAGS)
+ucp_hello_world_LDFLAGS = $(EXAMPLE_CUDA_LD_FLAGS)
ucp_hello_world_LDADD = $(top_builddir)/src/ucs/libucs.la \
$(top_builddir)/src/ucp/libucp.la \
- $(EXAMPLE_CUDA_LDFLAGS)
+ $(EXAMPLE_CUDA_LIBS)
uct_hello_world_SOURCES = uct_hello_world.c
uct_hello_world_CFLAGS = $(BASE_CFLAGS) $(EXAMPLE_CUDA_CFLAGS)
uct_hello_world_CPPFLAGS = $(BASE_CPPFLAGS) $(EXAMPLE_CUDA_CPPFLAGS)
+uct_hello_world_LDFLAGS = $(EXAMPLE_CUDA_LD_FLAGS)
uct_hello_world_LDADD = $(top_builddir)/src/ucs/libucs.la \
$(top_builddir)/src/uct/libuct.la \
- $(EXAMPLE_CUDA_LDFLAGS)
+ $(EXAMPLE_CUDA_LIBS)
ucp_client_server_SOURCES = ucp_client_server.c
-ucp_client_server_CFLAGS = $(BASE_CFLAGS) $(CFLAGS_PEDANTIC)
-ucp_client_server_CPPFLAGS = $(BASE_CPPFLAGS)
+ucp_client_server_CFLAGS = $(BASE_CFLAGS) $(EXAMPLE_CUDA_CFLAGS)
+ucp_client_server_CPPFLAGS = $(BASE_CPPFLAGS) $(EXAMPLE_CUDA_CPPFLAGS)
+ucp_client_server_LDFLAGS = $(EXAMPLE_CUDA_LD_FLAGS)
ucp_client_server_LDADD = $(top_builddir)/src/ucs/libucs.la \
- $(top_builddir)/src/ucp/libucp.la
+ $(top_builddir)/src/ucp/libucp.la \
+ $(EXAMPLE_CUDA_LIBS)
endif
diff --git a/examples/hello_world_util.h b/examples/hello_world_util.h
index c51134e1683..51cff49c830 100644
--- a/examples/hello_world_util.h
+++ b/examples/hello_world_util.h
@@ -181,18 +181,15 @@ ucs_memory_type_t parse_mem_type(const char *opt_arg)
void print_common_help()
{
- fprintf(stderr, " -n name Set node name or IP address "
- "of the server (required for client and should be ignored "
- "for server)\n");
- fprintf(stderr, " -p port Set alternative server port (default:13337)\n");
- fprintf(stderr, " -s size Set test string length (default:16)\n");
- fprintf(stderr, " -m memory type of messages\n");
- fprintf(stderr, " host - system memory (default)\n");
+ fprintf(stderr, " -p Set alternative server port (default:13337)\n");
+ fprintf(stderr, " -s Set test string length (default:16)\n");
+ fprintf(stderr, " -m Memory type of messages\n");
+ fprintf(stderr, " host - system memory (default)\n");
if (check_mem_type_support(UCS_MEMORY_TYPE_CUDA)) {
- fprintf(stderr, " cuda - NVIDIA GPU memory\n");
+ fprintf(stderr, " cuda - NVIDIA GPU memory\n");
}
if (check_mem_type_support(UCS_MEMORY_TYPE_CUDA_MANAGED)) {
- fprintf(stderr, " cuda-managed - NVIDIA GPU managed/unified memory\n");
+ fprintf(stderr, " cuda-managed - NVIDIA GPU managed/unified memory\n");
}
}
@@ -268,7 +265,7 @@ int client_connect(const char *server, uint16_t server_port)
return -1;
}
-static int barrier(int oob_sock)
+static inline int barrier(int oob_sock)
{
int dummy = 0;
ssize_t res;
@@ -284,7 +281,7 @@ static int barrier(int oob_sock)
return !(res == sizeof(dummy));
}
-static int generate_test_string(char *str, int size)
+static inline int generate_test_string(char *str, int size)
{
char *tmp_str;
int i;
diff --git a/examples/ucp_client_server.c b/examples/ucp_client_server.c
index 1ed6f46a1b2..670d5ac389c 100644
--- a/examples/ucp_client_server.c
+++ b/examples/ucp_client_server.c
@@ -27,6 +27,8 @@
* 13337.
*/
+#include "hello_world_util.h"
+
#include
#include /* memset */
@@ -34,7 +36,6 @@
#include /* getopt */
#include /* atoi */
-#define TEST_STRING_LEN sizeof(test_message)
#define DEFAULT_PORT 13337
#define IP_STRING_LEN 50
#define PORT_STRING_LEN 8
@@ -44,9 +45,10 @@
#define DEFAULT_NUM_ITERATIONS 1
#define TEST_AM_ID 0
-const char test_message[] = "UCX Client-Server Hello World";
-static uint16_t server_port = DEFAULT_PORT;
-static int num_iterations = DEFAULT_NUM_ITERATIONS;
+
+static long test_string_length = 16;
+static uint16_t server_port = DEFAULT_PORT;
+static int num_iterations = DEFAULT_NUM_ITERATIONS;
typedef enum {
@@ -219,19 +221,20 @@ static ucs_status_t start_client(ucp_worker_h ucp_worker, const char *ip,
* Print the received message on the server side or the sent data on the client
* side.
*/
-static void print_result(int is_server, char *recv_message, int current_iter)
+static void print_result(int is_server, char *msg_str, int current_iter)
{
if (is_server) {
printf("Server: iteration #%d\n", (current_iter + 1));
printf("UCX data message was received\n");
printf("\n\n----- UCP TEST SUCCESS -------\n\n");
- printf("%s", recv_message);
+ printf("%s", msg_str);
printf("\n\n------------------------------\n\n");
} else {
printf("Client: iteration #%d\n", (current_iter + 1));
printf("\n\n-----------------------------------------\n\n");
printf("Client sent message: \n%s.\nlength: %ld\n",
- test_message, TEST_STRING_LEN);
+ (test_string_length != 0) ? msg_str : "",
+ test_string_length);
printf("\n-----------------------------------------\n\n");
}
}
@@ -264,11 +267,11 @@ static ucs_status_t request_wait(ucp_worker_h ucp_worker, void *request,
}
static int request_finalize(ucp_worker_h ucp_worker, test_req_t *request,
- test_req_t *ctx, int is_server,
- char *recv_message, int current_iter)
+ test_req_t *ctx, int is_server, void *msg,
+ int current_iter)
{
ucs_status_t status;
- int ret = 0;
+ char *msg_str;
status = request_wait(ucp_worker, request, ctx);
if (status != UCS_OK) {
@@ -280,10 +283,18 @@ static int request_finalize(ucp_worker_h ucp_worker, test_req_t *request,
/* Print the output of the first, last and every PRINT_INTERVAL iteration */
if ((current_iter == 0) || (current_iter == (num_iterations - 1)) ||
!((current_iter + 1) % (PRINT_INTERVAL))) {
- print_result(is_server, recv_message, current_iter);
+ msg_str = calloc(1, test_string_length + 1);
+ if (msg_str == NULL) {
+ fprintf(stderr, "memory allocation failed\n");
+ return -1;
+ }
+
+ mem_type_memcpy(msg_str, msg, test_string_length);
+ print_result(is_server, msg_str, current_iter);
+ free(msg_str);
}
- return ret;
+ return 0;
}
/**
@@ -294,33 +305,41 @@ static int request_finalize(ucp_worker_h ucp_worker, test_req_t *request,
static int send_recv_stream(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server,
int current_iter)
{
- char recv_message[TEST_STRING_LEN]= "";
ucp_request_param_t param;
test_req_t *request;
- size_t length;
+ size_t msg_length;
+ void *msg;
test_req_t ctx;
+ int ret;
+
+ msg_length = test_string_length;
+ msg = mem_type_malloc(msg_length);
+ CHKERR_ACTION(msg == NULL, "allocate memory\n", return -1;);
+ mem_type_memset(msg, 0, msg_length);
- ctx.complete = 0;
+ ctx.complete = 0;
param.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
UCP_OP_ATTR_FIELD_USER_DATA;
param.user_data = &ctx;
+
if (!is_server) {
+ ret = generate_test_string(msg, msg_length);
+ CHKERR_ACTION(ret < 0, "generate test string", return -1;);
+
/* Client sends a message to the server using the stream API */
param.cb.send = send_cb;
- request = ucp_stream_send_nbx(ep, test_message, TEST_STRING_LEN,
- ¶m);
+ request = ucp_stream_send_nbx(ep, msg, msg_length, ¶m);
} else {
/* Server receives a message from the client using the stream API */
param.op_attr_mask |= UCP_OP_ATTR_FIELD_FLAGS;
param.flags = UCP_STREAM_RECV_FLAG_WAITALL;
param.cb.recv_stream = stream_recv_cb;
- request = ucp_stream_recv_nbx(ep, &recv_message,
- TEST_STRING_LEN,
- &length, ¶m);
+ request = ucp_stream_recv_nbx(ep, msg, msg_length,
+ &msg_length, ¶m);
}
- return request_finalize(ucp_worker, request, &ctx, is_server,
- recv_message, current_iter);
+ return request_finalize(ucp_worker, request, &ctx, is_server, msg,
+ current_iter);
}
/**
@@ -331,28 +350,37 @@ static int send_recv_stream(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server,
static int send_recv_tag(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server,
int current_iter)
{
- char recv_message[TEST_STRING_LEN]= "";
ucp_request_param_t param;
void *request;
+ size_t msg_length;
+ void *msg;
test_req_t ctx;
+ int ret;
- ctx.complete = 0;
+ msg_length = test_string_length;
+ msg = mem_type_malloc(msg_length);
+ CHKERR_ACTION(msg == NULL, "allocate memory\n", return -1;);
+ mem_type_memset(msg, 0, msg_length);
+
+ ctx.complete = 0;
param.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
UCP_OP_ATTR_FIELD_USER_DATA;
param.user_data = &ctx;
if (!is_server) {
+ ret = generate_test_string(msg, msg_length);
+ CHKERR_ACTION(ret < 0, "generate test string", return -1;);
+
/* Client sends a message to the server using the Tag-Matching API */
param.cb.send = send_cb;
- request = ucp_tag_send_nbx(ep, test_message, TEST_STRING_LEN,
- TAG, ¶m);
+ request = ucp_tag_send_nbx(ep, msg, msg_length, TAG, ¶m);
} else {
/* Server receives a message from the client using the Tag-Matching API */
param.cb.recv = tag_recv_cb;
- request = ucp_tag_recv_nbx(ucp_worker, &recv_message,
- TEST_STRING_LEN, TAG, 0, ¶m);
+ request = ucp_tag_recv_nbx(ucp_worker, msg, msg_length, TAG, 0,
+ ¶m);
}
- return request_finalize(ucp_worker, request, &ctx, is_server, recv_message,
+ return request_finalize(ucp_worker, request, &ctx, is_server, msg,
current_iter);
}
@@ -360,16 +388,18 @@ ucs_status_t ucp_am_data_cb(void *arg, const void *header, size_t header_length,
void *data, size_t length,
const ucp_am_recv_param_t *param)
{
- if (length != TEST_STRING_LEN) {
+ if (length != test_string_length) {
fprintf(stderr, "received wrong data length %ld (expected %ld)",
- length, TEST_STRING_LEN);
- goto out;
+ length, test_string_length);
+ return UCS_OK;
}
if ((header != NULL) || (header_length != 0)) {
fprintf(stderr, "received unexpected header, length %ld", header_length);
}
+ am_data_desc.complete = 1;
+
if (param->recv_attr & UCP_AM_RECV_ATTR_FLAG_RNDV) {
/* Rendezvous request arrived, data contains an internal UCX descriptor,
* which has to be passed to ucp_am_recv_data_nbx function to confirm
@@ -384,10 +414,8 @@ ucs_status_t ucp_am_data_cb(void *arg, const void *header, size_t header_length,
* immediately
*/
am_data_desc.is_rndv = 0;
- memcpy(am_data_desc.recv_buf, data, length);
+ mem_type_memcpy(am_data_desc.recv_buf, data, length);
-out:
- am_data_desc.complete = 1;
return UCS_OK;
}
@@ -400,21 +428,31 @@ ucs_status_t ucp_am_data_cb(void *arg, const void *header, size_t header_length,
static int send_recv_am(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server,
int current_iter)
{
- char recv_message[TEST_STRING_LEN] = "";
test_req_t *request;
ucp_request_param_t params;
+ size_t msg_length;
+ void *msg;
test_req_t ctx;
+ int ret;
+
+ msg_length = test_string_length;
+ msg = mem_type_malloc(msg_length);
+ CHKERR_ACTION(msg == NULL, "allocate memory\n", return -1;);
+ mem_type_memset(msg, 0, msg_length);
- am_data_desc.recv_buf = recv_message;
- ctx.complete = 0;
- params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
- UCP_OP_ATTR_FIELD_USER_DATA;
- params.user_data = &ctx;
+ ctx.complete = 0;
+ params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
+ UCP_OP_ATTR_FIELD_USER_DATA;
+ params.user_data = &ctx;
if (is_server) {
+ am_data_desc.recv_buf = msg;
+
+ /* waiting for AM callback has called */
while (!am_data_desc.complete) {
ucp_worker_progress(ucp_worker);
}
+
am_data_desc.complete = 0;
if (am_data_desc.is_rndv) {
@@ -425,8 +463,7 @@ static int send_recv_am(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server,
params.cb.recv_am = am_recv_cb,
request = ucp_am_recv_data_nbx(ucp_worker,
am_data_desc.desc,
- &recv_message,
- TEST_STRING_LEN,
+ msg, msg_length,
¶ms);
} else {
/* Data has arrived eagerly and is ready for use, no need to
@@ -434,14 +471,16 @@ static int send_recv_am(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server,
request = NULL;
}
} else {
+ ret = generate_test_string(msg, msg_length);
+ CHKERR_ACTION(ret < 0, "generate test string", return -1;);
+
/* Client sends a message to the server using the AM API */
params.cb.send = (ucp_send_nbx_callback_t)send_cb,
- request = ucp_am_send_nbx(ep, TEST_AM_ID, NULL, 0ul,
- test_message, TEST_STRING_LEN,
- ¶ms);
+ request = ucp_am_send_nbx(ep, TEST_AM_ID, NULL, 0ul, msg,
+ msg_length, ¶ms);
}
- return request_finalize(ucp_worker, request, &ctx, is_server, recv_message,
+ return request_finalize(ucp_worker, request, &ctx, is_server, msg,
current_iter);
}
@@ -480,24 +519,25 @@ static void usage()
fprintf(stderr, "Usage: ucp_client_server [parameters]\n");
fprintf(stderr, "UCP client-server example utility\n");
fprintf(stderr, "\nParameters are:\n");
- fprintf(stderr, " -a Set IP address of the server "
+ fprintf(stderr, " -a Set IP address of the server "
"(required for client and should not be specified "
"for the server)\n");
- fprintf(stderr, " -l Set IP address where server listens "
+ fprintf(stderr, " -l Set IP address where server listens "
"(If not specified, server uses INADDR_ANY; "
"Irrelevant at client)\n");
- fprintf(stderr, " -p Port number to listen/connect to (default = %d). "
+ fprintf(stderr, " -p Port number to listen/connect to (default = %d). "
"0 on the server side means select a random port and print it\n",
DEFAULT_PORT);
- fprintf(stderr, " -c Communication type for the client and server. "
- " Valid values are:\n"
- " 'stream' : Stream API\n"
- " 'tag' : Tag API\n"
- " 'am' : AM API\n"
- " If not specified, %s API will be used.\n", COMM_TYPE_DEFAULT);
- fprintf(stderr, " -i Number of iterations to run. Client and server must "
+ fprintf(stderr, " -c Communication type for the client and server. "
+ " Valid values are:\n"
+ " 'stream' : Stream API\n"
+ " 'tag' : Tag API\n"
+ " 'am' : AM API\n"
+ " If not specified, %s API will be used.\n", COMM_TYPE_DEFAULT);
+ fprintf(stderr, " -i Number of iterations to run. Client and server must "
"have the same value. (default = %d).\n",
num_iterations);
+ print_common_help();
fprintf(stderr, "\n");
}
@@ -510,9 +550,7 @@ static int parse_cmd(int argc, char *const argv[], char **server_addr,
int c = 0;
int port;
- opterr = 0;
-
- while ((c = getopt(argc, argv, "a:l:p:c:i:")) != -1) {
+ while ((c = getopt(argc, argv, "a:l:p:c:i:s:m:h")) != -1) {
switch (c) {
case 'a':
*server_addr = optarg;
@@ -523,10 +561,7 @@ static int parse_cmd(int argc, char *const argv[], char **server_addr,
} else if (!strcasecmp(optarg, "tag")) {
*send_recv_type = CLIENT_SERVER_SEND_RECV_TAG;
} else if (!strcasecmp(optarg, "am")) {
- /* TODO: uncomment below when AM API is fully supported.
- * *send_recv_type = CLIENT_SERVER_SEND_RECV_AM; */
- fprintf(stderr, "AM API is not fully supported yet\n");
- return -1;
+ *send_recv_type = CLIENT_SERVER_SEND_RECV_AM;
} else {
fprintf(stderr, "Wrong communication type %s. "
"Using %s as default\n", optarg, COMM_TYPE_DEFAULT);
@@ -547,6 +582,20 @@ static int parse_cmd(int argc, char *const argv[], char **server_addr,
case 'i':
num_iterations = atoi(optarg);
break;
+ case 's':
+ test_string_length = atol(optarg);
+ if (test_string_length < 0) {
+ fprintf(stderr, "Wrong string size %ld\n", test_string_length);
+ return UCS_ERR_UNSUPPORTED;
+ }
+ break;
+ case 'm':
+ test_mem_type = parse_mem_type(optarg);
+ if (test_mem_type == UCS_MEMORY_TYPE_LAST) {
+ return UCS_ERR_UNSUPPORTED;
+ }
+ break;
+ case 'h':
default:
usage();
return -1;
diff --git a/examples/ucp_hello_world.c b/examples/ucp_hello_world.c
index 897de0c90a3..39b308f125a 100644
--- a/examples/ucp_hello_world.c
+++ b/examples/ucp_hello_world.c
@@ -47,7 +47,6 @@
#include
#include
#include /* getopt */
-#include /* isprint */
#include /* pthread_self */
#include /* errno */
#include
@@ -67,18 +66,25 @@ enum ucp_test_mode_t {
TEST_MODE_EVENTFD
} ucp_test_mode = TEST_MODE_PROBE;
+typedef enum {
+ FAILURE_MODE_NONE,
+ FAILURE_MODE_SEND, /* fail send operation on server */
+ FAILURE_MODE_RECV, /* fail receive operation on client */
+ FAILURE_MODE_KEEPALIVE /* fail without communication on client */
+} failure_mode_t;
+
static struct err_handling {
ucp_err_handling_mode_t ucp_err_mode;
- int failure;
+ failure_mode_t failure_mode;
} err_handling_opt;
-static ucs_status_t client_status = UCS_OK;
-static uint16_t server_port = 13337;
-static long test_string_length = 16;
-static const ucp_tag_t tag = 0x1337a880u;
-static const ucp_tag_t tag_mask = UINT64_MAX;
-static const char *addr_msg_str = "UCX address message";
-static const char *data_msg_str = "UCX data message";
+static ucs_status_t ep_status = UCS_OK;
+static uint16_t server_port = 13337;
+static long test_string_length = 16;
+static const ucp_tag_t tag = 0x1337a880u;
+static const ucp_tag_t tag_mask = UINT64_MAX;
+static const char *addr_msg_str = "UCX address message";
+static const char *data_msg_str = "UCX data message";
static ucp_address_t *local_addr;
static ucp_address_t *peer_addr;
@@ -220,9 +226,14 @@ static int run_ucx_client(ucp_worker_h ucp_worker)
/* Send client UCX address to server */
ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS |
- UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE;
+ UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE |
+ UCP_EP_PARAM_FIELD_ERR_HANDLER |
+ UCP_EP_PARAM_FIELD_USER_DATA;
ep_params.address = peer_addr;
ep_params.err_mode = err_handling_opt.ucp_err_mode;
+ ep_params.err_handler.cb = failure_handler;
+ ep_params.err_handler.arg = NULL;
+ ep_params.user_data = &ep_status;
status = ucp_ep_create(ucp_worker, &ep_params, &server_ep);
CHKERR_JUMP(status != UCS_OK, "ucp_ep_create\n", err);
@@ -250,14 +261,14 @@ static int run_ucx_client(ucp_worker_h ucp_worker)
free(msg);
- if (err_handling_opt.failure) {
- fprintf(stderr, "Emulating unexpected failure on client side\n");
+ if (err_handling_opt.failure_mode == FAILURE_MODE_RECV) {
+ fprintf(stderr, "Emulating failure before receive operation on client side\n");
raise(SIGKILL);
}
/* Receive test string from server */
for (;;) {
-
+ CHKERR_JUMP(ep_status != UCS_OK, "receive data: EP disconnected\n", err_ep);
/* Probing incoming events in non-block mode */
msg_tag = ucp_tag_probe_nb(ucp_worker, tag, tag_mask, 1, &info_tag);
if (msg_tag != NULL) {
@@ -281,6 +292,13 @@ static int run_ucx_client(ucp_worker_h ucp_worker)
CHKERR_JUMP(status != UCS_OK, "test_poll_wait\n", err_ep);
}
}
+
+ if (err_handling_opt.failure_mode == FAILURE_MODE_KEEPALIVE) {
+ fprintf(stderr, "Emulating unexpected failure after receive completion "
+ "on client side, server should detect error by "
+ "keepalive mechanism\n");
+ raise(SIGKILL);
+ }
msg = mem_type_malloc(info_tag.length);
CHKERR_JUMP(msg == NULL, "allocate memory\n", err_ep);
@@ -295,25 +313,23 @@ static int run_ucx_client(ucp_worker_h ucp_worker)
}
str = calloc(1, test_string_length);
- if (str != NULL) {
- mem_type_memcpy(str, msg + 1, test_string_length);
- printf("\n\n----- UCP TEST SUCCESS ----\n\n");
- printf("%s", str);
- printf("\n\n---------------------------\n\n");
- free(str);
- } else {
+ if (str == NULL) {
fprintf(stderr, "Memory allocation failed\n");
- mem_type_free(msg);
- goto err_ep;
+ ret = -1;
+ goto err_msg;
}
- mem_type_free(msg);
-
+ mem_type_memcpy(str, msg + 1, test_string_length);
+ printf("\n\n----- UCP TEST SUCCESS ----\n\n");
+ printf("%s", str);
+ printf("\n\n---------------------------\n\n");
+ free(str);
ret = 0;
+err_msg:
+ mem_type_free(msg);
err_ep:
ucp_ep_destroy(server_ep);
-
err:
return ret;
}
@@ -378,6 +394,14 @@ static int run_ucx_server(ucp_worker_h ucp_worker)
goto err;
}
+ if (err_handling_opt.failure_mode == FAILURE_MODE_SEND) {
+ fprintf(stderr, "Emulating unexpected failure on server side, client "
+ "should detect error by keepalive mechanism\n");
+ free(msg);
+ raise(SIGKILL);
+ exit(1);
+ }
+
peer_addr_len = msg->data_len;
peer_addr = malloc(peer_addr_len);
if (peer_addr == NULL) {
@@ -400,13 +424,13 @@ static int run_ucx_server(ucp_worker_h ucp_worker)
ep_params.err_mode = err_handling_opt.ucp_err_mode;
ep_params.err_handler.cb = failure_handler;
ep_params.err_handler.arg = NULL;
- ep_params.user_data = &client_status;
+ ep_params.user_data = &ep_status;
status = ucp_ep_create(ucp_worker, &ep_params, &client_ep);
/* If peer failure testing was requested, it could be possible that UCP EP
* couldn't be created; in this case set `ret = 0` to report success */
- CHKERR_ACTION(status != UCS_OK, "ucp_ep_create\n",
- ret = (err_handling_opt.failure) ? 0 : -1; goto err);
+ ret = (err_handling_opt.failure_mode != FAILURE_MODE_NONE) ? 0 : -1;
+ CHKERR_ACTION(status != UCS_OK, "ucp_ep_create\n", goto err);
msg_len = sizeof(*msg) + test_string_length;
msg = mem_type_malloc(msg_len);
@@ -417,12 +441,14 @@ static int run_ucx_server(ucp_worker_h ucp_worker)
ret = generate_test_string((char *)(msg + 1), test_string_length);
CHKERR_JUMP(ret < 0, "generate test string", err_free_mem_type_msg);
- if (err_handling_opt.failure) {
- /* Sleep for small amount of time to ensure that server was killed
+ if (err_handling_opt.failure_mode == FAILURE_MODE_RECV) {
+ /* Sleep for small amount of time to ensure that client was killed
* and peer failure handling is covered */
sleep(5);
}
+ ucp_worker_progress(ucp_worker);
+
send_param.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
UCP_OP_ATTR_FIELD_USER_DATA |
UCP_OP_ATTR_FIELD_MEMORY_TYPE;
@@ -434,7 +460,7 @@ static int run_ucx_server(ucp_worker_h ucp_worker)
status = ucx_wait(ucp_worker, request, "send",
data_msg_str);
if (status != UCS_OK) {
- if (!err_handling_opt.failure) {
+ if (err_handling_opt.failure_mode != FAILURE_MODE_NONE) {
ret = -1;
} else {
/* If peer failure testing was requested, set `ret = 0` to report
@@ -442,13 +468,20 @@ static int run_ucx_server(ucp_worker_h ucp_worker)
ret = 0;
/* Make sure that failure_handler was called */
- while (client_status == UCS_OK) {
+ while (ep_status == UCS_OK) {
ucp_worker_progress(ucp_worker);
}
}
goto err_free_mem_type_msg;
}
+ if (err_handling_opt.failure_mode == FAILURE_MODE_KEEPALIVE) {
+ fprintf(stderr, "Waiting for client is terminated\n");
+ while (ep_status == UCS_OK) {
+ ucp_worker_progress(ucp_worker);
+ }
+ }
+
status = flush_ep(ucp_worker, client_ep);
printf("flush_ep completed with status %d (%s)\n",
status, ucs_status_string(status));
@@ -564,7 +597,7 @@ int main(int argc, char **argv)
ret = run_test(client_target_name, ucp_worker);
- if (!ret && !err_handling_opt.failure) {
+ if (!ret && (err_handling_opt.failure_mode != FAILURE_MODE_NONE)) {
/* Make sure remote is disconnected before destroying local worker */
ret = barrier(oob_sock);
}
@@ -586,15 +619,40 @@ int main(int argc, char **argv)
return ret;
}
+static void print_usage()
+{
+ fprintf(stderr, "Usage: ucp_hello_world [parameters]\n");
+ fprintf(stderr, "UCP hello world client/server example utility\n");
+ fprintf(stderr, "\nParameters are:\n");
+ fprintf(stderr, " -w Select test mode \"wait\" to test "
+ "ucp_worker_wait function\n");
+ fprintf(stderr, " -f Select test mode \"event fd\" to test "
+ "ucp_worker_get_efd function with later poll\n");
+ fprintf(stderr, " -b Select test mode \"busy polling\" to test "
+ "ucp_tag_probe_nb and ucp_worker_progress (default)\n");
+ fprintf(stderr, " -n Set node name or IP address "
+ "of the server (required for client and should be ignored "
+ "for server)\n");
+ fprintf(stderr, " -e Emulate unexpected failure and handle an "
+ "error with enabled UCP_ERR_HANDLING_MODE_PEER\n");
+ fprintf(stderr, " send - send failure on server side "
+ "before send initiated\n");
+ fprintf(stderr, " recv - receive failure on client side "
+ "before receive completed\n");
+ fprintf(stderr, " keepalive - keepalive failure on client side "
+ "after communication completed\n");
+ print_common_help();
+ fprintf(stderr, "\n");
+}
+
ucs_status_t parse_cmd(int argc, char * const argv[], char **server_name)
{
int c = 0, idx = 0;
- opterr = 0;
- err_handling_opt.ucp_err_mode = UCP_ERR_HANDLING_MODE_NONE;
- err_handling_opt.failure = 0;
+ err_handling_opt.ucp_err_mode = UCP_ERR_HANDLING_MODE_NONE;
+ err_handling_opt.failure_mode = FAILURE_MODE_NONE;
- while ((c = getopt(argc, argv, "wfben:p:s:m:h")) != -1) {
+ while ((c = getopt(argc, argv, "wfbe:n:p:s:m:h")) != -1) {
switch (c) {
case 'w':
ucp_test_mode = TEST_MODE_WAIT;
@@ -606,8 +664,17 @@ ucs_status_t parse_cmd(int argc, char * const argv[], char **server_name)
ucp_test_mode = TEST_MODE_PROBE;
break;
case 'e':
- err_handling_opt.ucp_err_mode = UCP_ERR_HANDLING_MODE_PEER;
- err_handling_opt.failure = 1;
+ err_handling_opt.ucp_err_mode = UCP_ERR_HANDLING_MODE_PEER;
+ if (!strcmp(optarg, "recv")) {
+ err_handling_opt.failure_mode = FAILURE_MODE_RECV;
+ } else if (!strcmp(optarg, "send")) {
+ err_handling_opt.failure_mode = FAILURE_MODE_SEND;
+ } else if (!strcmp(optarg, "keepalive")) {
+ err_handling_opt.failure_mode = FAILURE_MODE_KEEPALIVE;
+ } else {
+ print_usage();
+ return UCS_ERR_UNSUPPORTED;
+ }
break;
case 'n':
*server_name = optarg;
@@ -621,7 +688,7 @@ ucs_status_t parse_cmd(int argc, char * const argv[], char **server_name)
break;
case 's':
test_string_length = atol(optarg);
- if (test_string_length <= 0) {
+ if (test_string_length < 0) {
fprintf(stderr, "Wrong string size %ld\n", test_string_length);
return UCS_ERR_UNSUPPORTED;
}
@@ -632,36 +699,14 @@ ucs_status_t parse_cmd(int argc, char * const argv[], char **server_name)
return UCS_ERR_UNSUPPORTED;
}
break;
- case '?':
- if (optopt == 's') {
- fprintf(stderr, "Option -%c requires an argument.\n", optopt);
- } else if (isprint (optopt)) {
- fprintf(stderr, "Unknown option `-%c'.\n", optopt);
- } else {
- fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt);
- }
- /* Fall through */
case 'h':
default:
- fprintf(stderr, "Usage: ucp_hello_world [parameters]\n");
- fprintf(stderr, "UCP hello world client/server example utility\n");
- fprintf(stderr, "\nParameters are:\n");
- fprintf(stderr, " -w Select test mode \"wait\" to test "
- "ucp_worker_wait function\n");
- fprintf(stderr, " -f Select test mode \"event fd\" to test "
- "ucp_worker_get_efd function with later poll\n");
- fprintf(stderr, " -b Select test mode \"busy polling\" to test "
- "ucp_tag_probe_nb and ucp_worker_progress (default)\n");
- fprintf(stderr, " -e Emulate unexpected failure on server side"
- "and handle an error on client side with enabled "
- "UCP_ERR_HANDLING_MODE_PEER\n");
- print_common_help();
- fprintf(stderr, "\n");
+ print_usage();
return UCS_ERR_UNSUPPORTED;
}
}
- fprintf(stderr, "INFO: UCP_HELLO_WORLD mode = %d server = %s port = %d\n",
- ucp_test_mode, *server_name, server_port);
+ fprintf(stderr, "INFO: UCP_HELLO_WORLD mode = %d server = %s port = %d, pid = %d\n",
+ ucp_test_mode, *server_name, server_port, getpid());
for (idx = optind; idx < argc; idx++) {
fprintf(stderr, "WARNING: Non-option argument %s\n", argv[idx]);
diff --git a/examples/uct_hello_world.c b/examples/uct_hello_world.c
index ceaff6380b3..66a51d81f39 100644
--- a/examples/uct_hello_world.c
+++ b/examples/uct_hello_world.c
@@ -10,7 +10,8 @@
#include
#include
-#include
+#include
+
typedef enum {
FUNC_AM_SHORT,
@@ -206,7 +207,8 @@ static void print_strings(const char *label, const char *local_str,
const char *remote_str, size_t length)
{
fprintf(stdout, "\n\n----- UCT TEST SUCCESS ----\n\n");
- fprintf(stdout, "[%s] %s sent %s", label, local_str, remote_str);
+ fprintf(stdout, "[%s] %s sent %s (%" PRIu64 " bytes)", label, local_str,
+ (length != 0) ? remote_str : "", length);
fprintf(stdout, "\n\n---------------------------\n");
fflush(stdout);
}
@@ -424,8 +426,11 @@ int print_err_usage()
fprintf(stderr, func_template, 'i', func_am_t_str(FUNC_AM_SHORT), " (default)");
fprintf(stderr, func_template, 'b', func_am_t_str(FUNC_AM_BCOPY), "");
fprintf(stderr, func_template, 'z', func_am_t_str(FUNC_AM_ZCOPY), "");
- fprintf(stderr, " -d Select device name\n");
- fprintf(stderr, " -t Select transport layer\n");
+ fprintf(stderr, " -d Select device name\n");
+ fprintf(stderr, " -t Select transport layer\n");
+ fprintf(stderr, " -n Set node name or IP address "
+ "of the server (required for client and should be ignored "
+ "for server)\n");
print_common_help();
fprintf(stderr, "\nExample:\n");
fprintf(stderr, " Server: uct_hello_world -d eth0 -t tcp\n");
@@ -446,7 +451,6 @@ int parse_cmd(int argc, char * const argv[], cmd_args_t *args)
args->func_am_type = FUNC_AM_SHORT;
args->test_strlen = 16;
- opterr = 0;
while ((c = getopt(argc, argv, "ibzd:t:n:p:s:m:h")) != -1) {
switch (c) {
case 'i':
@@ -477,7 +481,7 @@ int parse_cmd(int argc, char * const argv[], cmd_args_t *args)
break;
case 's':
args->test_strlen = atol(optarg);
- if (args->test_strlen <= 0) {
+ if (args->test_strlen < 0) {
fprintf(stderr, "Wrong string size %ld\n", args->test_strlen);
return UCS_ERR_UNSUPPORTED;
}
@@ -488,14 +492,6 @@ int parse_cmd(int argc, char * const argv[], cmd_args_t *args)
return UCS_ERR_UNSUPPORTED;
}
break;
- case '?':
- if (optopt == 's') {
- fprintf(stderr, "Option -%c requires an argument.\n", optopt);
- } else if (isprint (optopt)) {
- fprintf(stderr, "Unknown option `-%c'.\n", optopt);
- } else {
- fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt);
- }
case 'h':
default:
return print_err_usage();
diff --git a/src/tools/info/proto_info.c b/src/tools/info/proto_info.c
index 4c74cd0dc8c..c58577e029b 100644
--- a/src/tools/info/proto_info.c
+++ b/src/tools/info/proto_info.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -93,27 +94,206 @@ static void print_resource_usage(const resource_usage_t *usage_before,
printf("#\n");
}
-void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
- uint64_t ctx_features, const ucp_ep_params_t *base_ep_params,
- size_t estimated_num_eps, size_t estimated_num_ppn,
- unsigned dev_type_bitmap, const char *mem_size)
+static void listener_accept_callback(ucp_ep_h ep, void *arg)
{
- ucp_config_t *config;
+ *(ucp_ep_h*)arg = ep;
+}
+
+static void
+set_saddr(const char *addr_str, uint16_t port, struct sockaddr_in *saddr)
+{
+ memset(saddr, 0, sizeof(*saddr));
+ saddr->sin_family = AF_INET;
+ saddr->sin_addr.s_addr = inet_addr(addr_str);
+ saddr->sin_port = htons(port);
+}
+
+static ucs_status_t
+wait_completion(ucp_worker_h worker, ucs_status_ptr_t status_ptr)
+{
+ ucs_status_t status;
+
+ if (status_ptr == NULL) {
+ status = UCS_OK;
+ } else if (UCS_PTR_IS_PTR(status_ptr)) {
+ do {
+ ucp_worker_progress(worker);
+ status = ucp_request_test(status_ptr, NULL);
+ } while (status == UCS_INPROGRESS);
+ ucp_request_release(status_ptr);
+ } else {
+ status = UCS_PTR_STATUS(status_ptr);
+ }
+
+ return status;
+}
+
+static void
+ep_close(ucp_worker_h worker, ucp_ep_h ep, ucp_ep_close_flags_t flags,
+ const char *ep_type)
+{
+ ucp_request_param_t request_param;
+ ucs_status_ptr_t status_ptr;
+
+ request_param.op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS;
+ request_param.flags = flags;
+
+ status_ptr = ucp_ep_close_nbx(ep, &request_param);
+ wait_completion(worker, status_ptr);
+}
+
+static ucs_status_t
+create_listener(ucp_worker_h worker, ucp_listener_h *listener_p,
+ uint16_t *listen_port_p, void *accept_cb_arg)
+{
+ ucp_listener_h listener;
+ struct sockaddr_in listen_saddr;
+ ucp_listener_params_t listen_params;
+ ucp_listener_attr_t listen_attr;
+ ucs_status_t status;
+
+ set_saddr("0.0.0.0", 0, &listen_saddr);
+
+ listen_params.field_mask = UCP_LISTENER_PARAM_FIELD_SOCK_ADDR |
+ UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER;
+ listen_params.sockaddr.addr = (const struct sockaddr*)&listen_saddr;
+ listen_params.sockaddr.addrlen = sizeof(listen_saddr);
+ listen_params.accept_handler.cb = listener_accept_callback;
+ listen_params.accept_handler.arg = accept_cb_arg;
+
+ status = ucp_listener_create(worker, &listen_params, &listener);
+ if (status != UCS_OK) {
+ printf("\n");
+ goto out;
+ }
+
+ listen_attr.field_mask = UCP_LISTENER_ATTR_FIELD_SOCKADDR;
+
+ status = ucp_listener_query(listener, &listen_attr);
+ if (status != UCS_OK) {
+ printf("\n");
+ goto out_destroy_listener;
+ }
+
+ status = ucs_sockaddr_get_port((struct sockaddr*)&listen_attr.sockaddr,
+ listen_port_p);
+ if (status != UCS_OK) {
+ printf("\n");
+ goto out_destroy_listener;
+ }
+
+ *listener_p = listener;
+out:
+ return status;
+
+out_destroy_listener:
+ ucp_listener_destroy(listener);
+ goto out;
+}
+
+ucs_status_t
+print_ucp_ep_info(ucp_worker_h worker, const ucp_ep_params_t *base_ep_params,
+ const char *ip_addr)
+{
+ ucp_listener_h listener = NULL;
+ ucp_ep_h server_ep = NULL;
+ ucp_address_t *worker_addr = NULL;
+ ucp_ep_params_t ep_params = *base_ep_params;
ucs_status_t status;
ucs_status_ptr_t status_ptr;
+ size_t worker_addr_length;
+ struct sockaddr_in connect_saddr;
+ uint16_t listen_port;
+ ucp_ep_h ep;
+ char ep_name[64];
+ ucp_request_param_t request_param;
+
+ if (ip_addr != NULL) {
+ status = create_listener(worker, &listener, &listen_port, &server_ep);
+ if (status != UCS_OK) {
+ return status;
+ }
+
+ ucs_strncpy_zero(ep_name, "client", sizeof(ep_name));
+
+ set_saddr(ip_addr, listen_port, &connect_saddr);
+
+ ep_params.field_mask |= UCP_EP_PARAM_FIELD_FLAGS |
+ UCP_EP_PARAM_FIELD_SOCK_ADDR;
+ ep_params.flags = UCP_EP_PARAMS_FLAGS_CLIENT_SERVER;
+ ep_params.sockaddr.addr = (struct sockaddr*)&connect_saddr;
+ ep_params.sockaddr.addrlen = sizeof(connect_saddr);
+ } else {
+ status = ucp_worker_get_address(worker, &worker_addr,
+ &worker_addr_length);
+ if (status != UCS_OK) {
+ printf("\n");
+ return status;
+ }
+
+ ucs_strncpy_zero(ep_name, "connected to UCP worker", sizeof(ep_name));
+
+ ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
+ ep_params.address = worker_addr;
+ }
+
+ status = ucp_ep_create(worker, &ep_params, &ep);
+ if (status != UCS_OK) {
+ printf("\n");
+ goto out;
+ }
+
+ request_param.op_attr_mask = 0;
+ /* do EP flush to make sure that fully completed to a peer and final
+ * configuration is applied */
+ status_ptr = ucp_ep_flush_nbx(ep, &request_param);
+ status = wait_completion(worker, status_ptr);
+ if (status != UCS_OK) {
+ printf("\n");
+ goto out_close_eps;
+ }
+
+ ucp_ep_print_info(ep, stdout);
+
+out_close_eps:
+ ep_close(worker, ep, 0, ep_name);
+
+ if (server_ep != NULL) {
+ ucs_assert(ip_addr != NULL); /* server EP is created only for sockaddr
+ * connection flow */
+ ep_close(worker, server_ep, UCP_EP_CLOSE_FLAG_FORCE, "server");
+ }
+
+out:
+ if (listener != NULL) {
+ ucp_listener_destroy(listener);
+ }
+
+ if (worker_addr == NULL) {
+ ucp_worker_release_address(worker, worker_addr);
+ }
+
+ return status;
+}
+
+ucs_status_t
+print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
+ uint64_t ctx_features, const ucp_ep_params_t *base_ep_params,
+ size_t estimated_num_eps, size_t estimated_num_ppn,
+ unsigned dev_type_bitmap, const char *mem_size,
+ const char *ip_addr)
+{
+ ucp_config_t *config;
+ ucs_status_t status;
ucp_context_h context;
ucp_worker_h worker;
ucp_params_t params;
ucp_worker_params_t worker_params;
- ucp_ep_params_t ep_params;
- ucp_address_t *address;
- size_t address_length;
resource_usage_t usage;
- ucp_ep_h ep;
status = ucp_config_read(NULL, NULL, &config);
if (status != UCS_OK) {
- return;
+ goto out;
}
memset(¶ms, 0, sizeof(params));
@@ -172,40 +352,15 @@ void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
}
if (print_opts & PRINT_UCP_EP) {
- status = ucp_worker_get_address(worker, &address, &address_length);
- if (status != UCS_OK) {
- printf("\n");
- goto out_destroy_worker;
- }
-
- ep_params = *base_ep_params;
-
- ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
- ep_params.address = address;
-
- status = ucp_ep_create(worker, &ep_params, &ep);
- ucp_worker_release_address(worker, address);
- if (status != UCS_OK) {
- printf("\n");
- goto out_destroy_worker;
- }
-
- ucp_ep_print_info(ep, stdout);
-
- status_ptr = ucp_disconnect_nb(ep);
- if (UCS_PTR_IS_PTR(status_ptr)) {
- do {
- ucp_worker_progress(worker);
- status = ucp_request_test(status_ptr, NULL);
- } while (status == UCS_INPROGRESS);
- ucp_request_release(status_ptr);
- }
+ status = print_ucp_ep_info(worker, base_ep_params, ip_addr);
}
-out_destroy_worker:
ucp_worker_destroy(worker);
-out_cleanup_context:
+
+ out_cleanup_context:
ucp_cleanup(context);
out_release_config:
ucp_config_release(config);
+out:
+ return status;
}
diff --git a/src/tools/info/sys_info.c b/src/tools/info/sys_info.c
index 88d31767046..7c355a264c2 100644
--- a/src/tools/info/sys_info.c
+++ b/src/tools/info/sys_info.c
@@ -1,5 +1,6 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
+* Copyright (C) Shanghai Zhaoxin Semiconductor Co., Ltd. 2020. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
@@ -11,6 +12,7 @@
#include "ucx_info.h"
#include
+#include
#include
#include
#include
@@ -19,17 +21,20 @@
static const char* cpu_model_names[] = {
- [UCS_CPU_MODEL_UNKNOWN] = "unknown",
- [UCS_CPU_MODEL_INTEL_IVYBRIDGE] = "IvyBridge",
- [UCS_CPU_MODEL_INTEL_SANDYBRIDGE] = "SandyBridge",
- [UCS_CPU_MODEL_INTEL_NEHALEM] = "Nehalem",
- [UCS_CPU_MODEL_INTEL_WESTMERE] = "Westmere",
- [UCS_CPU_MODEL_INTEL_HASWELL] = "Haswell",
- [UCS_CPU_MODEL_INTEL_BROADWELL] = "Broadwell",
- [UCS_CPU_MODEL_INTEL_SKYLAKE] = "Skylake",
- [UCS_CPU_MODEL_ARM_AARCH64] = "ARM 64-bit",
- [UCS_CPU_MODEL_AMD_NAPLES] = "Naples",
- [UCS_CPU_MODEL_AMD_ROME] = "Rome"
+ [UCS_CPU_MODEL_UNKNOWN] = "unknown",
+ [UCS_CPU_MODEL_INTEL_IVYBRIDGE] = "IvyBridge",
+ [UCS_CPU_MODEL_INTEL_SANDYBRIDGE] = "SandyBridge",
+ [UCS_CPU_MODEL_INTEL_NEHALEM] = "Nehalem",
+ [UCS_CPU_MODEL_INTEL_WESTMERE] = "Westmere",
+ [UCS_CPU_MODEL_INTEL_HASWELL] = "Haswell",
+ [UCS_CPU_MODEL_INTEL_BROADWELL] = "Broadwell",
+ [UCS_CPU_MODEL_INTEL_SKYLAKE] = "Skylake",
+ [UCS_CPU_MODEL_ARM_AARCH64] = "ARM 64-bit",
+ [UCS_CPU_MODEL_AMD_NAPLES] = "Naples",
+ [UCS_CPU_MODEL_AMD_ROME] = "Rome",
+ [UCS_CPU_MODEL_ZHAOXIN_ZHANGJIANG] = "Zhangjiang",
+ [UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU] = "Wudaokou",
+ [UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI] = "Lujiazui"
};
static const char* cpu_vendor_names[] = {
@@ -38,7 +43,8 @@ static const char* cpu_vendor_names[] = {
[UCS_CPU_VENDOR_AMD] = "AMD",
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
- [UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM"
+ [UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM",
+ [UCS_CPU_VENDOR_ZHAOXIN] = "Zhaoxin"
};
static double measure_memcpy_bandwidth(size_t size)
diff --git a/src/tools/info/tl_info.c b/src/tools/info/tl_info.c
index f0211abce8d..112ee2de7e4 100644
--- a/src/tools/info/tl_info.c
+++ b/src/tools/info/tl_info.c
@@ -118,7 +118,7 @@ static const char *size_limit_to_str(size_t min_size, size_t max_size)
static void print_iface_info(uct_worker_h worker, uct_md_h md,
uct_tl_resource_desc_t *resource)
{
- char buf[200] = {0};
+ char buf[256] = {0};
uct_iface_params_t iface_params = {
.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE |
UCT_IFACE_PARAM_FIELD_DEVICE |
@@ -291,12 +291,14 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md,
}
buf[0] = '\0';
- if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF |
- UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF |
- UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF |
- UCT_IFACE_FLAG_ERRHANDLE_AM_ID |
- UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM |
- UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) {
+ if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF |
+ UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF |
+ UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF |
+ UCT_IFACE_FLAG_ERRHANDLE_AM_ID |
+ UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM |
+ UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE |
+ UCT_IFACE_FLAG_EP_CHECK |
+ UCT_IFACE_FLAG_EP_KEEPALIVE)) {
if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF |
UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF |
@@ -323,6 +325,12 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md,
if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE) {
strncat(buf, " peer failure,", sizeof(buf) - strlen(buf) - 1);
}
+ if (iface_attr.cap.flags & UCT_IFACE_FLAG_EP_CHECK) {
+ strncat(buf, " ep_check,", sizeof(buf) - strlen(buf) - 1);
+ }
+ if (iface_attr.cap.flags & UCT_IFACE_FLAG_EP_KEEPALIVE) {
+ strncat(buf, " keepalive,", sizeof(buf) - strlen(buf) - 1);
+ }
buf[strlen(buf) - 1] = '\0';
} else {
strncat(buf, " none", sizeof(buf) - strlen(buf) - 1);
@@ -444,9 +452,6 @@ static void print_md_info(uct_component_h component,
if (md_attr.cap.flags & UCT_MD_FLAG_RKEY_PTR) {
printf("# rkey_ptr is supported\n");
}
- if (md_attr.cap.flags & UCT_MD_FLAG_SOCKADDR) {
- printf("# supports client-server connection establishment via sockaddr\n");
- }
}
if (num_resources == 0) {
diff --git a/src/tools/info/type_info.c b/src/tools/info/type_info.c
index f2b8abd75c1..b24154d21b5 100644
--- a/src/tools/info/type_info.c
+++ b/src/tools/info/type_info.c
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -126,6 +127,7 @@ void print_type_info(const char * tl_name)
PRINT_SIZE(ucs_rcache_t);
PRINT_SIZE(ucs_rcache_region_t);
PRINT_SIZE(ucs_conn_match_elem_t);
+ PRINT_SIZE(ucs_memory_info_t);
printf("\nUCT:\n");
PRINT_SIZE(uct_am_handler_t);
diff --git a/src/tools/info/ucx_info.c b/src/tools/info/ucx_info.c
index 27be23d238d..0a231a23c5d 100644
--- a/src/tools/info/ucx_info.c
+++ b/src/tools/info/ucx_info.c
@@ -28,6 +28,7 @@ static void usage() {
printf(" -y Show type and structures information\n");
printf(" -s Show system information\n");
printf(" -c Show UCX configuration\n");
+ printf(" -C Comment-out default configuration values\n");
printf(" -a Show also hidden configuration\n");
printf(" -f Display fully decorated output\n");
printf("\nUCP information (-u is required):\n");
@@ -39,6 +40,7 @@ static void usage() {
printf(" 'a' : atomic operations\n");
printf(" 'r' : remote memory access\n");
printf(" 't' : tag matching \n");
+ printf(" 'm' : active messages \n");
printf(" 'w' : wakeup\n");
printf(" Modifiers to use in combination with above features:\n");
printf(" 'e' : error handling\n");
@@ -51,12 +53,16 @@ static void usage() {
printf(" 'shm' : shared memory devices only\n");
printf(" 'net' : network devices only\n");
printf(" 'self' : self transport only\n");
+ /* TODO: add IPv6 support */
+ printf(" -A Local IPv4 device address to use for creating\n"
+ " endpoint in client/server mode");
printf(" -h Show this help message\n");
printf("\n");
}
int main(int argc, char **argv)
{
+ char *ip_addr = NULL;
ucs_config_print_flags_t print_flags;
ucp_ep_params_t ucp_ep_params;
unsigned dev_type_bitmap;
@@ -77,7 +83,8 @@ int main(int argc, char **argv)
mem_size = NULL;
dev_type_bitmap = UINT_MAX;
ucp_ep_params.field_mask = 0;
- while ((c = getopt(argc, argv, "fahvcydbswpet:n:u:D:m:N:")) != -1) {
+
+ while ((c = getopt(argc, argv, "fahvcydbswpeCt:n:u:D:m:N:A:")) != -1) {
switch (c) {
case 'f':
print_flags |= UCS_CONFIG_PRINT_CONFIG | UCS_CONFIG_PRINT_HEADER | UCS_CONFIG_PRINT_DOC;
@@ -88,6 +95,9 @@ int main(int argc, char **argv)
case 'c':
print_flags |= UCS_CONFIG_PRINT_CONFIG;
break;
+ case 'C':
+ print_flags |= UCS_CONFIG_PRINT_COMMENT_DEFAULT;
+ break;
case 'v':
print_opts |= PRINT_VERSION;
break;
@@ -140,6 +150,9 @@ int main(int argc, char **argv)
case 'w':
ucp_features |= UCP_FEATURE_WAKEUP;
break;
+ case 'm':
+ ucp_features |= UCP_FEATURE_AM;
+ break;
case 'e':
ucp_ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE;
ucp_ep_params.err_mode = UCP_ERR_HANDLING_MODE_PEER;
@@ -164,6 +177,9 @@ int main(int argc, char **argv)
return -1;
}
break;
+ case 'A':
+ ip_addr = optarg;
+ break;
case 'h':
usage();
return 0;
@@ -208,12 +224,14 @@ int main(int argc, char **argv)
if (print_opts & (PRINT_UCP_CONTEXT|PRINT_UCP_WORKER|PRINT_UCP_EP|PRINT_MEM_MAP)) {
if (ucp_features == 0) {
- printf("Please select UCP features using -u switch: a|r|t|w\n");
+ printf("Please select UCP features using -u switch: a|r|t|m|w\n");
usage();
return -1;
}
- print_ucp_info(print_opts, print_flags, ucp_features, &ucp_ep_params,
- ucp_num_eps, ucp_num_ppn, dev_type_bitmap, mem_size);
+
+ return print_ucp_info(print_opts, print_flags, ucp_features,
+ &ucp_ep_params, ucp_num_eps, ucp_num_ppn,
+ dev_type_bitmap, mem_size, ip_addr);
}
return 0;
diff --git a/src/tools/info/ucx_info.h b/src/tools/info/ucx_info.h
index 037de535c6f..4b9f96dc1c0 100644
--- a/src/tools/info/ucx_info.h
+++ b/src/tools/info/ucx_info.h
@@ -7,9 +7,12 @@
#ifndef UCX_INFO_H
#define UCX_INFO_H
+#include
#include
#include
+#include
+
enum {
PRINT_VERSION = UCS_BIT(0),
@@ -35,9 +38,11 @@ void print_uct_info(int print_opts, ucs_config_print_flags_t print_flags,
void print_type_info(const char * tl_name);
-void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
- uint64_t ctx_features, const ucp_ep_params_t *base_ep_params,
- size_t estimated_num_eps, size_t estimated_num_ppn,
- unsigned dev_type_bitmap, const char *mem_size);
+ucs_status_t
+print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
+ uint64_t ctx_features, const ucp_ep_params_t *base_ep_params,
+ size_t estimated_num_eps, size_t estimated_num_ppn,
+ unsigned dev_type_bitmap, const char *mem_size,
+ const char *ip_addr);
#endif
diff --git a/src/tools/perf/api/libperf.h b/src/tools/perf/api/libperf.h
index 4e2bb9842f1..230e6416add 100644
--- a/src/tools/perf/api/libperf.h
+++ b/src/tools/perf/api/libperf.h
@@ -1,8 +1,9 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
-* Copyright (C) The University of Tennessee and The University
+* Copyright (C) The University of Tennessee and The University
* of Tennessee Research Foundation. 2015. ALL RIGHTS RESERVED.
+* Copyright (C) ARM Ltd. 2020. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/
@@ -15,12 +16,8 @@ BEGIN_C_DECLS
/** @file libperf.h */
-#include
#include
#include
-#include
-#include
-#include
typedef enum {
@@ -47,6 +44,8 @@ typedef enum {
typedef enum {
UCX_PERF_TEST_TYPE_PINGPONG, /* Ping-pong mode */
+ UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM,/* Ping-pong mode with
+ ucp_worker_wait_mem() */
UCX_PERF_TEST_TYPE_STREAM_UNI, /* Unidirectional stream */
UCX_PERF_TEST_TYPE_STREAM_BI, /* Bidirectional stream */
UCX_PERF_TEST_TYPE_LAST
@@ -61,6 +60,7 @@ typedef enum {
typedef enum {
UCT_PERF_DATA_LAYOUT_SHORT,
+ UCT_PERF_DATA_LAYOUT_SHORT_IOV,
UCT_PERF_DATA_LAYOUT_BCOPY,
UCT_PERF_DATA_LAYOUT_ZCOPY,
UCT_PERF_DATA_LAYOUT_LAST
@@ -68,7 +68,7 @@ typedef enum {
typedef enum {
- UCX_PERF_WAIT_MODE_PROGRESS, /* Repeatedly call progress */
+ UCX_PERF_WAIT_MODE_POLL, /* Repeatedly call progress */
UCX_PERF_WAIT_MODE_SLEEP, /* Go to sleep */
UCX_PERF_WAIT_MODE_SPIN, /* Spin without calling progress */
UCX_PERF_WAIT_MODE_LAST
@@ -85,7 +85,8 @@ enum ucx_perf_test_flags {
UCX_PERF_TEST_FLAG_VERBOSE = UCS_BIT(7), /* Print error messages */
UCX_PERF_TEST_FLAG_STREAM_RECV_DATA = UCS_BIT(8), /* For stream tests, use recv data API */
UCX_PERF_TEST_FLAG_FLUSH_EP = UCS_BIT(9), /* Issue flush on endpoint instead of worker */
- UCX_PERF_TEST_FLAG_WAKEUP = UCS_BIT(10) /* Create context with wakeup feature enabled */
+ UCX_PERF_TEST_FLAG_WAKEUP = UCS_BIT(10), /* Create context with wakeup feature enabled */
+ UCX_PERF_TEST_FLAG_ERR_HANDLING = UCS_BIT(11) /* Create UCP eps with error handling support */
};
@@ -188,7 +189,6 @@ typedef struct ucx_perf_params {
size_t iov_stride; /* Distance between starting address
of consecutive IOV entries. It is
similar to UCT uct_iov_t type stride */
- size_t am_hdr_size; /* Active message header size (included in message size) */
size_t alignment; /* Message buffer alignment */
unsigned max_outstanding; /* Maximal number of outstanding sends */
ucx_perf_counter_t warmup_iter; /* Number of warm-up iterations */
@@ -206,12 +206,16 @@ typedef struct ucx_perf_params {
char md_name[UCT_MD_NAME_MAX]; /* Memory domain name to use */
uct_perf_data_layout_t data_layout; /* Data layout to use */
unsigned fc_window; /* Window size for flow control <= UCX_PERF_TEST_MAX_FC_WINDOW */
+ size_t am_hdr_size; /* UCT Active Message header size
+ (included in message size) */
} uct;
struct {
unsigned nonblocking_mode; /* TBD */
ucp_perf_datatype_t send_datatype;
ucp_perf_datatype_t recv_datatype;
+ size_t am_hdr_size; /* UCP Active Message header size
+ (not included in message size) */
} ucp;
} ucx_perf_params_t;
diff --git a/src/tools/perf/cuda/Makefile.am b/src/tools/perf/cuda/Makefile.am
index aa6cb37065a..ecb7a33c0f7 100644
--- a/src/tools/perf/cuda/Makefile.am
+++ b/src/tools/perf/cuda/Makefile.am
@@ -10,6 +10,7 @@ module_LTLIBRARIES = libucx_perftest_cuda.la
libucx_perftest_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS)
libucx_perftest_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS)
libucx_perftest_cuda_la_LDFLAGS = $(CUDA_LDFLAGS) -version-info $(SOVERSION)
+libucx_perftest_cuda_la_LIBADD = $(CUDA_LIBS)
libucx_perftest_cuda_la_SOURCES = cuda_alloc.c
include $(top_srcdir)/config/module.am
diff --git a/src/tools/perf/lib/libperf.c b/src/tools/perf/lib/libperf.c
index 911a29b2e3f..134301f20e2 100644
--- a/src/tools/perf/lib/libperf.c
+++ b/src/tools/perf/lib/libperf.c
@@ -3,7 +3,8 @@
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
* Copyright (C) The University of Tennessee and The University
* of Tennessee Research Foundation. 2015-2016. ALL RIGHTS RESERVED.
-* Copyright (C) ARM Ltd. 2017. ALL RIGHTS RESERVED.
+* Copyright (C) ARM Ltd. 2017-2020. ALL RIGHTS RESERVED.
+* Copyright (C) Huawei Technologies Co., Ltd. 2021. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/
@@ -81,7 +82,9 @@ static const char *perf_iface_ops[] = {
[ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_SHORT)] = "tag eager short",
[ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)] = "tag eager bcopy",
[ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)] = "tag eager zcopy",
- [ucs_ilog2(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)] = "tag rndv zcopy"
+ [ucs_ilog2(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)] = "tag rndv zcopy",
+ [ucs_ilog2(UCT_IFACE_FLAG_EP_CHECK)] = "ep check",
+ [ucs_ilog2(UCT_IFACE_FLAG_EP_KEEPALIVE)] = "ep keepalive"
};
static const char *perf_atomic_op[] = {
@@ -167,7 +170,6 @@ uct_perf_test_alloc_host(const ucx_perf_context_t *perf, size_t length,
status = uct_iface_mem_alloc(perf->uct.iface, length,
flags, "perftest", alloc_mem);
if (status != UCS_OK) {
- ucs_free(alloc_mem);
ucs_error("failed to allocate memory: %s", ucs_status_string(status));
return status;
}
@@ -323,7 +325,8 @@ void ucx_perf_calc_result(ucx_perf_context_t *perf, ucx_perf_result_t *result)
ucs_time_t median;
double factor;
- if (perf->params.test_type == UCX_PERF_TEST_TYPE_PINGPONG) {
+ if ((perf->params.test_type == UCX_PERF_TEST_TYPE_PINGPONG) ||
+ (perf->params.test_type == UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM)) {
factor = 2.0;
} else {
factor = 1.0;
@@ -479,7 +482,8 @@ void uct_perf_iface_flush_b(ucx_perf_context_t *perf)
static inline uint64_t __get_flag(uct_perf_data_layout_t layout, uint64_t short_f,
uint64_t bcopy_f, uint64_t zcopy_f)
{
- return (layout == UCT_PERF_DATA_LAYOUT_SHORT) ? short_f :
+ return ((layout == UCT_PERF_DATA_LAYOUT_SHORT) ||
+ (layout == UCT_PERF_DATA_LAYOUT_SHORT_IOV)) ? short_f :
(layout == UCT_PERF_DATA_LAYOUT_BCOPY) ? bcopy_f :
(layout == UCT_PERF_DATA_LAYOUT_ZCOPY) ? zcopy_f :
0;
@@ -501,7 +505,8 @@ static inline ucs_status_t __get_atomic_flag(size_t size, uint64_t *op32,
static inline size_t __get_max_size(uct_perf_data_layout_t layout, size_t short_m,
size_t bcopy_m, uint64_t zcopy_m)
{
- return (layout == UCT_PERF_DATA_LAYOUT_SHORT) ? short_m :
+ return ((layout == UCT_PERF_DATA_LAYOUT_SHORT) ||
+ (layout == UCT_PERF_DATA_LAYOUT_SHORT_IOV)) ? short_m :
(layout == UCT_PERF_DATA_LAYOUT_BCOPY) ? bcopy_m :
(layout == UCT_PERF_DATA_LAYOUT_ZCOPY) ? zcopy_m :
0;
@@ -650,8 +655,7 @@ static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
if (params->command == UCX_PERF_CMD_AM) {
if ((params->uct.data_layout == UCT_PERF_DATA_LAYOUT_SHORT) &&
- (params->am_hdr_size != sizeof(uint64_t)))
- {
+ (params->uct.am_hdr_size != sizeof(uint64_t))) {
if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
ucs_error("Short AM header size must be 8 bytes");
}
@@ -659,19 +663,20 @@ static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
}
if ((params->uct.data_layout == UCT_PERF_DATA_LAYOUT_ZCOPY) &&
- (params->am_hdr_size > attr.cap.am.max_hdr))
- {
+ (params->uct.am_hdr_size > attr.cap.am.max_hdr)) {
if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
- ucs_error("AM header size (%zu) is larger than max supported (%zu)",
- params->am_hdr_size, attr.cap.am.max_hdr);
+ ucs_error("AM header size (%zu) is larger than max supported "
+ "(%zu)",
+ params->uct.am_hdr_size, attr.cap.am.max_hdr);
}
return UCS_ERR_UNSUPPORTED;
}
- if (params->am_hdr_size > message_size) {
+ if (params->uct.am_hdr_size > message_size) {
if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
- ucs_error("AM header size (%zu) is larger than message size (%zu)",
- params->am_hdr_size, message_size);
+ ucs_error("AM header size (%zu) is larger than message size "
+ "(%zu)",
+ params->uct.am_hdr_size, message_size);
}
return UCS_ERR_INVALID_PARAM;
}
@@ -691,7 +696,8 @@ static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
}
}
- if (UCT_PERF_DATA_LAYOUT_ZCOPY == params->uct.data_layout) {
+ if ((UCT_PERF_DATA_LAYOUT_ZCOPY == params->uct.data_layout) ||
+ (UCT_PERF_DATA_LAYOUT_SHORT_IOV == params->uct.data_layout)) {
if (params->msg_size_cnt > max_iov) {
if ((params->flags & UCX_PERF_TEST_FLAG_VERBOSE) ||
!params->msg_size_cnt) {
@@ -702,11 +708,13 @@ static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
return UCS_ERR_UNSUPPORTED;
}
/* if msg_size_cnt == 1 the message size checked above */
- if ((UCX_PERF_CMD_AM == params->command) && (params->msg_size_cnt > 1)) {
- if (params->am_hdr_size > params->msg_size_list[0]) {
+ if ((UCT_PERF_DATA_LAYOUT_ZCOPY == params->uct.data_layout) &&
+ (UCX_PERF_CMD_AM == params->command) && (params->msg_size_cnt > 1)) {
+ if (params->uct.am_hdr_size > params->msg_size_list[0]) {
if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
ucs_error("AM header size (%lu) larger than the first IOV "
- "message size (%lu)", params->am_hdr_size,
+ "message size (%lu)",
+ params->uct.am_hdr_size,
params->msg_size_list[0]);
}
return UCS_ERR_INVALID_PARAM;
@@ -972,6 +980,9 @@ static ucs_status_t ucp_perf_test_fill_params(ucx_perf_params_t *params,
case UCX_PERF_CMD_STREAM:
ucp_params->features |= UCP_FEATURE_STREAM;
break;
+ case UCX_PERF_CMD_AM:
+ ucp_params->features |= UCP_FEATURE_AM;
+ break;
default:
if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
ucs_error("Invalid test command");
@@ -979,7 +990,8 @@ static ucs_status_t ucp_perf_test_fill_params(ucx_perf_params_t *params,
return UCS_ERR_INVALID_PARAM;
}
- if (params->flags & UCX_PERF_TEST_FLAG_WAKEUP) {
+ if ((params->flags & UCX_PERF_TEST_FLAG_WAKEUP) ||
+ (params->wait_mode == UCX_PERF_WAIT_MODE_SLEEP)) {
ucp_params->features |= UCP_FEATURE_WAKEUP;
}
@@ -1086,6 +1098,16 @@ static ucs_status_t ucp_perf_test_alloc_mem(ucx_perf_context_t *perf)
goto err_free_send_buffer;
}
+ /* Allocate AM header */
+ if (params->ucp.am_hdr_size != 0) {
+ perf->ucp.am_hdr = malloc(params->ucp.am_hdr_size);
+ if (perf->ucp.am_hdr == NULL) {
+ goto err_free_buffers;
+ }
+ } else {
+ perf->ucp.am_hdr = NULL;
+ }
+
/* Allocate IOV datatype memory */
perf->ucp.send_iov = NULL;
status = ucp_perf_test_alloc_iov_mem(params->ucp.send_datatype,
@@ -1093,7 +1115,7 @@ static ucs_status_t ucp_perf_test_alloc_mem(ucx_perf_context_t *perf)
params->thread_count,
&perf->ucp.send_iov);
if (UCS_OK != status) {
- goto err_free_buffers;
+ goto err_free_am_hdr;
}
perf->ucp.recv_iov = NULL;
@@ -1109,6 +1131,8 @@ static ucs_status_t ucp_perf_test_alloc_mem(ucx_perf_context_t *perf)
err_free_send_iov_buffers:
free(perf->ucp.send_iov);
+err_free_am_hdr:
+ free(perf->ucp.am_hdr);
err_free_buffers:
perf->allocator->ucp_free(perf, perf->recv_buffer, perf->ucp.recv_memh);
err_free_send_buffer:
@@ -1121,6 +1145,7 @@ static void ucp_perf_test_free_mem(ucx_perf_context_t *perf)
{
free(perf->ucp.recv_iov);
free(perf->ucp.send_iov);
+ free(perf->ucp.am_hdr);
perf->allocator->ucp_free(perf, perf->recv_buffer, perf->ucp.recv_memh);
perf->allocator->ucp_free(perf, perf->send_buffer, perf->ucp.send_memh);
}
@@ -1179,6 +1204,13 @@ static ucs_status_t ucp_perf_test_exchange_status(ucx_perf_context_t *perf,
return collective_status;
}
+static void ucp_perf_test_err_handler(void *arg, ucp_ep_h ep,
+ ucs_status_t status)
+{
+ ucs_error("error handler called with status %d (%s)\n", status,
+ ucs_status_string(status));
+}
+
static ucs_status_t ucp_perf_test_receive_remote_data(ucx_perf_context_t *perf)
{
unsigned thread_count = perf->params.thread_count;
@@ -1230,6 +1262,14 @@ static ucs_status_t ucp_perf_test_receive_remote_data(ucx_perf_context_t *perf)
ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
ep_params.address = address;
+ if (perf->params.flags & UCX_PERF_TEST_FLAG_ERR_HANDLING) {
+ ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLER |
+ UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE;
+ ep_params.err_handler.cb = ucp_perf_test_err_handler;
+ ep_params.err_handler.arg = NULL;
+ ep_params.err_mode = UCP_ERR_HANDLING_MODE_PEER;
+ }
+
status = ucp_ep_create(perf->ucp.tctx[i].perf.ucp.worker, &ep_params,
&perf->ucp.tctx[i].perf.ucp.ep);
if (status != UCS_OK) {
@@ -1396,7 +1436,7 @@ static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf,
for (i = 0; i < perf->params.thread_count; i++) {
status = ucp_worker_flush(perf->ucp.tctx[i].perf.ucp.worker);
if (status != UCS_OK) {
- ucs_warn("ucp_worker_flush() failed on theread %d: %s",
+ ucs_warn("ucp_worker_flush() failed on thread %d: %s",
i, ucs_status_string(status));
}
}
@@ -1599,7 +1639,7 @@ static ucs_status_t uct_perf_setup(ucx_perf_context_t *perf)
}
/* Enable progress before `uct_iface_flush` and `uct_worker_progress` called
- * to give a chance to finish connection for some tranports (ib/ud, tcp).
+ * to give a chance to finish connection for some transports (ib/ud, tcp).
* They may return UCS_INPROGRESS from `uct_iface_flush` when connections are
* in progress */
uct_iface_progress_enable(perf->uct.iface,
@@ -1648,6 +1688,7 @@ static ucs_status_t ucp_perf_setup(ucx_perf_context_t *perf)
{
ucp_params_t ucp_params;
ucp_worker_params_t worker_params;
+ ucp_worker_attr_t worker_attr;
ucp_config_t *config;
ucs_status_t status;
unsigned i, thread_count;
@@ -1663,7 +1704,7 @@ static ucs_status_t ucp_perf_setup(ucx_perf_context_t *perf)
if (perf->params.thread_count > 1) {
/* when there is more than one thread, a ucp_worker would be created for
* each. all of them will share the same ucp_context */
- ucp_params.features |= UCP_PARAM_FIELD_MT_WORKERS_SHARED;
+ ucp_params.field_mask |= UCP_PARAM_FIELD_MT_WORKERS_SHARED;
ucp_params.mt_workers_shared = 1;
}
@@ -1717,6 +1758,23 @@ static ucs_status_t ucp_perf_setup(ucx_perf_context_t *perf)
}
}
+ if (perf->params.command == UCX_PERF_CMD_AM) {
+ /* Check that requested AM header size is not larger than max supported. */
+ worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_MAX_AM_HEADER;
+ status = ucp_worker_query(perf->ucp.tctx[0].perf.ucp.worker,
+ &worker_attr);
+ if (status != UCS_OK) {
+ goto err_free_tctx_destroy_workers;
+ }
+
+ if (worker_attr.max_am_header < perf->params.ucp.am_hdr_size) {
+ ucs_error("AM header size (%zu) is larger than max supported (%zu)",
+ perf->params.ucp.am_hdr_size, worker_attr.max_am_header);
+ status = UCS_ERR_INVALID_PARAM;
+ goto err_free_tctx_destroy_workers;
+ }
+ }
+
status = ucp_perf_test_setup_endpoints(perf, ucp_params.features);
if (status != UCS_OK) {
if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) {
@@ -1865,6 +1923,12 @@ static ucs_status_t ucx_perf_thread_run_test(void* arg)
ucx_perf_params_t* params = &perf->params;
ucs_status_t status;
+ /* new threads need explicit device association */
+ status = perf->allocator->init(perf);
+ if (status != UCS_OK) {
+ goto out;
+ }
+
if (params->warmup_iter > 0) {
ucx_perf_set_warmup(perf, params);
status = ucx_perf_funcs[params->api].run(perf);
diff --git a/src/tools/perf/lib/libperf_int.h b/src/tools/perf/lib/libperf_int.h
index 74592000db0..498f9742778 100644
--- a/src/tools/perf/lib/libperf_int.h
+++ b/src/tools/perf/lib/libperf_int.h
@@ -15,8 +15,10 @@ BEGIN_C_DECLS
/** @file libperf_int.h */
-#include
#include
+#include
+#include
+
#if _OPENMP
#include
@@ -102,6 +104,7 @@ struct ucx_perf_context {
ucp_mem_h recv_memh;
ucp_dt_iov_t *send_iov;
ucp_dt_iov_t *recv_iov;
+ void *am_hdr;
} ucp;
};
};
diff --git a/src/tools/perf/lib/ucp_tests.cc b/src/tools/perf/lib/ucp_tests.cc
index 46e13e8ee15..1e27857c2b7 100644
--- a/src/tools/perf/lib/ucp_tests.cc
+++ b/src/tools/perf/lib/ucp_tests.cc
@@ -2,6 +2,7 @@
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) The University of Tennessee and The University
* of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
+* Copyright (C) ARM Ltd. 2020. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
@@ -10,15 +11,9 @@
# include "config.h"
#endif
-#include
+#include "libperf_int.h"
-extern "C" {
-#include
-#include
-#include
-}
#include
-
#include
@@ -28,19 +23,53 @@ extern "C" {
template
class ucp_perf_test_runner {
public:
+ static const unsigned AM_ID = 1;
static const ucp_tag_t TAG = 0x1337a880u;
static const ucp_tag_t TAG_MASK = (FLAGS & UCX_PERF_TEST_FLAG_TAG_WILDCARD) ?
0 : (ucp_tag_t)-1;
typedef uint8_t psn_t;
- ucp_perf_test_runner(ucx_perf_context_t &perf) :
- m_perf(perf),
- m_outstanding(0),
- m_max_outstanding(m_perf.params.max_outstanding)
+ ucp_perf_test_runner(ucx_perf_context_t &perf)
+ : m_perf(perf),
+ m_outstanding(0),
+ m_max_outstanding(m_perf.params.max_outstanding),
+ m_am_rx_buffer(NULL),
+ m_am_rx_length(0ul)
{
+ memset(&m_am_rx_params, 0, sizeof(m_am_rx_params));
+
ucs_assert_always(m_max_outstanding > 0);
+
+ set_am_handler(am_data_handler, this, UCP_AM_FLAG_WHOLE_MSG);
+ }
+
+ ~ucp_perf_test_runner()
+ {
+ set_am_handler(NULL, this, 0);
+ }
+
+ void set_am_handler(ucp_am_recv_callback_t cb, void *arg, unsigned flags)
+ {
+ if (CMD == UCX_PERF_CMD_AM) {
+ ucp_am_handler_param_t param;
+ param.field_mask = UCP_AM_HANDLER_PARAM_FIELD_ID |
+ UCP_AM_HANDLER_PARAM_FIELD_CB |
+ UCP_AM_HANDLER_PARAM_FIELD_ARG;
+ param.id = AM_ID;
+ param.cb = cb;
+ param.arg = arg;
+
+ if (flags != 0) {
+ param.field_mask |= UCP_AM_HANDLER_PARAM_FIELD_FLAGS;
+ param.flags = flags;
+ }
+
+ ucs_status_t status = ucp_worker_set_am_recv_handler(
+ m_perf.ucp.worker, ¶m);
+ ucs_assert_always(status == UCS_OK);
+ }
}
void create_iov_buffer(ucp_dt_iov_t *iov, void *buffer)
@@ -89,16 +118,65 @@ class ucp_perf_test_runner {
}
}
+ void ucp_perf_init_common_params(size_t *total_length, size_t *send_length,
+ ucp_datatype_t *send_dt,
+ void **send_buffer, size_t *recv_length,
+ ucp_datatype_t *recv_dt,
+ void **recv_buffer)
+ {
+ *total_length = ucx_perf_get_message_size(&m_perf.params);
+
+ if (CMD == UCX_PERF_CMD_PUT) {
+ ucs_assert(*total_length >= sizeof(psn_t));
+ }
+
+ ucp_perf_test_prepare_iov_buffers();
+
+ *send_length = *recv_length = *total_length;
+
+ *send_dt = ucp_perf_test_get_datatype(m_perf.params.ucp.send_datatype,
+ m_perf.ucp.send_iov, send_length,
+ send_buffer);
+ *recv_dt = ucp_perf_test_get_datatype(m_perf.params.ucp.recv_datatype,
+ m_perf.ucp.recv_iov, recv_length,
+ recv_buffer);
+ if (CMD == UCX_PERF_CMD_AM) {
+ m_am_rx_params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
+ UCP_OP_ATTR_FIELD_USER_DATA |
+ UCP_OP_ATTR_FIELD_DATATYPE |
+ UCP_OP_ATTR_FLAG_NO_IMM_CMPL;
+ m_am_rx_params.datatype = *recv_dt;
+ m_am_rx_params.cb.recv_am = am_data_recv_cb;
+ m_am_rx_params.user_data = this;
+ m_am_rx_buffer = *recv_buffer;
+ m_am_rx_length = *recv_length;
+ }
+ }
+
+ void UCS_F_ALWAYS_INLINE blocking_progress() {
+ if (ucp_worker_progress(m_perf.ucp.worker) == 0) {
+ ucp_worker_wait(m_perf.ucp.worker);
+ }
+ }
+
+ void UCS_F_ALWAYS_INLINE progress() {
+ if (ucs_unlikely(UCX_PERF_WAIT_MODE_SLEEP == m_perf.params.wait_mode)) {
+ blocking_progress();
+ } else {
+ ucp_worker_progress(m_perf.ucp.worker);
+ }
+ }
+
void UCS_F_ALWAYS_INLINE progress_responder() {
if (!(FLAGS & UCX_PERF_TEST_FLAG_ONE_SIDED) &&
!(m_perf.params.flags & UCX_PERF_TEST_FLAG_ONE_SIDED))
{
- ucp_worker_progress(m_perf.ucp.worker);
+ progress();
}
}
void UCS_F_ALWAYS_INLINE progress_requestor() {
- ucp_worker_progress(m_perf.ucp.worker);
+ progress();
}
ssize_t UCS_F_ALWAYS_INLINE wait_stream_recv(void *request)
@@ -117,6 +195,25 @@ class ucp_perf_test_runner {
return ucs_likely(status == UCS_OK) ? length : status;
}
+ ucs_status_t am_rndv_recv(void *data, size_t length,
+ const ucp_am_recv_param_t *rx_params)
+ {
+ ucs_assert(!(rx_params->recv_attr &
+ (UCP_AM_RECV_ATTR_FLAG_DATA | UCP_AM_RECV_ATTR_FLAG_FIRST |
+ UCP_AM_RECV_ATTR_FLAG_ONLY)));
+ ucs_assert(length == ucx_perf_get_message_size(&m_perf.params));
+
+ ucs_status_ptr_t sp = ucp_am_recv_data_nbx(m_perf.ucp.worker, data,
+ m_am_rx_buffer,
+ m_am_rx_length,
+ &m_am_rx_params);
+ ucs_assert(UCS_PTR_IS_PTR(sp));
+ ucp_request_release(sp);
+
+ return UCS_INPROGRESS;
+ }
+
+
static void send_cb(void *request, ucs_status_t status)
{
ucp_perf_request_t *r = reinterpret_cast(
@@ -128,6 +225,11 @@ class ucp_perf_test_runner {
ucp_request_free(request);
}
+ static void send_nbx_cb(void *request, ucs_status_t status, void *user_data)
+ {
+ send_cb(request, status);
+ }
+
static void tag_recv_cb(void *request, ucs_status_t status,
ucp_tag_recv_info_t *info)
{
@@ -146,6 +248,28 @@ class ucp_perf_test_runner {
ucp_request_free(request);
}
+ static void am_data_recv_cb(void *request, ucs_status_t status,
+ size_t length, void *user_data)
+ {
+ ucp_perf_test_runner *test = (ucp_perf_test_runner*)user_data;
+ test->op_completed();
+ }
+
+ static ucs_status_t
+ am_data_handler(void *arg, const void *header, size_t header_length,
+ void *data, size_t length, const ucp_am_recv_param_t *param)
+ {
+ ucp_perf_test_runner *test = (ucp_perf_test_runner*)arg;
+
+ if (param->recv_attr & UCP_AM_RECV_ATTR_FLAG_RNDV) {
+ return test->am_rndv_recv(data, length, param);
+ }
+
+ /* TODO: Add option to do memcopy here */
+ test->op_completed();
+ return UCS_OK;
+ }
+
void UCS_F_ALWAYS_INLINE wait_window(unsigned n, bool is_requestor)
{
while (m_outstanding >= (m_max_outstanding - n + 1)) {
@@ -162,12 +286,14 @@ class ucp_perf_test_runner {
uint8_t sn, uint64_t remote_addr, ucp_rkey_h rkey)
{
void *request;
+ ucp_request_param_t param;
/* coverity[switch_selector_expr_is_constant] */
switch (CMD) {
case UCX_PERF_CMD_TAG:
case UCX_PERF_CMD_TAG_SYNC:
case UCX_PERF_CMD_STREAM:
+ case UCX_PERF_CMD_AM:
wait_window(1, true);
/* coverity[switch_selector_expr_is_constant] */
switch (CMD) {
@@ -183,6 +309,16 @@ class ucp_perf_test_runner {
request = ucp_stream_send_nb(ep, buffer, length, datatype,
send_cb, 0);
break;
+ case UCX_PERF_CMD_AM:
+ param.op_attr_mask = UCP_OP_ATTR_FIELD_DATATYPE |
+ UCP_OP_ATTR_FIELD_CALLBACK;
+ param.cb.send = send_nbx_cb;
+ param.datatype = datatype;
+ request = ucp_am_send_nbx(ep, AM_ID,
+ m_perf.ucp.am_hdr,
+ m_perf.params.ucp.am_hdr_size, buffer,
+ length, ¶m);
+ break;
default:
request = UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM);
break;
@@ -197,6 +333,7 @@ class ucp_perf_test_runner {
/* coverity[switch_selector_expr_is_constant] */
switch (TYPE) {
case UCX_PERF_TEST_TYPE_PINGPONG:
+ case UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM:
*((uint8_t*)buffer + length - 1) = sn;
break;
case UCX_PERF_TEST_TYPE_STREAM_UNI:
@@ -276,6 +413,9 @@ class ucp_perf_test_runner {
reinterpret_cast(request)->context = this;
op_started();
return UCS_OK;
+ case UCX_PERF_CMD_AM:
+ op_started();
+ return UCS_OK;
case UCX_PERF_CMD_PUT:
/* coverity[switch_selector_expr_is_constant] */
switch (TYPE) {
@@ -285,6 +425,13 @@ class ucp_perf_test_runner {
progress_responder();
}
return UCS_OK;
+ case UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM:
+ ptr = (volatile uint8_t*)buffer + length - 1;
+ while (*ptr != sn) {
+ ucp_worker_wait_mem(worker, (void *)ptr);
+ progress_responder();
+ }
+ return UCS_OK;
case UCX_PERF_TEST_TYPE_STREAM_UNI:
return UCS_OK;
default:
@@ -372,10 +519,17 @@ class ucp_perf_test_runner {
ucp_rkey_h rkey;
size_t length, send_length, recv_length;
- length = ucx_perf_get_message_size(&m_perf.params);
- ucs_assert(length >= sizeof(psn_t));
+ send_buffer = m_perf.send_buffer;
+ recv_buffer = m_perf.recv_buffer;
+ worker = m_perf.ucp.worker;
+ ep = m_perf.ucp.ep;
+ remote_addr = m_perf.ucp.remote_addr;
+ rkey = m_perf.ucp.rkey;
+ sn = 0;
- ucp_perf_test_prepare_iov_buffers();
+ ucp_perf_init_common_params(&length, &send_length, &send_datatype,
+ &send_buffer, &recv_length, &recv_datatype,
+ &recv_buffer);
if (CMD == UCX_PERF_CMD_PUT) {
m_perf.allocator->memcpy((psn_t*)m_perf.recv_buffer + length - 1,
@@ -386,28 +540,12 @@ class ucp_perf_test_runner {
ucp_perf_barrier(&m_perf);
- my_index = rte_call(&m_perf, group_index);
+ my_index = rte_call(&m_perf, group_index);
ucx_perf_test_start_clock(&m_perf);
ucx_perf_omp_barrier(&m_perf);
- send_buffer = m_perf.send_buffer;
- recv_buffer = m_perf.recv_buffer;
- worker = m_perf.ucp.worker;
- ep = m_perf.ucp.ep;
- remote_addr = m_perf.ucp.remote_addr;
- rkey = m_perf.ucp.rkey;
- sn = 0;
- send_length = length;
- recv_length = length;
- send_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.send_datatype,
- m_perf.ucp.send_iov, &send_length,
- &send_buffer);
- recv_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.recv_datatype,
- m_perf.ucp.recv_iov, &recv_length,
- &recv_buffer);
-
if (my_index == 0) {
UCX_PERF_TEST_FOREACH(&m_perf) {
send(ep, send_buffer, send_length, send_datatype, sn, remote_addr, rkey);
@@ -446,35 +584,26 @@ class ucp_perf_test_runner {
size_t length, send_length, recv_length;
uint8_t sn;
- length = ucx_perf_get_message_size(&m_perf.params);
- ucs_assert(length >= sizeof(psn_t));
+ send_buffer = m_perf.send_buffer;
+ recv_buffer = m_perf.recv_buffer;
+ worker = m_perf.ucp.worker;
+ ep = m_perf.ucp.ep;
+ remote_addr = m_perf.ucp.remote_addr;
+ rkey = m_perf.ucp.rkey;
+ sn = 0;
- ucp_perf_test_prepare_iov_buffers();
+ ucp_perf_init_common_params(&length, &send_length, &send_datatype,
+ &send_buffer, &recv_length, &recv_datatype,
+ &recv_buffer);
ucp_perf_barrier(&m_perf);
- my_index = rte_call(&m_perf, group_index);
+ my_index = rte_call(&m_perf, group_index);
ucx_perf_test_start_clock(&m_perf);
ucx_perf_omp_barrier(&m_perf);
- send_buffer = m_perf.send_buffer;
- recv_buffer = m_perf.recv_buffer;
- worker = m_perf.ucp.worker;
- ep = m_perf.ucp.ep;
- remote_addr = m_perf.ucp.remote_addr;
- rkey = m_perf.ucp.rkey;
- sn = 0;
- send_length = length;
- recv_length = length;
- send_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.send_datatype,
- m_perf.ucp.send_iov, &send_length,
- &send_buffer);
- recv_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.recv_datatype,
- m_perf.ucp.recv_iov, &recv_length,
- &recv_buffer);
-
if (my_index == 0) {
UCX_PERF_TEST_FOREACH(&m_perf) {
recv(worker, ep, recv_buffer, recv_length, recv_datatype, sn);
@@ -510,6 +639,7 @@ class ucp_perf_test_runner {
/* coverity[switch_selector_expr_is_constant] */
switch (TYPE) {
case UCX_PERF_TEST_TYPE_PINGPONG:
+ case UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM:
return run_pingpong();
case UCX_PERF_TEST_TYPE_STREAM_UNI:
return run_stream_uni();
@@ -578,9 +708,16 @@ class ucp_perf_test_runner {
--m_outstanding;
}
- ucx_perf_context_t &m_perf;
- unsigned m_outstanding;
- const unsigned m_max_outstanding;
+ ucx_perf_context_t &m_perf;
+ unsigned m_outstanding;
+ const unsigned m_max_outstanding;
+ /*
+ * These fields are used by UCP AM flow only, because receive operation is
+ * initiated from the data receive callback.
+ */
+ void *m_am_rx_buffer;
+ size_t m_am_rx_length;
+ ucp_request_param_t m_am_rx_params;
};
@@ -621,10 +758,14 @@ class ucp_perf_test_runner {
TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
UCX_PERF_TEST_FLAG_ONE_SIDED, UCX_PERF_TEST_FLAG_ONE_SIDED)
+#define TEST_CASE_ALL_AM(_perf, _case) \
+ TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, 0, 0)
+
ucs_status_t ucp_perf_test_dispatch(ucx_perf_context_t *perf)
{
UCS_PP_FOREACH(TEST_CASE_ALL_OSD, perf,
(UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG),
+ (UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM),
(UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI),
(UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI),
(UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI),
@@ -645,6 +786,11 @@ ucs_status_t ucp_perf_test_dispatch(ucx_perf_context_t *perf)
(UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG)
);
+ UCS_PP_FOREACH(TEST_CASE_ALL_AM, perf,
+ (UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_PINGPONG),
+ (UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI)
+ );
+
ucs_error("Invalid test case: %d/%d/0x%x",
perf->params.command, perf->params.test_type,
perf->params.flags);
diff --git a/src/tools/perf/lib/uct_tests.cc b/src/tools/perf/lib/uct_tests.cc
index 81d7d227e03..591c4fe0eb7 100644
--- a/src/tools/perf/lib/uct_tests.cc
+++ b/src/tools/perf/lib/uct_tests.cc
@@ -2,6 +2,7 @@
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) The University of Tennessee and The University
* of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
+* Copyright (C) ARM Ltd. 2020. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
@@ -10,19 +11,13 @@
# include "config.h"
#endif
-#define __STDC_FORMAT_MACROS 1
-#include
-#include
+#define __STDC_FORMAT_MACROS /* For PRIu64 */
-extern "C" {
-#include
-#include
-#include
-#include
-}
+#include "libperf_int.h"
#include
+
template
class uct_perf_test_runner {
public:
@@ -72,7 +67,8 @@ class uct_perf_test_runner {
const size_t iovcnt = perf->params.msg_size_cnt;
size_t iov_length_it, iov_it;
- ucs_assert(UCT_PERF_DATA_LAYOUT_ZCOPY == DATA);
+ ucs_assert((UCT_PERF_DATA_LAYOUT_ZCOPY == DATA) ||
+ (UCT_PERF_DATA_LAYOUT_SHORT_IOV == DATA));
ucs_assert(NULL != perf->params.msg_size_list);
ucs_assert(iovcnt > 0);
ucs_assert(perf->params.msg_size_list[0] >= header_size);
@@ -99,10 +95,11 @@ class uct_perf_test_runner {
}
void uct_perf_test_prepare_iov_buffer() {
- if (UCT_PERF_DATA_LAYOUT_ZCOPY == DATA) {
+ if ((UCT_PERF_DATA_LAYOUT_ZCOPY == DATA) ||
+ (UCT_PERF_DATA_LAYOUT_SHORT_IOV == DATA)) {
size_t start_iov_buffer_size = 0;
- if (UCX_PERF_CMD_AM == CMD) {
- start_iov_buffer_size = m_perf.params.am_hdr_size;
+ if ((UCX_PERF_CMD_AM == CMD) && (UCT_PERF_DATA_LAYOUT_ZCOPY == DATA)) {
+ start_iov_buffer_size = m_perf.params.uct.am_hdr_size;
}
uct_perf_get_buffer_iov(m_perf.uct.iov, m_perf.send_buffer,
start_iov_buffer_size,
@@ -248,6 +245,10 @@ class uct_perf_test_runner {
return uct_ep_am_short(ep, UCT_PERF_TEST_AM_ID, am_short_hdr,
(char*)buffer + sizeof(am_short_hdr),
length - sizeof(am_short_hdr));
+ case UCT_PERF_DATA_LAYOUT_SHORT_IOV:
+ set_sn(buffer, m_perf.uct.send_mem.mem_type, &sn);
+ return uct_ep_am_short_iov(ep, UCT_PERF_TEST_AM_ID, m_perf.uct.iov,
+ m_perf.params.msg_size_cnt);
case UCT_PERF_DATA_LAYOUT_BCOPY:
set_sn(buffer, m_perf.uct.send_mem.mem_type, &sn);
packed_len = uct_ep_am_bcopy(ep, UCT_PERF_TEST_AM_ID, pack_cb,
@@ -255,7 +256,7 @@ class uct_perf_test_runner {
return (packed_len >= 0) ? UCS_OK : (ucs_status_t)packed_len;
case UCT_PERF_DATA_LAYOUT_ZCOPY:
set_sn(buffer, m_perf.uct.send_mem.mem_type, &sn);
- header_size = m_perf.params.am_hdr_size;
+ header_size = m_perf.params.uct.am_hdr_size;
return uct_ep_am_zcopy(ep, UCT_PERF_TEST_AM_ID, buffer, header_size,
m_perf.uct.iov, m_perf.params.msg_size_cnt,
0, comp);
@@ -263,7 +264,8 @@ class uct_perf_test_runner {
return UCS_ERR_INVALID_PARAM;
}
case UCX_PERF_CMD_PUT:
- if (TYPE == UCX_PERF_TEST_TYPE_PINGPONG) {
+ if ((TYPE == UCX_PERF_TEST_TYPE_PINGPONG) ||
+ (TYPE == UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM)) {
/* Put the control word at the latest byte of the IOV message */
set_sn(UCS_PTR_BYTE_OFFSET(buffer,
uct_perf_get_buffer_extent(&m_perf.params) - 1),
@@ -621,6 +623,7 @@ class uct_perf_test_runner {
/* coverity[switch_selector_expr_is_constant] */
switch (TYPE) {
case UCX_PERF_TEST_TYPE_PINGPONG:
+ case UCX_PERF_TEST_TYPE_PINGPONG_WAIT_MEM:
return run_pingpong();
case UCX_PERF_TEST_TYPE_STREAM_UNI:
/* coverity[switch_selector_expr_is_constant] */
@@ -687,6 +690,7 @@ class uct_perf_test_runner {
TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, _data, false)
#define TEST_CASE_ALL_DATA(_perf, _case) \
TEST_CASE_ALL_OSD(_perf, _case, UCT_PERF_DATA_LAYOUT_SHORT) \
+ TEST_CASE_ALL_OSD(_perf, _case, UCT_PERF_DATA_LAYOUT_SHORT_IOV) \
TEST_CASE_ALL_OSD(_perf, _case, UCT_PERF_DATA_LAYOUT_BCOPY) \
TEST_CASE_ALL_OSD(_perf, _case, UCT_PERF_DATA_LAYOUT_ZCOPY)
diff --git a/src/tools/perf/perftest.c b/src/tools/perf/perftest.c
index 9973e29b6dd..dad3f260cc7 100644
--- a/src/tools/perf/perftest.c
+++ b/src/tools/perf/perftest.c
@@ -40,7 +40,7 @@
#define MAX_BATCH_FILES 32
#define MAX_CPUS 1024
#define TL_RESOURCE_NAME_NONE ""
-#define TEST_PARAMS_ARGS "t:n:s:W:O:w:D:i:H:oSCIqM:r:T:d:x:A:BUm:"
+#define TEST_PARAMS_ARGS "t:n:s:W:O:w:D:i:H:oSCIqM:r:E:T:d:x:A:BUem:"
#define TEST_ID_UNDEFINED -1
enum {
@@ -162,7 +162,13 @@ test_type_t tests[] = {
{"stream_lat", UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG,
"stream latency", "latency", 1},
- {NULL}
+ {"ucp_am_lat", UCX_PERF_API_UCP, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_PINGPONG,
+ "am latency", "latency", 1},
+
+ {"ucp_am_bw", UCX_PERF_API_UCP, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI,
+ "am bandwidth / message rate", "overhead", 32},
+
+ {NULL}
};
static int sock_io(int sock, ssize_t (*sock_call)(int, void *, size_t, int),
@@ -296,6 +302,9 @@ static void print_header(struct perftest_context *ctx)
case UCT_PERF_DATA_LAYOUT_SHORT:
test_data_str = "short";
break;
+ case UCT_PERF_DATA_LAYOUT_SHORT_IOV:
+ test_data_str = "short iov";
+ break;
case UCT_PERF_DATA_LAYOUT_BCOPY:
test_data_str = "bcopy";
break;
@@ -320,6 +329,11 @@ static void print_header(struct perftest_context *ctx)
printf("| Send memory: %-60s |\n", ucs_memory_type_names[ctx->params.super.send_mem_type]);
printf("| Recv memory: %-60s |\n", ucs_memory_type_names[ctx->params.super.recv_mem_type]);
printf("| Message size: %-60zu |\n", ucx_perf_get_message_size(&ctx->params.super));
+ if ((test->api == UCX_PERF_API_UCP) &&
+ (test->command == UCX_PERF_CMD_AM)) {
+ printf("| AM header size: %-60zu |\n",
+ ctx->params.super.ucp.am_hdr_size);
+ }
}
if (ctx->flags & TEST_FLAG_PRINT_CSV) {
@@ -350,7 +364,7 @@ static void print_test_name(struct perftest_context *ctx)
unsigned i, pos;
if (!(ctx->flags & TEST_FLAG_PRINT_CSV) && (ctx->num_batch_files > 0)) {
- strcpy(buf, "+--------------+---------+---------+---------+----------+----------+-----------+-----------+");
+ strcpy(buf, "+--------------+--------------+---------+---------+---------+----------+----------+-----------+-----------+");
pos = 1;
for (i = 0; i < ctx->num_batch_files; ++i) {
@@ -447,14 +461,15 @@ static void usage(const struct perftest_context *ctx, const char *program)
printf(" -d device to use for testing\n");
printf(" -x transport to use for testing\n");
printf(" -D data layout for sender side:\n");
- printf(" short - short messages (default, cannot be used for get)\n");
- printf(" bcopy - copy-out (cannot be used for atomics)\n");
- printf(" zcopy - zero-copy (cannot be used for atomics)\n");
- printf(" iov - scatter-gather list (iovec)\n");
+ printf(" short - short messages (default, cannot be used for get)\n");
+ printf(" shortiov - short io-vector messages (only for active messages)\n");
+ printf(" bcopy - copy-out (cannot be used for atomics)\n");
+ printf(" zcopy - zero-copy (cannot be used for atomics)\n");
+ printf(" iov - scatter-gather list (iovec)\n");
printf(" -W flow control window size, for active messages (%u)\n",
ctx->params.super.uct.fc_window);
- printf(" -H active message header size (%zu)\n",
- ctx->params.super.am_hdr_size);
+ printf(" -H active message header size (%zu), included in message size\n",
+ ctx->params.super.uct.am_hdr_size);
printf(" -A asynchronous progress mode (thread_spinlock)\n");
printf(" thread_spinlock - separate progress thread with spin locking\n");
printf(" thread_mutex - separate progress thread with mutex locking\n");
@@ -475,6 +490,12 @@ static void usage(const struct perftest_context *ctx, const char *program)
printf(" recv : Use ucp_stream_recv_nb\n");
printf(" recv_data : Use ucp_stream_recv_data_nb\n");
printf(" -I create context with wakeup feature enabled\n");
+ printf(" -e create endpoints with error handling support\n");
+ printf(" -E wait mode for tests\n");
+ printf(" poll : repeatedly call worker_progress\n");
+ printf(" sleep : go to sleep after posting requests\n");
+ printf(" -H active message header size (%zu), not included in message size\n",
+ ctx->params.super.ucp.am_hdr_size);
printf("\n");
printf(" NOTE: When running UCP tests, transport and device should be specified by\n");
printf(" environment variables: UCX_TLS and UCX_[SELF|SHM|NET]_DEVICES.\n");
@@ -590,7 +611,6 @@ static ucs_status_t init_test_params(perftest_params_t *params)
params->super.wait_mode = UCX_PERF_WAIT_MODE_LAST;
params->super.max_outstanding = 0;
params->super.warmup_iter = 10000;
- params->super.am_hdr_size = 8;
params->super.alignment = ucs_get_page_size();
params->super.max_iter = 1000000l;
params->super.max_time = 0.0;
@@ -598,12 +618,14 @@ static ucs_status_t init_test_params(perftest_params_t *params)
params->super.flags = UCX_PERF_TEST_FLAG_VERBOSE;
params->super.uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW;
params->super.uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT;
+ params->super.uct.am_hdr_size = 8;
params->super.send_mem_type = UCS_MEMORY_TYPE_HOST;
params->super.recv_mem_type = UCS_MEMORY_TYPE_HOST;
params->super.msg_size_cnt = 1;
params->super.iov_stride = 0;
params->super.ucp.send_datatype = UCP_PERF_DATATYPE_CONTIG;
params->super.ucp.recv_datatype = UCP_PERF_DATATYPE_CONTIG;
+ params->super.ucp.am_hdr_size = 0;
strcpy(params->super.uct.dev_name, TL_RESOURCE_NAME_NONE);
strcpy(params->super.uct.tl_name, TL_RESOURCE_NAME_NONE);
@@ -654,6 +676,8 @@ static ucs_status_t parse_test_params(perftest_params_t *params, char opt,
case 'D':
if (!strcmp(opt_arg, "short")) {
params->super.uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT;
+ } else if (!strcmp(opt_arg, "shortiov")) {
+ params->super.uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT_IOV;
} else if (!strcmp(opt_arg, "bcopy")) {
params->super.uct.data_layout = UCT_PERF_DATA_LAYOUT_BCOPY;
} else if (!strcmp(opt_arg, "zcopy")) {
@@ -672,6 +696,18 @@ static ucs_status_t parse_test_params(perftest_params_t *params, char opt,
return UCS_ERR_INVALID_PARAM;
}
return UCS_OK;
+ case 'E':
+ if (!strcmp(opt_arg, "poll")) {
+ params->super.wait_mode = UCX_PERF_WAIT_MODE_POLL;
+ return UCS_OK;
+ } else if (!strcmp(opt_arg, "sleep")) {
+ params->super.wait_mode = UCX_PERF_WAIT_MODE_SLEEP;
+ return UCS_OK;
+ } else {
+ ucs_error("Invalid option argument for -E");
+ return UCS_ERR_INVALID_PARAM;
+ }
+ return UCS_OK;
case 'i':
params->super.iov_stride = atol(opt_arg);
return UCS_OK;
@@ -681,7 +717,8 @@ static ucs_status_t parse_test_params(perftest_params_t *params, char opt,
case 's':
return parse_message_sizes_params(opt_arg, ¶ms->super);
case 'H':
- params->super.am_hdr_size = atol(opt_arg);
+ params->super.uct.am_hdr_size = atol(opt_arg);
+ params->super.ucp.am_hdr_size = atol(opt_arg);
return UCS_OK;
case 'W':
params->super.uct.fc_window = atoi(opt_arg);
@@ -710,6 +747,9 @@ static ucs_status_t parse_test_params(perftest_params_t *params, char opt,
case 'I':
params->super.flags |= UCX_PERF_TEST_FLAG_WAKEUP;
return UCS_OK;
+ case 'e':
+ params->super.flags |= UCX_PERF_TEST_FLAG_ERR_HANDLING;
+ return UCS_OK;
case 'M':
if (!strcmp(opt_arg, "single")) {
params->super.thread_mode = UCS_THREAD_MODE_SINGLE;
@@ -1433,8 +1473,6 @@ static ucx_perf_rte_t ext_rte = {
static ucs_status_t setup_mpi_rte(struct perftest_context *ctx)
{
- ucs_trace_func("");
-
#if defined (HAVE_MPI)
static ucx_perf_rte_t mpi_rte = {
.group_size = mpi_rte_group_size,
@@ -1448,6 +1486,8 @@ static ucs_status_t setup_mpi_rte(struct perftest_context *ctx)
int size, rank;
+ ucs_trace_func("");
+
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != 2) {
ucs_error("This test should run with exactly 2 processes (actual: %d)", size);
@@ -1463,6 +1503,8 @@ static ucs_status_t setup_mpi_rte(struct perftest_context *ctx)
ctx->params.super.rte = &mpi_rte;
ctx->params.super.report_arg = ctx;
#elif defined (HAVE_RTE)
+ ucs_trace_func("");
+
ctx->params.rte_group = NULL;
ctx->params.rte = &mpi_rte;
ctx->params.report_arg = ctx;
diff --git a/src/tools/vfs/Makefile.am b/src/tools/vfs/Makefile.am
new file mode 100644
index 00000000000..62408f8aea7
--- /dev/null
+++ b/src/tools/vfs/Makefile.am
@@ -0,0 +1,17 @@
+#
+# Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED.
+#
+# See file LICENSE for terms.
+#
+
+if HAVE_FUSE3
+
+bin_PROGRAMS = ucx_vfs
+ucx_vfs_CPPFLAGS = $(BASE_CPPFLAGS) $(FUSE3_CPPFLAGS)
+ucx_vfs_CFLAGS = $(BASE_CFLAGS)
+ucx_vfs_SOURCES = vfs_main.c vfs_server.c
+noinst_HEADERS = vfs_daemon.h
+ucx_vfs_LDADD = $(FUSE3_LIBS) \
+ $(top_builddir)/src/ucs/vfs/sock/libucs_vfs_sock.la
+
+endif
diff --git a/src/tools/vfs/vfs_daemon.h b/src/tools/vfs/vfs_daemon.h
new file mode 100644
index 00000000000..094a1e182c1
--- /dev/null
+++ b/src/tools/vfs/vfs_daemon.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED.
+ *
+ * See file LICENSE for terms.
+ */
+
+#ifndef VFS_DAEMON_H_
+#define VFS_DAEMON_H_
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+#define VFS_DEFAULT_MOUNTPOINT_DIR "/tmp/ucx"
+#define VFS_FUSE_MOUNT_PROG "fusermount3"
+
+
+enum {
+ VFS_DAEMON_ACTION_START = UCS_VFS_SOCK_ACTION_NOP
+};
+
+
+#define vfs_error(_fmt, ...) \
+ { \
+ fprintf(stderr, "Error: " _fmt "\n", ##__VA_ARGS__); \
+ }
+
+
+#define vfs_log(_fmt, ...) \
+ { \
+ if (g_opts.verbose) { \
+ fprintf(stderr, "Debug: " _fmt "\n", ##__VA_ARGS__); \
+ } \
+ }
+
+
+typedef struct {
+ int action;
+ int foreground;
+ int verbose;
+ const char *mountpoint_dir;
+ const char *mount_opts;
+ const char *sock_path;
+} vfs_opts_t;
+
+
+extern vfs_opts_t g_opts;
+extern const char *vfs_action_names[];
+
+int vfs_mount(int pid);
+
+int vfs_unmount(int pid);
+
+int vfs_server_loop(int listen_fd);
+
+#endif
diff --git a/src/tools/vfs/vfs_main.c b/src/tools/vfs/vfs_main.c
new file mode 100644
index 00000000000..b6732c95454
--- /dev/null
+++ b/src/tools/vfs/vfs_main.c
@@ -0,0 +1,504 @@
+/**
+ * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED.
+ *
+ * See file LICENSE for terms.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "vfs_daemon.h"
+
+#include
+#include
+#include
+#include
+#include
+
+
+vfs_opts_t g_opts = {
+ .action = VFS_DAEMON_ACTION_START,
+ .foreground = 0,
+ .verbose = 0,
+ .mountpoint_dir = VFS_DEFAULT_MOUNTPOINT_DIR,
+ .mount_opts = "",
+ .sock_path = NULL
+};
+
+const char *vfs_action_names[] = {
+ [UCS_VFS_SOCK_ACTION_STOP] = "stop",
+ [UCS_VFS_SOCK_ACTION_MOUNT] = "mount",
+ [VFS_DAEMON_ACTION_START] = "start"
+};
+
+static struct sockaddr_un g_sockaddr;
+
+
+static int vfs_run_fusermount(char **extra_argv)
+{
+ char command[128];
+ pid_t child_pid;
+ int ret, status;
+ int devnull_fd;
+ char *p, *endp;
+ char *argv[16];
+ int i, argc;
+
+ argc = 0;
+ argv[argc++] = VFS_FUSE_MOUNT_PROG;
+ if (!g_opts.verbose) {
+ argv[argc++] = "-q";
+ }
+ while (*extra_argv != NULL) {
+ argv[argc++] = *(extra_argv++);
+ }
+ argv[argc++] = NULL;
+ assert(argc <= ucs_static_array_size(argv));
+
+ /* save the whole command to log */
+ p = command;
+ endp = command + sizeof(command);
+ for (i = 0; argv[i] != NULL; ++i) {
+ snprintf(p, endp - p, "%s ", argv[i]);
+ p += strlen(p);
+ }
+ *(p - 1) = '\0';
+
+ vfs_log("exec '%s'", command);
+
+ child_pid = fork();
+ if (child_pid == -1) {
+ vfs_error("fork() failed: %m");
+ return -1;
+ }
+
+ if (child_pid == 0) {
+ if (!g_opts.verbose) {
+ devnull_fd = open("/dev/null", O_WRONLY);
+ if (devnull_fd < 0) {
+ vfs_error("failed open /dev/null: %m");
+ exit(1);
+ }
+
+ dup2(devnull_fd, 1);
+ dup2(devnull_fd, 2);
+ close(devnull_fd);
+ }
+ execvp(argv[0], argv);
+ vfs_error("failed to execute '%s': %m", command);
+ exit(1);
+ }
+
+ ret = waitpid(child_pid, &status, 0);
+ if (ret < 0) {
+ vfs_error("waitpid(%d) failed: %m", child_pid);
+ return -errno;
+ } else if (WIFEXITED(status) && (WEXITSTATUS(status) != 0)) {
+ vfs_error("'%s' exited with status %d", command, WEXITSTATUS(status));
+ return -1;
+ } else if (!WIFEXITED(status)) {
+ vfs_error("'%s' did not exit properly (%d)", command, status);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void vfs_get_mountpoint(pid_t pid, char *mountpoint, size_t max_length)
+{
+ snprintf(mountpoint, max_length, "%s/%d", g_opts.mountpoint_dir, pid);
+}
+
+static const char *vfs_get_process_name(int pid, char *buf, size_t max_length)
+{
+ char procfs_comm[NAME_MAX];
+ size_t length;
+ FILE *file;
+ char *p;
+
+ /* open /proc//comm to read command name */
+ snprintf(procfs_comm, sizeof(procfs_comm), "/proc/%d/comm", pid);
+ file = fopen(procfs_comm, "r");
+ if (file == NULL) {
+ goto err;
+ }
+
+ /* read command to buffer */
+ if (fgets(buf, max_length, file) == NULL) {
+ goto err_close;
+ }
+
+ /* remove trailing space/newline */
+ length = strlen(buf);
+ for (p = &buf[length - 1]; (p >= buf) && isspace(*p); --p) {
+ *p = '\0';
+ --length;
+ }
+
+ /* append process id */
+ snprintf(buf + length, max_length - length, "@pid:%d", pid);
+ fclose(file);
+ goto out;
+
+err_close:
+ fclose(file);
+err:
+ snprintf(buf, max_length, "pid:%d", pid);
+out:
+ return buf;
+}
+
+int vfs_mount(int pid)
+{
+ char mountpoint[PATH_MAX];
+ char mountopts[1024];
+ char name[NAME_MAX];
+ int fuse_fd, ret;
+
+ /* Add common mount options:
+ * - File system name (source) : process name and pid
+ * - File system type : ucx_vfs
+ * - Enable permissions check : yes
+ * - Direct IO (no caching) : yes
+ */
+ ret = snprintf(
+ mountopts, sizeof(mountopts),
+ "fsname=%s,subtype=ucx_vfs,default_permissions,direct_io%s%s",
+ vfs_get_process_name(pid, name, sizeof(name)),
+ (strlen(g_opts.mount_opts) > 0) ? "," : "", g_opts.mount_opts);
+ if (ret >= sizeof(mountopts)) {
+ return -ENOMEM;
+ }
+
+ /* Create the mount point directory, and ignore "already exists" error */
+ vfs_get_mountpoint(pid, mountpoint, sizeof(mountpoint));
+ ret = mkdir(mountpoint, S_IRWXU);
+ if ((ret < 0) && (errno != EEXIST)) {
+ ret = -errno;
+ vfs_error("failed to create directory '%s': %m", mountpoint);
+ return ret;
+ }
+
+ /* Mount a new FUSE filesystem in the mount point directory */
+ vfs_log("mounting directory '%s' with options '%s'", mountpoint, mountopts);
+ fuse_fd = fuse_open_channel(mountpoint, mountopts);
+ if (fuse_fd < 0) {
+ vfs_error("fuse_open_channel(%s,opts=%s) failed: %m", mountpoint,
+ mountopts);
+ return fuse_fd;
+ }
+
+ vfs_log("mounted directory '%s' with fd %d", mountpoint, fuse_fd);
+ return fuse_fd;
+}
+
+int vfs_unmount(int pid)
+{
+ char mountpoint[PATH_MAX];
+ char *argv[5];
+ int ret;
+
+ /* Unmount FUSE file system */
+ vfs_get_mountpoint(pid, mountpoint, sizeof(mountpoint));
+ argv[0] = "-u";
+ argv[1] = "-z";
+ argv[2] = "--";
+ argv[3] = mountpoint;
+ argv[4] = NULL;
+ ret = vfs_run_fusermount(argv);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Remove mount point directory */
+ vfs_log("removing directory '%s'", mountpoint);
+ ret = rmdir(mountpoint);
+ if (ret < 0) {
+ vfs_error("failed to remove directory '%s': %m", mountpoint);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int vfs_unlink_socket(int silent_notexist)
+{
+ int ret;
+
+ vfs_log("removing existing socket '%s'", g_sockaddr.sun_path);
+
+ ret = unlink(g_sockaddr.sun_path);
+ if (ret < 0) {
+ ret = -errno;
+ if (silent_notexist && (errno == ENOENT)) {
+ vfs_log("could not unlink '%s': %m", g_sockaddr.sun_path);
+ } else {
+ vfs_error("could not unlink '%s': %m", g_sockaddr.sun_path);
+ }
+ return ret;
+ }
+
+ return 0;
+}
+
+/* return 0 or the (negative) value of errno in case of error */
+static int vfs_listen(int silent_addinuse_err)
+{
+ int listen_fd, ret;
+
+ ret = umask(~S_IRWXU);
+ if (ret < 0) {
+ ret = -errno;
+ vfs_error("failed to set umask permissions: %m");
+ goto out;
+ }
+
+ listen_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (listen_fd < 0) {
+ ret = -errno;
+ vfs_error("failed to create listening socket: %m");
+ goto out;
+ }
+
+ ret = bind(listen_fd, (const struct sockaddr*)&g_sockaddr,
+ sizeof(g_sockaddr));
+ if (ret < 0) {
+ ret = -errno;
+ if ((errno != EADDRINUSE) || !silent_addinuse_err) {
+ vfs_error("bind(%s) failed: %m", g_sockaddr.sun_path);
+ }
+ goto out_close;
+ }
+
+ ret = listen(listen_fd, 128);
+ if (ret < 0) {
+ ret = -errno;
+ vfs_error("listen() failed: %m");
+ goto out_unlink;
+ }
+
+ vfs_log("listening for connections on '%s'", g_sockaddr.sun_path);
+ ret = vfs_server_loop(listen_fd);
+
+out_unlink:
+ vfs_unlink_socket(0);
+out_close:
+ close(listen_fd);
+out:
+ return ret;
+}
+
+/* return 0 or the (negative) value of errno in case of error */
+static int vfs_connect_and_act()
+{
+ ucs_vfs_sock_message_t vfs_msg_out;
+ int connfd;
+ int ret;
+
+ vfs_log("connecting to '%s'", g_sockaddr.sun_path);
+
+ connfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (connfd < 0) {
+ ret = -errno;
+ vfs_error("failed to create connection socket: %m");
+ goto out;
+ }
+
+ ret = connect(connfd, (const struct sockaddr*)&g_sockaddr,
+ sizeof(g_sockaddr));
+ if (ret < 0) {
+ ret = -errno;
+ if (errno == ECONNREFUSED) {
+ vfs_log("connect(%s) failed: %m", g_sockaddr.sun_path);
+ } else {
+ vfs_error("connect(%s) failed: %m", g_sockaddr.sun_path);
+ }
+ goto out_close;
+ }
+
+ if (g_opts.action < UCS_VFS_SOCK_ACTION_LAST) {
+ vfs_log("sending action '%s'", vfs_action_names[g_opts.action]);
+
+ /* send action */
+ vfs_msg_out.action = g_opts.action;
+ ret = ucs_vfs_sock_send(connfd, &vfs_msg_out);
+ if (ret < 0) {
+ vfs_error("failed to send: %d", ret);
+ goto out_close;
+ }
+
+ ret = 0;
+ }
+
+out_close:
+ close(connfd);
+out:
+ return ret;
+}
+
+/* return 0 or negative value in case of error */
+int vfs_start()
+{
+ int ret;
+
+ ret = vfs_listen(1);
+ if (ret != -EADDRINUSE) {
+ return ret;
+ }
+
+ /* Failed to listen because 'socket_name' path already exists - try to
+ * connect */
+ ret = vfs_connect_and_act();
+ if (ret != -ECONNREFUSED) {
+ return ret;
+ }
+
+ /* Could not connect to the socket because no one is listening - remove the
+ * socket file and try listening again */
+ ret = vfs_unlink_socket(0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return vfs_listen(0);
+}
+
+static void vfs_usage()
+{
+ struct sockaddr_un sock_addr = {};
+
+ ucs_vfs_sock_get_address(&sock_addr);
+ printf("Usage: ucx_vfs [options] [action]\n");
+ printf("\n");
+ printf("Options:\n");
+ printf(" -d Set parent directory for mount points (default: %s)\n",
+ g_opts.mountpoint_dir);
+ printf(" -o Pass these mount options to mount.fuse\n");
+ printf(" -f Do not daemonize; run in foreground\n");
+ printf(" -v Enable verbose logging (requires -f)\n");
+ printf(" -l Set listening unix socket path (default: %s)\n",
+ sock_addr.sun_path);
+ printf("\n");
+ printf("Actions:\n");
+ printf(" start Run the daemon and listen for connection from UCX\n");
+ printf(" If a daemon is already running, do nothing\n");
+ printf(" This is the default action.\n");
+ printf(" stop Stop the running daemon\n");
+ printf("\n");
+}
+
+static int vfs_parse_args(int argc, char **argv)
+{
+ const char *action_str;
+ int c, i;
+
+ while ((c = getopt(argc, argv, "d:o:vfl:h")) != -1) {
+ switch (c) {
+ case 'd':
+ g_opts.mountpoint_dir = optarg;
+ break;
+ case 'o':
+ g_opts.mount_opts = optarg;
+ break;
+ case 'v':
+ ++g_opts.verbose;
+ break;
+ case 'f':
+ g_opts.foreground = 1;
+ break;
+ case 'l':
+ g_opts.sock_path = optarg;
+ break;
+ case 'h':
+ default:
+ vfs_usage();
+ return -127;
+ }
+ }
+
+ if (g_opts.verbose && !g_opts.foreground) {
+ vfs_error("Option -v requires -f");
+ vfs_usage();
+ return -1;
+ }
+
+ if (optind < argc) {
+ action_str = argv[optind];
+ g_opts.action = UCS_VFS_SOCK_ACTION_LAST;
+ for (i = 0; i < ucs_static_array_size(vfs_action_names); ++i) {
+ if ((vfs_action_names[i] != NULL) &&
+ !strcmp(action_str, vfs_action_names[i])) {
+ g_opts.action = i;
+ }
+ }
+ if (g_opts.action == UCS_VFS_SOCK_ACTION_LAST) {
+ vfs_error("invalid action '%s'", action_str);
+ vfs_usage();
+ return 0;
+ }
+ ++optind;
+ }
+
+ if (optind < argc) {
+ vfs_error("only one action can be specified");
+ vfs_usage();
+ return -1;
+ }
+
+ return 0;
+}
+
+static int vfs_test_fuse()
+{
+ char *argv[] = {"-V", NULL};
+ return vfs_run_fusermount(argv);
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ ret = vfs_parse_args(argc, argv);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = vfs_test_fuse();
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = mkdir(g_opts.mountpoint_dir, S_IRWXU);
+ if ((ret < 0) && (errno != EEXIST)) {
+ vfs_error("could not create directory '%s': %m", g_opts.mountpoint_dir);
+ return -1;
+ }
+
+ if (!g_opts.foreground) {
+ fuse_daemonize(0);
+ }
+
+ if (g_opts.sock_path == NULL) {
+ ret = ucs_vfs_sock_get_address(&g_sockaddr);
+ if (ret < 0) {
+ vfs_error("failed to initialize socket address: %d", ret);
+ return -1;
+ }
+ } else {
+ g_sockaddr.sun_family = AF_UNIX;
+ memset(g_sockaddr.sun_path, 0, sizeof(g_sockaddr.sun_path));
+ strncpy(g_sockaddr.sun_path, g_opts.sock_path,
+ sizeof(g_sockaddr.sun_path) - 1);
+ }
+
+ switch (g_opts.action) {
+ case VFS_DAEMON_ACTION_START:
+ return vfs_start();
+ case UCS_VFS_SOCK_ACTION_STOP:
+ return vfs_connect_and_act();
+ default:
+ vfs_error("unexpected action %d", g_opts.action);
+ return -1;
+ }
+}
diff --git a/src/tools/vfs/vfs_server.c b/src/tools/vfs/vfs_server.c
new file mode 100644
index 00000000000..e6a62541fbf
--- /dev/null
+++ b/src/tools/vfs/vfs_server.c
@@ -0,0 +1,378 @@
+/**
+ * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED.
+ *
+ * See file LICENSE for terms.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "vfs_daemon.h"
+
+#include
+#include
+#include
+
+
+#define VFS_MAX_FDS 1024
+
+typedef enum {
+ VFS_FD_STATE_LISTENING,
+ VFS_FD_STATE_ACCEPTED,
+ VFS_FD_STATE_MOUNTED,
+ VFS_FD_STATE_FD_SENT,
+ VFS_FD_STATE_CLOSED
+} vfs_socket_state_t;
+
+typedef struct {
+ vfs_socket_state_t state;
+ pid_t pid;
+ int fuse_fd;
+} vfs_serever_fd_state_t;
+
+typedef struct {
+ vfs_serever_fd_state_t fd_state[VFS_MAX_FDS];
+ struct pollfd poll_fds[VFS_MAX_FDS];
+ int nfds;
+ int stop;
+} vfs_server_context_t;
+
+static vfs_server_context_t vfs_server_context;
+
+static const char *vfs_server_fd_state_names[] = {
+ [VFS_FD_STATE_LISTENING] = "LISTENING",
+ [VFS_FD_STATE_ACCEPTED] = "ACCEPTED",
+ [VFS_FD_STATE_MOUNTED] = "MOUNTED",
+ [VFS_FD_STATE_FD_SENT] = "FD_SENT",
+ [VFS_FD_STATE_CLOSED] = "CLOSED"
+};
+
+static void vfs_server_log_context(int events)
+{
+ vfs_serever_fd_state_t *fd_state;
+ char log_message[1024];
+ struct pollfd *pfd;
+ char *p, *endp;
+ int idx;
+
+ if (g_opts.verbose < 2) {
+ return;
+ }
+
+ p = log_message;
+ endp = log_message + sizeof(log_message);
+
+ for (idx = 0; idx < vfs_server_context.nfds; ++idx) {
+ pfd = &vfs_server_context.poll_fds[idx];
+ fd_state = &vfs_server_context.fd_state[idx];
+ snprintf(p, endp - p, "[%d]{%c %d%s%s %d} ", idx,
+ vfs_server_fd_state_names[fd_state->state][0],
+ vfs_server_context.poll_fds[idx].fd,
+ (events && (pfd->revents & POLLIN)) ? "i" : "",
+ (events && (pfd->revents & POLLOUT)) ? "o" : "",
+ fd_state->pid);
+ p += strlen(p);
+ }
+
+ if (p == log_message) {
+ vfs_log("");
+ } else {
+ *(p - 1) = '\0';
+ vfs_log("%s", log_message);
+ }
+}
+
+static int vfs_server_poll_events()
+{
+ int ret;
+
+ vfs_server_log_context(0);
+
+ ret = poll(vfs_server_context.poll_fds, vfs_server_context.nfds, -1);
+ if (ret < 0) {
+ ret = -errno;
+ if (errno != EINTR) {
+ vfs_error("poll(nfds=%d) failed: %m", vfs_server_context.nfds)
+ }
+ return ret;
+ }
+
+ vfs_server_log_context(1);
+ return 0;
+}
+
+static void vfs_server_close_fd(int fd)
+{
+ int ret = close(fd);
+ if (ret < 0) {
+ vfs_error("failed to close fd %d: %m", fd);
+ }
+}
+
+static void vfs_server_log_fd(int idx, const char *message)
+{
+ vfs_serever_fd_state_t *fd_state = &vfs_server_context.fd_state[idx];
+ struct pollfd *pfd = &vfs_server_context.poll_fds[idx];
+
+ vfs_log("%s fd[%d]=%d %s, pid: %d fuse_fd: %d", message, idx, pfd->fd,
+ vfs_server_fd_state_names[fd_state->state], fd_state->fuse_fd,
+ fd_state->pid);
+}
+
+static int vfs_server_add_fd(int fd, vfs_socket_state_t state)
+{
+ int idx, ret;
+
+ ret = fcntl(fd, F_GETFL);
+ if (ret < 0) {
+ vfs_error("fcntl(%d, F_GETFL) failed: %m", fd);
+ return -errno;
+ }
+
+ ret = fcntl(fd, F_SETFL, ret | O_NONBLOCK);
+ if (ret < 0) {
+ vfs_error("fcntl(%d, F_SETFL) failed: %m", fd);
+ return -errno;
+ }
+
+ idx = vfs_server_context.nfds++;
+ vfs_server_context.fd_state[idx].state = state;
+ vfs_server_context.fd_state[idx].pid = -1;
+ vfs_server_context.fd_state[idx].fuse_fd = -1;
+ vfs_server_context.poll_fds[idx].events = POLLIN;
+ vfs_server_context.poll_fds[idx].fd = fd;
+ vfs_server_context.poll_fds[idx].revents = 0;
+
+ vfs_server_log_fd(idx, "added");
+ return 0;
+}
+
+static void vfs_server_remove_fd(int idx)
+{
+ vfs_server_log_fd(idx, "removing");
+
+ switch (vfs_server_context.fd_state[idx].state) {
+ case VFS_FD_STATE_FD_SENT:
+ case VFS_FD_STATE_MOUNTED:
+ vfs_server_close_fd(vfs_server_context.fd_state[idx].fuse_fd);
+ vfs_unmount(vfs_server_context.fd_state[idx].pid);
+ /* Fall through */
+ case VFS_FD_STATE_ACCEPTED:
+ vfs_server_close_fd(vfs_server_context.poll_fds[idx].fd);
+ /* Fall through */
+ default:
+ break;
+ }
+
+ vfs_server_context.fd_state[idx].state = VFS_FD_STATE_CLOSED;
+ vfs_server_context.fd_state[idx].pid = -1;
+ vfs_server_context.fd_state[idx].fuse_fd = -1;
+ vfs_server_context.poll_fds[idx].events = 0;
+ vfs_server_context.poll_fds[idx].fd = -1;
+ vfs_server_context.poll_fds[idx].revents = 0;
+}
+
+static void vfs_server_remove_all_fds()
+{
+ while (vfs_server_context.nfds > 0) {
+ vfs_server_remove_fd(--vfs_server_context.nfds);
+ }
+}
+
+static void vfs_server_accept(int listen_fd)
+{
+ int ret, connfd;
+
+ connfd = accept(listen_fd, NULL, NULL);
+ if (connfd < 0) {
+ vfs_error("accept(listen_fd=%d) failed: %m", listen_fd);
+ return;
+ }
+
+ ret = ucs_vfs_sock_setopt_passcred(connfd);
+ if (ret < 0) {
+ close(connfd);
+ return;
+ }
+
+ vfs_server_add_fd(connfd, VFS_FD_STATE_ACCEPTED);
+}
+
+static void vfs_server_mount(int idx, pid_t pid)
+{
+ int fuse_fd;
+
+ if (pid < 0) {
+ vfs_error("received invalid pid: %d", pid);
+ vfs_server_remove_fd(idx);
+ return;
+ }
+
+ fuse_fd = vfs_mount(pid);
+ if (fuse_fd < 0) {
+ vfs_server_remove_fd(idx);
+ return;
+ }
+
+ vfs_server_context.fd_state[idx].state = VFS_FD_STATE_MOUNTED;
+ vfs_server_context.fd_state[idx].pid = pid;
+ vfs_server_context.fd_state[idx].fuse_fd = fuse_fd;
+ vfs_server_context.poll_fds[idx].events |= POLLOUT;
+}
+
+static void vfs_server_recv(int idx)
+{
+ ucs_vfs_sock_message_t vfs_msg_in;
+ char message[64];
+ int ret;
+
+ ret = ucs_vfs_sock_recv(vfs_server_context.poll_fds[idx].fd, &vfs_msg_in);
+ if (ret < 0) {
+ vfs_error("failed to receive a message: %d (%s)", ret, strerror(-ret));
+ vfs_server_remove_fd(idx);
+ return;
+ }
+
+ snprintf(message, sizeof(message), "got action '%s' on",
+ vfs_action_names[vfs_msg_in.action]);
+ vfs_server_log_fd(idx, message);
+
+ switch (vfs_msg_in.action) {
+ case UCS_VFS_SOCK_ACTION_STOP:
+ vfs_server_context.stop = 1;
+ break;
+ case UCS_VFS_SOCK_ACTION_MOUNT:
+ vfs_server_mount(idx, vfs_msg_in.pid);
+ break;
+ case UCS_VFS_SOCK_ACTION_NOP:
+ vfs_server_remove_fd(idx);
+ break;
+ default:
+ vfs_error("ignoring invalid action %d", vfs_msg_in.action);
+ vfs_server_remove_fd(idx);
+ break;
+ }
+}
+
+static void vfs_server_handle_pollin(int idx)
+{
+ switch (vfs_server_context.fd_state[idx].state) {
+ case VFS_FD_STATE_LISTENING:
+ vfs_server_accept(vfs_server_context.poll_fds[idx].fd);
+ break;
+ case VFS_FD_STATE_ACCEPTED:
+ vfs_server_recv(idx);
+ break;
+ case VFS_FD_STATE_FD_SENT:
+ vfs_server_remove_fd(idx);
+ break;
+ default:
+ vfs_server_log_fd(idx, "unexpected POLLIN event on");
+ vfs_server_remove_fd(idx);
+ break;
+ }
+}
+
+static void vfs_server_handle_pollout(int idx)
+{
+ ucs_vfs_sock_message_t vfs_msg_out;
+ int ret;
+
+ if (vfs_server_context.fd_state[idx].state != VFS_FD_STATE_MOUNTED) {
+ vfs_server_log_fd(idx, "unexpected POLLOUT event on");
+ vfs_server_remove_fd(idx);
+ return;
+ }
+
+ /* Send reply with file descriptor from fuse mount */
+ vfs_msg_out.action = UCS_VFS_SOCK_ACTION_MOUNT_REPLY;
+ vfs_msg_out.fd = vfs_server_context.fd_state[idx].fuse_fd;
+ ret = ucs_vfs_sock_send(vfs_server_context.poll_fds[idx].fd, &vfs_msg_out);
+ if (ret < 0) {
+ vfs_error("failed to send a message: %d", ret);
+ vfs_server_remove_fd(idx);
+ return;
+ }
+
+ vfs_server_log_fd(idx, "sent fuse_fd on");
+ vfs_server_context.fd_state[idx].state = VFS_FD_STATE_FD_SENT;
+ vfs_server_context.poll_fds[idx].events &= ~POLLOUT;
+}
+
+static void vfs_server_copy_fd_state(int dest_idx, int src_idx)
+{
+ if (dest_idx != src_idx) {
+ vfs_server_context.fd_state[dest_idx] =
+ vfs_server_context.fd_state[src_idx];
+ vfs_server_context.poll_fds[dest_idx] =
+ vfs_server_context.poll_fds[src_idx];
+ }
+}
+
+static void vfs_server_sighandler(int signo)
+{
+ vfs_server_context.stop = 1;
+}
+
+static void vfs_server_set_sighandler()
+{
+ struct sigaction sigact;
+
+ sigact.sa_handler = vfs_server_sighandler;
+ sigact.sa_flags = 0;
+ sigemptyset(&sigact.sa_mask);
+
+ sigaction(SIGINT, &sigact, NULL);
+ sigaction(SIGHUP, &sigact, NULL);
+ sigaction(SIGTERM, &sigact, NULL);
+}
+
+int vfs_server_loop(int listen_fd)
+{
+ int idx, valid_idx;
+ int ret;
+
+ vfs_server_context.nfds = 0;
+ vfs_server_context.stop = 0;
+
+ vfs_server_set_sighandler();
+
+ vfs_server_add_fd(listen_fd, VFS_FD_STATE_LISTENING);
+
+ while (!vfs_server_context.stop) {
+ ret = vfs_server_poll_events();
+ if (ret < 0) {
+ if (ret == -EINTR) {
+ continue;
+ } else {
+ return ret;
+ }
+ }
+
+ valid_idx = 0;
+ for (idx = 0; idx < vfs_server_context.nfds; ++idx) {
+ if (vfs_server_context.poll_fds[idx].events == 0) {
+ vfs_server_copy_fd_state(valid_idx++, idx);
+ continue;
+ }
+
+ if (vfs_server_context.poll_fds[idx].revents & POLLIN) {
+ vfs_server_handle_pollin(idx);
+ }
+ if (vfs_server_context.poll_fds[idx].revents & POLLOUT) {
+ vfs_server_handle_pollout(idx);
+ }
+
+ if (vfs_server_context.fd_state[idx].state != VFS_FD_STATE_CLOSED) {
+ vfs_server_copy_fd_state(valid_idx++, idx);
+ }
+ }
+
+ vfs_server_context.nfds = valid_idx;
+ }
+
+ vfs_server_remove_all_fds();
+
+ return 0;
+}
diff --git a/src/ucm/api/ucm.h b/src/ucm/api/ucm.h
index 6308c369e89..65ff4b29a3b 100644
--- a/src/ucm/api/ucm.h
+++ b/src/ucm/api/ucm.h
@@ -37,6 +37,7 @@ typedef enum ucm_event_type {
UCM_EVENT_SHMDT = UCS_BIT(4),
UCM_EVENT_SBRK = UCS_BIT(5),
UCM_EVENT_MADVISE = UCS_BIT(6),
+ UCM_EVENT_BRK = UCS_BIT(7),
/* Aggregate events */
UCM_EVENT_VM_MAPPED = UCS_BIT(16),
@@ -161,6 +162,15 @@ typedef union ucm_event {
int advice;
} madvise;
+ /*
+ * UCM_EVENT_BRK
+ * brk() is called.
+ */
+ struct {
+ int result;
+ void *addr;
+ } brk;
+
/*
* UCM_EVENT_VM_MAPPED, UCM_EVENT_VM_UNMAPPED
*
@@ -203,7 +213,7 @@ typedef struct ucm_global_config {
ucm_mmap_hook_mode_t mmap_hook_mode; /* MMAP hook mode */
int enable_malloc_hooks; /* Enable installing malloc hooks */
int enable_malloc_reloc; /* Enable installing malloc relocations */
- int enable_cuda_reloc; /* Enable installing CUDA relocations */
+ ucm_mmap_hook_mode_t cuda_hook_mode; /* Cuda hooks mode */
int enable_dynamic_mmap_thresh; /* Enable adaptive mmap threshold */
size_t alloc_alignment; /* Alignment for memory allocations */
int dlopen_process_rpath; /* Process RPATH section in dlopen hook */
@@ -211,7 +221,10 @@ typedef struct ucm_global_config {
} ucm_global_config_t;
-/* Global UCM configuration */
+/*
+ * Global UCM configuration to be set externally.
+ * @deprecated replaced by @ref ucm_library_init.
+ */
extern ucm_global_config_t ucm_global_opts;
@@ -253,6 +266,17 @@ typedef void (*ucm_event_callback_t)(ucm_event_type_t event_type,
ucm_event_t *event, void *arg);
+/**
+ * Initialize UCM library and set its configuration.
+ *
+ * @param [in] ucm_opts UCM library global configuration. If NULL, default
+ * configuration is applied.
+ *
+ * @note Calling this function more than once in the same process has no effect.
+ */
+void ucm_library_init(const ucm_global_config_t *ucm_opts);
+
+
/**
* @brief Install a handler for memory events.
*
diff --git a/src/ucm/bistro/bistro.c b/src/ucm/bistro/bistro.c
index 51a807e91d5..c31741aa8d5 100644
--- a/src/ucm/bistro/bistro.c
+++ b/src/ucm/bistro/bistro.c
@@ -10,13 +10,16 @@
#include
#include
+#include
#include
#include
+#include
+
ucs_status_t ucm_bistro_remove_restore_point(ucm_bistro_restore_point_t *rp)
{
- ucs_assert(rp != NULL);
+ ucm_assert(rp != NULL);
free(rp);
return UCS_OK;
}
@@ -103,8 +106,52 @@ ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp)
void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp)
{
- ucs_assert(rp != NULL);
+ ucm_assert(rp != NULL);
return rp->addr;
}
+void *ucm_bistro_allocate_code(size_t size)
+{
+ static const size_t mmap_size = 16 * UCS_KBYTE;
+ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+ static void *mem_area = MAP_FAILED;
+ static size_t alloc_offset = 0;
+ size_t alloc_size;
+ void *result;
+
+ pthread_mutex_lock(&mutex);
+
+ if (mem_area == MAP_FAILED) {
+ /* Allocate executable memory block once, and reuse it for
+ * subsequent allocations. We assume bistro would not really need
+ * more than 'mmap_size' in total, since it's used for limited number
+ * of library functions. Also, the memory is never really released, so
+ * our allocator is very simple.
+ */
+ mem_area = mmap(NULL, ucs_align_up_pow2(mmap_size, ucm_get_page_size()),
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ if (mem_area == MAP_FAILED) {
+ ucm_error("failed to allocated executable memory of %zu bytes: %m",
+ mmap_size);
+ result = NULL;
+ goto out;
+ }
+ }
+
+ alloc_size = ucs_align_up_pow2(size, UCS_SYS_PARAGRAPH_SIZE);
+ if ((alloc_size + alloc_offset) > mmap_size) {
+ result = NULL;
+ goto out;
+ }
+
+ /* Allocate next memory block in the mmap-ed area */
+ result = UCS_PTR_BYTE_OFFSET(mem_area, alloc_offset);
+ alloc_offset += alloc_size;
+
+out:
+ pthread_mutex_unlock(&mutex);
+ return result;
+}
+
#endif
diff --git a/src/ucm/bistro/bistro.h b/src/ucm/bistro/bistro.h
index 16e988700c3..101000455e6 100644
--- a/src/ucm/bistro/bistro.h
+++ b/src/ucm/bistro/bistro.h
@@ -36,6 +36,7 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t;
*/
ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp);
+
/**
* Remove resore point created by @ref ucm_bistro_patch witout
* restore original function body
@@ -46,6 +47,7 @@ ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp);
*/
ucs_status_t ucm_bistro_remove_restore_point(ucm_bistro_restore_point_t *rp);
+
/**
* Get patch address for restore point
*
@@ -55,4 +57,15 @@ ucs_status_t ucm_bistro_remove_restore_point(ucm_bistro_restore_point_t *rp);
*/
void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp);
+
+/**
+ * Allocate executable memory which can be used to create trampolines or
+ * temporary functions.
+ *
+ * @param size Memory size to allocated
+ *
+ * @return Pointer to allocated memory, or NULL if failed.
+ */
+void *ucm_bistro_allocate_code(size_t size);
+
#endif
diff --git a/src/ucm/bistro/bistro_aarch64.c b/src/ucm/bistro/bistro_aarch64.c
index e161292f9ae..8659c0ee6ab 100644
--- a/src/ucm/bistro/bistro_aarch64.c
+++ b/src/ucm/bistro/bistro_aarch64.c
@@ -59,12 +59,10 @@
*/
#define BR(_reg) ((0xd61f << 16) + ((_reg) << 5))
-ucs_status_t ucm_bistro_patch(const char *symbol, void *hook,
+ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
+ void **orig_func_p,
ucm_bistro_restore_point_t **rp)
{
- void *func;
- ucs_status_t status;
-
ucm_bistro_patch_t patch = {
.reg3 = MOVZ(R15, 3, (uintptr_t)hook >> 48),
.reg2 = MOVK(R15, 2, (uintptr_t)hook >> 32),
@@ -72,15 +70,18 @@ ucs_status_t ucm_bistro_patch(const char *symbol, void *hook,
.reg0 = MOVK(R15, 0, (uintptr_t)hook),
.br = BR(R15)
};
+ ucs_status_t status;
- UCM_LOOKUP_SYMBOL(func, symbol);
+ if (orig_func_p != NULL) {
+ return UCS_ERR_UNSUPPORTED;
+ }
- status = ucm_bistro_create_restore_point(func, sizeof(patch), rp);
+ status = ucm_bistro_create_restore_point(func_ptr, sizeof(patch), rp);
if (UCS_STATUS_IS_ERR(status)) {
return status;
}
- return ucm_bistro_apply_patch(func, &patch, sizeof(patch));
+ return ucm_bistro_apply_patch(func_ptr, &patch, sizeof(patch));
}
#endif
diff --git a/src/ucm/bistro/bistro_aarch64.h b/src/ucm/bistro/bistro_aarch64.h
index 487aa923d08..b0770b30fe7 100644
--- a/src/ucm/bistro/bistro_aarch64.h
+++ b/src/ucm/bistro/bistro_aarch64.h
@@ -28,14 +28,19 @@ typedef struct ucm_bistro_patch {
* Set library function call hook using Binary Instrumentation
* method (BISTRO): replace function body by user defined call
*
- * @param symbol function name to replace
- * @param hook user-defined function-replacer
- * @param rp restore point used to restore original function,
- * optional, may be NULL
+ * @param func_ptr Pointer to function to patch.
+ * @param hook User-defined function-replacer.
+ * @param symbol Function name to replace.
+ * @param orig_func_p Unsupported on this architecture and must be NULL.
+ * If set to a non-NULL value, this function returns
+ * @ref UCS_ERR_UNSUPPORTED.
+ * @param rp Restore point used to restore original function.
+ * Optional, may be NULL.
*
* @return Error code as defined by @ref ucs_status_t
*/
-ucs_status_t ucm_bistro_patch(const char *symbol, void *hook,
+ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
+ void **orig_func_p,
ucm_bistro_restore_point_t **rp);
#endif
diff --git a/src/ucm/bistro/bistro_int.h b/src/ucm/bistro/bistro_int.h
index e6c08a4994a..80b423b8cd7 100644
--- a/src/ucm/bistro/bistro_int.h
+++ b/src/ucm/bistro/bistro_int.h
@@ -33,17 +33,4 @@ ucs_status_t ucm_bistro_apply_patch(void *dst, void *patch, size_t len);
ucs_status_t ucm_bistro_create_restore_point(void *addr, size_t len,
ucm_bistro_restore_point_t **rp);
-static inline void *ucm_bistro_lookup(const char *symbol)
-{
- void *addr;
-
- ucs_assert(symbol != NULL);
-
- addr = dlsym(RTLD_NEXT, symbol);
- if (!addr) {
- addr = dlsym(RTLD_DEFAULT, symbol);
- }
- return addr;
-}
-
#endif
diff --git a/src/ucm/bistro/bistro_ppc64.c b/src/ucm/bistro/bistro_ppc64.c
index 4b14250cd97..942346c8c07 100644
--- a/src/ucm/bistro/bistro_ppc64.c
+++ b/src/ucm/bistro/bistro_ppc64.c
@@ -76,7 +76,7 @@ struct ucm_bistro_restore_point {
static void ucm_bistro_fill_base_patch(ucm_bistro_base_patch_t *patch,
uint32_t reg, uintptr_t value)
{
- ucs_assert(patch != NULL);
+ ucm_assert(patch != NULL);
patch->addis = ADDIS ( reg, 0, (value >> 48));
patch->ori1 = ORI ( reg, reg, (value >> 32));
@@ -88,7 +88,7 @@ static void ucm_bistro_fill_base_patch(ucm_bistro_base_patch_t *patch,
static void ucm_bistro_fill_patch(ucm_bistro_patch_t *patch,
uint32_t reg, uintptr_t value)
{
- ucs_assert(patch != NULL);
+ ucm_assert(patch != NULL);
ucm_bistro_fill_base_patch(&patch->super, reg, value);
@@ -134,7 +134,7 @@ static void *ucm_bistro_get_text_addr(void *addr)
#endif
}
-ucs_status_t ucm_bistro_patch_toc(const char *symbol, void *hook,
+ucs_status_t ucm_bistro_patch_toc(void *func_ptr, void *hook,
ucm_bistro_restore_point_t **rp,
uint64_t toc)
{
@@ -143,11 +143,9 @@ ucs_status_t ucm_bistro_patch_toc(const char *symbol, void *hook,
ucm_bistro_restore_point_t restore;
ucm_bistro_patch_t patch;
- UCM_LOOKUP_SYMBOL(func, symbol);
+ restore.entry = func_ptr;
- restore.entry = func;
-
- func = ucm_bistro_get_text_addr(func);
+ func = ucm_bistro_get_text_addr(func_ptr);
hook = ucm_bistro_get_text_addr(hook);
status = ucm_bistro_patch_hook(hook, &restore, toc);
@@ -185,7 +183,7 @@ ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp)
{
ucs_status_t status;
- ucs_assert(rp != NULL);
+ ucm_assert(rp != NULL);
status = ucm_bistro_apply_patch(rp->func, &rp->func_patch, sizeof(rp->func_patch));
if (UCS_STATUS_IS_ERR(status)) {
@@ -202,7 +200,7 @@ ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp)
void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp)
{
- ucs_assert(rp != NULL);
+ ucm_assert(rp != NULL);
return rp->entry;
}
diff --git a/src/ucm/bistro/bistro_ppc64.h b/src/ucm/bistro/bistro_ppc64.h
index 7b5c3b46e7e..e0d4635ca5a 100644
--- a/src/ucm/bistro/bistro_ppc64.h
+++ b/src/ucm/bistro/bistro_ppc64.h
@@ -4,13 +4,15 @@
* See file LICENSE for terms.
*/
-
#ifndef UCM_BISTRO_BISTRO_PPC64_H_
#define UCM_BISTRO_BISTRO_PPC64_H_
-#include
+#include "bistro.h"
#include
+#include
+#include
+
/* special processing for ppc64 to save and restore TOC (r2)
* Reference: "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" */
@@ -26,26 +28,31 @@
* Set library function call hook using Binary Instrumentation
* method (BISTRO): replace function body by user defined call
*
- * @param symbol function name to replace
- * @param hook user-defined function-replacer
- * @param rp restore point used to restore original function,
- * optional, may be NULL
+ * @param func_ptr Function to patch.
+ * @param hook User-defined function-replacer.
+ * @param rp Restore point used to restore original function.
+ Optional, may be NULL.
*
* @return Error code as defined by @ref ucs_status_t
*/
/* we have to use inline proxy call to save TOC register
* value - PPC is very sensible to this register value */
-ucs_status_t ucm_bistro_patch_toc(const char *symbol, void *hook,
+ucs_status_t ucm_bistro_patch_toc(void *func_ptr, void *hook,
ucm_bistro_restore_point_t **rp,
uint64_t toc);
-static inline
-ucs_status_t ucm_bistro_patch(const char *symbol, void *hook,
- ucm_bistro_restore_point_t **rp)
+static inline ucs_status_t
+ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
+ void **orig_func_p, ucm_bistro_restore_point_t **rp)
{
uint64_t toc;
+
+ if (orig_func_p != NULL) {
+ return UCS_ERR_UNSUPPORTED;
+ }
+
asm volatile ("std 2, %0" : "=m" (toc));
- return ucm_bistro_patch_toc(symbol, hook, rp, toc);
+ return ucm_bistro_patch_toc(func_ptr, hook, rp, toc);
}
#endif
diff --git a/src/ucm/bistro/bistro_x86_64.c b/src/ucm/bistro/bistro_x86_64.c
index da67dbc517f..5b0f7ace0f6 100644
--- a/src/ucm/bistro/bistro_x86_64.c
+++ b/src/ucm/bistro/bistro_x86_64.c
@@ -5,16 +5,16 @@
*/
#ifdef HAVE_CONFIG_H
-# include "config.h"
+#include "config.h"
#endif
/* *******************************************************
* x86 processors family *
* ***************************************************** */
+
#if defined(__x86_64__)
#include
-#include
#include
#include
@@ -22,47 +22,218 @@
#include
#include
#include
-#include
-#include
-ucs_status_t ucm_bistro_patch(const char *symbol, void *hook,
+typedef struct {
+ void *jmp_addr;
+ char code[];
+} ucm_bistro_orig_func_t;
+
+typedef struct {
+ uint8_t opcode; /* 0xff */
+ uint8_t modrm; /* 0x25 */
+ int32_t displ;
+} UCS_S_PACKED ucm_bistro_jmp_indirect_t;
+
+
+/* REX prefix */
+#define UCM_BISTRO_X86_REX_MASK 0xF0 /* Mask */
+#define UCM_BISTRO_X86_REX 0x40 /* Value */
+
+#define UCM_BISTRO_X86_REX_W 0x48 /* REX.W value */
+#define UCM_BISTRO_X86_REX_B 0x41 /* REX.B value */
+
+/* PUSH general register
+ * "push $reg"
+ */
+#define UCM_BISTRO_X86_PUSH_R_MASK 0xF0 /* Mask */
+#define UCM_BISTRO_X86_PUSH_R 0x50 /* Value */
+
+/* Immediate Grp 1(1A), Ev, Iz */
+#define UCM_BISTRO_X86_IMM_GRP1_EV_IZ 0x81
+
+/* MOV Ev,Gv */
+#define UCM_BISTRO_X86_MOV_EV_GV 0x89
+
+/* MOV immediate word or double into word, double, or quad register
+ * "mov $imm32, %reg"
+ */
+#define UCM_BISTRO_X86_MOV_IR_MASK 0xF8 /* Mask */
+#define UCM_BISTRO_X86_MOV_IR 0xB8 /* Value */
+
+/* ModR/M encoding:
+ * [ mod | reg | r/m ]
+ * [ 7 6 | 5 4 3 | 2 1 0 ]
+ */
+#define UCM_BISTRO_X86_MODRM_MOD_SHIFT 6 /* mod */
+#define UCM_BISTRO_X86_MODRM_REG_SHIFT 3 /* reg */
+#define UCM_BISTRO_X86_MODRM_RM_BITS 3 /* r/m */
+
+/* Table 2-2 */
+#define UCM_BISTRO_X86_MODRM_MOD_DISP8 1 /* 0b01 */
+#define UCM_BISTRO_X86_MODRM_MOD_DISP32 2 /* 0b10 */
+#define UCM_BISTRO_X86_MODRM_MOD_REG 3 /* 0b11 */
+#define UCM_BISTRO_X86_MODRM_RM_SIB 4 /* 0b100 */
+
+/* ModR/M encoding for SUB RSP
+ * mod=0b11, reg=0b101 (SUB as opcode extension), r/m=0b100
+ */
+#define UCM_BISTRO_X86_MODRM_SUB_SP 0xEC /* 11 101 100 */
+
+/* ModR/M encoding for EBP/BP/CH/MM5/XMM5, AH/SP/ESP/MM4/XMM4 */
+#define UCM_BISTRO_X86_MODRM_BP_SP 0xE5 /* 11 100 101 */
+
+
+/*
+ * Find the minimal length of initial instructions in the function which can be
+ * safely executed from any memory location.
+ * Uses a very simplified disassembler which supports only the typical
+ * instructions found in function prologue.
+ */
+static size_t ucm_bistro_detect_pic_prefix(const void *func, size_t min_length)
+{
+ uint8_t rex, opcode, modrm, mod;
+ size_t offset, prev_offset;
+
+ offset = 0;
+ while (offset < min_length) {
+ prev_offset = offset;
+ opcode = *(uint8_t*)UCS_PTR_BYTE_OFFSET(func, offset++);
+
+ /* check for REX prefix */
+ if ((opcode & UCM_BISTRO_X86_REX_MASK) == UCM_BISTRO_X86_REX) {
+ rex = opcode;
+ opcode = *(uint8_t*)UCS_PTR_BYTE_OFFSET(func, offset++);
+ } else {
+ rex = 0;
+ }
+
+ /* check the opcode */
+ if (((rex == 0) || rex == UCM_BISTRO_X86_REX_B) &&
+ ((opcode & UCM_BISTRO_X86_PUSH_R_MASK) == UCM_BISTRO_X86_PUSH_R)) {
+ continue;
+ } else if ((rex == UCM_BISTRO_X86_REX_W) &&
+ (opcode == UCM_BISTRO_X86_IMM_GRP1_EV_IZ)) {
+ modrm = *(uint8_t*)UCS_PTR_BYTE_OFFSET(func, offset++);
+ if (modrm == UCM_BISTRO_X86_MODRM_SUB_SP) {
+ /* sub $imm32, %rsp */
+ offset += sizeof(uint32_t);
+ continue;
+ }
+ } else if ((rex == UCM_BISTRO_X86_REX_W) &&
+ (opcode == UCM_BISTRO_X86_MOV_EV_GV)) {
+ modrm = *(uint8_t*)UCS_PTR_BYTE_OFFSET(func, offset++);
+ if (modrm == UCM_BISTRO_X86_MODRM_BP_SP) {
+ /* mov %rsp, %rbp */
+ continue;
+ }
+ mod = modrm >> UCM_BISTRO_X86_MODRM_MOD_SHIFT;
+ if ((mod != UCM_BISTRO_X86_MODRM_MOD_REG) &&
+ ((modrm & UCS_MASK(UCM_BISTRO_X86_MODRM_RM_BITS)) ==
+ UCM_BISTRO_X86_MODRM_RM_SIB)) {
+ /* r/m = 0b100, mod = 0b00/0b01/0b10 */
+ ++offset; /* skip SIB */
+ if (mod == UCM_BISTRO_X86_MODRM_MOD_DISP8) {
+ offset += sizeof(uint8_t); /* skip disp8 */
+ } else if (mod == UCM_BISTRO_X86_MODRM_MOD_DISP32) {
+ offset += sizeof(uint32_t); /* skip disp32 */
+ }
+ continue;
+ }
+ } else if ((rex == 0) &&
+ ((opcode & UCM_BISTRO_X86_MOV_IR_MASK) == UCM_BISTRO_X86_MOV_IR)) {
+ offset += sizeof(uint32_t);
+ continue;
+ }
+
+ /* unsupported instruction - bail */
+ return prev_offset;
+ }
+
+ return offset;
+}
+
+static ucs_status_t
+ucm_bistro_construct_orig_func(const void *func_ptr, size_t patch_len,
+ const char *symbol, void **orig_func_p)
+{
+ ucm_bistro_jmp_indirect_t *jmp_back;
+ ucm_bistro_orig_func_t *orig_func;
+ size_t prefix_len, code_size;
+
+ prefix_len = ucm_bistro_detect_pic_prefix(func_ptr, patch_len);
+ ucm_debug("'%s' at %p prefix length %zu/%zu", symbol, func_ptr, prefix_len,
+ patch_len);
+ if (prefix_len < patch_len) {
+ return UCS_ERR_UNSUPPORTED;
+ }
+
+ /* Allocate executable page */
+ code_size = sizeof(*orig_func) + patch_len + sizeof(*jmp_back);
+ orig_func = ucm_bistro_allocate_code(code_size);
+ if (orig_func == NULL) {
+ return UCS_ERR_NO_MEMORY;
+ }
+
+ /* Copy code fragment from original function */
+ memcpy(orig_func->code, func_ptr, prefix_len);
+
+ /* Indirect jump to *orig_func->jmp_address */
+ orig_func->jmp_addr = UCS_PTR_BYTE_OFFSET(func_ptr, prefix_len);
+ jmp_back = UCS_PTR_BYTE_OFFSET(orig_func->code, prefix_len);
+ jmp_back->opcode = 0xff;
+ jmp_back->modrm = 0x25;
+ jmp_back->displ = UCS_PTR_BYTE_DIFF(jmp_back + 1, &orig_func->jmp_addr);
+ *orig_func_p = orig_func->code;
+
+ return UCS_OK;
+}
+
+ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
+ void **orig_func_p,
ucm_bistro_restore_point_t **rp)
{
- ucm_bistro_jmp_r11_patch_t patch_jmp_r11 = {
+ ucm_bistro_jmp_r11_patch_t jmp_r11 = {
.mov_r11 = {0x49, 0xbb},
.jmp_r11 = {0x41, 0xff, 0xe3}
};
- ucm_bistro_jmp_near_patch_t patch_jmp_near = {
+ ucm_bistro_jmp_near_patch_t jmp_near = {
.jmp_rel = 0xe9
};
- void *func, *patch, *jmp_base;
+ void *patch, *jmp_base;
ucs_status_t status;
ptrdiff_t jmp_disp;
size_t patch_len;
- UCM_LOOKUP_SYMBOL(func, symbol);
-
- jmp_base = UCS_PTR_BYTE_OFFSET(func, sizeof(patch_jmp_near));
+ jmp_base = UCS_PTR_BYTE_OFFSET(func_ptr, sizeof(jmp_near));
jmp_disp = UCS_PTR_BYTE_DIFF(jmp_base, hook);
if (labs(jmp_disp) < INT32_MAX) {
/* if 32-bit near jump is possible, use it, since it's a short 5-byte
* instruction which reduces the chances of racing with other thread
*/
- patch_jmp_near.disp = jmp_disp;
- patch = &patch_jmp_near;
- patch_len = sizeof(patch_jmp_near);
+ jmp_near.disp = jmp_disp;
+ patch = &jmp_near;
+ patch_len = sizeof(jmp_near);
} else {
- patch_jmp_r11.ptr = hook;
- patch = &patch_jmp_r11;
- patch_len = sizeof(patch_jmp_r11);
+ jmp_r11.ptr = hook;
+ patch = &jmp_r11;
+ patch_len = sizeof(jmp_r11);
}
- status = ucm_bistro_create_restore_point(func, patch_len, rp);
+ if (orig_func_p != NULL) {
+ status = ucm_bistro_construct_orig_func(func_ptr, patch_len, symbol,
+ orig_func_p);
+ if (status != UCS_OK) {
+ return status;
+ }
+ }
+
+ status = ucm_bistro_create_restore_point(func_ptr, patch_len, rp);
if (UCS_STATUS_IS_ERR(status)) {
return status;
}
- return ucm_bistro_apply_patch(func, patch, patch_len);
+ return ucm_bistro_apply_patch(func_ptr, patch, patch_len);
}
+
#endif
diff --git a/src/ucm/bistro/bistro_x86_64.h b/src/ucm/bistro/bistro_x86_64.h
index 04f09b87415..87ef636cc8a 100644
--- a/src/ucm/bistro/bistro_x86_64.h
+++ b/src/ucm/bistro/bistro_x86_64.h
@@ -35,14 +35,20 @@ typedef struct ucm_bistro_jmp_near_patch {
* Set library function call hook using Binary Instrumentation
* method (BISTRO): replace function body by user defined call
*
- * @param symbol function name to replace
- * @param hook user-defined function-replacer
- * @param rp restore point used to restore original function,
- * optional, may be NULL
+ * @param func_ptr Pointer to function to patch.
+ * @param hook User-defined function-replacer.
+ * @param symbol Function name to replace.
+ * @param orig_func_p If non-NULL, set to pointer to a trampoline which calls
+ * the original function (before patching). If it's not
+ * possible to create such trampoline, the function returns
+ * @ref UCS_ERR_UNSUPPORTED.
+ * @param rp Restore point used to restore original function.
+ * Optional, may be NULL.
*
* @return Error code as defined by @ref ucs_status_t
*/
-ucs_status_t ucm_bistro_patch(const char *symbol, void *hook,
+ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
+ void **orig_func_p,
ucm_bistro_restore_point_t **rp);
#endif
diff --git a/src/ucm/cuda/Makefile.am b/src/ucm/cuda/Makefile.am
index 438960e2c28..03fd621f97a 100644
--- a/src/ucm/cuda/Makefile.am
+++ b/src/ucm/cuda/Makefile.am
@@ -9,7 +9,7 @@ if HAVE_CUDA
module_LTLIBRARIES = libucm_cuda.la
libucm_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS)
libucm_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS)
-libucm_cuda_la_LIBADD = ../libucm.la
+libucm_cuda_la_LIBADD = ../libucm.la $(CUDA_LIBS)
libucm_cuda_la_LDFLAGS = $(UCM_MODULE_LDFLAGS) \
$(patsubst %, -Xlinker %, $(CUDA_LDFLAGS)) \
-version-info $(SOVERSION)
diff --git a/src/ucm/cuda/cudamem.c b/src/ucm/cuda/cudamem.c
index 02fd8c32f03..dfab90b886f 100644
--- a/src/ucm/cuda/cudamem.c
+++ b/src/ucm/cuda/cudamem.c
@@ -6,392 +6,276 @@
*/
#ifdef HAVE_CONFIG_H
-# include "config.h"
+#include "config.h"
#endif
-#include
+#include "cudamem.h"
#include
+#include
#include
#include
#include
#include
-#include
#include
#include
#include
-#include
#include
-#include
-
-
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemFree, CUresult, -1, CUdeviceptr)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemFreeHost, CUresult, -1, void *)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemAlloc, CUresult, -1, CUdeviceptr *, size_t)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemAllocManaged, CUresult, -1, CUdeviceptr *,
- size_t, unsigned int)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemAllocPitch, CUresult, -1, CUdeviceptr *, size_t *,
- size_t, size_t, unsigned int)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemHostGetDevicePointer, CUresult, -1, CUdeviceptr *,
- void *, unsigned int)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemHostUnregister, CUresult, -1, void *)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaFree, cudaError_t, -1, void*)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaFreeHost, cudaError_t, -1, void*)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaMalloc, cudaError_t, -1, void**, size_t)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaMallocManaged, cudaError_t, -1, void**, size_t, unsigned int)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaMallocPitch, cudaError_t, -1, void**, size_t *,
- size_t, size_t)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaHostGetDevicePointer, cudaError_t, -1, void**,
- void *, unsigned int)
-UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaHostUnregister, cudaError_t, -1, void*)
-
-#if ENABLE_SYMBOL_OVERRIDE
-UCM_OVERRIDE_FUNC(cuMemFree, CUresult)
-UCM_OVERRIDE_FUNC(cuMemFreeHost, CUresult)
-UCM_OVERRIDE_FUNC(cuMemAlloc, CUresult)
-UCM_OVERRIDE_FUNC(cuMemAllocManaged, CUresult)
-UCM_OVERRIDE_FUNC(cuMemAllocPitch, CUresult)
-UCM_OVERRIDE_FUNC(cuMemHostGetDevicePointer, CUresult)
-UCM_OVERRIDE_FUNC(cuMemHostUnregister, CUresult)
-UCM_OVERRIDE_FUNC(cudaFree, cudaError_t)
-UCM_OVERRIDE_FUNC(cudaFreeHost, cudaError_t)
-UCM_OVERRIDE_FUNC(cudaMalloc, cudaError_t)
-UCM_OVERRIDE_FUNC(cudaMallocManaged, cudaError_t)
-UCM_OVERRIDE_FUNC(cudaMallocPitch, cudaError_t)
-UCM_OVERRIDE_FUNC(cudaHostGetDevicePointer, cudaError_t)
-UCM_OVERRIDE_FUNC(cudaHostUnregister, cudaError_t)
-#endif
-static void ucm_cuda_set_ptr_attr(CUdeviceptr dptr)
-{
- if ((void*)dptr == NULL) {
- ucm_trace("skipping cuPointerSetAttribute for null pointer");
- return;
+/* Create a body of CUDA memory allocation replacement function */
+#define UCM_CUDA_ALLOC_FUNC(_name, _mem_type, _retval, _success, _size, \
+ _ptr_type, _args_fmt, ...) \
+ _retval ucm_##_name(_ptr_type *ptr_p, UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
+ { \
+ _ptr_type ptr; \
+ _retval ret; \
+ \
+ ucm_event_enter(); \
+ ret = ucm_orig_##_name(ptr_p, UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
+ if (ret == (_success)) { \
+ ptr = *ptr_p; \
+ ucm_trace("%s(" _args_fmt ") allocated %p", __FUNCTION__, \
+ UCM_FUNC_PASS_ARGS(__VA_ARGS__), (void*)ptr); \
+ ucm_cuda_dispatch_mem_alloc((CUdeviceptr)ptr, (_size), \
+ (_mem_type)); \
+ } \
+ ucm_event_leave(); \
+ return ret; \
}
- unsigned int value = 1;
- CUresult ret;
- const char *cu_err_str;
+/* Create a body of CUDA memory release replacement function */
+#define UCM_CUDA_FREE_FUNC(_name, _retval, _ptr_type, _mem_type) \
+ _retval ucm_##_name(_ptr_type ptr) \
+ { \
+ _retval ret; \
+ \
+ ucm_event_enter(); \
+ ucm_trace("%s(ptr=%p)", __FUNCTION__, (void*)ptr); \
+ ucm_cuda_dispatch_mem_free((CUdeviceptr)ptr, _mem_type, #_name); \
+ ret = ucm_orig_##_name(ptr); \
+ ucm_event_leave(); \
+ return ret; \
+ }
- ret = cuPointerSetAttribute(&value, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr);
- if (ret != CUDA_SUCCESS) {
- cuGetErrorString(ret, &cu_err_str);
- ucm_warn("cuPointerSetAttribute(%p) failed: %s", (void *) dptr, cu_err_str);
+#define UCM_CUDA_FUNC_ENTRY(_func) \
+ { \
+ {#_func, ucm_override_##_func}, (void**)&ucm_orig_##_func \
}
-}
-static UCS_F_ALWAYS_INLINE void
-ucm_dispatch_mem_type_alloc(void *addr, size_t length, ucs_memory_type_t mem_type)
+typedef struct {
+ ucm_reloc_patch_t patch;
+ void **orig_func_ptr;
+} ucm_cuda_func_t;
+
+
+/* Driver API */
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemAlloc, CUresult, -1, CUdeviceptr*,
+ size_t)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemAlloc_v2, CUresult, -1, CUdeviceptr*,
+ size_t)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemAllocManaged, CUresult, -1, CUdeviceptr*,
+ size_t, unsigned int)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemAllocPitch, CUresult, -1, CUdeviceptr*,
+ size_t*, size_t, size_t, unsigned int)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemAllocPitch_v2, CUresult, -1,
+ CUdeviceptr*, size_t*, size_t, size_t,
+ unsigned int)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemFree, CUresult, -1, CUdeviceptr)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemFree_v2, CUresult, -1, CUdeviceptr)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemFreeHost, CUresult, -1, void*)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cuMemFreeHost_v2, CUresult, -1, void*)
+
+/* Runtime API */
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cudaFree, cudaError_t, -1, void*)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cudaFreeHost, cudaError_t, -1, void*)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cudaMalloc, cudaError_t, -1, void**, size_t)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cudaMallocManaged, cudaError_t, -1, void**,
+ size_t, unsigned int)
+UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(cudaMallocPitch, cudaError_t, -1, void**,
+ size_t*, size_t, size_t)
+
+static void ucm_cuda_dispatch_mem_alloc(CUdeviceptr ptr, size_t length,
+ ucs_memory_type_t mem_type)
{
+ unsigned sync_atr_value = 1;
+ const char *cu_err_str;
ucm_event_t event;
+ CUresult ret;
- event.mem_type.address = addr;
+ if ((ptr != 0) && (mem_type == UCS_MEMORY_TYPE_CUDA)) {
+ /* Synchronous operation for GPU direct */
+ ret = cuPointerSetAttribute(&sync_atr_value,
+ CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, ptr);
+ if (ret != CUDA_SUCCESS) {
+ cuGetErrorString(ret, &cu_err_str);
+ ucm_warn("cuPointerSetAttribute(%p) failed: %s", (void*)ptr,
+ cu_err_str);
+ }
+ }
+
+ event.mem_type.address = (void*)ptr;
event.mem_type.size = length;
event.mem_type.mem_type = mem_type;
ucm_event_dispatch(UCM_EVENT_MEM_TYPE_ALLOC, &event);
}
-static UCS_F_ALWAYS_INLINE void
-ucm_dispatch_mem_type_free(void *addr, size_t length, ucs_memory_type_t mem_type)
+static void ucm_cuda_dispatch_mem_free(CUdeviceptr ptr,
+ ucs_memory_type_t mem_type,
+ const char *func_name)
{
ucm_event_t event;
-
- event.mem_type.address = addr;
- event.mem_type.size = length;
- event.mem_type.mem_type = mem_type;
- ucm_event_dispatch(UCM_EVENT_MEM_TYPE_FREE, &event);
-}
-
-static void ucm_cudafree_dispatch_events(CUdeviceptr dptr, const char *func_name)
-{
- CUresult ret;
CUdeviceptr pbase;
- size_t psize;
+ size_t length;
+ CUresult ret;
- if (dptr == 0) {
+ if (ptr == 0) {
return;
}
- ret = cuMemGetAddressRange(&pbase, &psize, dptr);
+ ret = cuMemGetAddressRange(&pbase, &length, ptr);
if (ret == CUDA_SUCCESS) {
- if (dptr != pbase) {
+ if (ptr != pbase) {
ucm_warn("%s(%p) called with unexpected pointer (expected: %p)",
- func_name, (void*)dptr, (void*)pbase);
+ func_name, (void*)ptr, (void*)pbase);
}
} else {
- ucm_debug("cuMemGetAddressRange(devPtr=%p) failed", (void*)dptr);
- psize = 1; /* set minimum length */
+ ucm_debug("cuMemGetAddressRange(devPtr=%p) failed", (void*)ptr);
+ length = 1; /* set minimum length */
}
- ucm_dispatch_mem_type_free((void *)dptr, psize, UCS_MEMORY_TYPE_CUDA);
-}
-
-CUresult ucm_cuMemFree(CUdeviceptr dptr)
-{
- CUresult ret;
-
- ucm_event_enter();
-
- ucm_trace("ucm_cuMemFree(dptr=%p)",(void*)dptr);
-
- ucm_cudafree_dispatch_events(dptr, "cuMemFree");
-
- ret = ucm_orig_cuMemFree(dptr);
-
- ucm_event_leave();
- return ret;
-}
-
-CUresult ucm_cuMemFreeHost(void *p)
-{
- CUresult ret;
-
- ucm_event_enter();
-
- ucm_trace("ucm_cuMemFreeHost(ptr=%p)", p);
-
- ucm_dispatch_vm_munmap(p, 0);
-
- ret = ucm_orig_cuMemFreeHost(p);
-
- ucm_event_leave();
- return ret;
-}
-
-CUresult ucm_cuMemAlloc(CUdeviceptr *dptr, size_t size)
-{
- CUresult ret;
-
- ucm_event_enter();
-
- ret = ucm_orig_cuMemAlloc(dptr, size);
- if (ret == CUDA_SUCCESS) {
- ucm_trace("ucm_cuMemAlloc(dptr=%p size:%lu)",(void *)*dptr, size);
- ucm_dispatch_mem_type_alloc((void *)*dptr, size, UCS_MEMORY_TYPE_CUDA);
- ucm_cuda_set_ptr_attr(*dptr);
- }
-
- ucm_event_leave();
- return ret;
-}
-
-CUresult ucm_cuMemAllocManaged(CUdeviceptr *dptr, size_t size, unsigned int flags)
-{
- CUresult ret;
-
- ucm_event_enter();
-
- ret = ucm_orig_cuMemAllocManaged(dptr, size, flags);
- if (ret == CUDA_SUCCESS) {
- ucm_trace("ucm_cuMemAllocManaged(dptr=%p size:%lu, flags:%d)",
- (void *)*dptr, size, flags);
- ucm_dispatch_mem_type_alloc((void *)*dptr, size,
- UCS_MEMORY_TYPE_CUDA_MANAGED);
- }
-
- ucm_event_leave();
- return ret;
-}
-
-CUresult ucm_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch,
- size_t WidthInBytes, size_t Height,
- unsigned int ElementSizeBytes)
-{
- CUresult ret;
-
- ucm_event_enter();
-
- ret = ucm_orig_cuMemAllocPitch(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes);
- if (ret == CUDA_SUCCESS) {
- ucm_trace("ucm_cuMemAllocPitch(dptr=%p size:%lu)",(void *)*dptr,
- (WidthInBytes * Height));
- ucm_dispatch_mem_type_alloc((void *)*dptr, WidthInBytes * Height,
- UCS_MEMORY_TYPE_CUDA);
- ucm_cuda_set_ptr_attr(*dptr);
- }
-
- ucm_event_leave();
- return ret;
-}
-
-CUresult ucm_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags)
-{
- CUresult ret;
-
- ucm_event_enter();
-
- ret = ucm_orig_cuMemHostGetDevicePointer(pdptr, p, Flags);
- if (ret == CUDA_SUCCESS) {
- ucm_trace("ucm_cuMemHostGetDevicePointer(pdptr=%p p=%p)",(void *)*pdptr, p);
- }
-
- ucm_event_leave();
- return ret;
-}
-
-CUresult ucm_cuMemHostUnregister(void *p)
-{
- CUresult ret;
-
- ucm_event_enter();
-
- ucm_trace("ucm_cuMemHostUnregister(ptr=%p)", p);
-
- ret = ucm_orig_cuMemHostUnregister(p);
-
- ucm_event_leave();
- return ret;
-}
-
-cudaError_t ucm_cudaFree(void *devPtr)
-{
- cudaError_t ret;
-
- ucm_event_enter();
-
- ucm_trace("ucm_cudaFree(devPtr=%p)", devPtr);
-
- ucm_cudafree_dispatch_events((CUdeviceptr)devPtr, "cudaFree");
-
- ret = ucm_orig_cudaFree(devPtr);
-
- ucm_event_leave();
-
- return ret;
-}
-
-cudaError_t ucm_cudaFreeHost(void *ptr)
-{
- cudaError_t ret;
-
- ucm_event_enter();
-
- ucm_trace("ucm_cudaFreeHost(ptr=%p)", ptr);
-
- ucm_dispatch_vm_munmap(ptr, 0);
-
- ret = ucm_orig_cudaFreeHost(ptr);
-
- ucm_event_leave();
- return ret;
+ event.mem_type.address = (void*)ptr;
+ event.mem_type.size = length;
+ event.mem_type.mem_type = mem_type;
+ ucm_event_dispatch(UCM_EVENT_MEM_TYPE_FREE, &event);
}
-cudaError_t ucm_cudaMalloc(void **devPtr, size_t size)
-{
- cudaError_t ret;
-
- ucm_event_enter();
-
- ret = ucm_orig_cudaMalloc(devPtr, size);
- if (ret == cudaSuccess) {
- ucm_trace("ucm_cudaMalloc(devPtr=%p size:%lu)", *devPtr, size);
- ucm_dispatch_mem_type_alloc(*devPtr, size, UCS_MEMORY_TYPE_CUDA);
- ucm_cuda_set_ptr_attr((CUdeviceptr) *devPtr);
- }
-
- ucm_event_leave();
+/* Driver API replacements */
+UCM_CUDA_ALLOC_FUNC(cuMemAlloc, UCS_MEMORY_TYPE_CUDA, CUresult, CUDA_SUCCESS,
+ arg0, CUdeviceptr, "size=%zu", size_t)
+UCM_CUDA_ALLOC_FUNC(cuMemAlloc_v2, UCS_MEMORY_TYPE_CUDA, CUresult, CUDA_SUCCESS,
+ arg0, CUdeviceptr, "size=%zu", size_t)
+UCM_CUDA_ALLOC_FUNC(cuMemAllocManaged, UCS_MEMORY_TYPE_CUDA_MANAGED, CUresult,
+ CUDA_SUCCESS, arg0, CUdeviceptr, "size=%zu flags=0x%x",
+ size_t, unsigned)
+UCM_CUDA_ALLOC_FUNC(cuMemAllocPitch, UCS_MEMORY_TYPE_CUDA, CUresult,
+ CUDA_SUCCESS, (size_t)arg1 * arg2, CUdeviceptr,
+ "pitch=%p width=%zu height=%zu elem=%u", size_t*, size_t,
+ size_t, unsigned)
+UCM_CUDA_ALLOC_FUNC(cuMemAllocPitch_v2, UCS_MEMORY_TYPE_CUDA, CUresult,
+ CUDA_SUCCESS, (size_t)arg1 * arg2, CUdeviceptr,
+ "pitch=%p width=%zu height=%zu elem=%u", size_t*, size_t,
+ size_t, unsigned)
+UCM_CUDA_FREE_FUNC(cuMemFree, CUresult, CUdeviceptr, UCS_MEMORY_TYPE_CUDA)
+UCM_CUDA_FREE_FUNC(cuMemFree_v2, CUresult, CUdeviceptr, UCS_MEMORY_TYPE_CUDA)
+UCM_CUDA_FREE_FUNC(cuMemFreeHost, CUresult, void*, UCS_MEMORY_TYPE_HOST)
+UCM_CUDA_FREE_FUNC(cuMemFreeHost_v2, CUresult, void*, UCS_MEMORY_TYPE_HOST)
+
+static ucm_cuda_func_t ucm_cuda_driver_funcs[] = {
+ UCM_CUDA_FUNC_ENTRY(cuMemAlloc),
+ UCM_CUDA_FUNC_ENTRY(cuMemAlloc_v2),
+ UCM_CUDA_FUNC_ENTRY(cuMemAllocManaged),
+ UCM_CUDA_FUNC_ENTRY(cuMemAllocPitch),
+ UCM_CUDA_FUNC_ENTRY(cuMemAllocPitch_v2),
+ UCM_CUDA_FUNC_ENTRY(cuMemFree),
+ UCM_CUDA_FUNC_ENTRY(cuMemFree_v2),
+ UCM_CUDA_FUNC_ENTRY(cuMemFreeHost),
+ UCM_CUDA_FUNC_ENTRY(cuMemFreeHost_v2),
+ {{NULL}, NULL}
+};
- return ret;
-}
+/* Runtime API replacements */
+UCM_CUDA_ALLOC_FUNC(cudaMalloc, UCS_MEMORY_TYPE_CUDA, cudaError_t, cudaSuccess,
+ arg0, void*, "size=%zu", size_t)
+UCM_CUDA_ALLOC_FUNC(cudaMallocManaged, UCS_MEMORY_TYPE_CUDA_MANAGED,
+ cudaError_t, cudaSuccess, arg0, void*,
+ "size=%zu flags=0x%x", size_t, unsigned)
+UCM_CUDA_ALLOC_FUNC(cudaMallocPitch, UCS_MEMORY_TYPE_CUDA, cudaError_t,
+ cudaSuccess, (size_t)arg1 * arg2, void*,
+ "pitch=%p width=%zu height=%zu", size_t*, size_t, size_t)
+UCM_CUDA_FREE_FUNC(cudaFree, cudaError_t, void*, UCS_MEMORY_TYPE_CUDA)
+UCM_CUDA_FREE_FUNC(cudaFreeHost, cudaError_t, void*, UCS_MEMORY_TYPE_HOST)
+
+static ucm_cuda_func_t ucm_cuda_runtime_funcs[] = {
+ UCM_CUDA_FUNC_ENTRY(cudaFree),
+ UCM_CUDA_FUNC_ENTRY(cudaFreeHost),
+ UCM_CUDA_FUNC_ENTRY(cudaMalloc),
+ UCM_CUDA_FUNC_ENTRY(cudaMallocManaged),
+ UCM_CUDA_FUNC_ENTRY(cudaMallocPitch),
+ {{NULL}, NULL}
+};
-cudaError_t ucm_cudaMallocManaged(void **devPtr, size_t size, unsigned int flags)
+static ucm_mmap_hook_mode_t ucm_cuda_hook_mode()
{
- cudaError_t ret;
-
- ucm_event_enter();
-
- ret = ucm_orig_cudaMallocManaged(devPtr, size, flags);
- if (ret == cudaSuccess) {
- ucm_trace("ucm_cudaMallocManaged(devPtr=%p size:%lu flags:%d)",
- *devPtr, size, flags);
- ucm_dispatch_mem_type_alloc(*devPtr, size, UCS_MEMORY_TYPE_CUDA_MANAGED);
- }
-
- ucm_event_leave();
-
- return ret;
+ return ucm_get_hook_mode(ucm_global_opts.cuda_hook_mode);
}
-cudaError_t ucm_cudaMallocPitch(void **devPtr, size_t *pitch,
- size_t width, size_t height)
+static ucs_status_t
+ucm_cuda_install_hooks(ucm_cuda_func_t *funcs, int *used_reloc,
+ const char *name)
{
- cudaError_t ret;
-
- ucm_event_enter();
-
- ret = ucm_orig_cudaMallocPitch(devPtr, pitch, width, height);
- if (ret == cudaSuccess) {
- ucm_trace("ucm_cudaMallocPitch(devPtr=%p size:%lu)",*devPtr, (width * height));
- ucm_dispatch_mem_type_alloc(*devPtr, (width * height), UCS_MEMORY_TYPE_CUDA);
- ucm_cuda_set_ptr_attr((CUdeviceptr) *devPtr);
- }
+ const char UCS_V_UNUSED *hook_mode;
+ unsigned num_bistro, num_reloc;
+ ucm_cuda_func_t *func;
+ ucs_status_t status;
+ void *func_ptr;
+
+ num_bistro = 0;
+ num_reloc = 0;
+ for (func = funcs; func->patch.symbol != NULL; ++func) {
+ func_ptr = ucm_reloc_get_orig(func->patch.symbol, func->patch.value);
+ if (func_ptr == NULL) {
+ continue;
+ }
- ucm_event_leave();
- return ret;
-}
+ status = UCS_ERR_UNSUPPORTED;
-cudaError_t ucm_cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags)
-{
- cudaError_t ret;
+ if (ucm_cuda_hook_mode() == UCM_MMAP_HOOK_BISTRO) {
+ status = ucm_bistro_patch(func_ptr, func->patch.value,
+ func->patch.symbol, func->orig_func_ptr,
+ NULL);
+ if (status == UCS_OK) {
+ ucm_trace("installed bistro hook for '%s': %s",
+ func->patch.symbol, ucs_status_string(status));
+ ++num_bistro;
+ continue;
+ }
+
+ ucm_debug("failed to install bistro hook for '%s', trying reloc",
+ func->patch.symbol);
+ }
- ucm_event_enter();
+ status = ucm_reloc_modify(&func->patch);
+ if (status != UCS_OK) {
+ ucm_diag("failed to install relocation table entry for '%s'",
+ func->patch.symbol);
+ return status;
+ }
- ret = ucm_orig_cudaHostGetDevicePointer(pDevice, pHost, flags);
- if (ret == cudaSuccess) {
- ucm_trace("ucm_cuMemHostGetDevicePointer(pDevice=%p pHost=%p)", pDevice, pHost);
+ ++num_reloc;
+ ucm_trace("installed reloc hook on '%s'", func->patch.symbol);
}
- ucm_event_leave();
- return ret;
+ *used_reloc = num_reloc > 0;
+ ucm_info("cuda memory hooks on %s API: installed %u bistro and %u reloc",
+ name, num_bistro, num_reloc);
+ return UCS_OK;
}
-cudaError_t ucm_cudaHostUnregister(void *ptr)
-{
- cudaError_t ret;
-
- ucm_event_enter();
-
- ucm_trace("ucm_cudaHostUnregister(ptr=%p)", ptr);
-
- ret = ucm_orig_cudaHostUnregister(ptr);
-
- ucm_event_leave();
- return ret;
-}
-
-static ucm_reloc_patch_t patches[] = {
- {UCS_PP_MAKE_STRING(cuMemFree), ucm_override_cuMemFree},
- {UCS_PP_MAKE_STRING(cuMemFreeHost), ucm_override_cuMemFreeHost},
- {UCS_PP_MAKE_STRING(cuMemAlloc), ucm_override_cuMemAlloc},
- {UCS_PP_MAKE_STRING(cuMemAllocManaged), ucm_override_cuMemAllocManaged},
- {UCS_PP_MAKE_STRING(cuMemAllocPitch), ucm_override_cuMemAllocPitch},
- {UCS_PP_MAKE_STRING(cuMemHostGetDevicePointer), ucm_override_cuMemHostGetDevicePointer},
- {UCS_PP_MAKE_STRING(cuMemHostUnregister), ucm_override_cuMemHostUnregister},
- {UCS_PP_MAKE_STRING(cudaFree), ucm_override_cudaFree},
- {UCS_PP_MAKE_STRING(cudaFreeHost), ucm_override_cudaFreeHost},
- {UCS_PP_MAKE_STRING(cudaMalloc), ucm_override_cudaMalloc},
- {UCS_PP_MAKE_STRING(cudaMallocManaged), ucm_override_cudaMallocManaged},
- {UCS_PP_MAKE_STRING(cudaMallocPitch), ucm_override_cudaMallocPitch},
- {UCS_PP_MAKE_STRING(cudaHostGetDevicePointer), ucm_override_cudaHostGetDevicePointer},
- {UCS_PP_MAKE_STRING(cudaHostUnregister), ucm_override_cudaHostUnregister},
- {NULL, NULL}
-};
-
static ucs_status_t ucm_cudamem_install(int events)
{
- static int ucm_cudamem_installed = 0;
+ static int ucm_cudamem_installed = 0;
static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER;
- ucm_reloc_patch_t *patch;
- ucs_status_t status = UCS_OK;
+ ucs_status_t status = UCS_OK;
+ int used_reloc;
if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) {
goto out;
}
- if (!ucm_global_opts.enable_cuda_reloc) {
- ucm_debug("installing cudamem relocations is disabled by configuration");
+ if (ucm_cuda_hook_mode() == UCM_MMAP_HOOK_NONE) {
+ ucm_info("cuda memory hooks are disabled by configuration");
status = UCS_ERR_UNSUPPORTED;
goto out;
}
@@ -402,17 +286,24 @@ static ucs_status_t ucm_cudamem_install(int events)
goto out_unlock;
}
- for (patch = patches; patch->symbol != NULL; ++patch) {
- status = ucm_reloc_modify(patch);
- if (status != UCS_OK) {
- ucm_warn("failed to install relocation table entry for '%s'", patch->symbol);
- goto out_unlock;
+ status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, &used_reloc,
+ "driver");
+ if (status != UCS_OK) {
+ ucm_warn("failed to install cuda memory hooks on driver API");
+ } else if (!used_reloc) {
+ ucm_cudamem_installed = 1;
+ } else if (status == UCS_OK) {
+ /* Failed to install bistro hooks on all driver APIs, so need to install
+ hooks on runtime APIs. */
+ status = ucm_cuda_install_hooks(ucm_cuda_runtime_funcs, &used_reloc,
+ "runtime");
+ if (status == UCS_OK) {
+ ucm_cudamem_installed = 1;
+ } else {
+ ucm_warn("failed to install cuda memory hooks on runtime API")
}
}
- ucm_debug("cudaFree hooks are ready");
- ucm_cudamem_installed = 1;
-
out_unlock:
pthread_mutex_unlock(&install_mutex);
out:
@@ -429,13 +320,13 @@ static int ucm_cudamem_scan_regions_cb(void *arg, void *addr, size_t length,
/* we are interested in blocks which don't have any access permissions, or
* mapped to nvidia device.
*/
- if ((prot & (PROT_READ|PROT_WRITE|PROT_EXEC)) &&
+ if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
strncmp(path, cuda_path_pattern, strlen(cuda_path_pattern))) {
return 0;
}
- ucm_debug("dispatching initial memtype allocation for %p..%p %s",
- addr, UCS_PTR_BYTE_OFFSET(addr, length), path);
+ ucm_trace("dispatching initial memtype allocation for %p..%p %s", addr,
+ UCS_PTR_BYTE_OFFSET(addr, length), path);
event.mem_type.address = addr;
event.mem_type.size = length;
@@ -460,10 +351,12 @@ static ucm_event_installer_t ucm_cuda_initializer = {
.get_existing_alloc = ucm_cudamem_get_existing_alloc
};
-UCS_STATIC_INIT {
+UCS_STATIC_INIT
+{
ucs_list_add_tail(&ucm_event_installer_list, &ucm_cuda_initializer.list);
}
-UCS_STATIC_CLEANUP {
+UCS_STATIC_CLEANUP
+{
ucs_list_del(&ucm_cuda_initializer.list);
}
diff --git a/src/ucm/cuda/cudamem.h b/src/ucm/cuda/cudamem.h
index 03268231067..cd5a4087e27 100644
--- a/src/ucm/cuda/cudamem.h
+++ b/src/ucm/cuda/cudamem.h
@@ -7,95 +7,29 @@
#ifndef UCM_CUDAMEM_H_
#define UCM_CUDAMEM_H_
-#include
#include
#include
-/*cuMemFree */
-CUresult ucm_override_cuMemFree(CUdeviceptr dptr);
-CUresult ucm_orig_cuMemFree(CUdeviceptr dptr);
-CUresult ucm_cuMemFree(CUdeviceptr dptr);
-
-/*cuMemFreeHost */
-CUresult ucm_override_cuMemFreeHost(void *p);
-CUresult ucm_orig_cuMemFreeHost(void *p);
-CUresult ucm_cuMemFreeHost(void *p);
-
-/*cuMemAlloc*/
-CUresult ucm_override_cuMemAlloc(CUdeviceptr *dptr, size_t size);
-CUresult ucm_orig_cuMemAlloc(CUdeviceptr *dptr, size_t size);
CUresult ucm_cuMemAlloc(CUdeviceptr *dptr, size_t size);
-
-/*cuMemAllocManaged*/
-CUresult ucm_override_cuMemAllocManaged(CUdeviceptr *dptr, size_t size,
- unsigned int flags);
-CUresult ucm_orig_cuMemAllocManaged(CUdeviceptr *dptr, size_t size, unsigned int flags);
+CUresult ucm_cuMemAlloc_v2(CUdeviceptr *dptr, size_t size);
CUresult ucm_cuMemAllocManaged(CUdeviceptr *dptr, size_t size, unsigned int flags);
-
-/*cuMemAllocPitch*/
-CUresult ucm_override_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch,
- size_t WidthInBytes, size_t Height,
- unsigned int ElementSizeBytes);
-CUresult ucm_orig_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch,
- size_t WidthInBytes, size_t Height,
- unsigned int ElementSizeBytes);
CUresult ucm_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch,
size_t WidthInBytes, size_t Height,
unsigned int ElementSizeBytes);
+CUresult ucm_cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch,
+ size_t WidthInBytes, size_t Height,
+ unsigned int ElementSizeBytes);
+CUresult ucm_cuMemFree(CUdeviceptr dptr);
+CUresult ucm_cuMemFree_v2(CUdeviceptr dptr);
+CUresult ucm_cuMemFreeHost(void *p);
+CUresult ucm_cuMemFreeHost_v2(void *p);
-/*cuMemHostGetDevicePointer*/
-CUresult ucm_override_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p,
- unsigned int Flags);
-CUresult ucm_orig_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p,
- unsigned int Flags);
-CUresult ucm_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
-
-/*cuMemHostUnregister */
-CUresult ucm_override_cuMemHostUnregister(void *p);
-CUresult ucm_orig_cuMemHostUnregister(void *p);
-CUresult ucm_cuMemHostUnregister(void *p);
-
-/*cudaFree*/
-cudaError_t ucm_override_cudaFree(void *devPtr);
-cudaError_t ucm_orig_cudaFree(void *devPtr);
cudaError_t ucm_cudaFree(void *devPtr);
-
-/*cudaFreeHost*/
-cudaError_t ucm_override_cudaFreeHost(void *ptr);
-cudaError_t ucm_orig_cudaFreeHost(void *ptr);
cudaError_t ucm_cudaFreeHost(void *ptr);
-
-/*cudaMalloc*/
-cudaError_t ucm_override_cudaMalloc(void **devPtr, size_t size);
-cudaError_t ucm_orig_cudaMalloc(void **devPtr, size_t size);
cudaError_t ucm_cudaMalloc(void **devPtr, size_t size);
-
-/*cudaMallocManaged*/
-cudaError_t ucm_override_cudaMallocManaged(void **devPtr, size_t size,
- unsigned int flags);
-cudaError_t ucm_orig_cudaMallocManaged(void **devPtr, size_t size, unsigned int flags);
cudaError_t ucm_cudaMallocManaged(void **devPtr, size_t size, unsigned int flags);
-
-/*cudaMallocPitch*/
-cudaError_t ucm_override_cudaMallocPitch(void **devPtr, size_t *pitch,
- size_t width, size_t height);
-cudaError_t ucm_orig_cudaMallocPitch(void **devPtr, size_t *pitch,
- size_t width, size_t height);
cudaError_t ucm_cudaMallocPitch(void **devPtr, size_t *pitch,
size_t width, size_t height);
-/*cudaHostGetDevicePointer*/
-cudaError_t ucm_override_cudaHostGetDevicePointer(void **pDevice, void *pHost,
- unsigned int flags);
-cudaError_t ucm_orig_cudaHostGetDevicePointer(void **pDevice, void *pHost,
- unsigned int flags);
-cudaError_t ucm_cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags);
-
-
-/*cudaHostUnregister*/
-cudaError_t ucm_override_cudaHostUnregister(void *ptr);
-cudaError_t ucm_orig_cudaHostUnregister(void *ptr);
-cudaError_t ucm_cudaHostUnregister(void *ptr);
-
#endif
diff --git a/src/ucm/event/event.c b/src/ucm/event/event.c
index 1985b9b254f..d98dad64210 100644
--- a/src/ucm/event/event.c
+++ b/src/ucm/event/event.c
@@ -94,6 +94,11 @@ static void ucm_event_call_orig(ucm_event_type_t event_type, ucm_event_t *event,
event->shmdt.result = ucm_orig_shmdt(event->shmdt.shmaddr);
}
break;
+ case UCM_EVENT_BRK:
+ if (event->brk.result == -1) {
+ event->brk.result = ucm_orig_brk(event->brk.addr);
+ }
+ break;
case UCM_EVENT_SBRK:
if (event->sbrk.result == MAP_FAILED) {
event->sbrk.result = ucm_orig_sbrk(event->sbrk.increment);
@@ -120,8 +125,8 @@ static ucm_event_handler_t ucm_event_orig_handler = {
.list = UCS_LIST_INITIALIZER(&ucm_event_handlers, &ucm_event_handlers),
.events = UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | UCM_EVENT_MREMAP |
UCM_EVENT_SHMAT | UCM_EVENT_SHMDT | UCM_EVENT_SBRK |
- UCM_EVENT_MADVISE, /* All events */
- .priority = 0, /* Between negative and positive handlers */
+ UCM_EVENT_MADVISE | UCM_EVENT_BRK, /* All events */
+ .priority = 0, /* Between negative and positive handlers */
.cb = ucm_event_call_orig
};
static ucs_list_link_t ucm_event_handlers =
@@ -166,6 +171,7 @@ void ucm_event_leave()
pthread_rwlock_unlock(&ucm_event_lock);
}
+UCS_F_NOINLINE
void *ucm_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
ucm_event_t event;
@@ -198,6 +204,7 @@ void *ucm_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t off
return event.mmap.result;
}
+UCS_F_NOINLINE
int ucm_munmap(void *addr, size_t length)
{
ucm_event_t event;
@@ -238,6 +245,7 @@ void ucm_vm_munmap(void *addr, size_t length)
ucm_event_leave();
}
+UCS_F_NOINLINE
void *ucm_mremap(void *old_address, size_t old_size, size_t new_size, int flags)
{
ucm_event_t event;
@@ -285,6 +293,7 @@ static int ucm_shm_del_entry_from_khash(const void *addr, size_t *size)
return 0;
}
+UCS_F_NOINLINE
void *ucm_shmat(int shmid, const void *shmaddr, int shmflg)
{
#ifdef SHM_REMAP
@@ -334,6 +343,7 @@ void *ucm_shmat(int shmid, const void *shmaddr, int shmflg)
return event.shmat.result;
}
+UCS_F_NOINLINE
int ucm_shmdt(const void *shmaddr)
{
ucm_event_t event;
@@ -341,7 +351,7 @@ int ucm_shmdt(const void *shmaddr)
ucm_event_enter();
- ucm_debug("ucm_shmdt(shmaddr=%p)", shmaddr);
+ ucm_trace("ucm_shmdt(shmaddr=%p)", shmaddr);
if (!ucm_shm_del_entry_from_khash(shmaddr, &size)) {
size = ucm_get_shm_seg_size(shmaddr);
@@ -358,6 +368,7 @@ int ucm_shmdt(const void *shmaddr)
return event.shmdt.result;
}
+UCS_F_NOINLINE
void *ucm_sbrk(intptr_t increment)
{
ucm_event_t event;
@@ -367,7 +378,8 @@ void *ucm_sbrk(intptr_t increment)
ucm_trace("ucm_sbrk(increment=%+ld)", increment);
if (increment < 0) {
- ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), increment),
+ ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(ucm_get_current_brk(),
+ increment),
-increment);
}
@@ -376,7 +388,8 @@ void *ucm_sbrk(intptr_t increment)
ucm_event_dispatch(UCM_EVENT_SBRK, &event);
if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
- ucm_dispatch_vm_mmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), -increment),
+ ucm_dispatch_vm_mmap(UCS_PTR_BYTE_OFFSET(ucm_get_current_brk(),
+ -increment),
increment);
}
@@ -385,42 +398,42 @@ void *ucm_sbrk(intptr_t increment)
return event.sbrk.result;
}
+UCS_F_NOINLINE
int ucm_brk(void *addr)
{
-#if UCM_BISTRO_HOOKS
- void *old_addr;
- intptr_t increment;
+ ptrdiff_t increment;
+ void *current_brk;
ucm_event_t event;
- old_addr = ucm_brk_syscall(0);
- /* in case if addr == NULL - it just returns current pointer */
- increment = addr ? ((intptr_t)addr - (intptr_t)old_addr) : 0;
-
ucm_event_enter();
ucm_trace("ucm_brk(addr=%p)", addr);
+ if (addr == NULL) {
+ increment = 0;
+ } else {
+ current_brk = ucm_get_current_brk();
+ increment = UCS_PTR_BYTE_DIFF(current_brk, addr);
+ }
+
if (increment < 0) {
- ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(old_addr, increment),
- -increment);
+ ucm_dispatch_vm_munmap(addr, -increment);
}
- event.sbrk.result = (void*)-1;
- event.sbrk.increment = increment;
- ucm_event_dispatch(UCM_EVENT_SBRK, &event);
+ event.brk.result = -1;
+ event.brk.addr = addr;
+ ucm_event_dispatch(UCM_EVENT_BRK, &event);
- if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
- ucm_dispatch_vm_mmap(old_addr, increment);
+ if ((increment > 0) && (event.brk.result != -1)) {
+ ucm_dispatch_vm_mmap(current_brk, increment);
}
ucm_event_leave();
- return event.sbrk.result == MAP_FAILED ? -1 : 0;
-#else
- return -1;
-#endif
+ return event.brk.result;
}
+UCS_F_NOINLINE
int ucm_madvise(void *addr, size_t length, int advice)
{
ucm_event_t event;
@@ -455,6 +468,18 @@ int ucm_madvise(void *addr, size_t length, int advice)
return event.madvise.result;
}
+void ucm_library_init(const ucm_global_config_t *ucm_opts)
+{
+ static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
+
+ UCS_INIT_ONCE(&init_once) {
+ if (ucm_opts != NULL) {
+ ucm_global_opts = *ucm_opts;
+ }
+ ucm_mmap_init();
+ }
+}
+
void ucm_event_handler_add(ucm_event_handler_t *handler)
{
ucm_event_handler_t *elem;
@@ -481,20 +506,17 @@ void ucm_event_handler_remove(ucm_event_handler_t *handler)
static ucs_status_t ucm_event_install(int events)
{
- static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
UCS_MODULE_FRAMEWORK_DECLARE(ucm);
ucm_event_installer_t *event_installer;
int malloc_events;
ucs_status_t status;
- UCS_INIT_ONCE(&init_once) {
- ucm_prevent_dl_unload();
- }
+ ucm_prevent_dl_unload();
/* TODO lock */
- status = ucm_mmap_install(events);
+ status = ucm_mmap_install(events, 0);
if (status != UCS_OK) {
- ucm_debug("failed to install mmap events");
+ ucm_diag("failed to install mmap events");
goto out_unlock;
}
@@ -523,7 +545,6 @@ static ucs_status_t ucm_event_install(int events)
out_unlock:
return status;
-
}
ucs_status_t ucm_set_event_handler(int events, int priority,
@@ -536,7 +557,7 @@ ucs_status_t ucm_set_event_handler(int events, int priority,
if (events & ~(UCM_EVENT_MMAP|UCM_EVENT_MUNMAP|UCM_EVENT_MREMAP|
UCM_EVENT_SHMAT|UCM_EVENT_SHMDT|
- UCM_EVENT_SBRK|
+ UCM_EVENT_BRK|UCM_EVENT_SBRK|
UCM_EVENT_MADVISE|
UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED|
UCM_EVENT_MEM_TYPE_ALLOC|UCM_EVENT_MEM_TYPE_FREE|
@@ -549,6 +570,8 @@ ucs_status_t ucm_set_event_handler(int events, int priority,
return UCS_ERR_UNSUPPORTED;
}
+ ucm_library_init(NULL);
+
/* separate event flags from real events */
flags = events & (UCM_EVENT_FLAG_NO_INSTALL |
UCM_EVENT_FLAG_EXISTING_ALLOC);
@@ -587,6 +610,7 @@ ucs_status_t ucm_set_event_handler(int events, int priority,
void ucm_set_external_event(int events)
{
ucm_event_enter_exclusive();
+ ucm_debug("set external events: 0x%x", events);
ucm_external_events |= events;
ucm_event_leave();
}
@@ -594,6 +618,7 @@ void ucm_set_external_event(int events)
void ucm_unset_external_event(int events)
{
ucm_event_enter_exclusive();
+ ucm_debug("unset external events: 0x%x", events);
ucm_external_events &= ~events;
ucm_event_leave();
}
@@ -623,11 +648,13 @@ void ucm_unset_event_handler(int events, ucm_event_callback_t cb, void *arg)
ucs_status_t ucm_test_events(int events)
{
+ ucm_library_init(NULL);
return ucm_mmap_test_installed_events(events);
}
ucs_status_t ucm_test_external_events(int events)
{
+ ucm_library_init(NULL);
return ucm_mmap_test_events(events & ucm_external_events, "external");
}
diff --git a/src/ucm/event/event.h b/src/ucm/event/event.h
index 763ac3b2098..e7ae14ec6ad 100644
--- a/src/ucm/event/event.h
+++ b/src/ucm/event/event.h
@@ -13,12 +13,13 @@
#include
#define UCM_NATIVE_EVENT_VM_MAPPED (UCM_EVENT_MMAP | UCM_EVENT_MREMAP | \
- UCM_EVENT_SHMAT | UCM_EVENT_SBRK)
+ UCM_EVENT_SHMAT | UCM_EVENT_SBRK | \
+ UCM_EVENT_BRK)
#define UCM_NATIVE_EVENT_VM_UNMAPPED (UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | \
UCM_EVENT_MREMAP | UCM_EVENT_SHMDT | \
UCM_EVENT_SHMAT | UCM_EVENT_SBRK | \
- UCM_EVENT_MADVISE)
+ UCM_EVENT_MADVISE | UCM_EVENT_BRK)
typedef struct ucm_event_handler {
diff --git a/src/ucm/malloc/malloc_hook.c b/src/ucm/malloc/malloc_hook.c
index 50f7b974a64..9bfbdb3bba7 100644
--- a/src/ucm/malloc/malloc_hook.c
+++ b/src/ucm/malloc/malloc_hook.c
@@ -141,8 +141,8 @@ static void ucm_malloc_mmaped_ptr_add(void *ptr)
hash_it = kh_put(mmap_ptrs, &ucm_malloc_hook_state.ptrs, ptr,
&hash_extra_status);
- ucs_assert_always(hash_extra_status >= 0);
- ucs_assert_always(hash_it != kh_end(&ucm_malloc_hook_state.ptrs));
+ ucm_assert_always(hash_extra_status >= 0);
+ ucm_assert_always(hash_it != kh_end(&ucm_malloc_hook_state.ptrs));
ucs_recursive_spin_unlock(&ucm_malloc_hook_state.lock);
}
@@ -550,7 +550,7 @@ static void ucm_malloc_sbrk(ucm_event_type_t event_type,
if (ucm_malloc_hook_state.heap_start == (void*)-1) {
ucm_malloc_hook_state.heap_start = event->sbrk.result; /* sbrk() returns the previous break */
}
- ucm_malloc_hook_state.heap_end = ucm_orig_sbrk(0);
+ ucm_malloc_hook_state.heap_end = ucm_get_current_brk();
ucm_trace("sbrk(%+ld)=%p - adjusting heap to [%p..%p]",
event->sbrk.increment, event->sbrk.result,
diff --git a/src/ucm/mmap/install.c b/src/ucm/mmap/install.c
index 83d4b7d37e5..e9573ac7bce 100644
--- a/src/ucm/mmap/install.c
+++ b/src/ucm/mmap/install.c
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -30,8 +31,6 @@
#include
#include
-#define UCM_IS_HOOK_ENABLED(_entry) \
- ((_entry)->hook_type & UCS_BIT(ucm_mmap_hook_mode()))
#define UCM_HOOK_STR \
((ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) ? "reloc" : "bistro")
@@ -43,7 +42,7 @@
_call; \
ucm_trace("after %s: got 0x%x/0x%x", UCS_PP_MAKE_STRING(_call), \
(_data)->fired_events, exp_events); \
- /* in case if any event is missed - set correcponding bit to 0 */ \
+ /* in case if any event is missed - set corresponding bit to 0 */ \
/* same as equation: */ \
/* (_data)->out_events &= ~(exp_events ^ */ \
/* ((_data)->fired_events & exp_events)); */ \
@@ -61,17 +60,10 @@
extern const char *ucm_mmap_hook_modes[];
-typedef enum ucm_mmap_hook_type {
- UCM_HOOK_RELOC = UCS_BIT(UCM_MMAP_HOOK_RELOC),
- UCM_HOOK_BISTRO = UCS_BIT(UCM_MMAP_HOOK_BISTRO),
- UCM_HOOK_BOTH = UCM_HOOK_RELOC | UCM_HOOK_BISTRO
-} ucm_mmap_hook_type_t;
-
typedef struct ucm_mmap_func {
ucm_reloc_patch_t patch;
ucm_event_type_t event_type;
ucm_event_type_t deps;
- ucm_mmap_hook_type_t hook_type;
} ucm_mmap_func_t;
typedef struct ucm_mmap_test_events_data {
@@ -81,18 +73,16 @@ typedef struct ucm_mmap_test_events_data {
} ucm_mmap_test_events_data_t;
static ucm_mmap_func_t ucm_mmap_funcs[] = {
- { {"mmap", ucm_override_mmap}, UCM_EVENT_MMAP, UCM_EVENT_NONE, UCM_HOOK_BOTH},
- { {"munmap", ucm_override_munmap}, UCM_EVENT_MUNMAP, UCM_EVENT_NONE, UCM_HOOK_BOTH},
+ { {"mmap", ucm_override_mmap}, UCM_EVENT_MMAP, UCM_EVENT_NONE},
+ { {"munmap", ucm_override_munmap}, UCM_EVENT_MUNMAP, UCM_EVENT_NONE},
#if HAVE_MREMAP
- { {"mremap", ucm_override_mremap}, UCM_EVENT_MREMAP, UCM_EVENT_NONE, UCM_HOOK_BOTH},
-#endif
- { {"shmat", ucm_override_shmat}, UCM_EVENT_SHMAT, UCM_EVENT_NONE, UCM_HOOK_BOTH},
- { {"shmdt", ucm_override_shmdt}, UCM_EVENT_SHMDT, UCM_EVENT_SHMAT, UCM_HOOK_BOTH},
- { {"sbrk", ucm_override_sbrk}, UCM_EVENT_SBRK, UCM_EVENT_NONE, UCM_HOOK_RELOC},
-#if UCM_BISTRO_HOOKS
- { {"brk", ucm_override_brk}, UCM_EVENT_SBRK, UCM_EVENT_NONE, UCM_HOOK_BISTRO},
+ { {"mremap", ucm_override_mremap}, UCM_EVENT_MREMAP, UCM_EVENT_NONE},
#endif
- { {"madvise", ucm_override_madvise}, UCM_EVENT_MADVISE, UCM_EVENT_NONE, UCM_HOOK_BOTH},
+ { {"shmat", ucm_override_shmat}, UCM_EVENT_SHMAT, UCM_EVENT_NONE},
+ { {"shmdt", ucm_override_shmdt}, UCM_EVENT_SHMDT, UCM_EVENT_SHMAT},
+ { {"sbrk", ucm_override_sbrk}, UCM_EVENT_SBRK, UCM_EVENT_NONE},
+ { {"brk", ucm_override_brk}, UCM_EVENT_BRK, UCM_EVENT_NONE},
+ { {"madvise", ucm_override_madvise}, UCM_EVENT_MADVISE, UCM_EVENT_NONE},
{ {NULL, NULL}, UCM_EVENT_NONE}
};
@@ -108,6 +98,7 @@ static const char *ucm_mmap_event_name[] = {
UCM_MMAP_EVENT_NAME_ENTRY(SHMDT),
UCM_MMAP_EVENT_NAME_ENTRY(SBRK),
UCM_MMAP_EVENT_NAME_ENTRY(MADVISE),
+ UCM_MMAP_EVENT_NAME_ENTRY(BRK),
/* Aggregate events */
UCM_MMAP_EVENT_NAME_ENTRY(VM_MAPPED),
@@ -124,11 +115,20 @@ static void ucm_mmap_event_test_callback(ucm_event_type_t event_type,
* So ignore calls from other threads to ensure the only requested events
* are proceeded.
*/
- if (data->tid == ucs_get_tid()) {
+ if (data->tid == ucm_get_tid()) {
data->fired_events |= event_type;
}
}
+/* Call brk() and check return value, to avoid compile error of unused result */
+static void ucm_brk_checked(void *addr)
+{
+ int ret = brk(addr);
+ if ((ret != 0) && (addr != NULL)) {
+ ucm_diag("brk(addr=%p) failed: %m", addr);
+ }
+}
+
/* Fire events with pre/post action. The problem is in call sequence: we
* can't just fire single event - most of the system calls require set of
* calls to eliminate resource leaks or data corruption, such sequence
@@ -136,10 +136,10 @@ static void ucm_mmap_event_test_callback(ucm_event_type_t event_type,
* exclude additional events from processing used pre/post actions where
* set of handled events is cleared and evaluated for every system call */
static void
-ucm_fire_mmap_events_internal(int events, ucm_mmap_test_events_data_t *data)
+ucm_fire_mmap_events_internal(int events, ucm_mmap_test_events_data_t *data,
+ int exclusive)
{
size_t sbrk_size;
- int sbrk_mask;
int shmid;
void *p;
@@ -183,19 +183,29 @@ ucm_fire_mmap_events_internal(int events, ucm_mmap_test_events_data_t *data)
data, shmdt(p));
}
- if (events & (UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED)) {
- if (RUNNING_ON_VALGRIND) {
- /* on valgrind, doing a non-trivial sbrk() causes heap corruption */
- sbrk_size = 0;
- sbrk_mask = UCM_EVENT_SBRK;
- } else {
- sbrk_size = ucm_get_page_size();
- sbrk_mask = UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED;
+ if (exclusive && !RUNNING_ON_VALGRIND) {
+ sbrk_size = ucm_get_page_size();
+ if (events & (UCM_EVENT_BRK|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED)) {
+ p = ucm_get_current_brk();
+ UCM_FIRE_EVENT(events, UCM_EVENT_BRK|UCM_EVENT_VM_MAPPED, data,
+ ucm_brk_checked(UCS_PTR_BYTE_OFFSET(p, sbrk_size)));
+ UCM_FIRE_EVENT(events, UCM_EVENT_BRK|UCM_EVENT_VM_UNMAPPED, data,
+ ucm_brk_checked(p));
+ }
+ if (events & (UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED)) {
+ UCM_FIRE_EVENT(events, UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED,
+ data, (void)sbrk(sbrk_size));
+ UCM_FIRE_EVENT(events, UCM_EVENT_SBRK|UCM_EVENT_VM_UNMAPPED,
+ data, (void)sbrk(-sbrk_size));
+ }
+ } else {
+ /* To avoid side effects on other threads and valgrind heap corruption,
+ * pass invalid parameters. We assume that if the natives events are
+ * delivered, it means VM_MAPPED/UNMAPPED would be delivered as well.
+ */
+ if (events & UCM_EVENT_BRK) {
+ UCM_FIRE_EVENT(events, UCM_EVENT_BRK, data, ucm_brk_checked(NULL));
}
- UCM_FIRE_EVENT(events, (UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED) & sbrk_mask,
- data, (void)sbrk(sbrk_size));
- UCM_FIRE_EVENT(events, (UCM_EVENT_SBRK|UCM_EVENT_VM_UNMAPPED) & sbrk_mask,
- data, (void)sbrk(-sbrk_size));
}
if (events & (UCM_EVENT_MADVISE|UCM_EVENT_VM_UNMAPPED)) {
@@ -217,7 +227,7 @@ void ucm_fire_mmap_events(int events)
{
ucm_mmap_test_events_data_t data;
- ucm_fire_mmap_events_internal(events, &data);
+ ucm_fire_mmap_events_internal(events, &data, 0);
}
static void ucm_mmap_event_report_missing(int expected, int actual,
@@ -252,7 +262,7 @@ static void ucm_mmap_event_report_missing(int expected, int actual,
/* Called with lock held */
static ucs_status_t
-ucm_mmap_test_events_nolock(int events, const char *event_type)
+ucm_mmap_test_events_nolock(int events, int exclusive, const char *event_type)
{
ucm_event_handler_t handler;
ucm_mmap_test_events_data_t data;
@@ -262,13 +272,16 @@ ucm_mmap_test_events_nolock(int events, const char *event_type)
handler.cb = ucm_mmap_event_test_callback;
handler.arg = &data;
data.out_events = events;
- data.tid = ucs_get_tid();
+ data.tid = ucm_get_tid();
+
+ ucm_debug("testing mmap %s events 0x%x", event_type, events);
ucm_event_handler_add(&handler);
- ucm_fire_mmap_events_internal(events, &data);
+ ucm_fire_mmap_events_internal(events, &data, exclusive);
ucm_event_handler_remove(&handler);
- ucm_debug("mmap test: got 0x%x out of 0x%x", data.out_events, events);
+ ucm_debug("mmap %s events test: got 0x%x out of 0x%x", event_type,
+ data.out_events, events);
/* Return success if we caught all wanted events */
if (!ucs_test_all_flags(data.out_events, events)) {
@@ -279,6 +292,23 @@ ucm_mmap_test_events_nolock(int events, const char *event_type)
return UCS_OK;
}
+static int ucm_mmap_events_to_native_events(int events)
+{
+ int native_events;
+
+ native_events = events & ~(UCM_EVENT_MEM_TYPE_ALLOC |
+ UCM_EVENT_MEM_TYPE_FREE);
+
+ if (events & UCM_EVENT_VM_MAPPED) {
+ native_events |= UCM_NATIVE_EVENT_VM_MAPPED;
+ }
+ if (events & UCM_EVENT_VM_UNMAPPED) {
+ native_events |= UCM_NATIVE_EVENT_VM_UNMAPPED;
+ }
+
+ return native_events;
+}
+
ucs_status_t ucm_mmap_test_events(int events, const char *event_type)
{
ucs_status_t status;
@@ -287,7 +317,7 @@ ucs_status_t ucm_mmap_test_events(int events, const char *event_type)
* return UCS_OK iff all events are actually working
*/
pthread_mutex_lock(&ucm_mmap_install_mutex);
- status = ucm_mmap_test_events_nolock(events, event_type);
+ status = ucm_mmap_test_events_nolock(events, 0, event_type);
pthread_mutex_unlock(&ucm_mmap_install_mutex);
return status;
@@ -296,8 +326,11 @@ ucs_status_t ucm_mmap_test_events(int events, const char *event_type)
ucs_status_t ucm_mmap_test_installed_events(int events)
{
/*
- * return UCS_OK iff all installed events are actually working
- * we don't check the status of events which were not successfully installed
+ * Return UCS_OK iff all installed events are actually working.
+ * - We should not expand 'events' to native events, and test only the exact
+ * set of events the user asked to test.
+ * - We don't check the status of events which were not reported as
+ * successfully installed.
*/
return ucm_mmap_test_events(events & ucm_mmap_installed_events, "internal");
}
@@ -308,6 +341,7 @@ static ucs_status_t ucs_mmap_install_reloc(int events)
static int installed_events = 0;
ucm_mmap_func_t *entry;
ucs_status_t status;
+ void *func_ptr;
if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_NONE) {
ucm_debug("installing mmap hooks is disabled by configuration");
@@ -325,47 +359,36 @@ static ucs_status_t ucs_mmap_install_reloc(int events)
continue;
}
- if (UCM_IS_HOOK_ENABLED(entry)) {
- ucm_debug("mmap: installing %s hook for %s = %p for event 0x%x", UCM_HOOK_STR,
- entry->patch.symbol, entry->patch.value, entry->event_type);
+ ucm_debug("mmap: installing %s hook for %s = %p for event 0x%x",
+ UCM_HOOK_STR, entry->patch.symbol, entry->patch.value,
+ entry->event_type);
- if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) {
- status = ucm_reloc_modify(&entry->patch);
+ if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) {
+ status = ucm_reloc_modify(&entry->patch);
+ } else {
+ ucm_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO);
+ func_ptr = ucm_reloc_get_orig(entry->patch.symbol,
+ entry->patch.value);
+ if (func_ptr == NULL) {
+ status = UCS_ERR_NO_ELEM;
} else {
- ucs_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO);
- status = ucm_bistro_patch(entry->patch.symbol, entry->patch.value, NULL);
- }
- if (status != UCS_OK) {
- ucm_warn("failed to install %s hook for '%s'",
- UCM_HOOK_STR, entry->patch.symbol);
- return status;
+ status = ucm_bistro_patch(func_ptr, entry->patch.value,
+ entry->patch.symbol, NULL, NULL);
}
-
- installed_events |= entry->event_type;
}
- }
-
- return UCS_OK;
-}
-
-static int ucm_mmap_events_to_native_events(int events)
-{
- int native_events;
-
- native_events = events & ~(UCM_EVENT_MEM_TYPE_ALLOC |
- UCM_EVENT_MEM_TYPE_FREE);
+ if (status != UCS_OK) {
+ ucm_warn("failed to install %s hook for '%s'", UCM_HOOK_STR,
+ entry->patch.symbol);
+ return status;
+ }
- if (events & UCM_EVENT_VM_MAPPED) {
- native_events |= UCM_NATIVE_EVENT_VM_MAPPED;
- }
- if (events & UCM_EVENT_VM_UNMAPPED) {
- native_events |= UCM_NATIVE_EVENT_VM_UNMAPPED;
+ installed_events |= entry->event_type;
}
- return native_events;
+ return UCS_OK;
}
-ucs_status_t ucm_mmap_install(int events)
+ucs_status_t ucm_mmap_install(int events, int exclusive)
{
ucs_status_t status;
int native_events;
@@ -378,7 +401,8 @@ ucs_status_t ucm_mmap_install(int events)
/* if we already installed these events, check that they are still
* working, and if not - reinstall them.
*/
- status = ucm_mmap_test_events_nolock(native_events, 0);
+ status = ucm_mmap_test_events_nolock(native_events, exclusive,
+ "existing");
if (status == UCS_OK) {
goto out_unlock;
}
@@ -390,7 +414,7 @@ ucs_status_t ucm_mmap_install(int events)
goto out_unlock;
}
- status = ucm_mmap_test_events_nolock(native_events, 0);
+ status = ucm_mmap_test_events_nolock(native_events, exclusive, "installed");
if (status != UCS_OK) {
ucm_debug("failed to install mmap events");
goto out_unlock;
@@ -398,9 +422,38 @@ ucs_status_t ucm_mmap_install(int events)
/* status == UCS_OK */
ucm_mmap_installed_events |= native_events;
- ucm_debug("mmap installed events = 0x%x", ucm_mmap_installed_events);
+ ucm_info("mmap installed events = 0x%x", ucm_mmap_installed_events);
out_unlock:
pthread_mutex_unlock(&ucm_mmap_install_mutex);
return status;
}
+
+void ucm_mmap_init()
+{
+ ucm_event_type_t events;
+ ucm_mmap_func_t *entry;
+
+ if (!ucm_global_opts.enable_events ||
+ (ucm_mmap_hook_mode() != UCM_MMAP_HOOK_BISTRO)) {
+ return;
+ }
+
+ /* We must initialize bistro hooks during startup and not later, before
+ * other threads could execute the modified functions and fail on invalid
+ * instructions
+ */
+ events = 0;
+ for (entry = ucm_mmap_funcs; entry->patch.symbol != NULL; ++entry) {
+ events |= entry->event_type;
+ }
+ if (events & UCM_NATIVE_EVENT_VM_MAPPED) {
+ events |= UCM_EVENT_VM_MAPPED;
+ }
+ if (events & UCM_NATIVE_EVENT_VM_UNMAPPED) {
+ events |= UCM_EVENT_VM_UNMAPPED;
+ }
+
+ ucm_prevent_dl_unload();
+ ucm_mmap_install(events, 1);
+}
diff --git a/src/ucm/mmap/mmap.h b/src/ucm/mmap/mmap.h
index ed90a801238..c7b4e4e37b2 100644
--- a/src/ucm/mmap/mmap.h
+++ b/src/ucm/mmap/mmap.h
@@ -8,6 +8,7 @@
#define UCM_MMAP_H_
#include
+#include
#include
#define UCM_MMAP_HOOK_RELOC_STR "reloc"
@@ -21,7 +22,7 @@
# define UCM_DEFAULT_HOOK_MODE_STR UCM_MMAP_HOOK_RELOC_STR
#endif
-ucs_status_t ucm_mmap_install(int events);
+ucs_status_t ucm_mmap_install(int events, int exclusive);
void *ucm_override_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int ucm_override_munmap(void *addr, size_t length);
@@ -31,23 +32,16 @@ int ucm_override_shmdt(const void *shmaddr);
void *ucm_override_sbrk(intptr_t increment);
void *ucm_sbrk_select(intptr_t increment);
int ucm_override_brk(void *addr);
-void *ucm_brk_syscall(void *addr);
int ucm_override_madvise(void *addr, size_t length, int advice);
+void *ucm_get_current_brk();
void ucm_fire_mmap_events(int events);
ucs_status_t ucm_mmap_test_installed_events(int events);
ucs_status_t ucm_mmap_test_events(int events, const char *event_type);
+void ucm_mmap_init();
static UCS_F_ALWAYS_INLINE ucm_mmap_hook_mode_t ucm_mmap_hook_mode(void)
{
-#ifdef __SANITIZE_ADDRESS__
- return UCM_MMAP_HOOK_NONE;
-#else
- if (RUNNING_ON_VALGRIND && (ucm_global_opts.mmap_hook_mode == UCM_MMAP_HOOK_BISTRO)) {
- return UCM_MMAP_HOOK_RELOC;
- }
-
- return ucm_global_opts.mmap_hook_mode;
-#endif
+ return ucm_get_hook_mode(ucm_global_opts.mmap_hook_mode);
}
#endif
diff --git a/src/ucm/rocm/rocmmem.c b/src/ucm/rocm/rocmmem.c
index bf441d6057d..386f07fdcea 100644
--- a/src/ucm/rocm/rocmmem.c
+++ b/src/ucm/rocm/rocmmem.c
@@ -28,11 +28,6 @@ UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t,
UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_free, hsa_status_t,
HSA_STATUS_ERROR, void*)
-#if ENABLE_SYMBOL_OVERRIDE
-UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t)
-UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_free, hsa_status_t)
-#endif
-
static UCS_F_ALWAYS_INLINE void
ucm_dispatch_mem_type_alloc(void *addr, size_t length, ucs_memory_type_t mem_type)
{
@@ -172,7 +167,7 @@ static ucs_status_t ucm_rocmmem_install(int events)
}
}
- ucm_debug("rocm hooks are ready");
+ ucm_info("rocm hooks are ready");
ucm_rocmmem_installed = 1;
out_unlock:
diff --git a/src/ucm/util/log.c b/src/ucm/util/log.c
index ec41746a477..7325a341952 100644
--- a/src/ucm/util/log.c
+++ b/src/ucm/util/log.c
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#define UCM_LOG_BUG_SIZE 512
@@ -65,7 +66,7 @@ static char *ucm_log_ltoa(char *p, char *end, long n, int base, int flags,
int pad)
{
static const char digits[] = "0123456789abcdef";
- long divider;
+ long divider, top_divider;
if (((n < 0) || (flags & UCM_LOG_LTOA_FLAG_SIGN)) && (p < end)) {
*(p++) = (n < 0 ) ? '-' : '+';
@@ -80,9 +81,11 @@ static char *ucm_log_ltoa(char *p, char *end, long n, int base, int flags,
n = labs(n);
- divider = 1;
- while ((n / divider) != 0) {
- divider *= base;
+ divider = 1;
+ top_divider = 0;
+ while ((divider > 0) && ((n / divider) != 0)) {
+ top_divider = divider;
+ divider *= base;
--pad;
}
@@ -91,7 +94,7 @@ static char *ucm_log_ltoa(char *p, char *end, long n, int base, int flags,
(flags & UCM_LOG_LTOA_FLAG_PAD0) ? '0' : ' ');
}
- divider /= base;
+ divider = top_divider;
while ((p < end) && (divider > 0)) {
*(p++) = digits[(n / divider + base) % base];
divider /= base;
@@ -260,11 +263,15 @@ void __ucm_log(const char *file, unsigned line, const char *function,
va_list ap;
struct timeval tv;
ssize_t nwrite;
+ pid_t pid;
gettimeofday(&tv, NULL);
- ucm_log_snprintf(buf, UCM_LOG_BUG_SIZE - 1, "[%lu.%06lu] [%s:%d] %18s:%-4d UCX %s ",
- tv.tv_sec, tv.tv_usec, ucm_log_hostname, getpid(),
- ucs_basename(file), line, ucm_log_level_names[level]);
+ pid = getpid();
+ ucm_log_snprintf(buf, UCM_LOG_BUG_SIZE - 1,
+ "[%lu.%06lu] [%s:%d:%d] %18s:%-4d UCX %s ",
+ tv.tv_sec, tv.tv_usec, ucm_log_hostname, pid,
+ ucm_get_tid() - pid, ucs_basename(file), line,
+ ucm_log_level_names[level]);
buf[UCM_LOG_BUG_SIZE - 1] = '\0';
length = strlen(buf);
diff --git a/src/ucm/util/log.h b/src/ucm/util/log.h
index 9dcfd317406..ac0e32ab418 100644
--- a/src/ucm/util/log.h
+++ b/src/ucm/util/log.h
@@ -22,6 +22,7 @@
## __VA_ARGS__); \
}
+
#define ucm_fatal(_message, ...) ucm_log(UCS_LOG_LEVEL_FATAL, _message, ## __VA_ARGS__)
#define ucm_error(_message, ...) ucm_log(UCS_LOG_LEVEL_ERROR, _message, ## __VA_ARGS__)
#define ucm_warn(_message, ...) ucm_log(UCS_LOG_LEVEL_WARN, _message, ## __VA_ARGS__)
@@ -30,8 +31,25 @@
#define ucm_debug(_message, ...) ucm_log(UCS_LOG_LEVEL_DEBUG, _message, ## __VA_ARGS__)
#define ucm_trace(_message, ...) ucm_log(UCS_LOG_LEVEL_TRACE, _message, ## __VA_ARGS__)
+
+#define ucm_assert_always(_expression) \
+ do { \
+ if (!(_expression)) { \
+ ucm_fatal("Assertion `%s' failed", #_expression); \
+ } \
+ } while (0)
+
+
+#if ENABLE_ASSERT
+# define ucm_assert(...) ucm_assert_always(__VA_ARGS__)
+#else
+# define ucm_assert(...) {}
+#endif
+
+
extern const char *ucm_log_level_names[];
+
void __ucm_log(const char *file, unsigned line, const char *function,
ucs_log_level_t level, const char *message, ...)
UCS_F_PRINTF(5, 6);
diff --git a/src/ucm/util/reloc.c b/src/ucm/util/reloc.c
index 4443a73b5c7..143c146148a 100644
--- a/src/ucm/util/reloc.c
+++ b/src/ucm/util/reloc.c
@@ -8,12 +8,6 @@
# include "config.h"
#endif
-#ifndef NVALGRIND
-# include
-#else
-# define RUNNING_ON_VALGRIND 0
-#endif
-
#include "reloc.h"
#include
@@ -35,6 +29,14 @@
#include
#include
+/* Ensure this macro is defined (from ) - otherwise, cppcheck might
+ fail with an "unknown macro" warning */
+#ifndef ElfW
+#define ElfW(type) _ElfW (Elf, __ELF_NATIVE_CLASS, type)
+#define _ElfW(e,w,t) _ElfW_1 (e, w, _##t)
+#define _ElfW_1(e,w,t) e##w##t
+#endif
+
typedef void * (*ucm_reloc_dlopen_func_t)(const char *, int);
typedef int (*ucm_reloc_dlclose_func_t)(void *);
@@ -226,7 +228,7 @@ ucm_reloc_dl_apply_patch(const ucm_dl_info_t *dl_info, const char *dl_basename,
/* modify the relocation to the new value */
*entry = patch->value;
- ucm_debug("symbol '%s' in %s at [%p] modified from %p to %p",
+ ucm_trace("symbol '%s' in %s at [%p] modified from %p to %p",
patch->symbol, dl_basename, entry, prev_value, patch->value);
/* store default entry to prev_value to guarantee valid pointers
@@ -238,7 +240,7 @@ ucm_reloc_dl_apply_patch(const ucm_dl_info_t *dl_info, const char *dl_basename,
!((prev_value >= (void*)dl_info->start) &&
(prev_value < (void*)dl_info->end))) {
patch->prev_value = prev_value;
- ucm_debug("'%s' prev_value is %p", patch->symbol, prev_value);
+ ucm_trace("'%s' prev_value is %p", patch->symbol, prev_value);
}
return UCS_OK;
@@ -387,7 +389,7 @@ static void ucm_reloc_dl_info_cleanup(ElfW(Addr) dlpi_addr, const char *dl_name)
khiter = kh_get(ucm_dl_info_hash, &ucm_dl_info_hash, dlpi_addr);
if (khiter == kh_end(&ucm_dl_info_hash)) {
- ucm_debug("no dl_info entry for address 0x%lx", dlpi_addr);
+ ucm_trace("no dl_info entry for address 0x%lx", dlpi_addr);
return;
}
diff --git a/src/ucm/util/replace.c b/src/ucm/util/replace.c
index 6d8abae9405..58a06325778 100644
--- a/src/ucm/util/replace.c
+++ b/src/ucm/util/replace.c
@@ -17,14 +17,20 @@
#include
#include
#include
+#include
#include
#include
#include
+
#ifndef MAP_FAILED
#define MAP_FAILED ((void*)-1)
#endif
+#if HAVE___CURBRK
+extern void *__curbrk;
+#endif
+
#ifdef PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP
pthread_mutex_t ucm_reloc_get_orig_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
#else
@@ -113,17 +119,8 @@ int ucm_orig_shmdt(const void *shmaddr)
#endif
-#if HAVE___CURBRK
-extern void *__curbrk;
-#endif
-
_UCM_DEFINE_DLSYM_FUNC(brk, ucm_orig_dlsym_brk, ucm_override_brk, int, -1, void*)
-void *ucm_brk_syscall(void *addr)
-{
- return (void*)syscall(SYS_brk, addr);
-}
-
int ucm_orig_brk(void *addr)
{
void *new_addr;
@@ -133,7 +130,7 @@ int ucm_orig_brk(void *addr)
#endif
new_addr = ucm_brk_syscall(addr);
- if (new_addr < addr) {
+ if (new_addr != addr) {
errno = ENOMEM;
return -1;
} else {
@@ -151,15 +148,26 @@ void *ucm_orig_sbrk(intptr_t increment)
if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) {
return ucm_orig_dlsym_sbrk(increment);
} else {
- prev = ucm_brk_syscall(0);
- return ucm_orig_brk(UCS_PTR_BYTE_OFFSET(prev, increment)) ? (void*)-1 : prev;
+ prev = ucm_get_current_brk();
+ return ucm_orig_brk(UCS_PTR_BYTE_OFFSET(prev, increment)) ?
+ (void*)-1 : prev;
}
}
#else /* UCM_BISTRO_HOOKS */
+UCM_DEFINE_DLSYM_FUNC(brk, int, -1, void*)
UCM_DEFINE_DLSYM_FUNC(sbrk, void*, MAP_FAILED, intptr_t)
UCM_DEFINE_DLSYM_FUNC(shmat, void*, MAP_FAILED, int, const void*, int)
UCM_DEFINE_DLSYM_FUNC(shmdt, int, -1, const void*)
#endif /* UCM_BISTRO_HOOKS */
+
+void *ucm_get_current_brk()
+{
+#if HAVE___CURBRK
+ return __curbrk;
+#else
+ return ucm_brk_syscall(0);
+#endif
+}
diff --git a/src/ucm/util/replace.h b/src/ucm/util/replace.h
index 4b91b037d99..de060d1e048 100644
--- a/src/ucm/util/replace.h
+++ b/src/ucm/util/replace.h
@@ -79,6 +79,24 @@ extern pthread_t volatile ucm_reloc_get_orig_thread;
_UCM_DEFINE_REPLACE_FUNC(ucm_override_##_name, ucm_##_name, \
_rettype, _fail_val, __VA_ARGS__)
+/**
+ * Defines the following:
+ * - ucm_orig_##_name##_dlsym - calls original function by symbol lookup
+ * - ucm_orig_##_name - function pointer, initialized by default to
+ * ucm_orig_##_name##_dlsym
+ * - ucm_override_##_name - calls ucm_##_name
+ */
+#define UCM_DEFINE_REPLACE_DLSYM_PTR_FUNC(_name, _rettype, _fail_val, ...) \
+ _UCM_DEFINE_DLSYM_FUNC(_name, ucm_orig_##_name##_dlsym, \
+ ucm_override_##_name, _rettype, _fail_val, \
+ __VA_ARGS__) \
+ \
+ _rettype (*ucm_orig_##_name)(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) = \
+ ucm_orig_##_name##_dlsym; \
+ \
+ _UCM_DEFINE_REPLACE_FUNC(ucm_override_##_name, ucm_##_name, \
+ _rettype, _fail_val, __VA_ARGS__)
+
#define UCM_DEFINE_SYSCALL_FUNC(_name, _rettype, _syscall_id, ...) \
/* Call syscall */ \
_rettype ucm_orig_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
diff --git a/src/ucm/util/sys.c b/src/ucm/util/sys.c
index 92bf834da5c..9f0bcacdffb 100644
--- a/src/ucm/util/sys.c
+++ b/src/ucm/util/sys.c
@@ -17,10 +17,12 @@
#include
#include
#include
+#include
#include
#include
#include
#include
+#include
#include
#include
#include
@@ -36,7 +38,7 @@ ucm_global_config_t ucm_global_opts = {
.mmap_hook_mode = UCM_DEFAULT_HOOK_MODE,
.enable_malloc_hooks = 1,
.enable_malloc_reloc = 0,
- .enable_cuda_reloc = 1,
+ .cuda_hook_mode = UCM_DEFAULT_HOOK_MODE,
.enable_dynamic_mmap_thresh = 1,
.alloc_alignment = 16,
.dlopen_process_rpath = 1
@@ -282,7 +284,8 @@ void ucm_strerror(int eno, char *buf, size_t max)
void ucm_prevent_dl_unload()
{
- int flags = RTLD_LOCAL | RTLD_NODELETE;
+ static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
+ int flags = RTLD_LOCAL | RTLD_NODELETE;
Dl_info info;
void *dl;
int ret;
@@ -292,32 +295,34 @@ void ucm_prevent_dl_unload()
return;
}
- flags |= (ucm_global_opts.module_unload_prevent_mode ==
- UCM_UNLOAD_PREVENT_MODE_NOW) ? RTLD_NOW : RTLD_LAZY;
+ UCS_INIT_ONCE(&init_once) {
+ flags |= (ucm_global_opts.module_unload_prevent_mode ==
+ UCM_UNLOAD_PREVENT_MODE_NOW) ? RTLD_NOW : RTLD_LAZY;
- /* Get the path to current library by current function pointer */
- (void)dlerror();
- ret = dladdr(ucm_prevent_dl_unload, &info);
- if (ret == 0) {
- ucm_warn("could not find address of current library: %s", dlerror());
- return;
- }
+ /* Get the path to current library by current function pointer */
+ (void)dlerror();
+ ret = dladdr(ucm_prevent_dl_unload, &info);
+ if (ret == 0) {
+ ucm_warn("could not find address of current library: %s", dlerror());
+ return;
+ }
- /* Load the current library with NODELETE flag, to prevent it from being
- * unloaded. This will create extra reference to the library, but also add
- * NODELETE flag to the dynamic link map.
- */
- (void)dlerror();
- dl = dlopen(info.dli_fname, flags);
- if (dl == NULL) {
- ucm_warn("failed to load '%s': %s", info.dli_fname, dlerror());
- return;
- }
+ /* Load the current library with NODELETE flag, to prevent it from being
+ * unloaded. This will create extra reference to the library, but also add
+ * NODELETE flag to the dynamic link map.
+ */
+ (void)dlerror();
+ dl = dlopen(info.dli_fname, flags);
+ if (dl == NULL) {
+ ucm_warn("failed to load '%s': %s", info.dli_fname, dlerror());
+ return;
+ }
- ucm_debug("reloaded '%s' at %p with NODELETE flag", info.dli_fname, dl);
+ ucm_debug("loaded '%s' at %p with NODELETE flag", info.dli_fname, dl);
- /* Now we drop our reference to the lib, and it won't be unloaded anymore */
- dlclose(dl);
+ /* coverity[overwrite_var] */
+ dl = NULL;
+ }
}
char *ucm_concat_path(char *buffer, size_t max, const char *dir, const char *file)
@@ -349,3 +354,25 @@ char *ucm_concat_path(char *buffer, size_t max, const char *dir, const char *fil
return buffer;
}
+
+void *ucm_brk_syscall(void *addr)
+{
+ void *result;
+
+#ifdef __x86_64__
+ asm volatile("mov %1, %%rdi\n\t"
+ "mov $0xc, %%eax\n\t"
+ "syscall\n\t"
+ : "=a"(result)
+ : "m"(addr));
+#else
+ /* TODO implement 64-bit syscall for aarch64, ppc64le */
+ result = (void*)syscall(SYS_brk, addr);
+#endif
+ return result;
+}
+
+pid_t ucm_get_tid()
+{
+ return syscall(SYS_gettid);
+}
diff --git a/src/ucm/util/sys.h b/src/ucm/util/sys.h
index 37a1d927ef3..838fb61a1c5 100644
--- a/src/ucm/util/sys.h
+++ b/src/ucm/util/sys.h
@@ -8,6 +8,9 @@
#ifndef UCM_UTIL_SYS_H_
#define UCM_UTIL_SYS_H_
+#include