Callback not reached, using callback group and Fast-RTPS #1611

anaelle-sw · 2021-03-31T13:51:44Z

Bug report

Required Info:

Operating System:
- Ubuntu 20.04
Installation type:
- Binaries
Version or commit hash:
- 8.1.0 ROS2 Rolling
- Edit: Also reproduced on latest release: 8.2.0 ROS2 Rolling
DDS implementation:
- Fast-RTPS
Client library (if applicable):
- rclcpp

Steps to reproduce issue

Hello,
we are currently using CycloneDDS as RWM implementation, but we would need to use Fast-RTPS.
Using Fast-RTPS changes our ROS2 application behavior. I found out it is related to the callback groups, which we use a lot.
I tried to isolate the issue with 3 nodes that are quite simple:

A service client

#include <chrono>
#include <functional>
#include <memory>
#include <string>

#include <rclcpp/rclcpp.hpp>
#include <std_msgs/msg/bool.hpp>
#include <std_srvs/srv/trigger.hpp>

class CallbackGroupSrvClient: public rclcpp::Node {
public:
  // Constructor
  CallbackGroupSrvClient()
  : Node("callback_goup_srv_client"){

    // Init callback group
    callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive, false);
    callback_group_executor_thread_ = std::thread([this]() {
      callback_group_executor_.add_callback_group(callback_group_, this->get_node_base_interface());
      callback_group_executor_.spin();
    });

    // Init subscription
    subscription_ = this->create_subscription<std_msgs::msg::Bool>(
          "topic_bool",
          rclcpp::QoS(1),
          std::bind(
            &CallbackGroupSrvClient::sub_callback,
            this,
            std::placeholders::_1
            )
          );

    // Create service client and link it to callback group
    srv_client_ = this->create_client<std_srvs::srv::Trigger>(
          "trigger_srv",
          rmw_qos_profile_services_default,
          callback_group_
          );
  }

  // Destructor
  ~CallbackGroupSrvClient(){
    callback_group_executor_.cancel();
    callback_group_executor_thread_.join();
  }

private:
  // Methods
  void sub_callback(const std_msgs::msg::Bool::SharedPtr ){
    RCLCPP_INFO_STREAM(this->get_logger(), "Received new msg! ");

    // Send request and wait for future
    auto req = std::make_shared<std_srvs::srv::Trigger::Request>();
    auto future = srv_client_->async_send_request(req);
    RCLCPP_INFO_STREAM(this->get_logger(), "Sending request");
    auto future_status = future.wait_for(std::chrono::seconds(10));

    // Process result
    if (future_status == std::future_status::ready) {
      if (future.get()->success) {
        RCLCPP_INFO_STREAM(this->get_logger(), "Srv suceeded");
      }
      else {
        RCLCPP_INFO_STREAM(this->get_logger(), "Srv failed");
      }
    }
    else if (future_status == std::future_status::timeout) {
      RCLCPP_INFO_STREAM(this->get_logger(), "Response not received within timeout");
    }
    else if (future_status == std::future_status::deferred) {
      RCLCPP_INFO_STREAM(this->get_logger(), "Srv deferred");
    }

    return;
  }

  // Attributes
  rclcpp::Subscription<std_msgs::msg::Bool>::SharedPtr subscription_;
  rclcpp::CallbackGroup::SharedPtr callback_group_;
  rclcpp::executors::SingleThreadedExecutor callback_group_executor_;
  std::thread callback_group_executor_thread_;
  rclcpp::Client<std_srvs::srv::Trigger>::SharedPtr srv_client_;
};



int main(int argc, char * argv[]){
  rclcpp::init(argc, argv);
  auto client = std::make_shared<CallbackGroupSrvClient>();
  rclcpp::spin(client);
  rclcpp::shutdown();
  return 0;
}

A service server:

#include <chrono>
#include <functional>
#include <memory>
#include <string>

#include <rclcpp/rclcpp.hpp>
#include <std_msgs/msg/bool.hpp>
#include <std_srvs/srv/trigger.hpp>


class CallbackGroupSrvServer: public rclcpp::Node {
public:
  // Constructor
  CallbackGroupSrvServer()
  : Node("callback_goup_srv_server"){
    // Init callback group and spin it
    callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive, false);
    callback_group_executor_thread_ = std::thread([this]() {
      callback_group_executor_.add_callback_group(callback_group_, this->get_node_base_interface());
      callback_group_executor_.spin();
    });

    // Init service server and link it to callback group
    service_ = this->create_service<std_srvs::srv::Trigger>(
          "trigger_srv",
          std::bind(
            &CallbackGroupSrvServer::srv_callback,
            this,
            std::placeholders::_1,
            std::placeholders::_2)
          );
  }

  // Destructor
  ~CallbackGroupSrvServer(){
    callback_group_executor_.cancel();
    callback_group_executor_thread_.join();
  }

private:
  // Methods
  void sub_callback(const std_msgs::msg::Bool::SharedPtr ){
    new_msg_ = true;
    RCLCPP_INFO_STREAM(this->get_logger(), "Received new msg!");
    return;
  }

  void srv_callback(const std_srvs::srv::Trigger::Request::SharedPtr, std_srvs::srv::Trigger::Response::SharedPtr res){
    RCLCPP_INFO_STREAM(this->get_logger(), "Received request");
    new_msg_ = false;

    // Create subscription
    auto sub_options_ = rclcpp::SubscriptionOptions();
    sub_options_.callback_group = callback_group_;
    auto subscription = this->create_subscription<std_msgs::msg::Bool>(
          "topic_bool",
          rclcpp::QoS(1),
          std::bind(
            &CallbackGroupSrvServer::sub_callback,
            this,
            std::placeholders::_1),
          sub_options_
          );

    // Wait for new_msg
    while (rclcpp::ok()
           && new_msg_ == false) {
      rclcpp::sleep_for(std::chrono::seconds(1));
      RCLCPP_INFO_STREAM(this->get_logger(), "Sleep...");
    }

    res->message = "";
    res->success = true;
    return;
  }

  // Attributes
  rclcpp::Service<std_srvs::srv::Trigger>::SharedPtr service_;
  rclcpp::CallbackGroup::SharedPtr callback_group_;
  rclcpp::executors::SingleThreadedExecutor callback_group_executor_;
  std::thread callback_group_executor_thread_;
  bool new_msg_;
};

int main(int argc, char * argv[]){
  rclcpp::init(argc, argv);
  auto srv_server = std::make_shared<CallbackGroupSrvServer>();
  rclcpp::spin(srv_server);
  rclcpp::shutdown();
  return 0;
}

And a minimal publisher:

#include <chrono>
#include <functional>
#include <memory>
#include <string>

#include <rclcpp/rclcpp.hpp>
#include <std_msgs/msg/bool.hpp>


class MinimalPublisher: public rclcpp::Node {
public:
  // constructor
  MinimalPublisher()
  : Node("minimal_publisher"){
    publisher_ = this->create_publisher<std_msgs::msg::Bool>("topic_bool", rclcpp::QoS(1));
    timer_ = this->create_wall_timer(
          std::chrono::seconds(5),
          std::bind(
            &MinimalPublisher::timer_callback,
            this
            )
          );
  }

private:
  // method
  void timer_callback(){
    std_msgs::msg::Bool msg;
    msg.data = false;
    publisher_->publish(msg);
    RCLCPP_INFO_STREAM(this->get_logger(), "Publishing data! ");
    return;
  }

  // attributes
  rclcpp::TimerBase::SharedPtr timer_;
  rclcpp::Publisher<std_msgs::msg::Bool>::SharedPtr publisher_;
};

int main(int argc, char * argv[]){
  rclcpp::init(argc, argv);
  auto pub = std::make_shared<MinimalPublisher>();
  rclcpp::spin(pub);
  rclcpp::shutdown();
  return 0;
}

Here's a small description of each:

The publisher publishes data every 5 seconds on topic /topic_bool.
The client subscribes to /topic_bool. When a new message is received on this topic, the client calls the service /trigger_srv and wait 10 seconds for a response. The service client is declared with a callback group (which is added to a dedicated spinning executor)
The server for service /trigger_srv subscribes to /topic_bool when a new service request is received. When a new message is received on this topic, the server sends the service response. The subscription is declared with a callback group (which is added to a dedicated spinning executor)

Then in 3 different shells I force Fast-RTPS as RMW implementation: export RMW_IMPLEMENTATION=rmw_fastrtps_cpp
And I launch each node in a shell.

Expected behavior

To get the expected behavior, you can launch the nodes using CycloneDDS.
Here's what should happen:

The publisher publishes data on /topic_bool.
The client calls on /trigger_srv.
The server wait for a new message on /topic_bool.
The publisher publishes data on /topic_bool.
The server sends the response
The client receives the response

Actual behavior

Here's what happens when using Fast-RTPS:

The publisher publishes data on /topic_bool.
The client calls on /trigger_srv.
The server wait for a new message on /topic_bool.
The publisher publishes data on /topic_bool.
The server doesn't get the new message on /topic_bool.
The client stop waiting for a response (timeout).

Additional information

I tried to reproduce this issue under different conditions:

On two different computers (to check it is not related to one particular setup)
With / without a domain Id (via setting environment variable ROS_DOMAIN_ID)
With WiFi On / Off (to be sure to not catch other ROS nodes / daemons on the network)
With CycloneDDS binaries installed / removed, and a rebuild

Between each test, I made sure to do ros2 daemon stop.
Under all these conditions, and all combinations of them, the issue can be reproduced.

Thanks for helping!

The text was updated successfully, but these errors were encountered:

ros2/rclcpp#1611 Signed-off-by: Tomoya Fujita <[email protected]>

fujitatomoya · 2021-04-18T04:02:37Z

I can reproduce this issue with provided samples. as far as i checked, on service server's subscription, fast-dds receives the event and notify wait_set via condition variable. (saying fast-dds seems to be okay)

clalancette · 2021-04-19T12:50:24Z

The weird thing, though, is that the upper layers also seem to be doing the correct thing, as it works in CycloneDDS. So I'm a bit confused as to where the issue is.

@MiguelCompany any thoughts on what might be happening here?

iuhilnehc-ynos · 2021-04-20T04:31:23Z

Hi all,

The main reason to cause this issue seems like the guard_conditions in rmw_wait can not be triggered after a new subscription created. (Notice that the rmw_wait in the new thread only care about guard_conditions at the beginning.)

We expected that rmw_create_subscription will update graph to call GraphCache::associate_reader that will call GraphCache::on_change_callback_ to trigger the waiting on the rmw_wait, but it seems

https://github.com/ros2/rmw_fastrtps/blob/7bb3563bebd20c3cae03231c2e321bf132651449/rmw_fastrtps_cpp/src/init_rmw_context_impl.cpp#L168:L173

didn't do the work as we expected.

The guard_condition created from
https://github.com/ros2/rmw_fastrtps/blob/7bb3563bebd20c3cae03231c2e321bf132651449/rmw_fastrtps_cpp/src/init_rmw_context_impl.cpp#L139:L149
doesn't add into the guard_conditions on rmw_wait.

What do you think about this?

iuhilnehc-ynos · 2021-04-20T04:53:06Z

I have added experimental source code as below, and then the behavior of rclcpp_1611_server on rmw_fastrtps_cpp is the same as rmw_cyclonedds_cpp.

https://github.com/fujitatomoya/ros2_test_prover/blob/6904c1f0d871c5ae225801a312bf3dae34ca83f9/prover_rclcpp/src/rclcpp_1611_server.cpp#L17:L22

    callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive, false);

    // <add a timer with this callback_group_ before running the `spin`>
    // it just makes sure the rmw_wait can be triggered out to run the next cycle.
    timer_ = this->create_wall_timer(
          std::chrono::seconds(1),
          [](){},
          callback_group_
          );

    //callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::Reentrant, false);
    callback_group_executor_thread_ = std::thread([this]() {
      callback_group_executor_.add_callback_group(callback_group_, this->get_node_base_interface());
      callback_group_executor_.spin();
    });

iuhilnehc-ynos · 2021-04-20T05:03:05Z

I'll check whether there exist some other events that can be used.

iuhilnehc-ynos · 2021-04-21T07:51:26Z

After diving into the source code, I think I made a mistake at #1611 (comment).

Here is the correct location to notify the executor while creating a subscription, not the GraphCache::on_change_callback_
The example of this issue uses one node on two executors, I wonder if it is a limitation? Because it will lead to use the same guard condition (notify_guard_condition of a node) on two threads.
Run rmw_wait with the same guard condition in two threads, this is a problem in rmw_fastrtps_cpp, but not rmw_cyclonedds. 🤔

(Updated: use specific commit id instead of master)

MiguelCompany · 2021-04-21T08:09:25Z

@iuhilnehc-ynos Thanks for the thorough checks!

I guess changing this line to a notify_all would do the trick. Would you mind checking?

iuhilnehc-ynos · 2021-04-21T08:42:05Z

@MiguelCompany

Thanks for your reply. Actually, I have tried that before, but it didn't work.

rmw_cyclonedds

rmw_trigger_guard_condition
  dds_set_guardcondition
    dds_entity_trigger_set
      dds_entity_observers_signal   # it's the point

rmw_fastrtps_cpp

rmw_trigger_guard_condition
  __rmw_trigger_guard_condition
    GuardCondition::trigger
      if (conditionMutex_ != nullptr)
        conditionVariable_->notify_one();

Use notify_all instead of notify_one is not working. The conditionVariable created on two Executor::Executor used in two threads are different.

Executor::Executor
  rcl_wait_set_init
    rmw_create_wait_set
      __rmw_create_wait_set
        rmw_allocate(sizeof(CustomWaitsetInfo))

rmw_wait simple logic as follow,

one thread:
    attach the same GUARD with mutex(A1) and condition(A2)
                                      ^^^^^^^^^^//\  A2 is different from B2.
    wait
    detach the GUARD
    
another thread:
    attach the same GUARD with mutex(B1) and condition(B2)
    wait
    detach the GUARD

iuhilnehc-ynos · 2021-04-22T02:12:41Z

I am considering if we could update GuardCondition as

use a list to manage multiple conditionMutex_ and conditionVariable_
attachCondition(std::mutex *, std::condition_variable *) : add the item into the list
trigger() :trigger through the list
update the method detachCondition by adding arguments (std::mutex *, std::condition_variable *)

fujitatomoya · 2021-04-22T09:17:19Z

The conditionVariable created on two Executor::Executor used in two threads are different.

right.

https://github.com/fujitatomoya/ros2_test_prover/blob/080fb53d18ccaf6881ee9f155efbe0fd0b05c5cd/prover_rclcpp/src/rclcpp_1611_server.cpp#L49

this is taken care by

https://github.com/fujitatomoya/ros2_test_prover/blob/080fb53d18ccaf6881ee9f155efbe0fd0b05c5cd/prover_rclcpp/src/rclcpp_1611_server.cpp#L89

main thread rclcpp::spin() (thread id - 1)

callback_group_executor_thread_ will be in sleep and never be triggered when main thread takes service request. but as far as i see, cyclonedds wakes up other threads at this time, so that subscription event can be taken by callback_group_executor_thread_.

iuhilnehc-ynos · 2021-04-22T10:01:38Z

Let me make it more clear, the reason why the condition variable can't be triggered in callback_group_executor_thread_ is that detaching GuardCondition will set the mutex and condition_variable with nullptr in the main thread(spin). so calling rcl_trigger_guard_condition while creating subscription can only just set the hasTriggered_, but not to use conditionVariable_->notify_one.

MiguelCompany · 2021-04-22T10:08:02Z

The solution proposed on this comment may work. What bothers me is that getHasTriggered() will only return true for one of the threads. That said if the only reason to add them is to wake up the threads, the solution proposed by @iuhilnehc-ynos would work.

wjwwood · 2021-04-22T18:31:16Z

It would be great to have a new regression test to expose this problem.

iuhilnehc-ynos · 2021-04-25T05:41:08Z

I'll add a test case for rclcpp firstly, and then update rmw_fastrtps_cpp, finally, I'll update rmw_connextdds after ros2/rmw_fastrtps#527 is approved.

iuhilnehc-ynos · 2021-04-25T08:18:30Z

@wjwwood @MiguelCompany @fujitatomoya

Could you help to review these two PRs(#1640, ros2/rmw_fastrtps#527)?

asorbini · 2021-04-27T15:56:33Z

The example of this issue uses one node on two executors, I wonder if it is a limitation? Because it will lead to use the same guard condition (notify_guard_condition of a node) on two threads.

Is this allowed by design? Judging from the RMW implementations, it isn't a supported use case.

iuhilnehc-ynos · 2021-04-27T16:54:40Z

it isn't a supported use case.

Actually, I thought about this before.
I don't know whether a non-supported use case means ros2 not support anymore even if a use case seems a very normal case.

asorbini · 2021-04-27T17:14:03Z

I don't know whether a non-supported use case means ros2 not support anymore even if a use case seems a very normal case.

I think it could become a supported case, but it should be considered a new feature. As with any new feature, the implications of adding it should be vetted and qualified, designs should be created/updated, and the community should come to an agreement on whether to support it or not.

At this point, I read this issue like "this unsupported use case happens to work with one RMW implementation" more than "we should fix other implementations to support it".

Hopefully someone else can chime in to confirm whether attaching a node to multiple executors is currently a supported use case or not.

wjwwood · 2021-04-27T20:02:42Z

Using a single guard condition with two wait sets is not supported, but using two (or more) executors with ~~a single node~~ callback groups from a single node is supported. However, each executor used does not necessarily use the node's (single) graph guard condition. Usually that is waited on in one place and used to wake other waiting entities in turn with either a condition variable (in the case of the rclcpp::GraphListener) or by triggering other guard conditions to wake more wait sets. Note executors are implemented using wait sets.

wjwwood · 2021-04-27T20:07:37Z

I amended what I said.

asorbini · 2021-04-27T22:35:26Z

@wjwwood thank you for looking into my question.

Usually that is waited on in one place and used to wake other waiting entities in turn with either a condition variable (in the case of the rclcpp::GraphListener) or by triggering other guard conditions to wake more wait sets.

Could an alternative solution to this issue be rclcpp creating multiple graph conditions for each node, one per executor associated with the node?

This way RMW implementations would not have to support conditions attached to multiple waitsets.

wjwwood · 2021-04-27T22:44:27Z

This way RMW implementations would not have to support conditions attached to multiple waitsets.

That's definitely not supported. If that's happening then it's a bug in rclcpp.

asorbini · 2021-04-27T23:01:53Z

That's definitely not supported. If that's happening then it's a bug in rclcpp.

If I understand it correctly, the solution outlined by @iuhilnehc-ynos earlier in this thread seems to be going in that direction, hence why I asked (see also ros2/rmw_connextdds#51 which implements it for rmw_connextdds).

wjwwood · 2021-04-28T00:40:08Z

Ah, well then I misunderstood the fix. It is documented (though all the rmw docs are quite thin) that you cannot do this for a wait set:

It is not safe to wait for the same entity using different wait sets in two or
more threads concurrently.

https://github.com/ros2/rmw/blob/f9f4ef46f8dd3ef938526a1140a3c57c155a259f/rmw/include/rmw/rmw.h#L2400-L2401

So the bug is likely in rclcpp.

The (relatively) new rclcpp::GuardCondition class guards against this (as do the other entities) with the exchange_in_use_by_wait_set_state() method, but that's not used with the Executor classes yet.

iuhilnehc-ynos · 2021-04-28T05:45:09Z

Could an alternative solution to this issue be rclcpp creating multiple graph conditions for each node, one per executor associated with the node?

This way RMW implementations would not have to support conditions attached to multiple waitsets.

creating multiple graph conditions for each node, one per executor associated with one graph condition of the node seems work, if it's acceptable, I'll use this way. If not, I think the remaining method to resolve this issue is to add a limitation (e.g. throw an exception) while adding node's notify condition to an executor if the guard condition is in used.

fujitatomoya · 2021-04-28T06:40:38Z

It is not safe to wait for the same entity using different wait sets in two or
more threads concurrently.

Ah, it is clearly documented...i was thinking that this use case is okay, so i was expecting no constraints. @iuhilnehc-ynos
@ablasdel sorry 😢 my bad, appreciate for bringing discussion and reconsideration here 👍

creating multiple graph conditions for each node, one per executor associated with one graph condition of the node seems work, if it's acceptable

although this works, i guess that is an enhancement. so probably we would want to create another issue on this?

If not, I think the remaining method to resolve this issue is to add a limitation

i am bit inclined to do this, since this is not designed or recommended. so at least it should detect this operation for now.

wjwwood · 2021-04-28T22:27:39Z

creating multiple graph conditions for each node, one per executor associated with one graph condition of the node

I don't think this is a good solution, because it will require changes to the rmw API and it will be complicated because the creation and destruction of the graph guard conditions will need to be synchronized with the middleware which will need to trigger them from a different thread (most likely).

Instead, I think we should solve this by changing how this works in rclcpp. I'm not convinced we need the node's graph guard condition anyways, if I understand the issue correctly.

iuhilnehc-ynos · 2021-04-29T01:54:05Z

To add a new PR with a limitation (e.g. can not add a notify guard condition in one more executor.) for rclcpp make the reporter's source code not working entirely, even if using rmw_cyclonedds_cpp, and then ask him to change his source code not using the current way, this is not the way I want to help him.

If somebody could continue to help him, I appreciate that.

fujitatomoya · 2021-04-30T02:10:04Z

@wjwwood @iuhilnehc-ynos

i'd like to confirm (or try to summarize) the current situation and my understanding , any other comments from anyone welcome!

in application perspective, this should be supported.
this is bug in rclcpp but rmw, since rmw is designed that It is not safe to wait for the same entity using different wait sets in two or more threads concurrently.
that is saying, rclcpp needs to be make sure that the same entity is not used on different wait sets.

rclcpp make the reporter's source code not working entirely, even if using rmw_cyclonedds_cpp, and then ask him to change his source code not using the current way, this is not the way I want to help him.

me neither. if i am not mistaken, this sample code should be working.

Instead, I think we should solve this by changing how this works in rclcpp. I'm not convinced we need the node's graph guard condition anyways, if I understand the issue correctly.

i think the root cause is notify_guard_condition_ is used on different wait sets. so using rclcpp::GuardCondition with exchange_in_use_by_wait_set_state() method to make sure that does not happen. (this is introduced here, #1611 (comment))

i was bit confused before, but above is my understanding now.

wjwwood · 2021-04-30T23:35:12Z

that is saying, rclcpp needs to be make sure that the same entity is not used on different wait sets.

Yes, I think that's ideal.

i think the root cause is notify_guard_condition_ is used on different wait sets. so using rclcpp::GuardCondition with exchange_in_use_by_wait_set_state() method to make sure that does not happen. (this is introduced here, #1611 (comment))

Switching to rclcpp::GuardCondition will provide a more consistent and informative error, but I don't think it solves the problem, because I think the given code may still not work? I'm not sure about that though. I haven't had time to investigate it in detail.

To move forward on this issue we should ensure:

ensure guard conditions (or any entity) is not used with the same wait set simultaneously
make any changes necessary to then make it possible to use multiple executors with a single node
- importantly this means the node is added to (at most) one executor, but callback groups from the node can be attached to other executors if they were created with automatically_add_to_executor_with_node = false

We can do the first thing manually (make sure executors don't automatically use the node's sole graph guard condition) or by using the exchange_in_use_by_wait_set_state(), but that's meant to be used with the new rclcpp::WaitSet, which we should switch everything to eventually, but I haven't had time to do it.

For the second point, we can do that separately from the first, and I think we can do that by just avoiding the node's graph guard condition (if that is the root cause of the issue). Again, I think this should be possible conceptually, but I haven't looked into the details yet.

iuhilnehc-ynos · 2021-05-08T05:32:27Z

i think the root cause is notify_guard_condition_ is used on different wait sets.

ensure guard conditions (or any entity) is not used with the same wait set simultaneously

I agree about this.

From #1611 (comment) and #1611 (comment), I am sorry that I can't find out how to notify the thread to run the next cycle if a thread enters into rmw_wait only for the 3 guard conditions (shutdown_guard_condition_ and interrupt_guard_condition_ of Executor, the notify_guard_condition of NodeBase).
(Not adding notify_guard_condition into waitset if it's in_use, there exist 2 guard conditions(shutdown_guard_condition_, interrupt_guard_condition_), how can we notify this thread while creating a subscription.)

The following is my latest thought,

Remove notify_guard_condition from NodeBase. No idea why we must use notify_guard_condition before, it seems the interrupt_guard_condition_ of the Executor could work as expected as notify_guard_condition did.
Use a list in NodeBase to manage the interrupt_guard_condition_ of Executor instead of one notify_guard_condition, and the interrupt_guard_condition_ only be added into NodeBase at Executor::add_callback_group_to_map if Node is new for the executor.
Replace all locations(such as creating a subscription, creating a service, etc) of notify_guard_condition trigger notification with triggerring the list of interrupt_guard_condition_.
(The reason why I want to remove the notify_guard_condition is that calling two kinds of guard conditions in these locations seems not good.)

What do you think?

iuhilnehc-ynos · 2021-05-14T06:42:52Z

friendly ping @wjwwood

Could you give me some guidance or share a bit of advice on #1611 (comment) ?

wjwwood · 2021-05-19T03:55:17Z

I was thinking, perhaps naively, that instead of a node having a notify guard condition, we could have a "notify" guard condition for each callback group.

I think the interrupt guard condition in the executor should be left alone? It is used for the cancel() method. I suppose it could be used if you inverted the logic and exposed the cancel method or the interrupt guard condition of the executor to the callback group instead. I thought that was already what was happening, but again I haven't had time to investigate it in detail. It has been more than a year since I touched that part of the code, and even then it was my intern looking at it, and I was just reviewing.

iuhilnehc-ynos · 2021-05-19T05:25:20Z

Thank you.

we could have a "notify" guard condition for each callback group.

👍 , OK, I'll use this way.

Add a "notify" guard condition in Callbackgroup
Append "memory_strategy_->add_guard_condition(callback_group->get_notify_guard_condition());" after

rclcpp/rclcpp/src/rclcpp/executor.cpp

Line 247 in 2e4c97a

memory_strategy_->add_guard_condition(node_ptr->get_notify_guard_condition());

,

Append "rcl_trigger_guard_condition(callback_group->get_notify_guard_condition())" after

rclcpp/rclcpp/src/rclcpp/node_interfaces/node_topics.cpp

Lines 110 to 118 in 2e4c97a

    
           // Notify the executor that a new subscription was created using the parent Node. 
        
           { 
        
             auto notify_guard_condition_lock = node_base_->acquire_notify_guard_condition_lock(); 
        
             auto ret = rcl_trigger_guard_condition(node_base_->get_notify_guard_condition()); 
        
             if (ret != RCL_RET_OK) { 
        
               using rclcpp::exceptions::throw_from_rcl_error; 
        
               throw_from_rcl_error(ret, "failed to notify wait set on subscription creation"); 
        
             } 
        
           }

(other locations also need to append this)

I think the interrupt guard condition in the executor should be left alone?

Agreed.

iuhilnehc-ynos · 2021-05-19T05:30:06Z

One more question, do you think the "notify_guard_condition" in the Node is still necessary?

Remove it
or
Make sure only one waitset can use this "notify_guard_condition"

wjwwood · 2021-05-21T08:58:58Z

I think that sounds right. You might run into some undesirable circular references, but we iterate if that's the case.

I'd say, deprecate the notify guard condition of the node with the intention to remove it eventually.

iuhilnehc-ynos · 2021-05-24T03:43:13Z

the notify_guard_condition of Node is used in GraphListener, so I'll keep it. I'll remove the notify_guard_condition of Node used in Executor based on an entity that can't be used in multiple waitsets.

@wjwwood , Could you help to review #1640?

fujitatomoya · 2022-10-26T21:48:30Z

@anaelle-sw @iuhilnehc-ynos

i confirmed that this has been addressed by #1640, i will go ahead to close this.

fujitatomoya added a commit to fujitatomoya/ros2_test_prover that referenced this issue Apr 18, 2021

Callback not reached, using callback group and Fast-RTPS

6904c1f

ros2/rclcpp#1611 Signed-off-by: Tomoya Fujita <[email protected]>

This was referenced Apr 25, 2021

Fix bug that a callback not reached #1640

Merged

update guard condition by managing multiple items ros2/rmw_fastrtps#527

Closed

iuhilnehc-ynos mentioned this issue Apr 27, 2021

update guard condition by managing multiple items ros2/rmw_connextdds#51

Closed

clalancette assigned fujitatomoya Jun 10, 2021

MiguelCompany mentioned this issue May 13, 2022

Nested wall timers are not being executed on FastRTPS ros2/rmw_fastrtps#606

Closed

robotechvision mentioned this issue May 20, 2022

Fixed lifecycle manager startup (#2917) ros-navigation/navigation2#2960

Merged

7 tasks

afrixs mentioned this issue Jul 21, 2022

Subscriber which is created with dedicated callback group at runtime not working ros2/rmw_fastrtps#613

Closed

henningkayser mentioned this issue Aug 3, 2022

MGI CallbackGroup doesn't trigger when running external Executor + Fast-DDS moveit/moveit2#1474

Closed

Barry-Xu-2018 mentioned this issue Oct 18, 2022

Topic cannot be re-subscribed while use_sim_time is true #2028

Closed

fujitatomoya closed this as completed Oct 26, 2022

AlexeyMerzlyakov mentioned this issue Mar 2, 2023

Obstacle Layer, Voxel Layer, and Costmap Topic Collision Checker not working in Humble/Rolling due to Fast-DDS Regression ros-navigation/navigation2#3014

Closed

Callback not reached, using callback group and Fast-RTPS #1611

Callback not reached, using callback group and Fast-RTPS #1611

Comments

anaelle-sw commented Mar 31, 2021 • edited Loading

Bug report

Steps to reproduce issue

Expected behavior

Actual behavior

Additional information

fujitatomoya commented Apr 18, 2021

clalancette commented Apr 19, 2021

iuhilnehc-ynos commented Apr 20, 2021

iuhilnehc-ynos commented Apr 20, 2021 • edited Loading

iuhilnehc-ynos commented Apr 20, 2021

iuhilnehc-ynos commented Apr 21, 2021 • edited Loading

MiguelCompany commented Apr 21, 2021

iuhilnehc-ynos commented Apr 21, 2021 • edited Loading

iuhilnehc-ynos commented Apr 22, 2021

fujitatomoya commented Apr 22, 2021

iuhilnehc-ynos commented Apr 22, 2021

MiguelCompany commented Apr 22, 2021 • edited Loading

wjwwood commented Apr 22, 2021

iuhilnehc-ynos commented Apr 25, 2021 • edited Loading

iuhilnehc-ynos commented Apr 25, 2021

asorbini commented Apr 27, 2021

iuhilnehc-ynos commented Apr 27, 2021

asorbini commented Apr 27, 2021

wjwwood commented Apr 27, 2021 • edited Loading

wjwwood commented Apr 27, 2021

asorbini commented Apr 27, 2021

wjwwood commented Apr 27, 2021

asorbini commented Apr 27, 2021

wjwwood commented Apr 28, 2021

iuhilnehc-ynos commented Apr 28, 2021 • edited Loading

fujitatomoya commented Apr 28, 2021

wjwwood commented Apr 28, 2021

iuhilnehc-ynos commented Apr 29, 2021 • edited Loading

fujitatomoya commented Apr 30, 2021

wjwwood commented Apr 30, 2021

iuhilnehc-ynos commented May 8, 2021

iuhilnehc-ynos commented May 14, 2021

wjwwood commented May 19, 2021

iuhilnehc-ynos commented May 19, 2021 • edited Loading

iuhilnehc-ynos commented May 19, 2021

wjwwood commented May 21, 2021

iuhilnehc-ynos commented May 24, 2021

fujitatomoya commented Oct 26, 2022

anaelle-sw commented Mar 31, 2021 •

edited

Loading

iuhilnehc-ynos commented Apr 20, 2021 •

edited

Loading

iuhilnehc-ynos commented Apr 21, 2021 •

edited

Loading

iuhilnehc-ynos commented Apr 21, 2021 •

edited

Loading

MiguelCompany commented Apr 22, 2021 •

edited

Loading

iuhilnehc-ynos commented Apr 25, 2021 •

edited

Loading

wjwwood commented Apr 27, 2021 •

edited

Loading

iuhilnehc-ynos commented Apr 28, 2021 •

edited

Loading

iuhilnehc-ynos commented Apr 29, 2021 •

edited

Loading

iuhilnehc-ynos commented May 19, 2021 •

edited

Loading