diff --git a/src/runtime/thread_pool.cc b/src/runtime/thread_pool.cc index 5e7cf779c88f..e10738ab8bbe 100644 --- a/src/runtime/thread_pool.cc +++ b/src/runtime/thread_pool.cc @@ -253,9 +253,6 @@ class ThreadPool { num_workers_, [this](int worker_id) { this->RunWorker(worker_id); }, exclude_worker0_ /* include_main_thread */)); num_workers_used_ = threads_->Configure(threading::ThreadGroup::kBig, 0, exclude_worker0_); - // if MaxConcurrency restricted the number of workers (e.g., due to - // hyperthreading), respect the restriction - num_workers_used_ = std::min(num_workers_, num_workers_used_); } ~ThreadPool() { for (std::unique_ptr& q : queues_) { diff --git a/src/runtime/threading_backend.cc b/src/runtime/threading_backend.cc index 58d5fb4530dd..ad63493fb94f 100644 --- a/src/runtime/threading_backend.cc +++ b/src/runtime/threading_backend.cc @@ -54,10 +54,16 @@ class ThreadGroup::Impl { if (nthreads) { num_workers_used = nthreads; } + // if MaxConcurrency restricted the number of workers (e.g., due to + // hyperthreading), respect the restriction. On CPUs with N logical cores + // and N/2 physical cores this will set affinity to the first N/2 logical + // ones. + num_workers_used = std::min(num_workers_, num_workers_used); + const char *val = getenv("TVM_BIND_THREADS"); if (val == nullptr || atoi(val) == 1) { - // Skip if sorted_order.size() is bigger than the number of workers (threads_) - if (!(sorted_order_.size() > static_cast(num_workers_))) { + // Do not set affinity if there are more workers than found cores + if (sorted_order_.size() >= static_cast(num_workers_)) { SetAffinity(exclude_worker0, mode == kLittle); } else { LOG(WARNING)