From 3bd9fc08fd3624d51c6c5e34d7c8b84874be37a7 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Fri, 31 Aug 2018 14:53:16 -0400
Subject: [PATCH 01/31] PTP: Use uv_executor_queue_work and add options for
 zlib, crypto

---
 src/node_crypto.cc   |  2 +-
 src/node_internals.h | 16 ++++++++++++++--
 src/node_zlib.cc     |  1 +
 3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/src/node_crypto.cc b/src/node_crypto.cc
index fb0e1d6d8c5a31..cd1a08d764eb4c 100644
--- a/src/node_crypto.cc
+++ b/src/node_crypto.cc
@@ -4584,7 +4584,7 @@ bool ECDH::IsKeyPairValid() {
 struct CryptoJob : public ThreadPoolWork {
   Environment* const env;
   std::unique_ptr<AsyncWrap> async_wrap;
-  inline explicit CryptoJob(Environment* env) : ThreadPoolWork(env), env(env) {}
+  inline explicit CryptoJob(Environment* env) : ThreadPoolWork(env), env(env) { SetOptionsType(UV_WORK_USER_CPU); }
   inline void AfterThreadPoolWork(int status) final;
   virtual void AfterThreadPoolWork() = 0;
   static inline void Run(std::unique_ptr<CryptoJob> job, Local<Value> wrap);
diff --git a/src/node_internals.h b/src/node_internals.h
index eb9e79d9e8b522..62f5bad7f04f57 100644
--- a/src/node_internals.h
+++ b/src/node_internals.h
@@ -504,9 +504,19 @@ class InternalCallbackScope {
 
 class ThreadPoolWork {
  public:
-  explicit inline ThreadPoolWork(Environment* env) : env_(env) {}
+  explicit inline ThreadPoolWork(Environment* env) : env_(env) {
+    work_req_options_.type = UV_WORK_UNKNOWN;
+    work_req_options_.priority = -1;
+    work_req_options_.cancelable = 0;
+    work_req_options_.data = nullptr;
+  }
   inline virtual ~ThreadPoolWork() = default;
 
+  inline void SetOptionsType(uv_work_type type) { work_req_options_.type = type; }
+  inline void SetOptionsPriority(int priority) { work_req_options_.priority = priority; }
+  inline void SetOptionsCancelable(int cancelable) { work_req_options_.cancelable = cancelable; }
+  inline void SetOptionsData(void *data) { work_req_options_.data = data; }
+
   inline void ScheduleWork();
   inline int CancelWork();
 
@@ -516,13 +526,15 @@ class ThreadPoolWork {
  private:
   Environment* env_;
   uv_work_t work_req_;
+  uv_work_options_t work_req_options_;
 };
 
 void ThreadPoolWork::ScheduleWork() {
   env_->IncreaseWaitingRequestCounter();
-  int status = uv_queue_work(
+  int status = uv_executor_queue_work(
       env_->event_loop(),
       &work_req_,
+      &work_req_options_,
       [](uv_work_t* req) {
         ThreadPoolWork* self = ContainerOf(&ThreadPoolWork::work_req_, req);
         self->DoThreadPoolWork();
diff --git a/src/node_zlib.cc b/src/node_zlib.cc
index 774d319249ce63..ce28727ef7a8c1 100644
--- a/src/node_zlib.cc
+++ b/src/node_zlib.cc
@@ -91,6 +91,7 @@ class ZCtx : public AsyncWrap, public ThreadPoolWork {
         refs_(0),
         gzip_id_bytes_read_(0),
         write_result_(nullptr) {
+    SetOptionsType(UV_WORK_USER_CPU);
     MakeWeak();
   }
 

From bc0f42e917c8289bcb0c5ec398867333f976cf79 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Mon, 3 Sep 2018 17:21:03 -0400
Subject: [PATCH 02/31] PTP: First pass at node::threadpool

Summary:
 This commit outlines the general API for the node threadpool.
 The current node threadpool is "plugged in" to libuv, but not V8.

Thoughts:
 I think the current API will generally suffice going forward.
 For example, separate I/O and CPU pools can be implemented
 by sub-classing TaskQueue and introducing multiple separate
 queues for I/O-bound and CPU-bound Tasks.

Routing logic:
 I plug this TP into libuv during the call to Start().
 I would like to refactor out a 'LibuvExecutor' class similar
 to NodePlatform and have them both use similar task routing logic.
 I have not yet routed the v8::Platform's TP into this TP.

Tests:
 I introduced (passing) unit tests in test/cctest/test_threadpool.cc.

 In addition, this version passes much of the core test suite.
 Some failures, due e.g. to the lack of uv_cancel support comparable to
 that of libuv's default executor in this iteration.
---
 node.gyp                       |   3 +
 src/node.cc                    |  17 +++
 src/node_crypto.cc             |   4 +-
 src/node_internals.h           |  21 ++-
 src/node_platform.cc           |   4 +
 src/node_platform.h            |   4 +
 src/node_threadpool.cc         | 225 +++++++++++++++++++++++++++++++++
 src/node_threadpool.h          | 180 ++++++++++++++++++++++++++
 test/cctest/test_threadpool.cc | 130 +++++++++++++++++++
 9 files changed, 583 insertions(+), 5 deletions(-)
 create mode 100644 src/node_threadpool.cc
 create mode 100644 src/node_threadpool.h
 create mode 100644 test/cctest/test_threadpool.cc

diff --git a/node.gyp b/node.gyp
index 5b8ca26bb2010d..b0a1b9aecae289 100644
--- a/node.gyp
+++ b/node.gyp
@@ -363,6 +363,7 @@
         'src/node_util.cc',
         'src/node_v8.cc',
         'src/node_stat_watcher.cc',
+        'src/node_threadpool.cc',
         'src/node_watchdog.cc',
         'src/node_worker.cc',
         'src/node_zlib.cc',
@@ -421,6 +422,7 @@
         'src/node_persistent.h',
         'src/node_platform.h',
         'src/node_root_certs.h',
+        'src/node_threadpool.h',
         'src/node_version.h',
         'src/node_watchdog.h',
         'src/node_wrap.h',
@@ -963,6 +965,7 @@
         'test/cctest/test_node_postmortem_metadata.cc',
         'test/cctest/test_environment.cc',
         'test/cctest/test_platform.cc',
+        'test/cctest/test_threadpool.cc',
         'test/cctest/test_traced_value.cc',
         'test/cctest/test_util.cc',
         'test/cctest/test_url.cc'
diff --git a/src/node.cc b/src/node.cc
index 25dc8d2bbef6ee..6349847bdab184 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -24,6 +24,7 @@
 #include "node_javascript.h"
 #include "node_code_cache.h"
 #include "node_platform.h"
+#include "node_threadpool.h"
 #include "node_version.h"
 #include "node_internals.h"
 #include "node_revert.h"
@@ -283,8 +284,20 @@ class NodeTraceStateObserver :
   v8::TracingController* controller_;
 };
 
+static struct {
+  // Returns zero on success
+  int Initialize(void) {
+    tp_.reset(new threadpool::Threadpool());
+    tp_->Initialize();
+    return uv_replace_executor(tp_->GetExecutor());
+  }
+
+  std::unique_ptr<threadpool::Threadpool> tp_;
+} node_threadpool;
+
 static struct {
 #if NODE_USE_V8_PLATFORM
+  // TODO(davisjam): Pass in the node_threadpool.
   void Initialize(int thread_pool_size) {
     tracing_agent_.reset(new tracing::Agent());
     auto controller = tracing_agent_->GetTracingController();
@@ -3338,6 +3351,10 @@ int Start(int argc, char** argv) {
   V8::SetEntropySource(crypto::EntropySource);
 #endif  // HAVE_OPENSSL
 
+  // This needs to run before any work is queued to the libuv executor.
+  CHECK_EQ(0, node_threadpool.Initialize());
+
+  // TODO(davisjam): Pass the v8_platform the node_threadpool.
   v8_platform.Initialize(
       per_process_opts->v8_thread_pool_size);
   V8::Initialize();
diff --git a/src/node_crypto.cc b/src/node_crypto.cc
index cd1a08d764eb4c..e4c6cfbdf70cab 100644
--- a/src/node_crypto.cc
+++ b/src/node_crypto.cc
@@ -4584,7 +4584,9 @@ bool ECDH::IsKeyPairValid() {
 struct CryptoJob : public ThreadPoolWork {
   Environment* const env;
   std::unique_ptr<AsyncWrap> async_wrap;
-  inline explicit CryptoJob(Environment* env) : ThreadPoolWork(env), env(env) { SetOptionsType(UV_WORK_USER_CPU); }
+  inline explicit CryptoJob(Environment* env) : ThreadPoolWork(env), env(env) {
+    SetOptionsType(UV_WORK_USER_CPU);
+  }
   inline void AfterThreadPoolWork(int status) final;
   virtual void AfterThreadPoolWork() = 0;
   static inline void Run(std::unique_ptr<CryptoJob> job, Local<Value> wrap);
diff --git a/src/node_internals.h b/src/node_internals.h
index 62f5bad7f04f57..bcf45035fc4700 100644
--- a/src/node_internals.h
+++ b/src/node_internals.h
@@ -502,6 +502,7 @@ class InternalCallbackScope {
   bool closed_ = false;
 };
 
+// TODO(davisjam): Update to use node_threadpool.
 class ThreadPoolWork {
  public:
   explicit inline ThreadPoolWork(Environment* env) : env_(env) {
@@ -512,10 +513,18 @@ class ThreadPoolWork {
   }
   inline virtual ~ThreadPoolWork() = default;
 
-  inline void SetOptionsType(uv_work_type type) { work_req_options_.type = type; }
-  inline void SetOptionsPriority(int priority) { work_req_options_.priority = priority; }
-  inline void SetOptionsCancelable(int cancelable) { work_req_options_.cancelable = cancelable; }
-  inline void SetOptionsData(void *data) { work_req_options_.data = data; }
+  inline void SetOptionsType(uv_work_type type) {
+    work_req_options_.type = type;
+  }
+  inline void SetOptionsPriority(int priority) {
+    work_req_options_.priority = priority;
+  }
+  inline void SetOptionsCancelable(int cancelable) {
+    work_req_options_.cancelable = cancelable;
+  }
+  inline void SetOptionsData(void* data) {
+    work_req_options_.data = data;
+  }
 
   inline void ScheduleWork();
   inline int CancelWork();
@@ -531,6 +540,10 @@ class ThreadPoolWork {
 
 void ThreadPoolWork::ScheduleWork() {
   env_->IncreaseWaitingRequestCounter();
+  // TODO(davisjam): Should we route to node TP instead?
+  // I don't think so.
+  // These are pending user requests with a CB for the event loop.
+  // So it makes sense for libuv to handle them start-to-finish.
   int status = uv_executor_queue_work(
       env_->event_loop(),
       &work_req_,
diff --git a/src/node_platform.cc b/src/node_platform.cc
index 92e9b371c5be6f..03753c4d2d5bf0 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -125,6 +125,8 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
   static void RunTask(uv_timer_t* timer) {
     DelayedTaskScheduler* scheduler =
         ContainerOf(&DelayedTaskScheduler::loop_, timer->loop);
+    // This adds the Task to the TP queue.
+    // TODO(davisjam): Plug in TP implementation.
     scheduler->pending_worker_tasks_->Push(scheduler->TakeTimerTask(timer));
   }
 
@@ -330,6 +332,7 @@ void NodePlatform::DrainTasks(Isolate* isolate) {
 
   do {
     // Worker tasks aren't associated with an Isolate.
+    // TODO(davisjam): This will require some dancing with the TP.
     worker_thread_task_runner_->BlockingDrain();
   } while (per_isolate->FlushForegroundTasksInternal());
 }
@@ -371,6 +374,7 @@ bool PerIsolatePlatformData::FlushForegroundTasksInternal() {
 }
 
 void NodePlatform::CallOnWorkerThread(std::unique_ptr<v8::Task> task) {
+  // TODO(davisjam): Plug in TP implementation
   worker_thread_task_runner_->PostTask(std::move(task));
 }
 
diff --git a/src/node_platform.h b/src/node_platform.h
index 7297df7142242b..bc5f0ba92643d7 100644
--- a/src/node_platform.h
+++ b/src/node_platform.h
@@ -96,11 +96,14 @@ class PerIsolatePlatformData :
 };
 
 // This acts as the single worker thread task runner for all Isolates.
+// API is modeled on v8::TaskRunner.
 class WorkerThreadsTaskRunner {
  public:
   explicit WorkerThreadsTaskRunner(int thread_pool_size);
 
+  // Add task to queue for eventual Run()
   void PostTask(std::unique_ptr<v8::Task> task);
+  // Add task to queue after at least delay_in_seconds
   void PostDelayedTask(std::unique_ptr<v8::Task> task,
                        double delay_in_seconds);
 
@@ -110,6 +113,7 @@ class WorkerThreadsTaskRunner {
   int NumberOfWorkerThreads() const;
 
  private:
+  // Push'd directly by PostTask() and indirectly by PostDelayedTask.
   TaskQueue<v8::Task> pending_worker_tasks_;
 
   class DelayedTaskScheduler;
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
new file mode 100644
index 00000000000000..77d2dbe8c74d81
--- /dev/null
+++ b/src/node_threadpool.cc
@@ -0,0 +1,225 @@
+#include "node_threadpool.h"
+#include "node_internals.h"
+
+#include "env-inl.h"
+#include "debug_utils.h"
+#include "util.h"
+#include <algorithm>
+
+// TODO(davisjam): DO NOT MERGE. Only for debugging.
+// TODO(davisjam): There must be a better way to do this.
+#define DEBUG_LOG 1
+#undef DEBUG_LOG
+
+#ifdef DEBUG_LOG
+#include <stdio.h>
+#define LOG_0(fmt) fprintf(stderr, fmt)
+#define LOG_1(fmt, a1) fprintf(stderr, fmt, a1)
+#define LOG_2(fmt, a1, a2) fprintf(stderr, fmt, a1, a2)
+#define LOG_3(fmt, a1, a2, a3) fprintf(stderr, fmt, a1, a2, a3)
+#define LOG_4(fmt, a1, a2, a3, a4) fprintf(stderr, fmt, a1, a2, a3, a4)
+#define LOG_5(fmt, a1, a2, a3, a4, a5) fprintf(stderr, fmt, a1, a2, a3, a4, a5)
+#else
+#define LOG_0(fmt) (void) 0
+#define LOG_1(fmt, a1) (void) 0
+#define LOG_2(fmt, a1, a2) (void) 0
+#define LOG_3(fmt, a1, a2, a3) (void) 0
+#define LOG_4(fmt, a1, a2, a3, a4) (void) 0
+#define LOG_5(fmt, a1, a2, a3, a4, a5) (void) 0
+#endif
+
+namespace node {
+namespace threadpool {
+
+/**************
+ * Worker
+ ***************/
+
+Worker::Worker() {
+}
+
+void Worker::Start(TaskQueue* queue) {
+  CHECK_EQ(0, uv_thread_create(&self_, _Run, reinterpret_cast<void *>(queue)));
+}
+
+void Worker::Join(void) {
+  CHECK_EQ(0, uv_thread_join(&self_));
+}
+
+void Worker::_Run(void* data) {
+  TaskQueue* queue = static_cast<TaskQueue*>(data);
+  while (std::unique_ptr<Task> task = queue->BlockingPop()) {
+    task->Run();
+  }
+}
+
+/**************
+ * LibuvTask
+ ***************/
+
+LibuvTask::LibuvTask(Threadpool* tp,
+                     uv_work_t* req,
+                     const uv_work_options_t* opts)
+  : Task(), tp_(tp), req_(req) {
+  req_ = req;
+
+  // Copy opts.
+  if (opts) {
+    switch (opts->type) {
+    case UV_WORK_FS:
+      details_.type = TaskDetails::FS;
+      break;
+    case UV_WORK_DNS:
+      details_.type = TaskDetails::DNS;
+      break;
+    case UV_WORK_USER_IO:
+      details_.type = TaskDetails::IO;
+      break;
+    case UV_WORK_USER_CPU:
+      details_.type = TaskDetails::CPU;
+      break;
+    default:
+      details_.type = TaskDetails::UNKNOWN;
+    }
+
+    details_.priority = opts->priority;
+    details_.cancelable = opts->cancelable;
+  } else {
+    details_.type = TaskDetails::UNKNOWN;
+    details_.priority = -1;
+    details_.cancelable = false;
+  }
+
+  LOG_1("LibuvTask::LibuvTask: type %d\n", details_.type);
+}
+
+LibuvTask::~LibuvTask(void) {
+  LOG_1("LibuvTask::Run: Task %p done\n", req_);
+  tp_->GetExecutor()->done(req_);
+}
+
+void LibuvTask::Run() {
+  LOG_1("LibuvTask::Run: Running Task %p\n", req_);
+  req_->work_cb(req_);
+}
+
+/**************
+ * TaskQueue
+ ***************/
+
+TaskQueue::TaskQueue()
+  : queue_(), stopped_(false), lock_(), tasks_available_() {
+}
+
+bool TaskQueue::Push(std::unique_ptr<Task> task) {
+  Mutex::ScopedLock scoped_lock(lock_);
+
+  if (stopped_) {
+    return false;
+  }
+
+  queue_.push(std::move(task));
+  tasks_available_.Signal(scoped_lock);
+  return true;
+}
+
+std::unique_ptr<Task> TaskQueue::Pop(void) {
+  Mutex::ScopedLock scoped_lock(lock_);
+
+  if (queue_.empty()) {
+    return std::unique_ptr<Task>(nullptr);
+  }
+
+  std::unique_ptr<Task> task = std::move(queue_.front());
+  queue_.pop();
+  return task;
+}
+
+std::unique_ptr<Task> TaskQueue::BlockingPop(void) {
+  Mutex::ScopedLock scoped_lock(lock_);
+
+  while (queue_.empty() && !stopped_) {
+    tasks_available_.Wait(scoped_lock);
+  }
+
+  if (queue_.empty()) {
+    return std::unique_ptr<Task>(nullptr);
+  }
+
+  std::unique_ptr<Task> result = std::move(queue_.front());
+  queue_.pop();
+  return result;
+}
+
+void TaskQueue::Stop(void) {
+  Mutex::ScopedLock scoped_lock(lock_);
+  stopped_ = true;
+  tasks_available_.Broadcast(scoped_lock);
+}
+
+int TaskQueue::Length(void) const {
+  Mutex::ScopedLock scoped_lock(lock_);
+  return queue_.size();
+}
+
+/**************
+ * Threadpool
+ ***************/
+
+Threadpool::Threadpool(void)
+  : queue_(), workers_() {
+  executor_.init = uv_executor_init;
+  executor_.destroy = nullptr;
+  executor_.submit = uv_executor_submit;
+  executor_.cancel = nullptr;
+  executor_.data = this;
+}
+
+Threadpool::~Threadpool(void) {
+  // Block future Push's.
+  queue_.Stop();
+
+  // Wait for Workers to drain the queue.
+  for (auto& worker : workers_) {
+    worker->Join();
+  }
+}
+
+// TODO(davisjam): Return early on multiple initialization
+void Threadpool::Initialize(void) {
+  int n_workers = 4;  // TODO(davisjam):
+
+  for (int i = 0; i < n_workers; i++) {
+    std::unique_ptr<Worker> worker(new Worker());
+    worker->Start(&queue_);
+    workers_.push_back(std::move(worker));
+  }
+}
+
+void Threadpool::Post(std::unique_ptr<Task> task) {
+  LOG_1("Threadpool::Post: Got task of type %d\n",
+    task->details_.type);
+  queue_.Push(std::move(task));
+}
+
+int Threadpool::QueueLength(void) const {
+  return queue_.Length();
+}
+
+void Threadpool::uv_executor_init(uv_executor_t* executor) {
+}
+
+void Threadpool::uv_executor_submit(uv_executor_t* executor,
+                                    uv_work_t* req,
+                                    const uv_work_options_t* opts) {
+  Threadpool* threadpool = reinterpret_cast<Threadpool *>(executor->data);
+  LOG_0("Threadpool::uv_executor_submit: Got some work!\n");
+  threadpool->Post(std::unique_ptr<Task>(new LibuvTask(threadpool, req, opts)));
+}
+
+uv_executor_t* Threadpool::GetExecutor() {
+  return &executor_;
+}
+
+}  // namespace threadpool
+}  // namespace node
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
new file mode 100644
index 00000000000000..cea8d279792c8e
--- /dev/null
+++ b/src/node_threadpool.h
@@ -0,0 +1,180 @@
+#ifndef SRC_NODE_THREADPOOL_H_
+#define SRC_NODE_THREADPOOL_H_
+
+#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#include <queue>
+#include <vector>
+#include <functional>
+
+#include "node.h"
+#include "node_mutex.h"
+#include "uv.h"
+
+namespace node {
+namespace threadpool {
+
+class Threadpool;
+class TaskQueue;
+class Task;
+class TaskDetails;
+class Worker;
+
+// Inhabited by a uv_thread_t.
+// Subclass to experiment, e.g.:
+//   - cancellation (a la Davis et al. 2018's Manager-Worker-Hangman approach)
+class Worker {
+ public:
+  Worker();
+
+  // Starts a thread and returns control to the caller.
+  void Start(TaskQueue* queue);
+  void Join(void);
+
+ protected:
+  // Override e.g. to implement cancellation.
+  static void _Run(void* data);
+
+  uv_thread_t self_;
+
+ private:
+};
+
+// This is basically a struct
+class TaskDetails {
+ public:
+  enum TaskType {
+      FS
+    , FS_LIKELY_CACHED  // Likely to be bound by memory or CPU
+    , OTHER_DISK_IO
+    , DNS
+    , OTHER_NETWORK_IO
+    , IO
+    , MEMORY
+    , CPU
+    , CPU_SLOW
+    , CPU_FAST
+    , UNKNOWN
+  };
+
+  TaskType type;
+  int priority;  // Larger numbers signal higher priority.i
+                 // Does nothing in this class.
+  bool cancelable;  // If true, by some yet-to-be-determined mechanism we can
+                    // cancel this Task while it is scheduled.
+
+ protected:
+ private:
+};
+
+// Abstract notion of a Task.
+// Clients of node::Threadpool should sub-class this for their type of request.
+//  - V8::Platform Tasks
+//  - libuv async work
+//  - User work from the N-API
+class Task {
+ public:
+  Task() {}
+  // Invoked after Run().
+  virtual ~Task() {}
+
+  // Invoked on some thread in the Threadpool.
+  virtual void Run() = 0;
+
+  // Run() can access details.
+  // Should be set in subclass constructor.
+  TaskDetails details_;
+ private:
+};
+
+class LibuvTask : public Task {
+ public:
+  LibuvTask(Threadpool* tp, uv_work_t* req, const uv_work_options_t* opts);
+  ~LibuvTask();
+
+  void Run();
+
+ protected:
+ private:
+  Threadpool* tp_;
+  uv_work_t* req_;
+};
+
+// Abstract notion of a queue of Tasks.
+// The default implementation is a FIFO queue.
+// Subclass to experiment, e.g.:
+//   - prioritization
+//   - multi-queue e.g. for CPU-bound and I/O-bound Tasks or Fast and Slow ones.
+class TaskQueue {
+ public:
+  TaskQueue();
+
+  // Return true if Push succeeds, else false.
+  bool Push(std::unique_ptr<Task> task);
+  std::unique_ptr<Task> Pop(void);
+
+  // Returns nullptr when we're done.
+  std::unique_ptr<Task> BlockingPop(void);
+
+  // Subsequent Push() will fail.
+  // Pop calls will return nullptr once queue is drained.
+  void Stop();
+
+  int Length(void) const;
+
+ private:
+  // Structures.
+  std::queue<std::unique_ptr<Task>> queue_;
+  bool stopped_;
+
+  // Synchronization.
+  Mutex lock_;
+  ConditionVariable tasks_available_;
+};
+
+// A threadpool works on asynchronous Tasks.
+// It consists of:
+//   - a TaskQueue of pending Tasks
+//   - a set of Workers that handle Tasks from the TaskQueue
+// Subclass to experiment, e.g.:
+//   - Use a different type of TaskQueue
+//   - Elastic workers (scale up and down)
+class Threadpool {
+ public:
+  Threadpool(void);
+  // Waits for queue to drain.
+  ~Threadpool(void);
+
+  // Call once, before you Post.
+  // TODO(davisjam): RAII?
+  void Initialize(void);
+  // TODO(davisjam): Destroy.
+
+  void Post(std::unique_ptr<Task> task);
+  int QueueLength(void) const;
+
+  // To interact with libuv's executor API:
+  //   - For the call to uv_replace_executor
+  //   - A LibuvTask needs the uv_executor_done_cb
+  uv_executor_t* GetExecutor();
+
+ protected:
+  // TODO(davisjam): This should be in some separate interface class like
+  //   NodePlatform::WorkerThreadsTaskRunner.
+  uv_executor_t executor_;  // So can be plugged in to libuv
+  static void uv_executor_init(uv_executor_t* executor);
+  static void uv_executor_submit(uv_executor_t* executor,
+                                 uv_work_t* req,
+                                 const uv_work_options_t* opts);
+
+ private:
+  TaskQueue queue_;
+  std::vector<std::unique_ptr<Worker>> workers_;
+};
+
+}  // namespace threadpool
+}  // namespace node
+
+#endif  // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#endif  // SRC_NODE_THREADPOOL_H_
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
new file mode 100644
index 00000000000000..79b10c1712564a
--- /dev/null
+++ b/test/cctest/test_threadpool.cc
@@ -0,0 +1,130 @@
+#include "node_internals.h"
+#include "node_threadpool.h"
+#include "libplatform/libplatform.h"
+
+#include <string>
+#include "gtest/gtest.h"
+#include "node_test_fixture.h"
+
+#include <atomic>
+
+using node::threadpool::Task;
+using node::threadpool::TaskQueue;
+using node::threadpool::Worker;
+using node::threadpool::Threadpool;
+
+// Thread-safe counters
+static std::atomic<int> testTaskRunCount(0);
+static std::atomic<int> testTaskDestroyedCount(0);
+
+// TODO(davisjam): Do I need this?
+class ThreadpoolTest : public NodeTestFixture {
+ private:
+  virtual void TearDown() {
+    NodeTestFixture::TearDown();
+  }
+};
+
+// Helper so we have a type of Task
+class TestTask : public node::threadpool::Task {
+ public:
+  TestTask() {}
+  ~TestTask() {
+    testTaskDestroyedCount++;
+  }
+
+  void Run() {
+    testTaskRunCount++;
+  }
+};
+
+TEST_F(ThreadpoolTest, TaskQueueEndToEnd) {
+  int nTasks = 100;
+  TaskQueue tq;
+
+  // Reset globals
+  testTaskRunCount = 0;
+  testTaskDestroyedCount = 0;
+
+  // Push
+  EXPECT_EQ(tq.Length(), 0);
+  for (int i = 0; i < nTasks; i++) {
+    EXPECT_EQ(tq.Push(std::unique_ptr<TestTask>(new TestTask())),
+              true);
+  }
+  EXPECT_EQ(tq.Length(), nTasks);
+
+  // Successful Pop, BlockingPop
+  for (int i = 0; i < nTasks; i++) {
+    std::unique_ptr<Task> task;
+    if (i % 2)
+      task = tq.Pop();
+    else
+      task = tq.BlockingPop();
+    EXPECT_NE(task.get(), nullptr);
+    EXPECT_EQ(tq.Length(), nTasks - (i + 1));
+  }
+
+  // Pop fails when queue is empty
+  std::unique_ptr<Task> task = tq.Pop();  // Non-blocking
+  EXPECT_EQ(task.get(), nullptr);
+  EXPECT_EQ(tq.Length(), 0);
+
+  // Stop works
+  tq.Stop();
+  EXPECT_EQ(tq.Push(std::unique_ptr<TestTask>(new TestTask())), false);
+}
+
+TEST_F(ThreadpoolTest, WorkersWorkWithTaskQueue) {
+  int nTasks = 100;
+  TaskQueue tq;
+  Worker w;
+
+  // Reset globals
+  testTaskRunCount = 0;
+  testTaskDestroyedCount = 0;
+
+  // Push
+  EXPECT_EQ(tq.Length(), 0);
+  for (int i = 0; i < nTasks; i++) {
+    EXPECT_EQ(tq.Push(std::unique_ptr<TestTask>(new TestTask())),
+              true);
+  }
+  // Worker hasn't started yet, so tq should be "full".
+  EXPECT_EQ(tq.Length(), nTasks);
+
+  // Once we start the worker, it should empty tq.
+  w.Start(&tq);
+
+  tq.Stop();  // Signal Worker that we're done
+  w.Join();   // Wait for Worker to finish
+  EXPECT_EQ(tq.Length(), 0);
+
+  // And it should have run and destroyed every Task.
+  EXPECT_EQ(testTaskRunCount, nTasks);
+  EXPECT_EQ(testTaskDestroyedCount, nTasks);
+}
+
+TEST_F(ThreadpoolTest, ThreadpoolWorks) {
+  int nTasks = 100;
+
+  {
+    std::unique_ptr<Threadpool> tp(new Threadpool());
+
+    // Reset globals
+    testTaskRunCount = 0;
+    testTaskDestroyedCount = 0;
+
+    tp->Initialize();
+
+    // Push
+    EXPECT_EQ(tp->QueueLength(), 0);
+    for (int i = 0; i < nTasks; i++) {
+      tp->Post(std::unique_ptr<TestTask>(new TestTask()));
+    }
+  }
+  // tp leaves scope. In destructor it drains the queue.
+
+  EXPECT_EQ(testTaskRunCount, nTasks);
+  EXPECT_EQ(testTaskDestroyedCount, nTasks);
+}

From 72001114b1adc9b40ab3c02c821181b041e6726b Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Wed, 5 Sep 2018 21:01:36 -0400
Subject: [PATCH 03/31] pTP: rewire NodePlatform to use Threadpool

Summary:
 In this commit I rewired NodePlatform to use the threadpool::Threadpool
 I introduced in the previous commit.

Approach:
 I touched the existing NodePlatform implementation as little as possible.
 Thus I wire WorkerThreadsTaskRunner to Post to a threadpool::Threadpool.

Existing problems:
 Not all existing behaviors supported by WorkerThreadsTaskRunner are
 supported by the current threadpool::Threadpool implementation.
 Where they do not exist I replaced them with no-ops.

 Node currently runs tests correctly (I tried a few) but segfaults during cleanup.
 I believe this is because of the lack of support for a "BlockingDrain" API.

 The CreatePlatform API is externalized.
 I do not know what this is for.
 This somewhat complicates my plan to have it accept a threadpool::Threadpool as an argument.
 Maybe I should overload this function and retain the existing n_threads API too?

Next steps:
 1. Refactor out a LibuvExecutor in node_threadpool, analogous to the
    WorkerThreadsTaskRunner.
 2. threadpool::Threadpool must support the union of the APIs needed
    by its various consumers (notably WorkerThreadsTaskRunner).
 3. Possibly we could refactor out the WorkerThreadsTaskRunner
    as an optimization (?), but since it handles Delayed tasks as well as
    "do them right now" Tasks it is a useful front-end.
    I don't intend such a refactoring to be part of the eventual PR.
 4. Consider overloading MultiIsolatePlatform/NodePlatform to retain the existing 'n_threads' API.
    When used, this should create and use a threadpool::Threadpool private to the NodePlatform.
---
 src/node.cc            | 29 +++++++--------
 src/node_platform.cc   | 80 ++++++++++++++++++++----------------------
 src/node_platform.h    | 10 +++---
 src/node_threadpool.cc | 26 +++++---------
 src/node_threadpool.h  |  2 ++
 5 files changed, 67 insertions(+), 80 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 6349847bdab184..8c17cd618b66d2 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -285,26 +285,23 @@ class NodeTraceStateObserver :
 };
 
 static struct {
-  // Returns zero on success
-  int Initialize(void) {
-    tp_.reset(new threadpool::Threadpool());
+  void Initialize(void) {
+    tp_ = std::make_shared<threadpool::Threadpool>();
     tp_->Initialize();
-    return uv_replace_executor(tp_->GetExecutor());
   }
 
-  std::unique_ptr<threadpool::Threadpool> tp_;
+  std::shared_ptr<threadpool::Threadpool> tp_;
 } node_threadpool;
 
 static struct {
 #if NODE_USE_V8_PLATFORM
-  // TODO(davisjam): Pass in the node_threadpool.
-  void Initialize(int thread_pool_size) {
+  void Initialize(std::shared_ptr<threadpool::Threadpool> tp) {
     tracing_agent_.reset(new tracing::Agent());
     auto controller = tracing_agent_->GetTracingController();
     controller->AddTraceStateObserver(new NodeTraceStateObserver(controller));
     tracing::TraceEventHelper::SetTracingController(controller);
     StartTracingAgent();
-    platform_ = new NodePlatform(thread_pool_size, controller);
+    platform_ = new NodePlatform(tp, controller);
     V8::InitializePlatform(platform_);
   }
 
@@ -3132,9 +3129,9 @@ MultiIsolatePlatform* GetMainThreadMultiIsolatePlatform() {
 
 
 MultiIsolatePlatform* CreatePlatform(
-    int thread_pool_size,
+    int thread_pool_size,  // TODO(davisjam): ignored. Not sure what to do here.
     v8::TracingController* tracing_controller) {
-  return new NodePlatform(thread_pool_size, tracing_controller);
+  return new NodePlatform(node_threadpool.tp_, tracing_controller);
 }
 
 
@@ -3351,12 +3348,16 @@ int Start(int argc, char** argv) {
   V8::SetEntropySource(crypto::EntropySource);
 #endif  // HAVE_OPENSSL
 
+  // Initialize our threadpool.
+  node_threadpool.Initialize();
+
+  // Replace the default libuv executor with our threadpool.
   // This needs to run before any work is queued to the libuv executor.
-  CHECK_EQ(0, node_threadpool.Initialize());
+  uv_replace_executor(node_threadpool.tp_->GetExecutor());
 
-  // TODO(davisjam): Pass the v8_platform the node_threadpool.
-  v8_platform.Initialize(
-      per_process_opts->v8_thread_pool_size);
+  // Replace the default V8 platform with our implementation.
+  // Use our threadpool.
+  v8_platform.Initialize(node_threadpool.tp_);
   V8::Initialize();
   performance::performance_v8_start = PERFORMANCE_NOW();
   v8_initialized = true;
diff --git a/src/node_platform.cc b/src/node_platform.cc
index 03753c4d2d5bf0..c115fe50c69560 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -16,24 +16,29 @@ using v8::Platform;
 using v8::Task;
 using v8::TracingController;
 
-namespace {
-
-static void PlatformWorkerThread(void* data) {
-  TRACE_EVENT_METADATA1("__metadata", "thread_name", "name",
-                        "PlatformWorkerThread");
-  TaskQueue<Task>* pending_worker_tasks = static_cast<TaskQueue<Task>*>(data);
-  while (std::unique_ptr<Task> task = pending_worker_tasks->BlockingPop()) {
-    task->Run();
-    pending_worker_tasks->NotifyOfCompletion();
+// Wrapper for delivery to threadpool::Threadpool.
+class V8Task : public threadpool::Task {
+ public:
+  V8Task(std::unique_ptr<v8::Task> task) {
+    task_ = std::move(task);
+    details_.type = threadpool::TaskDetails::V8;
+    details_.priority = -1;
+    details_.cancelable = -1;
   }
-}
+  ~V8Task() {}
 
-}  // namespace
+  void Run() {
+    task_->Run();
+  }
+
+ private:
+  std::unique_ptr<v8::Task> task_;
+};
 
 class WorkerThreadsTaskRunner::DelayedTaskScheduler {
  public:
-  explicit DelayedTaskScheduler(TaskQueue<Task>* tasks)
-    : pending_worker_tasks_(tasks) {}
+  explicit DelayedTaskScheduler(std::shared_ptr<threadpool::Threadpool> tp)
+    : tp_(tp) {}
 
   std::unique_ptr<uv_thread_t> Start() {
     auto start_thread = [](void* data) {
@@ -123,25 +128,25 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
   };
 
   static void RunTask(uv_timer_t* timer) {
+    // This DelayedTask is ready. Post it to the threadpool.
     DelayedTaskScheduler* scheduler =
         ContainerOf(&DelayedTaskScheduler::loop_, timer->loop);
-    // This adds the Task to the TP queue.
-    // TODO(davisjam): Plug in TP implementation.
-    scheduler->pending_worker_tasks_->Push(scheduler->TakeTimerTask(timer));
+    scheduler->tp_->Post(scheduler->TakeTimerTask(timer));
   }
 
-  std::unique_ptr<Task> TakeTimerTask(uv_timer_t* timer) {
+  std::unique_ptr<threadpool::Task> TakeTimerTask(uv_timer_t* timer) {
     std::unique_ptr<Task> task(static_cast<Task*>(timer->data));
     uv_timer_stop(timer);
     uv_close(reinterpret_cast<uv_handle_t*>(timer), [](uv_handle_t* handle) {
       delete reinterpret_cast<uv_timer_t*>(handle);
     });
     timers_.erase(timer);
-    return task;
+    return std::unique_ptr<threadpool::Task>(
+      new V8Task(std::move(task)));
   }
 
   uv_sem_t ready_;
-  TaskQueue<v8::Task>* pending_worker_tasks_;
+  std::shared_ptr<threadpool::Threadpool> tp_;
 
   TaskQueue<v8::Task> tasks_;
   uv_loop_t loop_;
@@ -149,22 +154,16 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
   std::unordered_set<uv_timer_t*> timers_;
 };
 
-WorkerThreadsTaskRunner::WorkerThreadsTaskRunner(int thread_pool_size) {
+WorkerThreadsTaskRunner::WorkerThreadsTaskRunner(std::shared_ptr<threadpool::Threadpool> tp) {
+  tp_ = tp;
   delayed_task_scheduler_.reset(
-      new DelayedTaskScheduler(&pending_worker_tasks_));
-  threads_.push_back(delayed_task_scheduler_->Start());
-  for (int i = 0; i < thread_pool_size; i++) {
-    std::unique_ptr<uv_thread_t> t { new uv_thread_t() };
-    if (uv_thread_create(t.get(), PlatformWorkerThread,
-                         &pending_worker_tasks_) != 0) {
-      break;
-    }
-    threads_.push_back(std::move(t));
-  }
+      new DelayedTaskScheduler(tp_));
 }
 
 void WorkerThreadsTaskRunner::PostTask(std::unique_ptr<Task> task) {
-  pending_worker_tasks_.Push(std::move(task));
+  fprintf(stderr, "Posting to threadpool!\n");
+  tp_->Post(std::unique_ptr<V8Task>(
+    new V8Task(std::move(task))));
 }
 
 void WorkerThreadsTaskRunner::PostDelayedTask(std::unique_ptr<v8::Task> task,
@@ -173,19 +172,18 @@ void WorkerThreadsTaskRunner::PostDelayedTask(std::unique_ptr<v8::Task> task,
 }
 
 void WorkerThreadsTaskRunner::BlockingDrain() {
-  pending_worker_tasks_.BlockingDrain();
+  // TODO(davisjam): No support for this in threadpool::Threadpool at the moment.
+  // I believe this is the cause of the segfaults at the end of running 'node'.
+  //pending_worker_tasks_.BlockingDrain();
 }
 
 void WorkerThreadsTaskRunner::Shutdown() {
-  pending_worker_tasks_.Stop();
+  // TODO(davisjam): More cleanup?
   delayed_task_scheduler_->Stop();
-  for (size_t i = 0; i < threads_.size(); i++) {
-    CHECK_EQ(0, uv_thread_join(threads_[i].get()));
-  }
 }
 
 int WorkerThreadsTaskRunner::NumberOfWorkerThreads() const {
-  return threads_.size();
+  return tp_->NWorkers();
 }
 
 PerIsolatePlatformData::PerIsolatePlatformData(
@@ -249,7 +247,7 @@ int PerIsolatePlatformData::unref() {
   return --ref_count_;
 }
 
-NodePlatform::NodePlatform(int thread_pool_size,
+NodePlatform::NodePlatform(std::shared_ptr<threadpool::Threadpool> tp,
                            TracingController* tracing_controller) {
   if (tracing_controller) {
     tracing_controller_.reset(tracing_controller);
@@ -258,7 +256,7 @@ NodePlatform::NodePlatform(int thread_pool_size,
     tracing_controller_.reset(controller);
   }
   worker_thread_task_runner_ =
-      std::make_shared<WorkerThreadsTaskRunner>(thread_pool_size);
+      std::make_shared<WorkerThreadsTaskRunner>(tp);
 }
 
 void NodePlatform::RegisterIsolate(IsolateData* isolate_data, uv_loop_t* loop) {
@@ -331,8 +329,7 @@ void NodePlatform::DrainTasks(Isolate* isolate) {
   std::shared_ptr<PerIsolatePlatformData> per_isolate = ForIsolate(isolate);
 
   do {
-    // Worker tasks aren't associated with an Isolate.
-    // TODO(davisjam): This will require some dancing with the TP.
+    // Worker tasks aren't associated with any particular Isolate.
     worker_thread_task_runner_->BlockingDrain();
   } while (per_isolate->FlushForegroundTasksInternal());
 }
@@ -374,7 +371,6 @@ bool PerIsolatePlatformData::FlushForegroundTasksInternal() {
 }
 
 void NodePlatform::CallOnWorkerThread(std::unique_ptr<v8::Task> task) {
-  // TODO(davisjam): Plug in TP implementation
   worker_thread_task_runner_->PostTask(std::move(task));
 }
 
diff --git a/src/node_platform.h b/src/node_platform.h
index bc5f0ba92643d7..3a2fc6b95327a9 100644
--- a/src/node_platform.h
+++ b/src/node_platform.h
@@ -12,6 +12,7 @@
 #include "node.h"
 #include "node_mutex.h"
 #include "uv.h"
+#include "node_threadpool.h"
 
 namespace node {
 
@@ -99,7 +100,7 @@ class PerIsolatePlatformData :
 // API is modeled on v8::TaskRunner.
 class WorkerThreadsTaskRunner {
  public:
-  explicit WorkerThreadsTaskRunner(int thread_pool_size);
+  explicit WorkerThreadsTaskRunner(std::shared_ptr<threadpool::Threadpool> tp);
 
   // Add task to queue for eventual Run()
   void PostTask(std::unique_ptr<v8::Task> task);
@@ -113,18 +114,15 @@ class WorkerThreadsTaskRunner {
   int NumberOfWorkerThreads() const;
 
  private:
-  // Push'd directly by PostTask() and indirectly by PostDelayedTask.
-  TaskQueue<v8::Task> pending_worker_tasks_;
-
   class DelayedTaskScheduler;
   std::unique_ptr<DelayedTaskScheduler> delayed_task_scheduler_;
 
-  std::vector<std::unique_ptr<uv_thread_t>> threads_;
+  std::shared_ptr<threadpool::Threadpool> tp_;
 };
 
 class NodePlatform : public MultiIsolatePlatform {
  public:
-  NodePlatform(int thread_pool_size, v8::TracingController* tracing_controller);
+  NodePlatform(std::shared_ptr<threadpool::Threadpool> tp, v8::TracingController* tracing_controller);
   virtual ~NodePlatform() {}
 
   void DrainTasks(v8::Isolate* isolate) override;
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 77d2dbe8c74d81..7574b832ccfa09 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -9,23 +9,13 @@
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
 // TODO(davisjam): There must be a better way to do this.
 #define DEBUG_LOG 1
-#undef DEBUG_LOG
+//#undef DEBUG_LOG
 
 #ifdef DEBUG_LOG
 #include <stdio.h>
-#define LOG_0(fmt) fprintf(stderr, fmt)
-#define LOG_1(fmt, a1) fprintf(stderr, fmt, a1)
-#define LOG_2(fmt, a1, a2) fprintf(stderr, fmt, a1, a2)
-#define LOG_3(fmt, a1, a2, a3) fprintf(stderr, fmt, a1, a2, a3)
-#define LOG_4(fmt, a1, a2, a3, a4) fprintf(stderr, fmt, a1, a2, a3, a4)
-#define LOG_5(fmt, a1, a2, a3, a4, a5) fprintf(stderr, fmt, a1, a2, a3, a4, a5)
+#define LOG(...) fprintf(stderr, __VA_ARGS__)
 #else
-#define LOG_0(fmt) (void) 0
-#define LOG_1(fmt, a1) (void) 0
-#define LOG_2(fmt, a1, a2) (void) 0
-#define LOG_3(fmt, a1, a2, a3) (void) 0
-#define LOG_4(fmt, a1, a2, a3, a4) (void) 0
-#define LOG_5(fmt, a1, a2, a3, a4, a5) (void) 0
+#define LOG(...) (void) 0
 #endif
 
 namespace node {
@@ -90,16 +80,16 @@ LibuvTask::LibuvTask(Threadpool* tp,
     details_.cancelable = false;
   }
 
-  LOG_1("LibuvTask::LibuvTask: type %d\n", details_.type);
+  LOG("LibuvTask::LibuvTask: type %d\n", details_.type);
 }
 
 LibuvTask::~LibuvTask(void) {
-  LOG_1("LibuvTask::Run: Task %p done\n", req_);
+  LOG("LibuvTask::Run: Task %p done\n", req_);
   tp_->GetExecutor()->done(req_);
 }
 
 void LibuvTask::Run() {
-  LOG_1("LibuvTask::Run: Running Task %p\n", req_);
+  LOG("LibuvTask::Run: Running Task %p\n", req_);
   req_->work_cb(req_);
 }
 
@@ -197,7 +187,7 @@ void Threadpool::Initialize(void) {
 }
 
 void Threadpool::Post(std::unique_ptr<Task> task) {
-  LOG_1("Threadpool::Post: Got task of type %d\n",
+  LOG("Threadpool::Post: Got task of type %d\n",
     task->details_.type);
   queue_.Push(std::move(task));
 }
@@ -213,7 +203,7 @@ void Threadpool::uv_executor_submit(uv_executor_t* executor,
                                     uv_work_t* req,
                                     const uv_work_options_t* opts) {
   Threadpool* threadpool = reinterpret_cast<Threadpool *>(executor->data);
-  LOG_0("Threadpool::uv_executor_submit: Got some work!\n");
+  LOG("Threadpool::uv_executor_submit: Got some work!\n");
   threadpool->Post(std::unique_ptr<Task>(new LibuvTask(threadpool, req, opts)));
 }
 
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index cea8d279792c8e..1833f78caad072 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -54,6 +54,7 @@ class TaskDetails {
     , CPU
     , CPU_SLOW
     , CPU_FAST
+    , V8
     , UNKNOWN
   };
 
@@ -152,6 +153,7 @@ class Threadpool {
 
   void Post(std::unique_ptr<Task> task);
   int QueueLength(void) const;
+  int NWorkers(void) const { return workers_.size(); }
 
   // To interact with libuv's executor API:
   //   - For the call to uv_replace_executor

From 662825fd11c17e1cff9c78fbea0931a765170623 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Thu, 6 Sep 2018 16:06:37 -0400
Subject: [PATCH 04/31] PTP: Refactor out a LibuvExecutor

No functional change in this commit.
I added a standalone LibuvExecutor that I plug into libuv.

Analogous to the NodePlatform's WorkerThreadsTaskRunner,
this design decouples the duties of the Threadpool from the
interface with libuv (V8).
---
 src/node.cc            |  6 +++--
 src/node_threadpool.cc | 61 ++++++++++++++++++++++++------------------
 src/node_threadpool.h  | 42 ++++++++++++++++++-----------
 3 files changed, 65 insertions(+), 44 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 8c17cd618b66d2..5a4fffacb8bad3 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -288,9 +288,11 @@ static struct {
   void Initialize(void) {
     tp_ = std::make_shared<threadpool::Threadpool>();
     tp_->Initialize();
+    libuv_executor_ = std::unique_ptr<threadpool::LibuvExecutor>(new threadpool::LibuvExecutor(tp_));
   }
 
   std::shared_ptr<threadpool::Threadpool> tp_;
+  std::unique_ptr<threadpool::LibuvExecutor> libuv_executor_;
 } node_threadpool;
 
 static struct {
@@ -3351,9 +3353,9 @@ int Start(int argc, char** argv) {
   // Initialize our threadpool.
   node_threadpool.Initialize();
 
-  // Replace the default libuv executor with our threadpool.
+  // Replace the default libuv executor with our executor.
   // This needs to run before any work is queued to the libuv executor.
-  uv_replace_executor(node_threadpool.tp_->GetExecutor());
+  uv_replace_executor(node_threadpool.libuv_executor_->GetExecutor());
 
   // Replace the default V8 platform with our implementation.
   // Use our threadpool.
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 7574b832ccfa09..5f433142a0082c 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -43,17 +43,46 @@ void Worker::_Run(void* data) {
   }
 }
 
+/**************
+ * LibuvExecutor
+ ***************/
+
+LibuvExecutor::LibuvExecutor(std::shared_ptr<Threadpool> tp)
+  : tp_(tp) {
+  executor_.init = uv_executor_init;
+  executor_.destroy = nullptr;
+  executor_.submit = uv_executor_submit;
+  executor_.cancel = nullptr;
+  executor_.data = this;
+}
+
+uv_executor_t* LibuvExecutor::GetExecutor() {
+  return &executor_;
+}
+
+void LibuvExecutor::uv_executor_init(uv_executor_t* executor) {
+  // Already initialized.
+  // TODO(davisjam): I don't think we need this API in libuv. Nor destroy.
+}
+
+void LibuvExecutor::uv_executor_submit(uv_executor_t* executor,
+                                       uv_work_t* req,
+                                       const uv_work_options_t* opts) {
+  LibuvExecutor* libuv_executor = reinterpret_cast<LibuvExecutor *>(executor->data);
+  LOG("LibuvExecutor::uv_executor_submit: Got some work!\n");
+  libuv_executor->tp_->Post(std::unique_ptr<Task>(new LibuvTask(libuv_executor, req, opts)));
+}
+
+
 /**************
  * LibuvTask
  ***************/
 
-LibuvTask::LibuvTask(Threadpool* tp,
+LibuvTask::LibuvTask(LibuvExecutor* libuv_executor,
                      uv_work_t* req,
                      const uv_work_options_t* opts)
-  : Task(), tp_(tp), req_(req) {
-  req_ = req;
-
-  // Copy opts.
+  : Task(), libuv_executor_(libuv_executor), req_(req) {
+  // Fill in TaskDetails based on opts.
   if (opts) {
     switch (opts->type) {
     case UV_WORK_FS:
@@ -85,7 +114,7 @@ LibuvTask::LibuvTask(Threadpool* tp,
 
 LibuvTask::~LibuvTask(void) {
   LOG("LibuvTask::Run: Task %p done\n", req_);
-  tp_->GetExecutor()->done(req_);
+  libuv_executor_->GetExecutor()->done(req_);
 }
 
 void LibuvTask::Run() {
@@ -158,11 +187,6 @@ int TaskQueue::Length(void) const {
 
 Threadpool::Threadpool(void)
   : queue_(), workers_() {
-  executor_.init = uv_executor_init;
-  executor_.destroy = nullptr;
-  executor_.submit = uv_executor_submit;
-  executor_.cancel = nullptr;
-  executor_.data = this;
 }
 
 Threadpool::~Threadpool(void) {
@@ -196,20 +220,5 @@ int Threadpool::QueueLength(void) const {
   return queue_.Length();
 }
 
-void Threadpool::uv_executor_init(uv_executor_t* executor) {
-}
-
-void Threadpool::uv_executor_submit(uv_executor_t* executor,
-                                    uv_work_t* req,
-                                    const uv_work_options_t* opts) {
-  Threadpool* threadpool = reinterpret_cast<Threadpool *>(executor->data);
-  LOG("Threadpool::uv_executor_submit: Got some work!\n");
-  threadpool->Post(std::unique_ptr<Task>(new LibuvTask(threadpool, req, opts)));
-}
-
-uv_executor_t* Threadpool::GetExecutor() {
-  return &executor_;
-}
-
 }  // namespace threadpool
 }  // namespace node
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 1833f78caad072..03efe5ad9f14ff 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -14,6 +14,7 @@
 namespace node {
 namespace threadpool {
 
+class LibuvExecutor;
 class Threadpool;
 class TaskQueue;
 class Task;
@@ -88,16 +89,39 @@ class Task {
  private:
 };
 
+// Shim that we plug into the libuv "pluggable TP" interface.
+//
+// Like WorkerThreadsTaskRunner, this routes libuv requests to the
+// internal Node.js Threadpool.
+class LibuvExecutor {
+ public:
+  LibuvExecutor(std::shared_ptr<Threadpool> tp);
+
+  uv_executor_t* GetExecutor();
+
+ private:
+
+  static void uv_executor_init(uv_executor_t* executor);
+  static void uv_executor_submit(uv_executor_t* executor,
+                                 uv_work_t* req,
+                                 const uv_work_options_t* opts);
+
+  std::shared_ptr<Threadpool> tp_;
+  uv_executor_t executor_;  // executor_.data points to instance of LibuvExecutor.
+};
+
+// The LibuvExecutor wraps libuv uv_work_t's into LibuvTasks
+// and routes them to the internal Threadpool.
 class LibuvTask : public Task {
  public:
-  LibuvTask(Threadpool* tp, uv_work_t* req, const uv_work_options_t* opts);
+  LibuvTask(LibuvExecutor *libuv_executor, uv_work_t* req, const uv_work_options_t* opts);
   ~LibuvTask();
 
   void Run();
 
  protected:
  private:
-  Threadpool* tp_;
+  LibuvExecutor* libuv_executor_;
   uv_work_t* req_;
 };
 
@@ -155,20 +179,6 @@ class Threadpool {
   int QueueLength(void) const;
   int NWorkers(void) const { return workers_.size(); }
 
-  // To interact with libuv's executor API:
-  //   - For the call to uv_replace_executor
-  //   - A LibuvTask needs the uv_executor_done_cb
-  uv_executor_t* GetExecutor();
-
- protected:
-  // TODO(davisjam): This should be in some separate interface class like
-  //   NodePlatform::WorkerThreadsTaskRunner.
-  uv_executor_t executor_;  // So can be plugged in to libuv
-  static void uv_executor_init(uv_executor_t* executor);
-  static void uv_executor_submit(uv_executor_t* executor,
-                                 uv_work_t* req,
-                                 const uv_work_options_t* opts);
-
  private:
   TaskQueue queue_;
   std::vector<std::unique_ptr<Worker>> workers_;

From a89465565d375653ff63200a8c64ccdf773c6691 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Thu, 6 Sep 2018 16:13:59 -0400
Subject: [PATCH 05/31] PTP: linting

No functional change
---
 src/node.cc            |  3 ++-
 src/node_platform.cc   | 10 ++++++----
 src/node_platform.h    |  3 ++-
 src/node_threadpool.cc |  8 +++++---
 src/node_threadpool.h  | 13 ++++++-------
 5 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 5a4fffacb8bad3..2752696fa63d3b 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -288,7 +288,8 @@ static struct {
   void Initialize(void) {
     tp_ = std::make_shared<threadpool::Threadpool>();
     tp_->Initialize();
-    libuv_executor_ = std::unique_ptr<threadpool::LibuvExecutor>(new threadpool::LibuvExecutor(tp_));
+    libuv_executor_ = std::unique_ptr<threadpool::LibuvExecutor>(
+      new threadpool::LibuvExecutor(tp_));
   }
 
   std::shared_ptr<threadpool::Threadpool> tp_;
diff --git a/src/node_platform.cc b/src/node_platform.cc
index c115fe50c69560..4f8e6e0b46108b 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -19,7 +19,7 @@ using v8::TracingController;
 // Wrapper for delivery to threadpool::Threadpool.
 class V8Task : public threadpool::Task {
  public:
-  V8Task(std::unique_ptr<v8::Task> task) {
+  explicit V8Task(std::unique_ptr<v8::Task> task) {
     task_ = std::move(task);
     details_.type = threadpool::TaskDetails::V8;
     details_.priority = -1;
@@ -154,7 +154,8 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
   std::unordered_set<uv_timer_t*> timers_;
 };
 
-WorkerThreadsTaskRunner::WorkerThreadsTaskRunner(std::shared_ptr<threadpool::Threadpool> tp) {
+WorkerThreadsTaskRunner::WorkerThreadsTaskRunner(
+  std::shared_ptr<threadpool::Threadpool> tp) {
   tp_ = tp;
   delayed_task_scheduler_.reset(
       new DelayedTaskScheduler(tp_));
@@ -172,9 +173,10 @@ void WorkerThreadsTaskRunner::PostDelayedTask(std::unique_ptr<v8::Task> task,
 }
 
 void WorkerThreadsTaskRunner::BlockingDrain() {
-  // TODO(davisjam): No support for this in threadpool::Threadpool at the moment.
+  // TODO(davisjam): No support for this in threadpool::Threadpool
+  // at the moment.
   // I believe this is the cause of the segfaults at the end of running 'node'.
-  //pending_worker_tasks_.BlockingDrain();
+  // pending_worker_tasks_.BlockingDrain();
 }
 
 void WorkerThreadsTaskRunner::Shutdown() {
diff --git a/src/node_platform.h b/src/node_platform.h
index 3a2fc6b95327a9..d982d404cc382c 100644
--- a/src/node_platform.h
+++ b/src/node_platform.h
@@ -122,7 +122,8 @@ class WorkerThreadsTaskRunner {
 
 class NodePlatform : public MultiIsolatePlatform {
  public:
-  NodePlatform(std::shared_ptr<threadpool::Threadpool> tp, v8::TracingController* tracing_controller);
+  NodePlatform(std::shared_ptr<threadpool::Threadpool> tp,
+    v8::TracingController* tracing_controller);
   virtual ~NodePlatform() {}
 
   void DrainTasks(v8::Isolate* isolate) override;
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 5f433142a0082c..e3212f1d353cab 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -9,7 +9,7 @@
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
 // TODO(davisjam): There must be a better way to do this.
 #define DEBUG_LOG 1
-//#undef DEBUG_LOG
+// #undef DEBUG_LOG
 
 #ifdef DEBUG_LOG
 #include <stdio.h>
@@ -68,9 +68,11 @@ void LibuvExecutor::uv_executor_init(uv_executor_t* executor) {
 void LibuvExecutor::uv_executor_submit(uv_executor_t* executor,
                                        uv_work_t* req,
                                        const uv_work_options_t* opts) {
-  LibuvExecutor* libuv_executor = reinterpret_cast<LibuvExecutor *>(executor->data);
+  LibuvExecutor* libuv_executor =
+    reinterpret_cast<LibuvExecutor *>(executor->data);
   LOG("LibuvExecutor::uv_executor_submit: Got some work!\n");
-  libuv_executor->tp_->Post(std::unique_ptr<Task>(new LibuvTask(libuv_executor, req, opts)));
+  libuv_executor->tp_->Post(std::unique_ptr<Task>(
+    new LibuvTask(libuv_executor, req, opts)));
 }
 
 
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 03efe5ad9f14ff..f6de8920891500 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -64,9 +64,6 @@ class TaskDetails {
                  // Does nothing in this class.
   bool cancelable;  // If true, by some yet-to-be-determined mechanism we can
                     // cancel this Task while it is scheduled.
-
- protected:
- private:
 };
 
 // Abstract notion of a Task.
@@ -95,26 +92,28 @@ class Task {
 // internal Node.js Threadpool.
 class LibuvExecutor {
  public:
-  LibuvExecutor(std::shared_ptr<Threadpool> tp);
+  explicit LibuvExecutor(std::shared_ptr<Threadpool> tp);
 
   uv_executor_t* GetExecutor();
 
  private:
-
   static void uv_executor_init(uv_executor_t* executor);
   static void uv_executor_submit(uv_executor_t* executor,
                                  uv_work_t* req,
                                  const uv_work_options_t* opts);
 
   std::shared_ptr<Threadpool> tp_;
-  uv_executor_t executor_;  // executor_.data points to instance of LibuvExecutor.
+  uv_executor_t executor_;  // executor_.data points to
+                            // instance of LibuvExecutor.
 };
 
 // The LibuvExecutor wraps libuv uv_work_t's into LibuvTasks
 // and routes them to the internal Threadpool.
 class LibuvTask : public Task {
  public:
-  LibuvTask(LibuvExecutor *libuv_executor, uv_work_t* req, const uv_work_options_t* opts);
+  LibuvTask(LibuvExecutor* libuv_executor,
+            uv_work_t* req,
+            const uv_work_options_t* opts);
   ~LibuvTask();
 
   void Run();

From 8771ad864ef209c1e388dda335e7f652e492570c Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Thu, 6 Sep 2018 16:30:06 -0400
Subject: [PATCH 06/31] PTP: Add state tracking for Tasks

Tasks can be in one of these states:
  - QUEUED
  - ASSIGNED
  - COMPLETED
---
 src/node_threadpool.cc | 12 ++++++++++++
 src/node_threadpool.h  | 13 ++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index e3212f1d353cab..2ab9e2fdbd397f 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -39,10 +39,20 @@ void Worker::Join(void) {
 void Worker::_Run(void* data) {
   TaskQueue* queue = static_cast<TaskQueue*>(data);
   while (std::unique_ptr<Task> task = queue->BlockingPop()) {
+    task->UpdateState(Task::ASSIGNED);
     task->Run();
+    task->UpdateState(Task::COMPLETED);
   }
 }
 
+/**************
+ * Task
+ ***************/
+
+void Task::UpdateState(enum State state) {
+  state_ = state;
+}
+
 /**************
  * LibuvExecutor
  ***************/
@@ -139,8 +149,10 @@ bool TaskQueue::Push(std::unique_ptr<Task> task) {
     return false;
   }
 
+  task->UpdateState(Task::QUEUED);
   queue_.push(std::move(task));
   tasks_available_.Signal(scoped_lock);
+
   return true;
 }
 
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index f6de8920891500..26d9ab64ae64f2 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -73,6 +73,12 @@ class TaskDetails {
 //  - User work from the N-API
 class Task {
  public:
+  enum State {
+      QUEUED
+    , ASSIGNED
+    , COMPLETED
+  };
+
   Task() {}
   // Invoked after Run().
   virtual ~Task() {}
@@ -80,10 +86,15 @@ class Task {
   // Invoked on some thread in the Threadpool.
   virtual void Run() = 0;
 
+  // Different Threadpool components should update this as the Task travels around.
+  void UpdateState(enum State state);
+
   // Run() can access details.
   // Should be set in subclass constructor.
   TaskDetails details_;
- private:
+
+ protected:
+  enum State state_;
 };
 
 // Shim that we plug into the libuv "pluggable TP" interface.

From 3a2bbb604eca9d685f923fc58a2b824780dc0f5e Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Thu, 6 Sep 2018 17:06:40 -0400
Subject: [PATCH 07/31] PTP: Bugfix: start and stop DelayedTaskScheduler

In a previous commit I was too eager in deleting threads
from the WorkerThreadsTaskRunner.

The DelayedTaskScheduler is the responsibility of the
WorkerThreadsTaskRunner, not the internal Threadpool.
Thus we should start and stop it correctly.

This was the cause of the segfault I mentioned earlier.
---
 src/node_platform.cc | 5 ++++-
 src/node_platform.h  | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/node_platform.cc b/src/node_platform.cc
index 4f8e6e0b46108b..4f47f190760c30 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -70,6 +70,7 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
     loop_.data = this;
     CHECK_EQ(0, uv_loop_init(&loop_));
     flush_tasks_.data = this;
+    fprintf(stderr, "WorkerThreadsTaskRunner::DelayedTaskScheduler: Initializing flush_tasks_ %p\n", &flush_tasks_);
     CHECK_EQ(0, uv_async_init(&loop_, &flush_tasks_, FlushTasks));
     uv_sem_post(&ready_);
 
@@ -159,6 +160,7 @@ WorkerThreadsTaskRunner::WorkerThreadsTaskRunner(
   tp_ = tp;
   delayed_task_scheduler_.reset(
       new DelayedTaskScheduler(tp_));
+  delayed_task_scheduler_thread_ = delayed_task_scheduler_->Start();
 }
 
 void WorkerThreadsTaskRunner::PostTask(std::unique_ptr<Task> task) {
@@ -180,8 +182,9 @@ void WorkerThreadsTaskRunner::BlockingDrain() {
 }
 
 void WorkerThreadsTaskRunner::Shutdown() {
-  // TODO(davisjam): More cleanup?
   delayed_task_scheduler_->Stop();
+  CHECK_EQ(0, uv_thread_join(delayed_task_scheduler_thread_.get()));
+  // TODO(davisjam): More cleanup?
 }
 
 int WorkerThreadsTaskRunner::NumberOfWorkerThreads() const {
diff --git a/src/node_platform.h b/src/node_platform.h
index d982d404cc382c..d75aaab87eb3a7 100644
--- a/src/node_platform.h
+++ b/src/node_platform.h
@@ -115,7 +115,9 @@ class WorkerThreadsTaskRunner {
 
  private:
   class DelayedTaskScheduler;
+
   std::unique_ptr<DelayedTaskScheduler> delayed_task_scheduler_;
+  std::unique_ptr<uv_thread_t> delayed_task_scheduler_thread_;
 
   std::shared_ptr<threadpool::Threadpool> tp_;
 };

From 8f6df02789db8a2d35542f83a0c11b9333f8c9a4 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Thu, 6 Sep 2018 17:08:12 -0400
Subject: [PATCH 08/31] PTP: BlockingDrain for Threadpool and TaskQueue

This completes the NodePlatform rewiring begun in a previous commit.

This BlockingDrain will wait on both V8 Tasks and libuv Tasks.
It waits on all Tasks in the Threadpool, even though NodePlatform
only cares about BlockingDrain'ing the V8 Tasks.
---
 src/node_platform.cc   |  5 +----
 src/node_threadpool.cc | 39 +++++++++++++++++++++++++++++++++++----
 src/node_threadpool.h  | 22 ++++++++++++++++++----
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/node_platform.cc b/src/node_platform.cc
index 4f47f190760c30..29043f56f82d16 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -175,10 +175,7 @@ void WorkerThreadsTaskRunner::PostDelayedTask(std::unique_ptr<v8::Task> task,
 }
 
 void WorkerThreadsTaskRunner::BlockingDrain() {
-  // TODO(davisjam): No support for this in threadpool::Threadpool
-  // at the moment.
-  // I believe this is the cause of the segfaults at the end of running 'node'.
-  // pending_worker_tasks_.BlockingDrain();
+  tp_->BlockingDrain();
 }
 
 void WorkerThreadsTaskRunner::Shutdown() {
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 2ab9e2fdbd397f..b45fe79c3b8e05 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -4,6 +4,7 @@
 #include "env-inl.h"
 #include "debug_utils.h"
 #include "util.h"
+
 #include <algorithm>
 
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
@@ -42,6 +43,8 @@ void Worker::_Run(void* data) {
     task->UpdateState(Task::ASSIGNED);
     task->Run();
     task->UpdateState(Task::COMPLETED);
+
+    queue->NotifyOfCompletion();
   }
 }
 
@@ -139,7 +142,9 @@ void LibuvTask::Run() {
  ***************/
 
 TaskQueue::TaskQueue()
-  : queue_(), stopped_(false), lock_(), tasks_available_() {
+  : queue_(), outstanding_tasks_(0), stopped_(false)
+  , lock_()
+  , task_available_(), tasks_drained_() {
 }
 
 bool TaskQueue::Push(std::unique_ptr<Task> task) {
@@ -151,7 +156,8 @@ bool TaskQueue::Push(std::unique_ptr<Task> task) {
 
   task->UpdateState(Task::QUEUED);
   queue_.push(std::move(task));
-  tasks_available_.Signal(scoped_lock);
+  outstanding_tasks_++;
+  task_available_.Signal(scoped_lock);
 
   return true;
 }
@@ -172,7 +178,7 @@ std::unique_ptr<Task> TaskQueue::BlockingPop(void) {
   Mutex::ScopedLock scoped_lock(lock_);
 
   while (queue_.empty() && !stopped_) {
-    tasks_available_.Wait(scoped_lock);
+    task_available_.Wait(scoped_lock);
   }
 
   if (queue_.empty()) {
@@ -184,10 +190,27 @@ std::unique_ptr<Task> TaskQueue::BlockingPop(void) {
   return result;
 }
 
+void TaskQueue::NotifyOfCompletion(void) {
+  Mutex::ScopedLock scoped_lock(lock_);
+  outstanding_tasks_--;
+  CHECK_GE(outstanding_tasks_, 0);
+  if (!outstanding_tasks_) {
+    tasks_drained_.Broadcast(scoped_lock);
+  }
+}
+
+void TaskQueue::BlockingDrain(void) {
+  Mutex::ScopedLock scoped_lock(lock_);
+  while (outstanding_tasks_) {
+    tasks_drained_.Wait(scoped_lock);
+  }
+  LOG("TaskQueue::BlockingDrain: Fully drained\n");
+}
+
 void TaskQueue::Stop(void) {
   Mutex::ScopedLock scoped_lock(lock_);
   stopped_ = true;
-  tasks_available_.Broadcast(scoped_lock);
+  task_available_.Broadcast(scoped_lock);
 }
 
 int TaskQueue::Length(void) const {
@@ -234,5 +257,13 @@ int Threadpool::QueueLength(void) const {
   return queue_.Length();
 }
 
+void Threadpool::BlockingDrain(void) {
+  queue_.BlockingDrain();
+}
+
+int Threadpool::NWorkers(void) const {
+ return workers_.size();
+}
+
 }  // namespace threadpool
 }  // namespace node
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 26d9ab64ae64f2..4d69dceed78a4c 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -146,11 +146,18 @@ class TaskQueue {
 
   // Return true if Push succeeds, else false.
   bool Push(std::unique_ptr<Task> task);
-  std::unique_ptr<Task> Pop(void);
 
-  // Returns nullptr when we're done.
+  // Non-blocking Pop. Returns nullptr if queue is empty.
+  std::unique_ptr<Task> Pop(void);
+  // Blocking Pop. Returns nullptr if queue is empty or Stop'd.
   std::unique_ptr<Task> BlockingPop(void);
 
+  // Workers should call this after completing a Task.
+  void NotifyOfCompletion(void);
+
+  // Block until there are no Tasks pending or scheduled.
+  void BlockingDrain(void);
+
   // Subsequent Push() will fail.
   // Pop calls will return nullptr once queue is drained.
   void Stop();
@@ -160,11 +167,15 @@ class TaskQueue {
  private:
   // Structures.
   std::queue<std::unique_ptr<Task>> queue_;
+  int outstanding_tasks_; // Number of Tasks in non-COMPLETED states.
   bool stopped_;
 
   // Synchronization.
   Mutex lock_;
-  ConditionVariable tasks_available_;
+  // Signal'd when there is at least one task in the queue.
+  ConditionVariable task_available_;
+  // Signal'd when all Push'd Tasks are in COMPLETED state.
+  ConditionVariable tasks_drained_;
 };
 
 // A threadpool works on asynchronous Tasks.
@@ -187,7 +198,10 @@ class Threadpool {
 
   void Post(std::unique_ptr<Task> task);
   int QueueLength(void) const;
-  int NWorkers(void) const { return workers_.size(); }
+  // Block until there are no tasks pending or scheduled in the TP.
+  void BlockingDrain(void);
+
+  int NWorkers(void) const;
 
  private:
   TaskQueue queue_;

From 5892ef7ab546983c1b3cc81688e9323d02c81c47 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Thu, 6 Sep 2018 21:15:02 -0400
Subject: [PATCH 09/31] PTP: CreatePlatform creates its own TP

This mostly-matches the old behavior, except
that instead of using a self-managed TP the NodePlatform
uses a private threadpool::Threadpool instance.

It's not clear whether an embedder would like
to plug in their own Threadpool, so play it safe for now.

Hopefully I can get better insight into desired behavior
from other community members.
---
 src/node.cc          |  4 ++--
 src/node_platform.cc | 16 ++++++++++++++++
 src/node_platform.h  |  3 +++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 2752696fa63d3b..95263c41856242 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -3132,9 +3132,9 @@ MultiIsolatePlatform* GetMainThreadMultiIsolatePlatform() {
 
 
 MultiIsolatePlatform* CreatePlatform(
-    int thread_pool_size,  // TODO(davisjam): ignored. Not sure what to do here.
+    int thread_pool_size,
     v8::TracingController* tracing_controller) {
-  return new NodePlatform(node_threadpool.tp_, tracing_controller);
+  return new NodePlatform(thread_pool_size, tracing_controller);
 }
 
 
diff --git a/src/node_platform.cc b/src/node_platform.cc
index 29043f56f82d16..1015042804b8d9 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -249,6 +249,22 @@ int PerIsolatePlatformData::unref() {
   return --ref_count_;
 }
 
+NodePlatform::NodePlatform(int thread_pool_size,
+                           TracingController* tracing_controller) {
+  if (tracing_controller) {
+    tracing_controller_.reset(tracing_controller);
+  } else {
+    TracingController* controller = new TracingController();
+    tracing_controller_.reset(controller);
+  }
+
+  // Give wttr its own TP.
+  std::shared_ptr<threadpool::Threadpool> tp =
+    std::make_shared<threadpool::Threadpool>();
+  tp->Initialize();
+  worker_thread_task_runner_ = std::make_shared<WorkerThreadsTaskRunner>(tp);
+}
+
 NodePlatform::NodePlatform(std::shared_ptr<threadpool::Threadpool> tp,
                            TracingController* tracing_controller) {
   if (tracing_controller) {
diff --git a/src/node_platform.h b/src/node_platform.h
index d75aaab87eb3a7..af2f08f99df989 100644
--- a/src/node_platform.h
+++ b/src/node_platform.h
@@ -124,6 +124,9 @@ class WorkerThreadsTaskRunner {
 
 class NodePlatform : public MultiIsolatePlatform {
  public:
+  // Create a NodePlatform with its own TP
+  NodePlatform(int thread_pool_size, v8::TracingController* tracing_controller);
+  // Create a NodePlatform using the provided TP
   NodePlatform(std::shared_ptr<threadpool::Threadpool> tp,
     v8::TracingController* tracing_controller);
   virtual ~NodePlatform() {}

From 95bdaadc2ea684b3d47cf5d47280abb315d68777 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Thu, 6 Sep 2018 21:16:45 -0400
Subject: [PATCH 10/31] PTP: linting, comments, etc.

No functional change
---
 src/node_platform.cc   |  2 --
 src/node_threadpool.cc |  4 ++--
 src/node_threadpool.h  | 11 +++++++----
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/node_platform.cc b/src/node_platform.cc
index 1015042804b8d9..48f16d3826e451 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -70,7 +70,6 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
     loop_.data = this;
     CHECK_EQ(0, uv_loop_init(&loop_));
     flush_tasks_.data = this;
-    fprintf(stderr, "WorkerThreadsTaskRunner::DelayedTaskScheduler: Initializing flush_tasks_ %p\n", &flush_tasks_);
     CHECK_EQ(0, uv_async_init(&loop_, &flush_tasks_, FlushTasks));
     uv_sem_post(&ready_);
 
@@ -164,7 +163,6 @@ WorkerThreadsTaskRunner::WorkerThreadsTaskRunner(
 }
 
 void WorkerThreadsTaskRunner::PostTask(std::unique_ptr<Task> task) {
-  fprintf(stderr, "Posting to threadpool!\n");
   tp_->Post(std::unique_ptr<V8Task>(
     new V8Task(std::move(task))));
 }
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index b45fe79c3b8e05..d92b45c0be6768 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -10,7 +10,7 @@
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
 // TODO(davisjam): There must be a better way to do this.
 #define DEBUG_LOG 1
-// #undef DEBUG_LOG
+#undef DEBUG_LOG
 
 #ifdef DEBUG_LOG
 #include <stdio.h>
@@ -262,7 +262,7 @@ void Threadpool::BlockingDrain(void) {
 }
 
 int Threadpool::NWorkers(void) const {
- return workers_.size();
+  return workers_.size();
 }
 
 }  // namespace threadpool
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 4d69dceed78a4c..d27db957c6f05f 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -86,7 +86,8 @@ class Task {
   // Invoked on some thread in the Threadpool.
   virtual void Run() = 0;
 
-  // Different Threadpool components should update this as the Task travels around.
+  // Different Threadpool components should update this as the
+  // Task travels around.
   void UpdateState(enum State state);
 
   // Run() can access details.
@@ -167,7 +168,7 @@ class TaskQueue {
  private:
   // Structures.
   std::queue<std::unique_ptr<Task>> queue_;
-  int outstanding_tasks_; // Number of Tasks in non-COMPLETED states.
+  int outstanding_tasks_;  // Number of Tasks in non-COMPLETED states.
   bool stopped_;
 
   // Synchronization.
@@ -185,16 +186,18 @@ class TaskQueue {
 // Subclass to experiment, e.g.:
 //   - Use a different type of TaskQueue
 //   - Elastic workers (scale up and down)
+//
+// TODO(davisjam): Thread pool size recommendation.
 class Threadpool {
  public:
+  // TODO(davisjam): RAII.
   Threadpool(void);
   // Waits for queue to drain.
   ~Threadpool(void);
 
   // Call once, before you Post.
-  // TODO(davisjam): RAII?
+  // TODO(davisjam): Remove, replace with RAII.
   void Initialize(void);
-  // TODO(davisjam): Destroy.
 
   void Post(std::unique_ptr<Task> task);
   int QueueLength(void) const;

From 76ca76855235cd84de0f42f21c0883045522d37d Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Fri, 7 Sep 2018 08:31:41 -0400
Subject: [PATCH 11/31] PTP: More unit tests

---
 src/node_threadpool.h          |  2 +-
 test/cctest/test_threadpool.cc | 26 +++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index d27db957c6f05f..a87d126423a948 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -153,7 +153,7 @@ class TaskQueue {
   // Blocking Pop. Returns nullptr if queue is empty or Stop'd.
   std::unique_ptr<Task> BlockingPop(void);
 
-  // Workers should call this after completing a Task.
+  // Workers should call this after completing a Pop'd Task.
   void NotifyOfCompletion(void);
 
   // Block until there are no Tasks pending or scheduled.
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index 79b10c1712564a..184bc70d81ab21 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -105,7 +105,7 @@ TEST_F(ThreadpoolTest, WorkersWorkWithTaskQueue) {
   EXPECT_EQ(testTaskDestroyedCount, nTasks);
 }
 
-TEST_F(ThreadpoolTest, ThreadpoolWorks) {
+TEST_F(ThreadpoolTest, ThreadpoolEndToEnd) {
   int nTasks = 100;
 
   {
@@ -116,6 +116,7 @@ TEST_F(ThreadpoolTest, ThreadpoolWorks) {
     testTaskDestroyedCount = 0;
 
     tp->Initialize();
+    EXPECT_GT(tp->NWorkers(), 0);
 
     // Push
     EXPECT_EQ(tp->QueueLength(), 0);
@@ -128,3 +129,26 @@ TEST_F(ThreadpoolTest, ThreadpoolWorks) {
   EXPECT_EQ(testTaskRunCount, nTasks);
   EXPECT_EQ(testTaskDestroyedCount, nTasks);
 }
+
+TEST_F(ThreadpoolTest, ThreadpoolBlockingDrain) {
+  // Enough that we will probably have to wait for them to finish.
+  int nTasks = 10000;
+
+  std::unique_ptr<Threadpool> tp(new Threadpool());
+
+  // Reset globals
+  testTaskRunCount = 0;
+  testTaskDestroyedCount = 0;
+
+  tp->Initialize();
+
+  // Push
+  EXPECT_EQ(tp->QueueLength(), 0);
+  for (int i = 0; i < nTasks; i++) {
+    tp->Post(std::unique_ptr<TestTask>(new TestTask()));
+  }
+
+  tp->BlockingDrain();
+  EXPECT_EQ(testTaskRunCount, nTasks);
+  EXPECT_EQ(testTaskDestroyedCount, nTasks);
+}

From 83e91b451436e41a81a2e03b25269518ef2ba4f8 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Fri, 7 Sep 2018 09:45:59 -0400
Subject: [PATCH 12/31] PTP: Threadpool RAII; UV_THREADPOOL_SIZE

1. Use RAII for Threadpool. Don't have a separate Initialize phase.
   This was previously useful because the Threadpool knew about
   libuv. Now that there is a LibuvExecutor, we can use RAII.

2. Have Threadpool accept a size.
   When absent, try:
     - UV_THREADPOOL_SIZE (default libuv TP size)
     - # cores (default --v8-pool-size behavior)
---
 src/node.cc                    |  3 +-
 src/node_platform.cc           |  4 +--
 src/node_threadpool.cc         | 56 +++++++++++++++++++++++++---------
 src/node_threadpool.h          | 17 ++++++-----
 test/cctest/test_threadpool.cc | 34 +++++++++++++++++----
 5 files changed, 82 insertions(+), 32 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 95263c41856242..4b4e07c5c9644d 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -286,8 +286,7 @@ class NodeTraceStateObserver :
 
 static struct {
   void Initialize(void) {
-    tp_ = std::make_shared<threadpool::Threadpool>();
-    tp_->Initialize();
+    tp_ = std::make_shared<threadpool::Threadpool>(-1);
     libuv_executor_ = std::unique_ptr<threadpool::LibuvExecutor>(
       new threadpool::LibuvExecutor(tp_));
   }
diff --git a/src/node_platform.cc b/src/node_platform.cc
index 48f16d3826e451..38d3f41953a88f 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -247,6 +247,7 @@ int PerIsolatePlatformData::unref() {
   return --ref_count_;
 }
 
+// TODO(davisjam): Duplicated code between constructors.
 NodePlatform::NodePlatform(int thread_pool_size,
                            TracingController* tracing_controller) {
   if (tracing_controller) {
@@ -258,8 +259,7 @@ NodePlatform::NodePlatform(int thread_pool_size,
 
   // Give wttr its own TP.
   std::shared_ptr<threadpool::Threadpool> tp =
-    std::make_shared<threadpool::Threadpool>();
-  tp->Initialize();
+    std::make_shared<threadpool::Threadpool>(thread_pool_size);
   worker_thread_task_runner_ = std::make_shared<WorkerThreadsTaskRunner>(tp);
 }
 
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index d92b45c0be6768..0597565c34793c 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -10,7 +10,7 @@
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
 // TODO(davisjam): There must be a better way to do this.
 #define DEBUG_LOG 1
-#undef DEBUG_LOG
+// #undef DEBUG_LOG
 
 #ifdef DEBUG_LOG
 #include <stdio.h>
@@ -222,31 +222,59 @@ int TaskQueue::Length(void) const {
  * Threadpool
  ***************/
 
-Threadpool::Threadpool(void)
-  : queue_(), workers_() {
+Threadpool::Threadpool(int threadpool_size)
+  : threadpool_size_(threadpool_size), queue_(), workers_() {
+  LOG("Threadpool::Threadpool: threadpool_size_ %d\n", threadpool_size_);
+  if (threadpool_size_ <= 0) {
+    // Check UV_THREADPOOL_SIZE
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
+      threadpool_size_ = atoi(buf);
+    }
+  }
+
+  if (threadpool_size_ <= 0) {
+    // No/bad UV_THREADPOOL_SIZE, so take a guess.
+    threadpool_size_ = GoodThreadpoolSize();
+  }
+  LOG("Threadpool::Threadpool: threadpool_size_ %d\n", threadpool_size_);
+  CHECK_GT(threadpool_size_, 0);
+
+  Initialize();
 }
 
-Threadpool::~Threadpool(void) {
-  // Block future Push's.
-  queue_.Stop();
+int Threadpool::GoodThreadpoolSize(void) {
+  // Ask libuv how many cores we have.
+  uv_cpu_info_t* cpu_infos;
+  int count;
 
-  // Wait for Workers to drain the queue.
-  for (auto& worker : workers_) {
-    worker->Join();
+  if (uv_cpu_info(&cpu_infos, &count)) {
+    return 4;  // Old libuv TP default.
   }
-}
 
-// TODO(davisjam): Return early on multiple initialization
-void Threadpool::Initialize(void) {
-  int n_workers = 4;  // TODO(davisjam):
+  uv_free_cpu_info(cpu_infos, count);
+  return count;
+}
 
-  for (int i = 0; i < n_workers; i++) {
+void Threadpool::Initialize() {
+  for (int i = 0; i < threadpool_size_; i++) {
     std::unique_ptr<Worker> worker(new Worker());
     worker->Start(&queue_);
     workers_.push_back(std::move(worker));
   }
 }
 
+Threadpool::~Threadpool(void) {
+  // Block future Push's.
+  queue_.Stop();
+
+  // Workers will drain the queue and then return.
+  for (auto& worker : workers_) {
+    worker->Join();
+  }
+}
+
 void Threadpool::Post(std::unique_ptr<Task> task) {
   LOG("Threadpool::Post: Got task of type %d\n",
     task->details_.type);
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index a87d126423a948..e06cc2f7640dd0 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -186,19 +186,16 @@ class TaskQueue {
 // Subclass to experiment, e.g.:
 //   - Use a different type of TaskQueue
 //   - Elastic workers (scale up and down)
-//
-// TODO(davisjam): Thread pool size recommendation.
 class Threadpool {
  public:
-  // TODO(davisjam): RAII.
-  Threadpool(void);
+  // If threadpool_size <= 0:
+  //   - checks UV_THREADPOOL_SIZE to determine threadpool_size
+  //   - if this is not set, takes a guess
+  // TODO(davisjam): Ponder --v8-pool-size and UV_THREADPOOL_SIZE.
+  explicit Threadpool(int threadpool_size);
   // Waits for queue to drain.
   ~Threadpool(void);
 
-  // Call once, before you Post.
-  // TODO(davisjam): Remove, replace with RAII.
-  void Initialize(void);
-
   void Post(std::unique_ptr<Task> task);
   int QueueLength(void) const;
   // Block until there are no tasks pending or scheduled in the TP.
@@ -207,6 +204,10 @@ class Threadpool {
   int NWorkers(void) const;
 
  private:
+  int GoodThreadpoolSize(void);
+  void Initialize(void);
+
+  int threadpool_size_;
   TaskQueue queue_;
   std::vector<std::unique_ptr<Worker>> workers_;
 };
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index 184bc70d81ab21..983f23c78218c4 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -7,8 +7,10 @@
 #include "node_test_fixture.h"
 
 #include <atomic>
+#include <stdlib.h>
 
 using node::threadpool::Task;
+using node::threadpool::TaskDetails;
 using node::threadpool::TaskQueue;
 using node::threadpool::Worker;
 using node::threadpool::Threadpool;
@@ -28,7 +30,11 @@ class ThreadpoolTest : public NodeTestFixture {
 // Helper so we have a type of Task
 class TestTask : public node::threadpool::Task {
  public:
-  TestTask() {}
+  TestTask() {
+    details_.type = TaskDetails::CPU_FAST;
+    details_.priority = -1;
+    details_.cancelable = false;
+  }
   ~TestTask() {
     testTaskDestroyedCount++;
   }
@@ -109,13 +115,12 @@ TEST_F(ThreadpoolTest, ThreadpoolEndToEnd) {
   int nTasks = 100;
 
   {
-    std::unique_ptr<Threadpool> tp(new Threadpool());
+    std::unique_ptr<Threadpool> tp(new Threadpool(-1));
 
     // Reset globals
     testTaskRunCount = 0;
     testTaskDestroyedCount = 0;
 
-    tp->Initialize();
     EXPECT_GT(tp->NWorkers(), 0);
 
     // Push
@@ -134,14 +139,12 @@ TEST_F(ThreadpoolTest, ThreadpoolBlockingDrain) {
   // Enough that we will probably have to wait for them to finish.
   int nTasks = 10000;
 
-  std::unique_ptr<Threadpool> tp(new Threadpool());
+  std::unique_ptr<Threadpool> tp(new Threadpool(-1));
 
   // Reset globals
   testTaskRunCount = 0;
   testTaskDestroyedCount = 0;
 
-  tp->Initialize();
-
   // Push
   EXPECT_EQ(tp->QueueLength(), 0);
   for (int i = 0; i < nTasks; i++) {
@@ -152,3 +155,22 @@ TEST_F(ThreadpoolTest, ThreadpoolBlockingDrain) {
   EXPECT_EQ(testTaskRunCount, nTasks);
   EXPECT_EQ(testTaskDestroyedCount, nTasks);
 }
+
+TEST_F(ThreadpoolTest, ThreadpoolSize) {
+  char* old = getenv("UV_THREADPOOL_SIZE");
+
+  int tp_size = 17;
+  char tp_size_str[4];
+  snprintf(tp_size_str, sizeof(tp_size_str), "%d", tp_size);
+
+  setenv("UV_THREADPOOL_SIZE", tp_size_str, 1);
+  std::unique_ptr<Threadpool> tp(new Threadpool(-1));
+  EXPECT_EQ(tp->NWorkers(), tp_size);
+
+  // Restore previous value of UV_THREADPOOL_SIZE.
+  if (old) {
+    setenv("UV_THREADPOOL_SIZE", old, 1);
+  } else {
+    unsetenv("UV_THREADPOOL_SIZE");
+  }
+}

From a654044a7a36dafe1133d7afd38049f29f757755 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Fri, 7 Sep 2018 15:28:38 -0400
Subject: [PATCH 13/31] PTP: Cancel Tasks Post'ed to the Threadpool

Feature: Ability to cancel Tasks Post'ed to the Threadpool.
Need: A LibuvExecutor would like this.

Approach:
  Fact: Threadpool::Post accepts a unique_ptr.
  Fact: In principle we can easily cancel Tasks that have not
        yet been queued.
  Fact: But it's hard to cancel a Task if we gave away our pointer
        to the Threadpool.

  Threadpool::Post now returns a shared_ptr to a TaskState object.
  You can TaskState.Cancel() and it might work.
  This is the behavior offered by the default libuv threadpool as well.
---
 src/node_threadpool.cc         | 249 +++++++++++++++++++++++++--------
 src/node_threadpool.h          | 112 ++++++++++-----
 test/cctest/test_threadpool.cc | 113 ++++++++++++---
 3 files changed, 358 insertions(+), 116 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 0597565c34793c..de181150de16b6 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -39,11 +39,18 @@ void Worker::Join(void) {
 
 void Worker::_Run(void* data) {
   TaskQueue* queue = static_cast<TaskQueue*>(data);
+  TaskState::State task_state;
   while (std::unique_ptr<Task> task = queue->BlockingPop()) {
-    task->UpdateState(Task::ASSIGNED);
-    task->Run();
-    task->UpdateState(Task::COMPLETED);
+    // May have been cancelled while queued.
+    task_state = task->TryUpdateState(TaskState::ASSIGNED);
+    if (task_state == TaskState::ASSIGNED) {
+      task->Run();
+    } else {
+      CHECK_EQ(task_state, TaskState::CANCELLED);
+    }
 
+    CHECK_EQ(task->TryUpdateState(TaskState::COMPLETED),
+                                  TaskState::COMPLETED);
     queue->NotifyOfCompletion();
   }
 }
@@ -52,20 +59,158 @@ void Worker::_Run(void* data) {
  * Task
  ***************/
 
-void Task::UpdateState(enum State state) {
-  state_ = state;
+Task::Task() : task_state_() {
+}
+
+void Task::SetTaskState(std::shared_ptr<TaskState> task_state) {
+  task_state_ = task_state;
+}
+
+TaskState::State Task::TryUpdateState(TaskState::State new_state) {
+  return task_state_->TryUpdateState(new_state);
+}
+
+
+/**************
+ * TaskState
+ ***************/
+
+TaskState::TaskState() : state_(INITIAL) {
+}
+
+TaskState::State TaskState::GetState() const {
+  Mutex::ScopedLock scoped_lock(lock_);
+  return state_;
+}
+
+bool TaskState::Cancel() {
+  if (TryUpdateState(CANCELLED) == CANCELLED) {
+    LOG("TaskState::Cancel: Succeed\n");
+    return true;
+  }
+  LOG("TaskState::Cancel: Fail\n");
+  return false;
+}
+
+TaskState::State TaskState::TryUpdateState(TaskState::State new_state) {
+  Mutex::ScopedLock scoped_lock(lock_);
+  if (ValidStateTransition(state_, new_state)) {
+    state_ = new_state;
+  }
+  return state_;
+}
+
+bool TaskState::ValidStateTransition(TaskState::State old_state, TaskState::State new_state) {
+  // Normal flow: INITIAL -> QUEUED -> ASSIGNED -> COMPLETED.
+  // Also: non-terminal state -> CANCELLED -> COMPLETED.
+  switch (old_state) {
+    case INITIAL:
+      return new_state == QUEUED || new_state == CANCELLED;
+    case QUEUED:
+      return new_state == ASSIGNED || new_state == CANCELLED;
+    case ASSIGNED:
+      return new_state == COMPLETED || new_state == CANCELLED;
+    case CANCELLED:
+      return new_state == COMPLETED;
+    // No transitions out of terminal state.
+    case COMPLETED:
+      return false;
+    default:
+      CHECK(0);
+  }
+  return false;
 }
 
+
 /**************
  * LibuvExecutor
  ***************/
 
+class LibuvTaskData;
+class LibuvTask;
+
+// Internal LibuvExecutor mechanism to enable uv_cancel.
+// Preserves task_state so smart pointers knows not to delete it.
+class LibuvTaskData {
+ friend class LibuvExecutor;
+
+ public:
+  LibuvTaskData(std::shared_ptr<TaskState> state) : state_(state) {
+  }
+
+ private:
+  std::shared_ptr<TaskState> state_;
+};
+
+// The LibuvExecutor wraps libuv uv_work_t's into LibuvTasks
+// and routes them to the internal Threadpool.
+class LibuvTask : public Task {
+ public:
+  LibuvTask(LibuvExecutor* libuv_executor,
+            uv_work_t* req,
+            const uv_work_options_t* opts)
+    : Task(), libuv_executor_(libuv_executor), req_(req) {
+    CHECK(req_);
+    req_->reserved[0] = nullptr;
+
+    // Fill in TaskDetails based on opts.
+    if (opts) {
+      switch (opts->type) {
+        case UV_WORK_FS:
+          details_.type = TaskDetails::FS;
+          break;
+        case UV_WORK_DNS:
+          details_.type = TaskDetails::DNS;
+          break;
+        case UV_WORK_USER_IO:
+          details_.type = TaskDetails::IO;
+          break;
+        case UV_WORK_USER_CPU:
+          details_.type = TaskDetails::CPU;
+          break;
+        default:
+          details_.type = TaskDetails::UNKNOWN;
+      }
+
+      details_.priority = opts->priority;
+      details_.cancelable = opts->cancelable;
+    } else {
+      details_.type = TaskDetails::UNKNOWN;
+      details_.priority = -1;
+      details_.cancelable = false;
+    }
+
+    LOG("LibuvTask::LibuvTask: type %d\n", details_.type);
+  }
+
+  ~LibuvTask() {
+    LOG("LibuvTask::Run: Task %p done\n", req_);
+    // Clean up our storage.
+    LibuvTaskData* data = reinterpret_cast<LibuvTaskData*>(req_->reserved[0]);
+    delete data;
+    req_->reserved[0] = nullptr;
+
+    // Inform libuv.
+    libuv_executor_->GetExecutor()->done(req_);
+  }
+
+  void Run() {
+    LOG("LibuvTask::Run: Running Task %p\n", req_);
+    req_->work_cb(req_);
+  }
+
+ protected:
+ private:
+  LibuvExecutor* libuv_executor_;
+  uv_work_t* req_;
+};
+
 LibuvExecutor::LibuvExecutor(std::shared_ptr<Threadpool> tp)
   : tp_(tp) {
   executor_.init = uv_executor_init;
   executor_.destroy = nullptr;
   executor_.submit = uv_executor_submit;
-  executor_.cancel = nullptr;
+  executor_.cancel = uv_executor_cancel;
   executor_.data = this;
 }
 
@@ -73,6 +218,10 @@ uv_executor_t* LibuvExecutor::GetExecutor() {
   return &executor_;
 }
 
+bool LibuvExecutor::Cancel(std::shared_ptr<TaskState> task_state) {
+  return task_state->Cancel();
+}
+
 void LibuvExecutor::uv_executor_init(uv_executor_t* executor) {
   // Already initialized.
   // TODO(davisjam): I don't think we need this API in libuv. Nor destroy.
@@ -83,58 +232,34 @@ void LibuvExecutor::uv_executor_submit(uv_executor_t* executor,
                                        const uv_work_options_t* opts) {
   LibuvExecutor* libuv_executor =
     reinterpret_cast<LibuvExecutor *>(executor->data);
-  LOG("LibuvExecutor::uv_executor_submit: Got some work!\n");
-  libuv_executor->tp_->Post(std::unique_ptr<Task>(
+  LOG("LibuvExecutor::uv_executor_submit: Got work %p\n", req);
+
+  auto task_state = libuv_executor->tp_->Post(std::unique_ptr<Task>(
     new LibuvTask(libuv_executor, req, opts)));
-}
+  CHECK(task_state);  // Must not fail. We have no mechanism to tell libuv.
 
+  auto data = new LibuvTaskData(task_state);
+  req->reserved[0] = data;
+}
 
-/**************
- * LibuvTask
- ***************/
+// Remember, libuv user won't free uv_work_t until after its done_cb is called.
+// That won't happen until after the wrapping LibuvTask is destroyed.
+int LibuvExecutor::uv_executor_cancel(uv_executor_t* executor,
+                                      uv_work_t* req) {
+  if (!req || !req->reserved[0]) {
+    return UV_EINVAL;
+  }
 
-LibuvTask::LibuvTask(LibuvExecutor* libuv_executor,
-                     uv_work_t* req,
-                     const uv_work_options_t* opts)
-  : Task(), libuv_executor_(libuv_executor), req_(req) {
-  // Fill in TaskDetails based on opts.
-  if (opts) {
-    switch (opts->type) {
-    case UV_WORK_FS:
-      details_.type = TaskDetails::FS;
-      break;
-    case UV_WORK_DNS:
-      details_.type = TaskDetails::DNS;
-      break;
-    case UV_WORK_USER_IO:
-      details_.type = TaskDetails::IO;
-      break;
-    case UV_WORK_USER_CPU:
-      details_.type = TaskDetails::CPU;
-      break;
-    default:
-      details_.type = TaskDetails::UNKNOWN;
-    }
+  LibuvExecutor* libuv_executor =
+    reinterpret_cast<LibuvExecutor *>(executor->data);
+  LibuvTaskData* task_data =
+    reinterpret_cast<LibuvTaskData *>(req->reserved[0]);
 
-    details_.priority = opts->priority;
-    details_.cancelable = opts->cancelable;
+  if (libuv_executor->Cancel(task_data->state_)) {
+    return 0;
   } else {
-    details_.type = TaskDetails::UNKNOWN;
-    details_.priority = -1;
-    details_.cancelable = false;
+    return UV_EBUSY;
   }
-
-  LOG("LibuvTask::LibuvTask: type %d\n", details_.type);
-}
-
-LibuvTask::~LibuvTask(void) {
-  LOG("LibuvTask::Run: Task %p done\n", req_);
-  libuv_executor_->GetExecutor()->done(req_);
-}
-
-void LibuvTask::Run() {
-  LOG("LibuvTask::Run: Running Task %p\n", req_);
-  req_->work_cb(req_);
 }
 
 /**************
@@ -142,9 +267,9 @@ void LibuvTask::Run() {
  ***************/
 
 TaskQueue::TaskQueue()
-  : queue_(), outstanding_tasks_(0), stopped_(false)
-  , lock_()
-  , task_available_(), tasks_drained_() {
+  : lock_()
+  , task_available_(), tasks_drained_()
+  , queue_(), outstanding_tasks_(0), stopped_(false) {
 }
 
 bool TaskQueue::Push(std::unique_ptr<Task> task) {
@@ -154,7 +279,11 @@ bool TaskQueue::Push(std::unique_ptr<Task> task) {
     return false;
   }
 
-  task->UpdateState(Task::QUEUED);
+  // The queue contains QUEUED or CANCELLED Tasks.
+  // There's little harm in queueing CANCELLED tasks.
+  TaskState::State task_state = task->TryUpdateState(TaskState::QUEUED);
+  CHECK(task_state == TaskState::QUEUED || task_state == TaskState::CANCELLED);
+
   queue_.push(std::move(task));
   outstanding_tasks_++;
   task_available_.Signal(scoped_lock);
@@ -250,10 +379,12 @@ int Threadpool::GoodThreadpoolSize(void) {
   int count;
 
   if (uv_cpu_info(&cpu_infos, &count)) {
+    LOG("Threadpool::GoodThreadpoolSize: Huh, uv_cpu_info failed?\n");
     return 4;  // Old libuv TP default.
   }
 
   uv_free_cpu_info(cpu_infos, count);
+    LOG("Threadpool::GoodThreadpoolSize: cpu count %d\n", count);
   return count;
 }
 
@@ -275,10 +406,16 @@ Threadpool::~Threadpool(void) {
   }
 }
 
-void Threadpool::Post(std::unique_ptr<Task> task) {
+std::shared_ptr<TaskState> Threadpool::Post(std::unique_ptr<Task> task) {
   LOG("Threadpool::Post: Got task of type %d\n",
     task->details_.type);
+
+  std::shared_ptr<TaskState> task_state = std::make_shared<TaskState>();
+  task->SetTaskState(task_state);
+
   queue_.Push(std::move(task));
+
+  return task_state;
 }
 
 int Threadpool::QueueLength(void) const {
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index e06cc2f7640dd0..f76d9f1416ada2 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -14,11 +14,15 @@
 namespace node {
 namespace threadpool {
 
+// Consumer of Threadpool.
 class LibuvExecutor;
+
+// Threadpool components.
 class Threadpool;
 class TaskQueue;
 class Task;
 class TaskDetails;
+class TaskState;
 class Worker;
 
 // Inhabited by a uv_thread_t.
@@ -66,6 +70,51 @@ class TaskDetails {
                     // cancel this Task while it is scheduled.
 };
 
+// Each TaskState is shared by a Task and its Post()'er.
+// A TaskState is a two-way communication channel:
+//  - The threadpool updates its State
+//  - The Post'er can try to Cancel it
+//
+// TODO(davisjam): Could add tracking of how long
+//  it spent in QUEUED, ASSIGNED, COMPLETED states,
+//  and what its total lifetime was.
+class TaskState {
+ // My friends can call TryUpdateState.
+ friend class Task;
+
+ public:
+  enum State {
+      INITIAL
+    , QUEUED
+    , ASSIGNED
+    , COMPLETED // Terminal state
+    , CANCELLED
+  };
+
+  TaskState();
+
+  // For the benefit of an impatient Post'er.
+  State GetState() const;
+
+  // Attempt to cancel the associated Task.
+  bool Cancel();
+
+ protected:
+  // Synchronization.
+  Mutex lock_;
+
+  // Different Threadpool components should update this as the
+  // Task travels around.
+  // Returns the state after the attempted update.
+  // Thread safe.
+  State TryUpdateState(State new_state);
+
+  // Caller must hold lock_.
+  bool ValidStateTransition(State old_state, State new_state);
+
+  State state_;
+};
+
 // Abstract notion of a Task.
 // Clients of node::Threadpool should sub-class this for their type of request.
 //  - V8::Platform Tasks
@@ -73,29 +122,22 @@ class TaskDetails {
 //  - User work from the N-API
 class Task {
  public:
-  enum State {
-      QUEUED
-    , ASSIGNED
-    , COMPLETED
-  };
-
-  Task() {}
+  // Subclasses should set details_ in their constructor.
+  Task();
   // Invoked after Run().
   virtual ~Task() {}
 
+  void SetTaskState(std::shared_ptr<TaskState> task_state);
+
   // Invoked on some thread in the Threadpool.
   virtual void Run() = 0;
 
-  // Different Threadpool components should update this as the
-  // Task travels around.
-  void UpdateState(enum State state);
+  TaskState::State TryUpdateState(TaskState::State new_state);
 
-  // Run() can access details.
-  // Should be set in subclass constructor.
   TaskDetails details_;
 
  protected:
-  enum State state_;
+  std::shared_ptr<TaskState> task_state_;
 };
 
 // Shim that we plug into the libuv "pluggable TP" interface.
@@ -108,39 +150,32 @@ class LibuvExecutor {
 
   uv_executor_t* GetExecutor();
 
+  // Returns true on success.
+  bool Cancel(std::shared_ptr<TaskState> task_state);
+
  private:
+  // These redirect into appropriate public methods.
   static void uv_executor_init(uv_executor_t* executor);
   static void uv_executor_submit(uv_executor_t* executor,
                                  uv_work_t* req,
                                  const uv_work_options_t* opts);
+  static int uv_executor_cancel(uv_executor_t* executor,
+                                uv_work_t* req);
 
   std::shared_ptr<Threadpool> tp_;
-  uv_executor_t executor_;  // executor_.data points to
+  uv_executor_t executor_;  // executor_.data points to an
                             // instance of LibuvExecutor.
 };
 
-// The LibuvExecutor wraps libuv uv_work_t's into LibuvTasks
-// and routes them to the internal Threadpool.
-class LibuvTask : public Task {
- public:
-  LibuvTask(LibuvExecutor* libuv_executor,
-            uv_work_t* req,
-            const uv_work_options_t* opts);
-  ~LibuvTask();
-
-  void Run();
-
- protected:
- private:
-  LibuvExecutor* libuv_executor_;
-  uv_work_t* req_;
-};
-
 // Abstract notion of a queue of Tasks.
 // The default implementation is a FIFO queue.
 // Subclass to experiment, e.g.:
 //   - prioritization
 //   - multi-queue e.g. for CPU-bound and I/O-bound Tasks or Fast and Slow ones.
+//
+// All Tasks Push'd to TaskQueue should have been assigned a TaskState.
+// The TaskQueue contains both QUEUED and CANCELLED Tasks.
+// Users should check the state of Tasks they Pop.
 class TaskQueue {
  public:
   TaskQueue();
@@ -166,17 +201,17 @@ class TaskQueue {
   int Length(void) const;
 
  private:
-  // Structures.
-  std::queue<std::unique_ptr<Task>> queue_;
-  int outstanding_tasks_;  // Number of Tasks in non-COMPLETED states.
-  bool stopped_;
-
   // Synchronization.
   Mutex lock_;
   // Signal'd when there is at least one task in the queue.
   ConditionVariable task_available_;
   // Signal'd when all Push'd Tasks are in COMPLETED state.
   ConditionVariable tasks_drained_;
+
+  // Structures.
+  std::queue<std::unique_ptr<Task>> queue_;
+  int outstanding_tasks_;  // Number of Tasks in non-COMPLETED states.
+  bool stopped_;
 };
 
 // A threadpool works on asynchronous Tasks.
@@ -196,7 +231,10 @@ class Threadpool {
   // Waits for queue to drain.
   ~Threadpool(void);
 
-  void Post(std::unique_ptr<Task> task);
+  // Returns a TaskState by which caller can track the progress of the Task.
+  // Caller can also use the TaskState to cancel the Task.
+  // Returns nullptr on failure.
+  std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task);
   int QueueLength(void) const;
   // Block until there are no tasks pending or scheduled in the TP.
   void BlockingDrain(void);
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index 983f23c78218c4..54074a3ceefbc2 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -1,16 +1,21 @@
 #include "node_internals.h"
 #include "node_threadpool.h"
-#include "libplatform/libplatform.h"
 
-#include <string>
-#include "gtest/gtest.h"
+#include "node.h"
+#include "node_platform.h"
+#include "node_internals.h"
+#include "env.h"
+#include "v8.h"
 #include "node_test_fixture.h"
 
+#include "gtest/gtest.h"
+
+#include <string>
 #include <atomic>
-#include <stdlib.h>
 
-using node::threadpool::Task;
 using node::threadpool::TaskDetails;
+using node::threadpool::TaskState;
+using node::threadpool::Task;
 using node::threadpool::TaskQueue;
 using node::threadpool::Worker;
 using node::threadpool::Threadpool;
@@ -19,28 +24,44 @@ using node::threadpool::Threadpool;
 static std::atomic<int> testTaskRunCount(0);
 static std::atomic<int> testTaskDestroyedCount(0);
 
-// TODO(davisjam): Do I need this?
-class ThreadpoolTest : public NodeTestFixture {
- private:
-  virtual void TearDown() {
-    NodeTestFixture::TearDown();
+// Tests of internals: Worker, Task, TaskQueue, Threadpool.
+//
+// NB The node instance defined by NodeTestFixture does not use our Threadpool.
+// So we can't easily test LibuvExecutor etc.
+// Rely on higher-level tests for that.
+class ThreadpoolTest : public NodeTestFixture { };
+
+// Helpers so we have different Task types.
+class FastTestTask : public node::threadpool::Task {
+ public:
+  FastTestTask() {
+    details_.type = TaskDetails::CPU_FAST;
+    details_.priority = -1;
+    details_.cancelable = false;
+  }
+  ~FastTestTask() {
+    testTaskDestroyedCount++;
+  }
+
+  void Run() {
+    testTaskRunCount++;
   }
 };
 
-// Helper so we have a type of Task
-class TestTask : public node::threadpool::Task {
+class SlowTestTask : public node::threadpool::Task {
  public:
-  TestTask() {
-    details_.type = TaskDetails::CPU_FAST;
+  SlowTestTask() {
+    details_.type = TaskDetails::CPU_SLOW;
     details_.priority = -1;
     details_.cancelable = false;
   }
-  ~TestTask() {
+  ~SlowTestTask() {
     testTaskDestroyedCount++;
   }
 
   void Run() {
     testTaskRunCount++;
+    for (int i = 0; i < 10000000; i++);
   }
 };
 
@@ -55,8 +76,10 @@ TEST_F(ThreadpoolTest, TaskQueueEndToEnd) {
   // Push
   EXPECT_EQ(tq.Length(), 0);
   for (int i = 0; i < nTasks; i++) {
-    EXPECT_EQ(tq.Push(std::unique_ptr<TestTask>(new TestTask())),
-              true);
+    auto task_state = std::make_shared<TaskState>();
+    auto task = std::unique_ptr<FastTestTask>(new FastTestTask());
+    task->SetTaskState(task_state);
+    EXPECT_EQ(tq.Push(std::move(task)), true);
   }
   EXPECT_EQ(tq.Length(), nTasks);
 
@@ -78,7 +101,7 @@ TEST_F(ThreadpoolTest, TaskQueueEndToEnd) {
 
   // Stop works
   tq.Stop();
-  EXPECT_EQ(tq.Push(std::unique_ptr<TestTask>(new TestTask())), false);
+  EXPECT_EQ(tq.Push(std::unique_ptr<FastTestTask>(new FastTestTask())), false);
 }
 
 TEST_F(ThreadpoolTest, WorkersWorkWithTaskQueue) {
@@ -93,10 +116,12 @@ TEST_F(ThreadpoolTest, WorkersWorkWithTaskQueue) {
   // Push
   EXPECT_EQ(tq.Length(), 0);
   for (int i = 0; i < nTasks; i++) {
-    EXPECT_EQ(tq.Push(std::unique_ptr<TestTask>(new TestTask())),
-              true);
+    auto task_state = std::make_shared<TaskState>();
+    auto task = std::unique_ptr<FastTestTask>(new FastTestTask());
+    task->SetTaskState(task_state);
+    EXPECT_EQ(tq.Push(std::move(task)), true);
   }
-  // Worker hasn't started yet, so tq should be "full".
+  // Worker hasn't started yet, so tq should be at high water mark.
   EXPECT_EQ(tq.Length(), nTasks);
 
   // Once we start the worker, it should empty tq.
@@ -126,7 +151,7 @@ TEST_F(ThreadpoolTest, ThreadpoolEndToEnd) {
     // Push
     EXPECT_EQ(tp->QueueLength(), 0);
     for (int i = 0; i < nTasks; i++) {
-      tp->Post(std::unique_ptr<TestTask>(new TestTask()));
+      tp->Post(std::unique_ptr<FastTestTask>(new FastTestTask()));
     }
   }
   // tp leaves scope. In destructor it drains the queue.
@@ -148,7 +173,7 @@ TEST_F(ThreadpoolTest, ThreadpoolBlockingDrain) {
   // Push
   EXPECT_EQ(tp->QueueLength(), 0);
   for (int i = 0; i < nTasks; i++) {
-    tp->Post(std::unique_ptr<TestTask>(new TestTask()));
+    tp->Post(std::unique_ptr<FastTestTask>(new FastTestTask()));
   }
 
   tp->BlockingDrain();
@@ -174,3 +199,45 @@ TEST_F(ThreadpoolTest, ThreadpoolSize) {
     unsetenv("UV_THREADPOOL_SIZE");
   }
 }
+
+TEST_F(ThreadpoolTest, ThreadpoolCancel) {
+  int nTasks = 10000;
+  int nCancelled = 0;
+
+  {
+    std::shared_ptr<TaskState> states[nTasks];
+    std::unique_ptr<Threadpool> tp(new Threadpool(1));
+
+    // Reset globals
+    testTaskRunCount = 0;
+    testTaskDestroyedCount = 0;
+
+    EXPECT_GT(tp->NWorkers(), 0);
+
+    // Push
+    EXPECT_EQ(tp->QueueLength(), 0);
+    for (int i = 0; i < nTasks; i++) {
+      states[i] = tp->Post(std::unique_ptr<SlowTestTask>(new SlowTestTask()));
+    }
+
+    // Cancel
+    for (int i = nTasks - 1; i >= 0; i--) {
+      if (states[i]->Cancel()) {
+        nCancelled++;
+      }
+    }
+    fprintf(stderr, "DEBUG: cancelled %d\n", nCancelled);
+  }
+  // tp leaves scope. In destructor it drains the queue.
+
+  // All Tasks, cancelled or not, should be destroyed.
+  EXPECT_EQ(testTaskDestroyedCount, nTasks);
+
+  // 0 <= testTaskRunCount <= nTasks.
+  // We may have successfully cancelled all of them.
+  EXPECT_GE(testTaskRunCount, 0);
+  EXPECT_LE(testTaskRunCount, nTasks);
+
+  // We used SlowTestTasks so we should have managed to cancel at least 1.
+  EXPECT_GT(nCancelled, 0);
+}

From 706f2c62bd93c3c7306c814eabdaf70253dc60e7 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 9 Sep 2018 13:42:23 -0400
Subject: [PATCH 14/31] PTP: Add WorkerGroup class to manage Workers

This lets Threadpool operate at a higher level of
abstraction.

While I was in there, I switched to using smart pointers
for the TaskQueue shared by the Workers.
---
 src/node_threadpool.cc         | 64 +++++++++++++++++++++-------------
 src/node_threadpool.h          | 29 +++++++++++----
 test/cctest/test_threadpool.cc | 49 +++++++++++++++++++++-----
 3 files changed, 104 insertions(+), 38 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index de181150de16b6..334fbc8f5cbb17 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -22,15 +22,38 @@
 namespace node {
 namespace threadpool {
 
+/**************
+ * WorkerGroup
+ ***************/
+
+WorkerGroup::WorkerGroup(int n_workers, std::shared_ptr<TaskQueue> tq)
+ : workers_() {
+  for (int i = 0; i < n_workers; i++) {
+    std::unique_ptr<Worker> worker(new Worker(tq));
+    worker->Start();
+    workers_.push_back(std::move(worker));
+  }
+}
+
+WorkerGroup::~WorkerGroup() {
+  for (auto& worker : workers_) {
+    worker->Join();
+  }
+}
+
+int WorkerGroup::Size() const {
+  return workers_.size();
+}
+
 /**************
  * Worker
  ***************/
 
-Worker::Worker() {
+Worker::Worker(std::shared_ptr<TaskQueue> tq) : tq_(tq) {
 }
 
-void Worker::Start(TaskQueue* queue) {
-  CHECK_EQ(0, uv_thread_create(&self_, _Run, reinterpret_cast<void *>(queue)));
+void Worker::Start() {
+  CHECK_EQ(0, uv_thread_create(&self_, _Run, reinterpret_cast<void *>(this)));
 }
 
 void Worker::Join(void) {
@@ -38,9 +61,10 @@ void Worker::Join(void) {
 }
 
 void Worker::_Run(void* data) {
-  TaskQueue* queue = static_cast<TaskQueue*>(data);
+  Worker* worker = static_cast<Worker*>(data);
+
   TaskState::State task_state;
-  while (std::unique_ptr<Task> task = queue->BlockingPop()) {
+  while (std::unique_ptr<Task> task = worker->tq_->BlockingPop()) {
     // May have been cancelled while queued.
     task_state = task->TryUpdateState(TaskState::ASSIGNED);
     if (task_state == TaskState::ASSIGNED) {
@@ -51,7 +75,7 @@ void Worker::_Run(void* data) {
 
     CHECK_EQ(task->TryUpdateState(TaskState::COMPLETED),
                                   TaskState::COMPLETED);
-    queue->NotifyOfCompletion();
+    worker->tq_->NotifyOfCompletion();
   }
 }
 
@@ -85,10 +109,8 @@ TaskState::State TaskState::GetState() const {
 
 bool TaskState::Cancel() {
   if (TryUpdateState(CANCELLED) == CANCELLED) {
-    LOG("TaskState::Cancel: Succeed\n");
     return true;
   }
-  LOG("TaskState::Cancel: Fail\n");
   return false;
 }
 
@@ -352,7 +374,7 @@ int TaskQueue::Length(void) const {
  ***************/
 
 Threadpool::Threadpool(int threadpool_size)
-  : threadpool_size_(threadpool_size), queue_(), workers_() {
+  : threadpool_size_(threadpool_size) {
   LOG("Threadpool::Threadpool: threadpool_size_ %d\n", threadpool_size_);
   if (threadpool_size_ <= 0) {
     // Check UV_THREADPOOL_SIZE
@@ -389,21 +411,15 @@ int Threadpool::GoodThreadpoolSize(void) {
 }
 
 void Threadpool::Initialize() {
-  for (int i = 0; i < threadpool_size_; i++) {
-    std::unique_ptr<Worker> worker(new Worker());
-    worker->Start(&queue_);
-    workers_.push_back(std::move(worker));
-  }
+  task_queue_ = std::make_shared<TaskQueue>();
+  worker_group_ = std::unique_ptr<WorkerGroup>(
+    new WorkerGroup(threadpool_size_, task_queue_));
 }
 
 Threadpool::~Threadpool(void) {
   // Block future Push's.
-  queue_.Stop();
-
-  // Workers will drain the queue and then return.
-  for (auto& worker : workers_) {
-    worker->Join();
-  }
+  task_queue_->Stop();
+  // As worker_group_ leaves scope, it drains tq and Join's its threads.
 }
 
 std::shared_ptr<TaskState> Threadpool::Post(std::unique_ptr<Task> task) {
@@ -413,21 +429,21 @@ std::shared_ptr<TaskState> Threadpool::Post(std::unique_ptr<Task> task) {
   std::shared_ptr<TaskState> task_state = std::make_shared<TaskState>();
   task->SetTaskState(task_state);
 
-  queue_.Push(std::move(task));
+  task_queue_->Push(std::move(task));
 
   return task_state;
 }
 
 int Threadpool::QueueLength(void) const {
-  return queue_.Length();
+  return task_queue_->Length();
 }
 
 void Threadpool::BlockingDrain(void) {
-  queue_.BlockingDrain();
+  task_queue_->BlockingDrain();
 }
 
 int Threadpool::NWorkers(void) const {
-  return workers_.size();
+  return worker_group_->Size();
 }
 
 }  // namespace threadpool
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index f76d9f1416ada2..4fda68ad304775 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -19,21 +19,39 @@ class LibuvExecutor;
 
 // Threadpool components.
 class Threadpool;
+
 class TaskQueue;
+class WorkerGroup;
+
 class Task;
 class TaskDetails;
 class TaskState;
+
 class Worker;
 
+// Represents a set of Workers
+class WorkerGroup {
+ public:
+  WorkerGroup(int n_workers, std::shared_ptr<TaskQueue> tq);
+  // Assumes tq has been Stop'd by its owner.
+  ~WorkerGroup();
+
+  int Size() const;
+
+ private:
+  std::vector<std::unique_ptr<Worker>> workers_;
+};
+
 // Inhabited by a uv_thread_t.
 // Subclass to experiment, e.g.:
 //   - cancellation (a la Davis et al. 2018's Manager-Worker-Hangman approach)
 class Worker {
  public:
-  Worker();
+  Worker(std::shared_ptr<TaskQueue> tq);
 
   // Starts a thread and returns control to the caller.
-  void Start(TaskQueue* queue);
+  void Start();
+  // Join the internal uv_thread_t.
   void Join(void);
 
  protected:
@@ -41,8 +59,7 @@ class Worker {
   static void _Run(void* data);
 
   uv_thread_t self_;
-
- private:
+  std::shared_ptr<TaskQueue> tq_;
 };
 
 // This is basically a struct
@@ -246,8 +263,8 @@ class Threadpool {
   void Initialize(void);
 
   int threadpool_size_;
-  TaskQueue queue_;
-  std::vector<std::unique_ptr<Worker>> workers_;
+  std::shared_ptr<TaskQueue> task_queue_;
+  std::unique_ptr<WorkerGroup> worker_group_;
 };
 
 }  // namespace threadpool
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index 54074a3ceefbc2..31bd533d3c2de6 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -18,6 +18,7 @@ using node::threadpool::TaskState;
 using node::threadpool::Task;
 using node::threadpool::TaskQueue;
 using node::threadpool::Worker;
+using node::threadpool::WorkerGroup;
 using node::threadpool::Threadpool;
 
 // Thread-safe counters
@@ -106,30 +107,62 @@ TEST_F(ThreadpoolTest, TaskQueueEndToEnd) {
 
 TEST_F(ThreadpoolTest, WorkersWorkWithTaskQueue) {
   int nTasks = 100;
-  TaskQueue tq;
-  Worker w;
+  std::shared_ptr<TaskQueue> tq = std::make_shared<TaskQueue>();
+  Worker w(tq);
 
   // Reset globals
   testTaskRunCount = 0;
   testTaskDestroyedCount = 0;
 
   // Push
-  EXPECT_EQ(tq.Length(), 0);
+  EXPECT_EQ(tq->Length(), 0);
   for (int i = 0; i < nTasks; i++) {
     auto task_state = std::make_shared<TaskState>();
     auto task = std::unique_ptr<FastTestTask>(new FastTestTask());
     task->SetTaskState(task_state);
-    EXPECT_EQ(tq.Push(std::move(task)), true);
+    EXPECT_EQ(tq->Push(std::move(task)), true);
   }
   // Worker hasn't started yet, so tq should be at high water mark.
-  EXPECT_EQ(tq.Length(), nTasks);
+  EXPECT_EQ(tq->Length(), nTasks);
 
   // Once we start the worker, it should empty tq.
-  w.Start(&tq);
+  w.Start();
 
-  tq.Stop();  // Signal Worker that we're done
+  tq->Stop();  // Signal Worker that we're done
   w.Join();   // Wait for Worker to finish
-  EXPECT_EQ(tq.Length(), 0);
+  EXPECT_EQ(tq->Length(), 0);
+
+  // And it should have run and destroyed every Task.
+  EXPECT_EQ(testTaskRunCount, nTasks);
+  EXPECT_EQ(testTaskDestroyedCount, nTasks);
+}
+
+TEST_F(ThreadpoolTest, WorkerGroupWorksWithTaskQueue) {
+  int nTasks = 100;
+  std::shared_ptr<TaskQueue> tq = std::make_shared<TaskQueue>();
+
+  // Reset globals
+  testTaskRunCount = 0;
+  testTaskDestroyedCount = 0;
+
+  // Push
+  EXPECT_EQ(tq->Length(), 0);
+  for (int i = 0; i < nTasks; i++) {
+    auto task_state = std::make_shared<TaskState>();
+    auto task = std::unique_ptr<FastTestTask>(new FastTestTask());
+    task->SetTaskState(task_state);
+    EXPECT_EQ(tq->Push(std::move(task)), true);
+  }
+  // Worker hasn't started yet, so tq should be at high water mark.
+  EXPECT_EQ(tq->Length(), nTasks);
+
+  {
+    // Once we create the WorkerGroup, it should empty tq.
+    WorkerGroup wg(4, tq);
+    tq->Stop();
+  } // wg leaves scope
+  // wg destructor should drain tq
+  EXPECT_EQ(tq->Length(), 0);
 
   // And it should have run and destroyed every Task.
   EXPECT_EQ(testTaskRunCount, nTasks);

From d79920f0f6b2ce9f0e76f4fd283586e13957a57a Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 9 Sep 2018 14:13:20 -0400
Subject: [PATCH 15/31] PTP: Expose NodeThreadpool as public TP face

This permits the use of the existing Threadpool class as a building
block for more sophisticated NodeThreadpools.

The default NodeThreadpool is just a pass-thru for a Threadpool.
---
 src/node.cc                    |   6 +-
 src/node_platform.cc           |  14 ++--
 src/node_platform.h            |   6 +-
 src/node_threadpool.cc         | 126 +++++++++++++++++++--------------
 src/node_threadpool.h          |  76 ++++++++++++++------
 test/cctest/test_threadpool.cc |  67 ++++++++++++------
 6 files changed, 187 insertions(+), 108 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 4b4e07c5c9644d..3a9f0585461d31 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -286,18 +286,18 @@ class NodeTraceStateObserver :
 
 static struct {
   void Initialize(void) {
-    tp_ = std::make_shared<threadpool::Threadpool>(-1);
+    tp_ = std::make_shared<threadpool::NodeThreadpool>(4);
     libuv_executor_ = std::unique_ptr<threadpool::LibuvExecutor>(
       new threadpool::LibuvExecutor(tp_));
   }
 
-  std::shared_ptr<threadpool::Threadpool> tp_;
+  std::shared_ptr<threadpool::NodeThreadpool> tp_;
   std::unique_ptr<threadpool::LibuvExecutor> libuv_executor_;
 } node_threadpool;
 
 static struct {
 #if NODE_USE_V8_PLATFORM
-  void Initialize(std::shared_ptr<threadpool::Threadpool> tp) {
+  void Initialize(std::shared_ptr<threadpool::NodeThreadpool> tp) {
     tracing_agent_.reset(new tracing::Agent());
     auto controller = tracing_agent_->GetTracingController();
     controller->AddTraceStateObserver(new NodeTraceStateObserver(controller));
diff --git a/src/node_platform.cc b/src/node_platform.cc
index 38d3f41953a88f..ea1968b108e7b9 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -16,7 +16,7 @@ using v8::Platform;
 using v8::Task;
 using v8::TracingController;
 
-// Wrapper for delivery to threadpool::Threadpool.
+// Wrapper for delivery to threadpool::NodeThreadpool.
 class V8Task : public threadpool::Task {
  public:
   explicit V8Task(std::unique_ptr<v8::Task> task) {
@@ -37,7 +37,7 @@ class V8Task : public threadpool::Task {
 
 class WorkerThreadsTaskRunner::DelayedTaskScheduler {
  public:
-  explicit DelayedTaskScheduler(std::shared_ptr<threadpool::Threadpool> tp)
+  explicit DelayedTaskScheduler(std::shared_ptr<threadpool::NodeThreadpool> tp)
     : tp_(tp) {}
 
   std::unique_ptr<uv_thread_t> Start() {
@@ -146,7 +146,7 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
   }
 
   uv_sem_t ready_;
-  std::shared_ptr<threadpool::Threadpool> tp_;
+  std::shared_ptr<threadpool::NodeThreadpool> tp_;
 
   TaskQueue<v8::Task> tasks_;
   uv_loop_t loop_;
@@ -155,7 +155,7 @@ class WorkerThreadsTaskRunner::DelayedTaskScheduler {
 };
 
 WorkerThreadsTaskRunner::WorkerThreadsTaskRunner(
-  std::shared_ptr<threadpool::Threadpool> tp) {
+  std::shared_ptr<threadpool::NodeThreadpool> tp) {
   tp_ = tp;
   delayed_task_scheduler_.reset(
       new DelayedTaskScheduler(tp_));
@@ -258,12 +258,12 @@ NodePlatform::NodePlatform(int thread_pool_size,
   }
 
   // Give wttr its own TP.
-  std::shared_ptr<threadpool::Threadpool> tp =
-    std::make_shared<threadpool::Threadpool>(thread_pool_size);
+  std::shared_ptr<threadpool::NodeThreadpool> tp =
+    std::make_shared<threadpool::NodeThreadpool>(thread_pool_size);
   worker_thread_task_runner_ = std::make_shared<WorkerThreadsTaskRunner>(tp);
 }
 
-NodePlatform::NodePlatform(std::shared_ptr<threadpool::Threadpool> tp,
+NodePlatform::NodePlatform(std::shared_ptr<threadpool::NodeThreadpool> tp,
                            TracingController* tracing_controller) {
   if (tracing_controller) {
     tracing_controller_.reset(tracing_controller);
diff --git a/src/node_platform.h b/src/node_platform.h
index af2f08f99df989..e464ca70cb5bea 100644
--- a/src/node_platform.h
+++ b/src/node_platform.h
@@ -100,7 +100,7 @@ class PerIsolatePlatformData :
 // API is modeled on v8::TaskRunner.
 class WorkerThreadsTaskRunner {
  public:
-  explicit WorkerThreadsTaskRunner(std::shared_ptr<threadpool::Threadpool> tp);
+  explicit WorkerThreadsTaskRunner(std::shared_ptr<threadpool::NodeThreadpool> tp);
 
   // Add task to queue for eventual Run()
   void PostTask(std::unique_ptr<v8::Task> task);
@@ -119,7 +119,7 @@ class WorkerThreadsTaskRunner {
   std::unique_ptr<DelayedTaskScheduler> delayed_task_scheduler_;
   std::unique_ptr<uv_thread_t> delayed_task_scheduler_thread_;
 
-  std::shared_ptr<threadpool::Threadpool> tp_;
+  std::shared_ptr<threadpool::NodeThreadpool> tp_;
 };
 
 class NodePlatform : public MultiIsolatePlatform {
@@ -127,7 +127,7 @@ class NodePlatform : public MultiIsolatePlatform {
   // Create a NodePlatform with its own TP
   NodePlatform(int thread_pool_size, v8::TracingController* tracing_controller);
   // Create a NodePlatform using the provided TP
-  NodePlatform(std::shared_ptr<threadpool::Threadpool> tp,
+  NodePlatform(std::shared_ptr<threadpool::NodeThreadpool> tp,
     v8::TracingController* tracing_controller);
   virtual ~NodePlatform() {}
 
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 334fbc8f5cbb17..cc0821c77e8827 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -16,12 +16,70 @@
 #include <stdio.h>
 #define LOG(...) fprintf(stderr, __VA_ARGS__)
 #else
-#define LOG(...) (void) 0
+#define LOG(...) () 0
 #endif
 
 namespace node {
 namespace threadpool {
 
+/**************
+ * NodeThreadpool
+ ***************/
+
+NodeThreadpool::NodeThreadpool(int threadpool_size) {
+  if (threadpool_size <= 0) {
+    // Check UV_THREADPOOL_SIZE
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
+      threadpool_size = atoi(buf);
+    }
+  }
+
+  if (threadpool_size <= 0) {
+    // No/bad UV_THREADPOOL_SIZE, so take a guess.
+    threadpool_size = GoodCPUThreadpoolSize();
+  }
+  LOG("Threadpool::Threadpool: threadpool_size %d\n", threadpool_size);
+  CHECK_GT(threadpool_size, 0);
+
+  tp_ = std::make_shared<Threadpool>(threadpool_size);
+}
+
+int NodeThreadpool::GoodCPUThreadpoolSize(void) {
+  // Ask libuv how many cores we have.
+  uv_cpu_info_t* cpu_infos;
+  int cpu_count;
+
+  if (uv_cpu_info(&cpu_infos, &cpu_count)) {
+    LOG("Threadpool::GoodThreadpoolSize: Huh, uv_cpu_info failed?\n");
+    return 4;  // Old libuv TP default.
+  }
+
+  uv_free_cpu_info(cpu_infos, cpu_count);
+    LOG("Threadpool::GoodThreadpoolSize: cpu_count %d\n", cpu_count);
+  return cpu_count - 1;  // Leave one core for main loop
+}
+
+NodeThreadpool::~NodeThreadpool() {
+}
+
+std::shared_ptr<TaskState> NodeThreadpool::Post(std::unique_ptr<Task> task) {
+  return tp_->Post(std::move(task));
+}
+
+void NodeThreadpool::BlockingDrain() {
+  return tp_->BlockingDrain();
+}
+
+int NodeThreadpool::QueueLength() const {
+  return tp_->QueueLength();
+}
+
+int NodeThreadpool::NWorkers() const {
+  return tp_->NWorkers();
+}
+
 /**************
  * WorkerGroup
  ***************/
@@ -56,7 +114,7 @@ void Worker::Start() {
   CHECK_EQ(0, uv_thread_create(&self_, _Run, reinterpret_cast<void *>(this)));
 }
 
-void Worker::Join(void) {
+void Worker::Join() {
   CHECK_EQ(0, uv_thread_join(&self_));
 }
 
@@ -227,7 +285,7 @@ class LibuvTask : public Task {
   uv_work_t* req_;
 };
 
-LibuvExecutor::LibuvExecutor(std::shared_ptr<Threadpool> tp)
+LibuvExecutor::LibuvExecutor(std::shared_ptr<NodeThreadpool> tp)
   : tp_(tp) {
   executor_.init = uv_executor_init;
   executor_.destroy = nullptr;
@@ -313,7 +371,7 @@ bool TaskQueue::Push(std::unique_ptr<Task> task) {
   return true;
 }
 
-std::unique_ptr<Task> TaskQueue::Pop(void) {
+std::unique_ptr<Task> TaskQueue::Pop() {
   Mutex::ScopedLock scoped_lock(lock_);
 
   if (queue_.empty()) {
@@ -325,7 +383,7 @@ std::unique_ptr<Task> TaskQueue::Pop(void) {
   return task;
 }
 
-std::unique_ptr<Task> TaskQueue::BlockingPop(void) {
+std::unique_ptr<Task> TaskQueue::BlockingPop() {
   Mutex::ScopedLock scoped_lock(lock_);
 
   while (queue_.empty() && !stopped_) {
@@ -341,7 +399,7 @@ std::unique_ptr<Task> TaskQueue::BlockingPop(void) {
   return result;
 }
 
-void TaskQueue::NotifyOfCompletion(void) {
+void TaskQueue::NotifyOfCompletion() {
   Mutex::ScopedLock scoped_lock(lock_);
   outstanding_tasks_--;
   CHECK_GE(outstanding_tasks_, 0);
@@ -350,7 +408,7 @@ void TaskQueue::NotifyOfCompletion(void) {
   }
 }
 
-void TaskQueue::BlockingDrain(void) {
+void TaskQueue::BlockingDrain() {
   Mutex::ScopedLock scoped_lock(lock_);
   while (outstanding_tasks_) {
     tasks_drained_.Wait(scoped_lock);
@@ -358,13 +416,13 @@ void TaskQueue::BlockingDrain(void) {
   LOG("TaskQueue::BlockingDrain: Fully drained\n");
 }
 
-void TaskQueue::Stop(void) {
+void TaskQueue::Stop() {
   Mutex::ScopedLock scoped_lock(lock_);
   stopped_ = true;
   task_available_.Broadcast(scoped_lock);
 }
 
-int TaskQueue::Length(void) const {
+int TaskQueue::Length() const {
   Mutex::ScopedLock scoped_lock(lock_);
   return queue_.size();
 }
@@ -373,50 +431,14 @@ int TaskQueue::Length(void) const {
  * Threadpool
  ***************/
 
-Threadpool::Threadpool(int threadpool_size)
-  : threadpool_size_(threadpool_size) {
-  LOG("Threadpool::Threadpool: threadpool_size_ %d\n", threadpool_size_);
-  if (threadpool_size_ <= 0) {
-    // Check UV_THREADPOOL_SIZE
-    char buf[32];
-    size_t buf_size = sizeof(buf);
-    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
-      threadpool_size_ = atoi(buf);
-    }
-  }
-
-  if (threadpool_size_ <= 0) {
-    // No/bad UV_THREADPOOL_SIZE, so take a guess.
-    threadpool_size_ = GoodThreadpoolSize();
-  }
-  LOG("Threadpool::Threadpool: threadpool_size_ %d\n", threadpool_size_);
-  CHECK_GT(threadpool_size_, 0);
-
-  Initialize();
-}
-
-int Threadpool::GoodThreadpoolSize(void) {
-  // Ask libuv how many cores we have.
-  uv_cpu_info_t* cpu_infos;
-  int count;
-
-  if (uv_cpu_info(&cpu_infos, &count)) {
-    LOG("Threadpool::GoodThreadpoolSize: Huh, uv_cpu_info failed?\n");
-    return 4;  // Old libuv TP default.
-  }
-
-  uv_free_cpu_info(cpu_infos, count);
-    LOG("Threadpool::GoodThreadpoolSize: cpu count %d\n", count);
-  return count;
-}
-
-void Threadpool::Initialize() {
+Threadpool::Threadpool(int threadpool_size) {
+  CHECK_GT(threadpool_size, 0);
   task_queue_ = std::make_shared<TaskQueue>();
   worker_group_ = std::unique_ptr<WorkerGroup>(
-    new WorkerGroup(threadpool_size_, task_queue_));
+    new WorkerGroup(threadpool_size, task_queue_));
 }
 
-Threadpool::~Threadpool(void) {
+Threadpool::~Threadpool() {
   // Block future Push's.
   task_queue_->Stop();
   // As worker_group_ leaves scope, it drains tq and Join's its threads.
@@ -434,15 +456,15 @@ std::shared_ptr<TaskState> Threadpool::Post(std::unique_ptr<Task> task) {
   return task_state;
 }
 
-int Threadpool::QueueLength(void) const {
+int Threadpool::QueueLength() const {
   return task_queue_->Length();
 }
 
-void Threadpool::BlockingDrain(void) {
+void Threadpool::BlockingDrain() {
   task_queue_->BlockingDrain();
 }
 
-int Threadpool::NWorkers(void) const {
+int Threadpool::NWorkers() const {
   return worker_group_->Size();
 }
 
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 4fda68ad304775..179d9514118ec6 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -14,10 +14,12 @@
 namespace node {
 namespace threadpool {
 
-// Consumer of Threadpool.
+class NodeThreadpool;
+
+// Consumer of NodeThreadpool.
 class LibuvExecutor;
 
-// Threadpool components.
+// NodeThreadpool components.
 class Threadpool;
 
 class TaskQueue;
@@ -52,7 +54,7 @@ class Worker {
   // Starts a thread and returns control to the caller.
   void Start();
   // Join the internal uv_thread_t.
-  void Join(void);
+  void Join();
 
  protected:
   // Override e.g. to implement cancellation.
@@ -163,7 +165,7 @@ class Task {
 // internal Node.js Threadpool.
 class LibuvExecutor {
  public:
-  explicit LibuvExecutor(std::shared_ptr<Threadpool> tp);
+  explicit LibuvExecutor(std::shared_ptr<NodeThreadpool> tp);
 
   uv_executor_t* GetExecutor();
 
@@ -179,7 +181,7 @@ class LibuvExecutor {
   static int uv_executor_cancel(uv_executor_t* executor,
                                 uv_work_t* req);
 
-  std::shared_ptr<Threadpool> tp_;
+  std::shared_ptr<NodeThreadpool> tp_;
   uv_executor_t executor_;  // executor_.data points to an
                             // instance of LibuvExecutor.
 };
@@ -201,21 +203,21 @@ class TaskQueue {
   bool Push(std::unique_ptr<Task> task);
 
   // Non-blocking Pop. Returns nullptr if queue is empty.
-  std::unique_ptr<Task> Pop(void);
+  std::unique_ptr<Task> Pop();
   // Blocking Pop. Returns nullptr if queue is empty or Stop'd.
-  std::unique_ptr<Task> BlockingPop(void);
+  std::unique_ptr<Task> BlockingPop();
 
   // Workers should call this after completing a Pop'd Task.
-  void NotifyOfCompletion(void);
+  void NotifyOfCompletion();
 
   // Block until there are no Tasks pending or scheduled.
-  void BlockingDrain(void);
+  void BlockingDrain();
 
   // Subsequent Push() will fail.
   // Pop calls will return nullptr once queue is drained.
   void Stop();
 
-  int Length(void) const;
+  int Length() const;
 
  private:
   // Synchronization.
@@ -240,33 +242,63 @@ class TaskQueue {
 //   - Elastic workers (scale up and down)
 class Threadpool {
  public:
-  // If threadpool_size <= 0:
-  //   - checks UV_THREADPOOL_SIZE to determine threadpool_size
-  //   - if this is not set, takes a guess
-  // TODO(davisjam): Ponder --v8-pool-size and UV_THREADPOOL_SIZE.
   explicit Threadpool(int threadpool_size);
   // Waits for queue to drain.
-  ~Threadpool(void);
+  ~Threadpool();
 
   // Returns a TaskState by which caller can track the progress of the Task.
   // Caller can also use the TaskState to cancel the Task.
   // Returns nullptr on failure.
   std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task);
-  int QueueLength(void) const;
   // Block until there are no tasks pending or scheduled in the TP.
-  void BlockingDrain(void);
+  void BlockingDrain();
 
-  int NWorkers(void) const;
+  // Status monitoring
+  int QueueLength() const;
 
- private:
-  int GoodThreadpoolSize(void);
-  void Initialize(void);
+  // Attributes
+  int NWorkers() const;
 
-  int threadpool_size_;
+ protected:
+  void Initialize();
+
+ private:
   std::shared_ptr<TaskQueue> task_queue_;
   std::unique_ptr<WorkerGroup> worker_group_;
 };
 
+// Public face of the threadpool.
+// Subclass for customized threadpool(s).
+class NodeThreadpool {
+ public:
+  // If threadpool_size <= 0:
+  //   - checks UV_THREADPOOL_SIZE to determine threadpool_size
+  //   - if this is not set, takes a guess
+  // TODO(davisjam): Ponder --v8-pool-size and UV_THREADPOOL_SIZE.
+  explicit NodeThreadpool(int threadpool_size);
+  // Waits for queue to drain.
+  ~NodeThreadpool();
+
+  // Returns a TaskState by which caller can track the progress of the Task.
+  // Caller can also use the TaskState to cancel the Task.
+  // Returns nullptr on failure.
+  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task);
+  // Block until there are no tasks pending or scheduled in the TP.
+  virtual void BlockingDrain();
+
+  // Status monitoring
+  virtual int QueueLength() const;
+
+  // Attributes
+  virtual int NWorkers() const;
+
+ protected:
+  virtual int GoodCPUThreadpoolSize();
+
+ private:
+  std::shared_ptr<Threadpool> tp_;
+};
+
 }  // namespace threadpool
 }  // namespace node
 
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index 31bd533d3c2de6..54397c26a1be0b 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -20,6 +20,7 @@ using node::threadpool::TaskQueue;
 using node::threadpool::Worker;
 using node::threadpool::WorkerGroup;
 using node::threadpool::Threadpool;
+using node::threadpool::NodeThreadpool;
 
 // Thread-safe counters
 static std::atomic<int> testTaskRunCount(0);
@@ -173,7 +174,7 @@ TEST_F(ThreadpoolTest, ThreadpoolEndToEnd) {
   int nTasks = 100;
 
   {
-    std::unique_ptr<Threadpool> tp(new Threadpool(-1));
+    std::unique_ptr<Threadpool> tp(new Threadpool(10));
 
     // Reset globals
     testTaskRunCount = 0;
@@ -197,7 +198,7 @@ TEST_F(ThreadpoolTest, ThreadpoolBlockingDrain) {
   // Enough that we will probably have to wait for them to finish.
   int nTasks = 10000;
 
-  std::unique_ptr<Threadpool> tp(new Threadpool(-1));
+  std::unique_ptr<Threadpool> tp(new Threadpool(10));
 
   // Reset globals
   testTaskRunCount = 0;
@@ -214,25 +215,6 @@ TEST_F(ThreadpoolTest, ThreadpoolBlockingDrain) {
   EXPECT_EQ(testTaskDestroyedCount, nTasks);
 }
 
-TEST_F(ThreadpoolTest, ThreadpoolSize) {
-  char* old = getenv("UV_THREADPOOL_SIZE");
-
-  int tp_size = 17;
-  char tp_size_str[4];
-  snprintf(tp_size_str, sizeof(tp_size_str), "%d", tp_size);
-
-  setenv("UV_THREADPOOL_SIZE", tp_size_str, 1);
-  std::unique_ptr<Threadpool> tp(new Threadpool(-1));
-  EXPECT_EQ(tp->NWorkers(), tp_size);
-
-  // Restore previous value of UV_THREADPOOL_SIZE.
-  if (old) {
-    setenv("UV_THREADPOOL_SIZE", old, 1);
-  } else {
-    unsetenv("UV_THREADPOOL_SIZE");
-  }
-}
-
 TEST_F(ThreadpoolTest, ThreadpoolCancel) {
   int nTasks = 10000;
   int nCancelled = 0;
@@ -274,3 +256,46 @@ TEST_F(ThreadpoolTest, ThreadpoolCancel) {
   // We used SlowTestTasks so we should have managed to cancel at least 1.
   EXPECT_GT(nCancelled, 0);
 }
+
+TEST_F(ThreadpoolTest, NodeThreadpoolEndToEnd) {
+  int nTasks = 100;
+
+  {
+    std::unique_ptr<NodeThreadpool> tp(new NodeThreadpool(10));
+
+    // Reset globals
+    testTaskRunCount = 0;
+    testTaskDestroyedCount = 0;
+
+    EXPECT_GT(tp->NWorkers(), 0);
+
+    // Push
+    EXPECT_EQ(tp->QueueLength(), 0);
+    for (int i = 0; i < nTasks; i++) {
+      tp->Post(std::unique_ptr<FastTestTask>(new FastTestTask()));
+    }
+  }
+  // tp leaves scope. In destructor it drains the queue.
+
+  EXPECT_EQ(testTaskRunCount, nTasks);
+  EXPECT_EQ(testTaskDestroyedCount, nTasks);
+}
+
+TEST_F(ThreadpoolTest, NodeThreadpoolSize) {
+  char* old = getenv("UV_THREADPOOL_SIZE");
+
+  int tp_size = 17;
+  char tp_size_str[4];
+  snprintf(tp_size_str, sizeof(tp_size_str), "%d", tp_size);
+
+  setenv("UV_THREADPOOL_SIZE", tp_size_str, 1);
+  std::unique_ptr<NodeThreadpool> tp(new NodeThreadpool(-1));
+  EXPECT_EQ(tp->NWorkers(), tp_size);
+
+  // Restore previous value of UV_THREADPOOL_SIZE.
+  if (old) {
+    setenv("UV_THREADPOOL_SIZE", old, 1);
+  } else {
+    unsetenv("UV_THREADPOOL_SIZE");
+  }
+}

From 223cfc4a6a47f9010630524adb9d56090eac9aaf Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 9 Sep 2018 14:58:03 -0400
Subject: [PATCH 16/31] PTP: Subclass NodeThreadpool for CPU/IO split

---
 src/node.cc            | 13 +++++++-
 src/node_threadpool.cc | 71 ++++++++++++++++++++++++++++++++++++++++--
 src/node_threadpool.h  | 28 ++++++++++++++++-
 3 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 3a9f0585461d31..f63e45e1c02462 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -286,7 +286,18 @@ class NodeTraceStateObserver :
 
 static struct {
   void Initialize(void) {
-    tp_ = std::make_shared<threadpool::NodeThreadpool>(4);
+    // What kind of threadpool is desired?
+    char *tp_type = getenv("NODE_THREADPOOL_TYPE");
+    if (!tp_type || strcmp(tp_type, "SHARED") == 0) {
+      tp_ = std::make_shared<threadpool::NodeThreadpool>(-1);
+    } else if (strcmp(tp_type, "SPLIT_IO_CPU") == 0) {
+      tp_ = std::make_shared<threadpool::SplitTaskTypeNodeThreadpool>(-1, -1);
+    } else if (strcmp(tp_type, "SPLIT_V8_LIBUV") == 0) {
+      CHECK(!"Not yet supported");
+    } else {
+      CHECK(0);
+    }
+
     libuv_executor_ = std::unique_ptr<threadpool::LibuvExecutor>(
       new threadpool::LibuvExecutor(tp_));
   }
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index cc0821c77e8827..bcfc4e1f69c49f 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -26,6 +26,9 @@ namespace threadpool {
  * NodeThreadpool
  ***************/
 
+NodeThreadpool::NodeThreadpool() : tp_(nullptr) {
+}
+
 NodeThreadpool::NodeThreadpool(int threadpool_size) {
   if (threadpool_size <= 0) {
     // Check UV_THREADPOOL_SIZE
@@ -40,7 +43,7 @@ NodeThreadpool::NodeThreadpool(int threadpool_size) {
     // No/bad UV_THREADPOOL_SIZE, so take a guess.
     threadpool_size = GoodCPUThreadpoolSize();
   }
-  LOG("Threadpool::Threadpool: threadpool_size %d\n", threadpool_size);
+  LOG("NodeThreadpool::NodeThreadpool: threadpool_size %d\n", threadpool_size);
   CHECK_GT(threadpool_size, 0);
 
   tp_ = std::make_shared<Threadpool>(threadpool_size);
@@ -52,12 +55,12 @@ int NodeThreadpool::GoodCPUThreadpoolSize(void) {
   int cpu_count;
 
   if (uv_cpu_info(&cpu_infos, &cpu_count)) {
-    LOG("Threadpool::GoodThreadpoolSize: Huh, uv_cpu_info failed?\n");
+    LOG("NodeThreadpool::GoodCPUThreadpoolSize: Huh, uv_cpu_info failed?\n");
     return 4;  // Old libuv TP default.
   }
 
   uv_free_cpu_info(cpu_infos, cpu_count);
-    LOG("Threadpool::GoodThreadpoolSize: cpu_count %d\n", cpu_count);
+    LOG("NodeThreadpool::GoodCPUThreadpoolSize: cpu_count %d\n", cpu_count);
   return cpu_count - 1;  // Leave one core for main loop
 }
 
@@ -80,6 +83,68 @@ int NodeThreadpool::NWorkers() const {
   return tp_->NWorkers();
 }
 
+/**************
+ * SplitTaskTypeNodeThreadpool
+ ***************/
+
+SplitTaskTypeNodeThreadpool::SplitTaskTypeNodeThreadpool(int cpu_pool_size, int io_pool_size) {
+  if (cpu_pool_size <= 0) {
+    // Check UV_THREADPOOL_SIZE
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
+      cpu_pool_size = atoi(buf);
+    }
+  }
+
+  if (cpu_pool_size <= 0) {
+    // No/bad UV_THREADPOOL_SIZE, so take a guess.
+    cpu_pool_size = GoodCPUThreadpoolSize();
+  }
+  LOG("SplitTaskTypeNodeThreadpool::SplitTaskTypeNodeThreadpool: cpu_pool_size %d\n", cpu_pool_size);
+  CHECK_GT(cpu_pool_size, 0);
+
+  cpu_tp_ = std::make_shared<Threadpool>(cpu_pool_size);
+
+  if (io_pool_size < 0) {
+    io_pool_size = 4 * cpu_pool_size;
+  }
+  LOG("SplitTaskTypeNodeThreadpool::SplitTaskTypeNodeThreadpool: io_pool_size %d\n", io_pool_size);
+  CHECK_GT(io_pool_size, 0);
+  io_tp_ = std::make_shared<Threadpool>(io_pool_size);
+}
+
+SplitTaskTypeNodeThreadpool::~SplitTaskTypeNodeThreadpool() {
+}
+
+std::shared_ptr<TaskState> SplitTaskTypeNodeThreadpool::Post(std::unique_ptr<Task> task) {
+  switch (task->details_.type) {
+    case TaskDetails::MEMORY:
+    case TaskDetails::CPU:
+    case TaskDetails::CPU_SLOW:
+    case TaskDetails::CPU_FAST:
+    case TaskDetails::V8:
+      LOG("SplitTaskTypeNodeThreadpool::Post: CPU\n");
+      return cpu_tp_->Post(std::move(task));
+    default:
+      LOG("SplitTaskTypeNodeThreadpool::Post: IO\n");
+      return io_tp_->Post(std::move(task));
+  }
+}
+
+void SplitTaskTypeNodeThreadpool::BlockingDrain() {
+  io_tp_->BlockingDrain();
+  cpu_tp_->BlockingDrain();
+}
+
+int SplitTaskTypeNodeThreadpool::QueueLength() const {
+  return cpu_tp_->QueueLength() + io_tp_->QueueLength();
+}
+
+int SplitTaskTypeNodeThreadpool::NWorkers() const {
+  return cpu_tp_->NWorkers() + io_tp_->NWorkers();
+}
+
 /**************
  * WorkerGroup
  ***************/
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 179d9514118ec6..7f34e8992519c5 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -271,6 +271,10 @@ class Threadpool {
 // Subclass for customized threadpool(s).
 class NodeThreadpool {
  public:
+  // TODO(davisjam): Is this OK? It permits sub-classing.
+  // But maybe we should take an interface approach and have all of these virtual
+  // methods be pure virtual?
+  NodeThreadpool();
   // If threadpool_size <= 0:
   //   - checks UV_THREADPOOL_SIZE to determine threadpool_size
   //   - if this is not set, takes a guess
@@ -293,12 +297,34 @@ class NodeThreadpool {
   virtual int NWorkers() const;
 
  protected:
-  virtual int GoodCPUThreadpoolSize();
+  int GoodCPUThreadpoolSize();
 
  private:
   std::shared_ptr<Threadpool> tp_;
 };
 
+// Splits based on task type: CPU or I/O
+class SplitTaskTypeNodeThreadpool : public NodeThreadpool {
+ public:
+  // If cpu_pool_size == -1, check UV_THREADPOOL_SIZE and then guess
+  // based on # cores.
+  // If io_pool_size == -1, uses 4x cpu_pool_size.
+  explicit SplitTaskTypeNodeThreadpool(int cpu_pool_size, int io_pool_size);
+  // Waits for queue to drain.
+  ~SplitTaskTypeNodeThreadpool();
+
+  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+  virtual void BlockingDrain() override;
+
+  virtual int QueueLength() const override;
+
+  virtual int NWorkers() const override;
+
+ private:
+  std::shared_ptr<Threadpool> cpu_tp_;
+  std::shared_ptr<Threadpool> io_tp_;
+};
+
 }  // namespace threadpool
 }  // namespace node
 

From f462078e450b8826b3081ccd8dbc29ca667d76c3 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 9 Sep 2018 15:34:09 -0400
Subject: [PATCH 17/31] PTP: Generalize subclass type

Splitting by I/O and CPU is a form of PartitionedNodeThreadpool.
A PartitionedNodeThreadpool basically follows the "threadpool handles"
proposed by saghul for libuv, but implemented in the Node-land executor.
---
 src/node.cc            |   3 +-
 src/node_threadpool.cc | 113 +++++++++++++++++++++++++++--------------
 src/node_threadpool.h  |  38 ++++++++++----
 3 files changed, 105 insertions(+), 49 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index f63e45e1c02462..72580d962e0cd8 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -291,7 +291,8 @@ static struct {
     if (!tp_type || strcmp(tp_type, "SHARED") == 0) {
       tp_ = std::make_shared<threadpool::NodeThreadpool>(-1);
     } else if (strcmp(tp_type, "SPLIT_IO_CPU") == 0) {
-      tp_ = std::make_shared<threadpool::SplitTaskTypeNodeThreadpool>(-1, -1);
+      std::vector<int> sizes{-1, -1};
+      tp_ = std::make_shared<threadpool::ByTaskTypePartitionedNodeThreadpool>(sizes);
     } else if (strcmp(tp_type, "SPLIT_V8_LIBUV") == 0) {
       CHECK(!"Not yet supported");
     } else {
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index bcfc4e1f69c49f..721e89d86fd6f4 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -84,67 +84,104 @@ int NodeThreadpool::NWorkers() const {
 }
 
 /**************
- * SplitTaskTypeNodeThreadpool
+ * PartitionedNodeThreadpool
  ***************/
 
-SplitTaskTypeNodeThreadpool::SplitTaskTypeNodeThreadpool(int cpu_pool_size, int io_pool_size) {
-  if (cpu_pool_size <= 0) {
-    // Check UV_THREADPOOL_SIZE
+PartitionedNodeThreadpool::PartitionedNodeThreadpool() {
+  LOG("PartitionedNodeThreadpool::PartitionedNodeThreadpool: default constructor\n");
+}
+
+PartitionedNodeThreadpool::PartitionedNodeThreadpool(std::vector<int> tp_sizes) {
+  LOG("PartitionedNodeThreadpool::PartitionedNodeThreadpool: vector constructor\n");
+  Initialize(tp_sizes);
+}
+
+void PartitionedNodeThreadpool::Initialize(const std::vector<int>& tp_sizes) {
+  int i = 0;
+  for (auto size : tp_sizes) {
+    LOG("PartitionedNodeThreadpool::Initialize: tp %d: %d threads\n", i, size);
+    std::shared_ptr<Threadpool> tp = std::make_shared<Threadpool>(size);
+    tps_.push_back(tp);
+    i++;
+  }
+}
+
+PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
+}
+
+void PartitionedNodeThreadpool::BlockingDrain() {
+  for (auto &tp : tps_) {
+    tp->BlockingDrain();
+  }
+}
+
+int PartitionedNodeThreadpool::QueueLength() const {
+  int sum = 0;
+  for (auto &tp : tps_) {
+    sum += tp->QueueLength();
+  }
+  return sum;
+}
+
+int PartitionedNodeThreadpool::NWorkers() const {
+  int sum = 0;
+  for (auto &tp : tps_) {
+    sum += tp->NWorkers();
+  }
+  return sum;
+}
+
+/**************
+ * ByTaskTypePartitionedNodeThreadpool
+ ***************/
+
+ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool(
+  std::vector<int> tp_sizes) : CPU_TP_IX(0), IO_TP_IX(1) {
+  CHECK_EQ(tp_sizes.size(), 2);
+
+  // CPU TP size
+  if (tp_sizes[CPU_TP_IX] <= 0) {
     char buf[32];
     size_t buf_size = sizeof(buf);
-    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
-      cpu_pool_size = atoi(buf);
+    if (uv_os_getenv("NODE_THREADPOOL_CPU_TP_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[CPU_TP_IX] = atoi(buf);
     }
   }
-
-  if (cpu_pool_size <= 0) {
-    // No/bad UV_THREADPOOL_SIZE, so take a guess.
-    cpu_pool_size = GoodCPUThreadpoolSize();
+  if (tp_sizes[CPU_TP_IX] <= 0) {
+    // No/bad env var, so take a guess.
+    tp_sizes[CPU_TP_IX] = GoodCPUThreadpoolSize();
   }
-  LOG("SplitTaskTypeNodeThreadpool::SplitTaskTypeNodeThreadpool: cpu_pool_size %d\n", cpu_pool_size);
-  CHECK_GT(cpu_pool_size, 0);
+  LOG("ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool: cpu_pool_size %d\n", tp_sizes[CPU_TP_IX]);
+  CHECK_GT(tp_sizes[CPU_TP_IX], 0);
 
-  cpu_tp_ = std::make_shared<Threadpool>(cpu_pool_size);
-
-  if (io_pool_size < 0) {
-    io_pool_size = 4 * cpu_pool_size;
+  // IO TP size
+  if (tp_sizes[IO_TP_IX] < 0) {
+    tp_sizes[IO_TP_IX] = 1 * tp_sizes[CPU_TP_IX];
   }
-  LOG("SplitTaskTypeNodeThreadpool::SplitTaskTypeNodeThreadpool: io_pool_size %d\n", io_pool_size);
-  CHECK_GT(io_pool_size, 0);
-  io_tp_ = std::make_shared<Threadpool>(io_pool_size);
+  LOG("ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool: io_pool_size %d\n", tp_sizes[IO_TP_IX]);
+  CHECK_GT(tp_sizes[IO_TP_IX], 0);
+
+  Initialize(tp_sizes);
 }
 
-SplitTaskTypeNodeThreadpool::~SplitTaskTypeNodeThreadpool() {
+ByTaskTypePartitionedNodeThreadpool::~ByTaskTypePartitionedNodeThreadpool() {
 }
 
-std::shared_ptr<TaskState> SplitTaskTypeNodeThreadpool::Post(std::unique_ptr<Task> task) {
+std::shared_ptr<TaskState> ByTaskTypePartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
   switch (task->details_.type) {
     case TaskDetails::MEMORY:
     case TaskDetails::CPU:
     case TaskDetails::CPU_SLOW:
     case TaskDetails::CPU_FAST:
     case TaskDetails::V8:
-      LOG("SplitTaskTypeNodeThreadpool::Post: CPU\n");
-      return cpu_tp_->Post(std::move(task));
+      LOG("ByTaskTypePartitionedNodeThreadpool::Post: CPU\n");
+      return tps_[CPU_TP_IX]->Post(std::move(task));
     default:
-      LOG("SplitTaskTypeNodeThreadpool::Post: IO\n");
-      return io_tp_->Post(std::move(task));
+      LOG("ByTaskTypePartitionedNodeThreadpool::Post: IO\n");
+      return tps_[IO_TP_IX]->Post(std::move(task));
   }
 }
 
-void SplitTaskTypeNodeThreadpool::BlockingDrain() {
-  io_tp_->BlockingDrain();
-  cpu_tp_->BlockingDrain();
-}
-
-int SplitTaskTypeNodeThreadpool::QueueLength() const {
-  return cpu_tp_->QueueLength() + io_tp_->QueueLength();
-}
-
-int SplitTaskTypeNodeThreadpool::NWorkers() const {
-  return cpu_tp_->NWorkers() + io_tp_->NWorkers();
-}
-
 /**************
  * WorkerGroup
  ***************/
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 7f34e8992519c5..6aaaa47844eacc 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -303,26 +303,44 @@ class NodeThreadpool {
   std::shared_ptr<Threadpool> tp_;
 };
 
-// Splits based on task type: CPU or I/O
-class SplitTaskTypeNodeThreadpool : public NodeThreadpool {
+// Maintains multiple Threadpools
+class PartitionedNodeThreadpool : public NodeThreadpool {
  public:
-  // If cpu_pool_size == -1, check UV_THREADPOOL_SIZE and then guess
-  // based on # cores.
-  // If io_pool_size == -1, uses 4x cpu_pool_size.
-  explicit SplitTaskTypeNodeThreadpool(int cpu_pool_size, int io_pool_size);
+  // So sub-classes can define their own constructors.
+  PartitionedNodeThreadpool();
+  // Create tp_sizes.size() TPs with these sizes.
+  explicit PartitionedNodeThreadpool(std::vector<int> tp_sizes);
   // Waits for queue to drain.
-  ~SplitTaskTypeNodeThreadpool();
+  ~PartitionedNodeThreadpool();
 
-  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) =0;
   virtual void BlockingDrain() override;
 
   virtual int QueueLength() const override;
 
   virtual int NWorkers() const override;
 
+ protected:
+  // Permits sub-classes to compute tp_sizes as needed.
+  void Initialize(const std::vector<int>& tp_sizes);
+  std::vector<std::shared_ptr<Threadpool>> tps_;
+};
+
+// Splits based on task type: CPU or I/O
+class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
+ public:
+  // tp_sizes[0] is CPU, tp_sizes[1] is I/O
+  // tp_sizes[0] -1: reads NODE_THREADPOOL_CPU_TP_SIZE, or guesses based on # cores
+  // tp_sizes[1] -1: reads NODE_THREADPOOL_IO_TP_SIZE, or guesses based on # cores
+  explicit ByTaskTypePartitionedNodeThreadpool(std::vector<int> tp_sizes);
+  // Waits for queue to drain.
+  ~ByTaskTypePartitionedNodeThreadpool();
+
+  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+
  private:
-  std::shared_ptr<Threadpool> cpu_tp_;
-  std::shared_ptr<Threadpool> io_tp_;
+  int CPU_TP_IX;
+  int IO_TP_IX;
 };
 
 }  // namespace threadpool

From 153dcdb352b77cec4b9cb3ea565d9020e62c0974 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 9 Sep 2018 15:59:05 -0400
Subject: [PATCH 18/31] PTP: ByTaskOriginPartitonedNodeThreadpool

This TP partitions the node TP by origin: V8 or Libuv.

This TP mirrors the baseline Node.js TP, with extra overheads
due to the indirection and additional memory de/allocation involved.
---
 src/node.cc                    |  7 +--
 src/node_platform.cc           |  4 +-
 src/node_threadpool.cc         | 80 +++++++++++++++++++++++++++++++---
 src/node_threadpool.h          | 42 ++++++++++++++----
 test/cctest/test_threadpool.cc |  8 +++-
 5 files changed, 120 insertions(+), 21 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index 72580d962e0cd8..d16aea5b8b090a 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -290,11 +290,12 @@ static struct {
     char *tp_type = getenv("NODE_THREADPOOL_TYPE");
     if (!tp_type || strcmp(tp_type, "SHARED") == 0) {
       tp_ = std::make_shared<threadpool::NodeThreadpool>(-1);
-    } else if (strcmp(tp_type, "SPLIT_IO_CPU") == 0) {
+    } else if (strcmp(tp_type, "SPLIT_BY_TYPE") == 0) {
       std::vector<int> sizes{-1, -1};
       tp_ = std::make_shared<threadpool::ByTaskTypePartitionedNodeThreadpool>(sizes);
-    } else if (strcmp(tp_type, "SPLIT_V8_LIBUV") == 0) {
-      CHECK(!"Not yet supported");
+    } else if (strcmp(tp_type, "SPLIT_BY_ORIGIN") == 0) {
+      std::vector<int> sizes{-1, -1};
+      tp_ = std::make_shared<threadpool::ByTaskOriginPartitionedNodeThreadpool>(sizes);
     } else {
       CHECK(0);
     }
diff --git a/src/node_platform.cc b/src/node_platform.cc
index ea1968b108e7b9..6f13461ee9b007 100644
--- a/src/node_platform.cc
+++ b/src/node_platform.cc
@@ -21,7 +21,9 @@ class V8Task : public threadpool::Task {
  public:
   explicit V8Task(std::unique_ptr<v8::Task> task) {
     task_ = std::move(task);
-    details_.type = threadpool::TaskDetails::V8;
+    details_.origin = threadpool::TaskDetails::V8;
+    details_.type = threadpool::TaskDetails::CPU;
+    details_.size = threadpool::TaskDetails::TASK_SIZE_UNKNOWN;
     details_.priority = -1;
     details_.cancelable = -1;
   }
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 721e89d86fd6f4..c3dccd970fa3ae 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -155,6 +155,13 @@ ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool(
   CHECK_GT(tp_sizes[CPU_TP_IX], 0);
 
   // IO TP size
+  if (tp_sizes[IO_TP_IX] <= 0) {
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("NODE_THREADPOOL_IO_TP_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[IO_TP_IX] = atoi(buf);
+    }
+  }
   if (tp_sizes[IO_TP_IX] < 0) {
     tp_sizes[IO_TP_IX] = 1 * tp_sizes[CPU_TP_IX];
   }
@@ -169,11 +176,8 @@ ByTaskTypePartitionedNodeThreadpool::~ByTaskTypePartitionedNodeThreadpool() {
 
 std::shared_ptr<TaskState> ByTaskTypePartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
   switch (task->details_.type) {
-    case TaskDetails::MEMORY:
     case TaskDetails::CPU:
-    case TaskDetails::CPU_SLOW:
-    case TaskDetails::CPU_FAST:
-    case TaskDetails::V8:
+    case TaskDetails::MEMORY:
       LOG("ByTaskTypePartitionedNodeThreadpool::Post: CPU\n");
       return tps_[CPU_TP_IX]->Post(std::move(task));
     default:
@@ -182,6 +186,60 @@ std::shared_ptr<TaskState> ByTaskTypePartitionedNodeThreadpool::Post(std::unique
   }
 }
 
+/**************
+ * ByTaskOriginPartitionedNodeThreadpool
+ ***************/
+
+ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
+  std::vector<int> tp_sizes) : V8_TP_IX(0), LIBUV_TP_IX(1) {
+  CHECK_EQ(tp_sizes.size(), 2);
+
+  // V8 TP size
+  if (tp_sizes[V8_TP_IX] <= 0) {
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("NODE_THREADPOOL_V8_TP_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[V8_TP_IX] = atoi(buf);
+    }
+  }
+  if (tp_sizes[V8_TP_IX] <= 0) {
+    // No/bad env var, so take a guess.
+    tp_sizes[V8_TP_IX] = GoodCPUThreadpoolSize();
+  }
+  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: v8 tp size %d\n", tp_sizes[V8_TP_IX]);
+  CHECK_GT(tp_sizes[V8_TP_IX], 0);
+
+  // LIBUV TP size
+  if (tp_sizes[LIBUV_TP_IX] <= 0) {
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[LIBUV_TP_IX] = atoi(buf);
+    }
+  }
+  if (tp_sizes[LIBUV_TP_IX] <= 0) {
+    tp_sizes[LIBUV_TP_IX] = 1 * tp_sizes[V8_TP_IX];
+  }
+  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: libuv tp size %d\n", tp_sizes[LIBUV_TP_IX]);
+  CHECK_GT(tp_sizes[LIBUV_TP_IX], 0);
+
+  Initialize(tp_sizes);
+}
+
+ByTaskOriginPartitionedNodeThreadpool::~ByTaskOriginPartitionedNodeThreadpool() {
+}
+
+std::shared_ptr<TaskState> ByTaskOriginPartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+  switch (task->details_.origin) {
+    case TaskDetails::V8:
+      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: V8\n");
+      return tps_[V8_TP_IX]->Post(std::move(task));
+    default:
+      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: LIBUV\n");
+      return tps_[LIBUV_TP_IX]->Post(std::move(task));
+  }
+}
+
 /**************
  * WorkerGroup
  ***************/
@@ -244,6 +302,11 @@ void Worker::_Run(void* data) {
  ***************/
 
 Task::Task() : task_state_() {
+  details_.origin = TaskDetails::TASK_ORIGIN_UNKNOWN;
+  details_.type = TaskDetails::TASK_TYPE_UNKNOWN;
+  details_.size = TaskDetails::TASK_SIZE_UNKNOWN;
+  details_.priority = -1;
+  details_.cancelable = false;
 }
 
 void Task::SetTaskState(std::shared_ptr<TaskState> task_state) {
@@ -335,7 +398,10 @@ class LibuvTask : public Task {
     CHECK(req_);
     req_->reserved[0] = nullptr;
 
-    // Fill in TaskDetails based on opts.
+    details_.origin = TaskDetails::LIBUV;
+    details_.size = TaskDetails::TASK_SIZE_UNKNOWN;
+
+    // type
     if (opts) {
       switch (opts->type) {
         case UV_WORK_FS:
@@ -351,13 +417,13 @@ class LibuvTask : public Task {
           details_.type = TaskDetails::CPU;
           break;
         default:
-          details_.type = TaskDetails::UNKNOWN;
+          details_.type = TaskDetails::TASK_TYPE_UNKNOWN;
       }
 
       details_.priority = opts->priority;
       details_.cancelable = opts->cancelable;
     } else {
-      details_.type = TaskDetails::UNKNOWN;
+      details_.type = TaskDetails::TASK_TYPE_UNKNOWN;
       details_.priority = -1;
       details_.cancelable = false;
     }
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 6aaaa47844eacc..8a45284cf0d2e3 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -67,26 +67,35 @@ class Worker {
 // This is basically a struct
 class TaskDetails {
  public:
+  enum TaskOrigin {
+       V8
+     , LIBUV
+     , USER   // N-API
+     , TASK_ORIGIN_UNKNOWN
+  };
+
   enum TaskType {
       FS
-    , FS_LIKELY_CACHED  // Likely to be bound by memory or CPU
-    , OTHER_DISK_IO
     , DNS
-    , OTHER_NETWORK_IO
     , IO
     , MEMORY
     , CPU
-    , CPU_SLOW
-    , CPU_FAST
-    , V8
-    , UNKNOWN
+    , TASK_TYPE_UNKNOWN
+  };
+
+  enum TaskSize {
+      SMALL
+    , LARGE
+    , TASK_SIZE_UNKNOWN
   };
 
+  TaskOrigin origin;
   TaskType type;
+  TaskSize size;
   int priority;  // Larger numbers signal higher priority.i
                  // Does nothing in this class.
   bool cancelable;  // If true, by some yet-to-be-determined mechanism we can
-                    // cancel this Task while it is scheduled.
+                    // cancel this Task *while* it is scheduled.
 };
 
 // Each TaskState is shared by a Task and its Post()'er.
@@ -343,6 +352,23 @@ class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
   int IO_TP_IX;
 };
 
+// Splits based on task origin: V8 or libuv
+class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
+ public:
+  // tp_sizes[0] is V8, tp_sizes[1] is libuv
+  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or guesses based on # cores
+  // tp_sizes[1] -1: reads UV_THREADPOOL_SIZE, defaults to 4
+  explicit ByTaskOriginPartitionedNodeThreadpool(std::vector<int> tp_sizes);
+  // Waits for queue to drain.
+  ~ByTaskOriginPartitionedNodeThreadpool();
+
+  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+
+ private:
+  int V8_TP_IX;
+  int LIBUV_TP_IX;
+};
+
 }  // namespace threadpool
 }  // namespace node
 
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index 54397c26a1be0b..984a74bf62ea0c 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -37,7 +37,9 @@ class ThreadpoolTest : public NodeTestFixture { };
 class FastTestTask : public node::threadpool::Task {
  public:
   FastTestTask() {
-    details_.type = TaskDetails::CPU_FAST;
+    details_.origin = TaskDetails::USER;
+    details_.type = TaskDetails::CPU;
+    details_.size = TaskDetails::SMALL;
     details_.priority = -1;
     details_.cancelable = false;
   }
@@ -53,7 +55,9 @@ class FastTestTask : public node::threadpool::Task {
 class SlowTestTask : public node::threadpool::Task {
  public:
   SlowTestTask() {
-    details_.type = TaskDetails::CPU_SLOW;
+    details_.origin = TaskDetails::USER;
+    details_.type = TaskDetails::CPU;
+    details_.size = TaskDetails::LARGE;
     details_.priority = -1;
     details_.cancelable = false;
   }

From 8cf91cda9c05f29ab20f032f4bb38aedd156e79f Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Mon, 10 Sep 2018 16:17:51 -0400
Subject: [PATCH 19/31] PTP: ByTaskOriginAndTypePartitonedNodeThreadpool

This TP has 3 pools: V8, libuv-{CPU, IO}.
The idea is that V8 tasks are higher-priority than libuv-CPU tasks
since they pay longer-term dividends.

As proper priority is harder to ponder than just using separate pools
for V8 and libuv-CPU tasks, I just used 3 pools.
---
 src/node.cc            |   9 ++--
 src/node_threadpool.cc | 120 +++++++++++++++++++++++++++++++++--------
 src/node_threadpool.h  |  34 +++++++++---
 3 files changed, 131 insertions(+), 32 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index d16aea5b8b090a..c65d57cbe01516 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -290,12 +290,15 @@ static struct {
     char *tp_type = getenv("NODE_THREADPOOL_TYPE");
     if (!tp_type || strcmp(tp_type, "SHARED") == 0) {
       tp_ = std::make_shared<threadpool::NodeThreadpool>(-1);
-    } else if (strcmp(tp_type, "SPLIT_BY_TYPE") == 0) {
-      std::vector<int> sizes{-1, -1};
-      tp_ = std::make_shared<threadpool::ByTaskTypePartitionedNodeThreadpool>(sizes);
     } else if (strcmp(tp_type, "SPLIT_BY_ORIGIN") == 0) {
       std::vector<int> sizes{-1, -1};
       tp_ = std::make_shared<threadpool::ByTaskOriginPartitionedNodeThreadpool>(sizes);
+    } else if (strcmp(tp_type, "SPLIT_BY_TYPE") == 0) {
+      std::vector<int> sizes{-1, -1};
+      tp_ = std::make_shared<threadpool::ByTaskTypePartitionedNodeThreadpool>(sizes);
+    } else if (strcmp(tp_type, "SPLIT_BY_ORIGIN_AND_TYPE") == 0) {
+      std::vector<int> sizes{-1, -1, -1};
+      tp_ = std::make_shared<threadpool::ByTaskOriginAndTypePartitionedNodeThreadpool>(sizes);
     } else {
       CHECK(0);
     }
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index c3dccd970fa3ae..963fe8b94245dc 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -131,6 +131,60 @@ int PartitionedNodeThreadpool::NWorkers() const {
   return sum;
 }
 
+/**************
+ * ByTaskOriginPartitionedNodeThreadpool
+ ***************/
+
+ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
+  std::vector<int> tp_sizes) : V8_TP_IX(0), LIBUV_TP_IX(1) {
+  CHECK_EQ(tp_sizes.size(), 2);
+
+  // V8 TP size
+  if (tp_sizes[V8_TP_IX] <= 0) {
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("NODE_THREADPOOL_V8_TP_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[V8_TP_IX] = atoi(buf);
+    }
+  }
+  if (tp_sizes[V8_TP_IX] <= 0) {
+    // No/bad env var, so take a guess.
+    tp_sizes[V8_TP_IX] = GoodCPUThreadpoolSize();
+  }
+  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: v8 tp size %d\n", tp_sizes[V8_TP_IX]);
+  CHECK_GT(tp_sizes[V8_TP_IX], 0);
+
+  // LIBUV TP size
+  if (tp_sizes[LIBUV_TP_IX] <= 0) {
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[LIBUV_TP_IX] = atoi(buf);
+    }
+  }
+  if (tp_sizes[LIBUV_TP_IX] <= 0) {
+    tp_sizes[LIBUV_TP_IX] = 1 * tp_sizes[V8_TP_IX];
+  }
+  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: libuv tp size %d\n", tp_sizes[LIBUV_TP_IX]);
+  CHECK_GT(tp_sizes[LIBUV_TP_IX], 0);
+
+  Initialize(tp_sizes);
+}
+
+ByTaskOriginPartitionedNodeThreadpool::~ByTaskOriginPartitionedNodeThreadpool() {
+}
+
+std::shared_ptr<TaskState> ByTaskOriginPartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+  switch (task->details_.origin) {
+    case TaskDetails::V8:
+      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: V8\n");
+      return tps_[V8_TP_IX]->Post(std::move(task));
+    default:
+      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: LIBUV\n");
+      return tps_[LIBUV_TP_IX]->Post(std::move(task));
+  }
+}
+
 /**************
  * ByTaskTypePartitionedNodeThreadpool
  ***************/
@@ -187,12 +241,12 @@ std::shared_ptr<TaskState> ByTaskTypePartitionedNodeThreadpool::Post(std::unique
 }
 
 /**************
- * ByTaskOriginPartitionedNodeThreadpool
+ * ByTaskOriginAndTypePartitionedNodeThreadpool
  ***************/
 
-ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
-  std::vector<int> tp_sizes) : V8_TP_IX(0), LIBUV_TP_IX(1) {
-  CHECK_EQ(tp_sizes.size(), 2);
+ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNodeThreadpool(
+  std::vector<int> tp_sizes) : V8_TP_IX(0), LIBUV_CPU_TP_IX(1), LIBUV_IO_TP_IX(2) {
+  CHECK_EQ(tp_sizes.size(), 3);
 
   // V8 TP size
   if (tp_sizes[V8_TP_IX] <= 0) {
@@ -209,34 +263,58 @@ ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
   LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: v8 tp size %d\n", tp_sizes[V8_TP_IX]);
   CHECK_GT(tp_sizes[V8_TP_IX], 0);
 
-  // LIBUV TP size
-  if (tp_sizes[LIBUV_TP_IX] <= 0) {
+  // LIBUV-CPU TP size
+  if (tp_sizes[LIBUV_CPU_TP_IX] <= 0) {
     char buf[32];
     size_t buf_size = sizeof(buf);
-    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
-      tp_sizes[LIBUV_TP_IX] = atoi(buf);
+    if (uv_os_getenv("NODE_THREADPOOL_UVTP_CPU_TP_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[LIBUV_CPU_TP_IX] = atoi(buf);
     }
   }
-  if (tp_sizes[LIBUV_TP_IX] <= 0) {
-    tp_sizes[LIBUV_TP_IX] = 1 * tp_sizes[V8_TP_IX];
+  if (tp_sizes[LIBUV_CPU_TP_IX] <= 0) {
+    // No/bad env var, so take a guess.
+    tp_sizes[LIBUV_CPU_TP_IX] = GoodCPUThreadpoolSize();
   }
-  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: libuv tp size %d\n", tp_sizes[LIBUV_TP_IX]);
-  CHECK_GT(tp_sizes[LIBUV_TP_IX], 0);
+  LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNodeThreadpool: libuv cpu pool size %d\n", tp_sizes[LIBUV_CPU_TP_IX]);
+  CHECK_GT(tp_sizes[LIBUV_CPU_TP_IX], 0);
+
+  // IO TP size
+  if (tp_sizes[LIBUV_IO_TP_IX] <= 0) {
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("NODE_THREADPOOL_UVTP_IO_TP_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[LIBUV_IO_TP_IX] = atoi(buf);
+    }
+  }
+  if (tp_sizes[LIBUV_IO_TP_IX] < 0) {
+    tp_sizes[LIBUV_IO_TP_IX] = 1 * tp_sizes[LIBUV_CPU_TP_IX];
+  }
+  LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNodeThreadpool: libuv io pool size %d\n", tp_sizes[LIBUV_IO_TP_IX]);
+  CHECK_GT(tp_sizes[LIBUV_IO_TP_IX], 0);
 
   Initialize(tp_sizes);
 }
 
-ByTaskOriginPartitionedNodeThreadpool::~ByTaskOriginPartitionedNodeThreadpool() {
+ByTaskOriginAndTypePartitionedNodeThreadpool::~ByTaskOriginAndTypePartitionedNodeThreadpool() {
 }
 
-std::shared_ptr<TaskState> ByTaskOriginPartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
-  switch (task->details_.origin) {
-    case TaskDetails::V8:
-      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: V8\n");
-      return tps_[V8_TP_IX]->Post(std::move(task));
-    default:
-      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: LIBUV\n");
-      return tps_[LIBUV_TP_IX]->Post(std::move(task));
+std::shared_ptr<TaskState> ByTaskOriginAndTypePartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+  if (task->details_.origin == TaskDetails::V8) {
+    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::Post: V8\n");
+    return tps_[V8_TP_IX]->Post(std::move(task));
+  } else if (task->details_.origin == TaskDetails::LIBUV) {
+    switch (task->details_.type) {
+      case TaskDetails::CPU:
+      case TaskDetails::MEMORY:
+        LOG("ByTaskTypePartitionedNodeThreadpool::Post: CPU\n");
+        return tps_[LIBUV_CPU_TP_IX]->Post(std::move(task));
+      default:
+        LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::Post: I/O\n");
+        return tps_[LIBUV_IO_TP_IX]->Post(std::move(task));
+    }
+  } else {
+    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::Post: Unexpected origin %d. Using libuv I/O pool\n", task->details_.origin);
+    return tps_[LIBUV_IO_TP_IX]->Post(std::move(task));
   }
 }
 
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 8a45284cf0d2e3..eb7bcbfbbf9c67 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -335,6 +335,23 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
   std::vector<std::shared_ptr<Threadpool>> tps_;
 };
 
+// Splits based on task origin: V8 or libuv
+class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
+ public:
+  // tp_sizes[0] is V8, tp_sizes[1] is libuv
+  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or guesses based on # cores
+  // tp_sizes[1] -1: reads UV_THREADPOOL_SIZE, defaults to 4
+  explicit ByTaskOriginPartitionedNodeThreadpool(std::vector<int> tp_sizes);
+  // Waits for queue to drain.
+  ~ByTaskOriginPartitionedNodeThreadpool();
+
+  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+
+ private:
+  int V8_TP_IX;
+  int LIBUV_TP_IX;
+};
+
 // Splits based on task type: CPU or I/O
 class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
  public:
@@ -352,21 +369,22 @@ class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
   int IO_TP_IX;
 };
 
-// Splits based on task origin: V8 or libuv
-class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
+// Splits based on task origin and type: V8 or libuv-{CPU or I/O}
+class ByTaskOriginAndTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
  public:
-  // tp_sizes[0] is V8, tp_sizes[1] is libuv
-  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or guesses based on # cores
-  // tp_sizes[1] -1: reads UV_THREADPOOL_SIZE, defaults to 4
-  explicit ByTaskOriginPartitionedNodeThreadpool(std::vector<int> tp_sizes);
+  // tp_sizes[0] is V8, tp_sizes[1] is libuv-CPU, tp_sizes[2] is libuv-I/O
+  // tp_sizes[1] -1: reads NODE_THREADPOOL_UVTP_CPU_TP_SIZE, or guesses based on # cores
+  // tp_sizes[2] -1: reads NODE_THREADPOOL_UVTP_IO_TP_SIZE, or guesses based on # cores
+  explicit ByTaskOriginAndTypePartitionedNodeThreadpool(std::vector<int> tp_sizes);
   // Waits for queue to drain.
-  ~ByTaskOriginPartitionedNodeThreadpool();
+  ~ByTaskOriginAndTypePartitionedNodeThreadpool();
 
   virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
 
  private:
   int V8_TP_IX;
-  int LIBUV_TP_IX;
+  int LIBUV_CPU_TP_IX;
+  int LIBUV_IO_TP_IX;
 };
 
 }  // namespace threadpool

From 21a931a75a5d9b39fd5ade4d9d7e84b70673d45c Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Mon, 10 Sep 2018 16:57:52 -0400
Subject: [PATCH 20/31] PTP: Track Task queue and run times

These are dumped in the PartitionedThreadpool d'tor.
Useful for profiling an application to check its worthiness for PoolParty.
---
 src/node_threadpool.cc | 102 +++++++++++++++++++++++++++++++++++++----
 src/node_threadpool.h  |  70 ++++++++++++++++++++++++----
 2 files changed, 155 insertions(+), 17 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 963fe8b94245dc..1dc955d8830e8f 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -10,13 +10,13 @@
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
 // TODO(davisjam): There must be a better way to do this.
 #define DEBUG_LOG 1
-// #undef DEBUG_LOG
+//#undef DEBUG_LOG
 
 #ifdef DEBUG_LOG
 #include <stdio.h>
 #define LOG(...) fprintf(stderr, __VA_ARGS__)
 #else
-#define LOG(...) () 0
+#define LOG(...) (void) 0
 #endif
 
 namespace node {
@@ -107,6 +107,20 @@ void PartitionedNodeThreadpool::Initialize(const std::vector<int>& tp_sizes) {
 }
 
 PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
+  // If we just return, the destructors of the tp's will drain them.
+  // But if we want to report meaningful statistics we must drain them first.
+  for (auto &tp : tps_) {
+    tp->BlockingDrain();
+  }
+
+  for (size_t i = 0; i < tps_.size(); i++) {
+    auto &tp = tps_[i];
+    LOG("TP %lu taskSummaries:\n", i);
+    const std::vector<std::unique_ptr<TaskSummary>> &summaries = tp->GetTaskSummaries();
+    for (const std::unique_ptr<TaskSummary> &summary : summaries) {
+      LOG("  origin %d type %d queue_time %lu run_time %lu\n", summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
+    }
+  }
 }
 
 void PartitionedNodeThreadpool::BlockingDrain() {
@@ -364,14 +378,18 @@ void Worker::_Run(void* data) {
     // May have been cancelled while queued.
     task_state = task->TryUpdateState(TaskState::ASSIGNED);
     if (task_state == TaskState::ASSIGNED) {
+      task->task_state_->MarkEnteredRun();
       task->Run();
+      task->task_state_->MarkExitedRun();
     } else {
       CHECK_EQ(task_state, TaskState::CANCELLED);
+      task->task_state_->MarkEnteredRun();
+      task->task_state_->MarkExitedRun();
     }
 
     CHECK_EQ(task->TryUpdateState(TaskState::COMPLETED),
                                   TaskState::COMPLETED);
-    worker->tq_->NotifyOfCompletion();
+    worker->tq_->NotifyOfCompletion(std::move(task));
   }
 }
 
@@ -395,12 +413,24 @@ TaskState::State Task::TryUpdateState(TaskState::State new_state) {
   return task_state_->TryUpdateState(new_state);
 }
 
+/**************
+ * TaskSummary
+ ***************/
+
+TaskSummary::TaskSummary(Task* completed_task) {
+  details_ = completed_task->details_;
+  time_in_queue_ = completed_task->task_state_->TimeInQueue();
+  time_in_run_ = completed_task->task_state_->TimeInRun();
+}
 
 /**************
  * TaskState
  ***************/
 
-TaskState::TaskState() : state_(INITIAL) {
+TaskState::TaskState() : state_(INITIAL)
+  , time_in_queue_(0), time_in_run_(0)
+  , time_entered_queue_(0), time_exited_queue_(0)
+  , time_entered_run_(0), time_exited_run_(0) {
 }
 
 TaskState::State TaskState::GetState() const {
@@ -415,6 +445,23 @@ bool TaskState::Cancel() {
   return false;
 }
 
+uint64_t TaskState::TimeInQueue() const {
+  Mutex::ScopedLock scoped_lock(lock_);
+  return time_in_queue_;
+}
+
+uint64_t TaskState::TimeInRun() const {
+  Mutex::ScopedLock scoped_lock(lock_);
+  return time_in_run_;
+}
+
+uint64_t TaskState::TimeInThreadpool() const {
+  Mutex::ScopedLock scoped_lock(lock_);
+  return time_in_queue_ + time_in_run_;
+}
+
+
+
 TaskState::State TaskState::TryUpdateState(TaskState::State new_state) {
   Mutex::ScopedLock scoped_lock(lock_);
   if (ValidStateTransition(state_, new_state)) {
@@ -444,6 +491,29 @@ bool TaskState::ValidStateTransition(TaskState::State old_state, TaskState::Stat
   return false;
 }
 
+void TaskState::MarkEnteredQueue() {
+  Mutex::ScopedLock scoped_lock(lock_);
+  time_entered_queue_ = uv_hrtime();
+}
+
+void TaskState::MarkExitedQueue() {
+  Mutex::ScopedLock scoped_lock(lock_);
+  time_exited_queue_ = uv_hrtime();
+  CHECK_GE(time_exited_queue_, time_entered_queue_);
+  time_in_queue_ = time_exited_queue_ - time_entered_queue_;
+}
+
+void TaskState::MarkEnteredRun() {
+  Mutex::ScopedLock scoped_lock(lock_);
+  time_entered_run_ = uv_hrtime();
+}
+
+void TaskState::MarkExitedRun() {
+  Mutex::ScopedLock scoped_lock(lock_);
+  time_exited_run_ = uv_hrtime();
+  CHECK_GE(time_exited_run_, time_entered_run_);
+  time_in_run_ = time_exited_run_ - time_entered_run_;
+}
 
 /**************
  * LibuvExecutor
@@ -595,11 +665,12 @@ int LibuvExecutor::uv_executor_cancel(uv_executor_t* executor,
 TaskQueue::TaskQueue()
   : lock_()
   , task_available_(), tasks_drained_()
-  , queue_(), outstanding_tasks_(0), stopped_(false) {
+  , queue_(), outstanding_tasks_(0), stopped_(false), task_summaries_() {
 }
 
 bool TaskQueue::Push(std::unique_ptr<Task> task) {
   Mutex::ScopedLock scoped_lock(lock_);
+  task->task_state_->MarkEnteredQueue();
 
   if (stopped_) {
     return false;
@@ -625,6 +696,8 @@ std::unique_ptr<Task> TaskQueue::Pop() {
   }
 
   std::unique_ptr<Task> task = std::move(queue_.front());
+  task->task_state_->MarkExitedQueue();
+
   queue_.pop();
   return task;
 }
@@ -640,18 +713,23 @@ std::unique_ptr<Task> TaskQueue::BlockingPop() {
     return std::unique_ptr<Task>(nullptr);
   }
 
-  std::unique_ptr<Task> result = std::move(queue_.front());
+  std::unique_ptr<Task> task = std::move(queue_.front());
+  task->task_state_->MarkExitedQueue();
+
   queue_.pop();
-  return result;
+  return task;
 }
 
-void TaskQueue::NotifyOfCompletion() {
+void TaskQueue::NotifyOfCompletion(std::unique_ptr<Task> completed_task) {
   Mutex::ScopedLock scoped_lock(lock_);
   outstanding_tasks_--;
   CHECK_GE(outstanding_tasks_, 0);
   if (!outstanding_tasks_) {
     tasks_drained_.Broadcast(scoped_lock);
   }
+
+  task_summaries_.push_back(
+    std::unique_ptr<TaskSummary>(new TaskSummary(completed_task.get())));
 }
 
 void TaskQueue::BlockingDrain() {
@@ -673,6 +751,10 @@ int TaskQueue::Length() const {
   return queue_.size();
 }
 
+std::vector<std::unique_ptr<TaskSummary>> const& TaskQueue::GetTaskSummaries() const {
+  return task_summaries_;
+}
+
 /**************
  * Threadpool
  ***************/
@@ -714,5 +796,9 @@ int Threadpool::NWorkers() const {
   return worker_group_->Size();
 }
 
+std::vector<std::unique_ptr<TaskSummary>> const& Threadpool::GetTaskSummaries() const {
+  return task_queue_->GetTaskSummaries();
+}
+
 }  // namespace threadpool
 }  // namespace node
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index eb7bcbfbbf9c67..f20db48d87b1f5 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -28,6 +28,7 @@ class WorkerGroup;
 class Task;
 class TaskDetails;
 class TaskState;
+class TaskSummary;
 
 class Worker;
 
@@ -64,6 +65,17 @@ class Worker {
   std::shared_ptr<TaskQueue> tq_;
 };
 
+// TODO(davisjam): Who should keep track of time, and for what?
+// At what level does the user want to monitor TP performance?
+// Presumably they want this info via APIs in NodeThreadpool.
+// Tracking it on a per-Task basis might be overkill. But on the other hand
+// this would permit users to dynamically identify slower and faster tasks for us.
+// Which would be cool.
+// If we track this in TaskState, then Task knows about it, and can tell TaskQueue about it,
+// which can propagate to Threadpool, which can propagate to NodeThreadpool.
+
+// TODO(davisjam): I don't like all of the 'friend class XXX' I introduced to make the time APIs compile.
+
 // This is basically a struct
 class TaskDetails {
  public:
@@ -92,7 +104,7 @@ class TaskDetails {
   TaskOrigin origin;
   TaskType type;
   TaskSize size;
-  int priority;  // Larger numbers signal higher priority.i
+  int priority;  // Larger numbers signal higher priority.
                  // Does nothing in this class.
   bool cancelable;  // If true, by some yet-to-be-determined mechanism we can
                     // cancel this Task *while* it is scheduled.
@@ -100,15 +112,16 @@ class TaskDetails {
 
 // Each TaskState is shared by a Task and its Post()'er.
 // A TaskState is a two-way communication channel:
-//  - The threadpool updates its State
-//  - The Post'er can try to Cancel it
-//
-// TODO(davisjam): Could add tracking of how long
-//  it spent in QUEUED, ASSIGNED, COMPLETED states,
-//  and what its total lifetime was.
+//  - The threadpool updates its State and TimeInX.
+//  - The Post'er can:
+//      - try to Cancel it
+//      - monitor how long it spends in the QUEUED and ASSIGNED states.
 class TaskState {
- // My friends can call TryUpdateState.
+ // My friends may TryUpdateState, update my time, etc.
  friend class Task;
+ friend class TaskQueue;
+ friend class Worker;
+ friend class TaskSummary;
 
  public:
   enum State {
@@ -127,6 +140,11 @@ class TaskState {
   // Attempt to cancel the associated Task.
   bool Cancel();
 
+  // Time in nanoseconds.
+  uint64_t TimeInQueue() const;
+  uint64_t TimeInRun() const;
+  uint64_t TimeInThreadpool() const;
+
  protected:
   // Synchronization.
   Mutex lock_;
@@ -141,6 +159,20 @@ class TaskState {
   bool ValidStateTransition(State old_state, State new_state);
 
   State state_;
+
+  void MarkEnteredQueue();
+  void MarkExitedQueue();
+  void MarkEnteredRun();
+  void MarkExitedRun();
+
+ private:
+  uint64_t time_in_queue_;
+  uint64_t time_in_run_;
+
+  uint64_t time_entered_queue_;
+  uint64_t time_exited_queue_;
+  uint64_t time_entered_run_;
+  uint64_t time_exited_run_;
 };
 
 // Abstract notion of a Task.
@@ -149,6 +181,11 @@ class TaskState {
 //  - libuv async work
 //  - User work from the N-API
 class Task {
+  // For access to task_state_'s time tracking.
+ friend class TaskQueue;   
+ friend class Worker;
+ friend class TaskSummary;
+
  public:
   // Subclasses should set details_ in their constructor.
   Task();
@@ -168,6 +205,15 @@ class Task {
   std::shared_ptr<TaskState> task_state_;
 };
 
+class TaskSummary {
+ public:
+  TaskSummary(Task* completed_task);
+
+  TaskDetails details_;
+  uint64_t time_in_queue_;
+  uint64_t time_in_run_;
+};
+
 // Shim that we plug into the libuv "pluggable TP" interface.
 //
 // Like WorkerThreadsTaskRunner, this routes libuv requests to the
@@ -217,7 +263,7 @@ class TaskQueue {
   std::unique_ptr<Task> BlockingPop();
 
   // Workers should call this after completing a Pop'd Task.
-  void NotifyOfCompletion();
+  void NotifyOfCompletion(std::unique_ptr<Task> completed_task);
 
   // Block until there are no Tasks pending or scheduled.
   void BlockingDrain();
@@ -228,6 +274,8 @@ class TaskQueue {
 
   int Length() const;
 
+  std::vector<std::unique_ptr<TaskSummary>> const& GetTaskSummaries() const;
+
  private:
   // Synchronization.
   Mutex lock_;
@@ -240,6 +288,7 @@ class TaskQueue {
   std::queue<std::unique_ptr<Task>> queue_;
   int outstanding_tasks_;  // Number of Tasks in non-COMPLETED states.
   bool stopped_;
+  std::vector<std::unique_ptr<TaskSummary>> task_summaries_;   // For statistics tracking.
 };
 
 // A threadpool works on asynchronous Tasks.
@@ -268,6 +317,8 @@ class Threadpool {
   // Attributes
   int NWorkers() const;
 
+  std::vector<std::unique_ptr<TaskSummary>> const& GetTaskSummaries() const;
+
  protected:
   void Initialize();
 
@@ -309,6 +360,7 @@ class NodeThreadpool {
   int GoodCPUThreadpoolSize();
 
  private:
+  // For default implementation.
   std::shared_ptr<Threadpool> tp_;
 };
 

From 076ccca06c710e65b81195443063e2aa0277e8d0 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Tue, 11 Sep 2018 11:31:39 -0400
Subject: [PATCH 21/31] PTP: Refactor to DRY in PartitionedNodeThreadpool

Make PartitionedNodeThreadpool a virtual class, with subclasses
implementing a ChooseThreadpool API so that they can share
a Post() implementation defined by PartitionedNodeThreadpool.
---
 src/node_threadpool.cc | 49 ++++++++++++++++++++++++------------------
 src/node_threadpool.h  | 18 +++++++++++-----
 2 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 1dc955d8830e8f..0acfdf13a9de3e 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -118,11 +118,18 @@ PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
     LOG("TP %lu taskSummaries:\n", i);
     const std::vector<std::unique_ptr<TaskSummary>> &summaries = tp->GetTaskSummaries();
     for (const std::unique_ptr<TaskSummary> &summary : summaries) {
-      LOG("  origin %d type %d queue_time %lu run_time %lu\n", summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
+      LOG("  TP %lu origin %d type %d queue_time %lu run_time %lu\n", i, summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
     }
   }
 }
 
+std::shared_ptr<TaskState> PartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+  int tp = ChooseThreadpool(task.get());
+  CHECK_GE(tp, 0);
+  CHECK_LT(tp, tps_.size());
+  return tps_[tp]->Post(std::move(task));
+}
+
 void PartitionedNodeThreadpool::BlockingDrain() {
   for (auto &tp : tps_) {
     tp->BlockingDrain();
@@ -188,14 +195,14 @@ ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
 ByTaskOriginPartitionedNodeThreadpool::~ByTaskOriginPartitionedNodeThreadpool() {
 }
 
-std::shared_ptr<TaskState> ByTaskOriginPartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+int ByTaskOriginPartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
   switch (task->details_.origin) {
     case TaskDetails::V8:
-      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: V8\n");
-      return tps_[V8_TP_IX]->Post(std::move(task));
+      LOG("ByTaskOriginPartitionedNodeThreadpool::ChooseThreadpool: V8\n");
+      return V8_TP_IX;
     default:
-      LOG("ByTaskOriginPartitionedNodeThreadpool::Post: LIBUV\n");
-      return tps_[LIBUV_TP_IX]->Post(std::move(task));
+      LOG("ByTaskOriginPartitionedNodeThreadpool::ChooseThreadpool: LIBUV\n");
+      return LIBUV_TP_IX;
   }
 }
 
@@ -242,15 +249,15 @@ ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool(
 ByTaskTypePartitionedNodeThreadpool::~ByTaskTypePartitionedNodeThreadpool() {
 }
 
-std::shared_ptr<TaskState> ByTaskTypePartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+int ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
   switch (task->details_.type) {
     case TaskDetails::CPU:
     case TaskDetails::MEMORY:
-      LOG("ByTaskTypePartitionedNodeThreadpool::Post: CPU\n");
-      return tps_[CPU_TP_IX]->Post(std::move(task));
+      LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: CPU\n");
+      return CPU_TP_IX;
     default:
-      LOG("ByTaskTypePartitionedNodeThreadpool::Post: IO\n");
-      return tps_[IO_TP_IX]->Post(std::move(task));
+      LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: IO\n");
+      return IO_TP_IX;
   }
 }
 
@@ -312,23 +319,23 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
 ByTaskOriginAndTypePartitionedNodeThreadpool::~ByTaskOriginAndTypePartitionedNodeThreadpool() {
 }
 
-std::shared_ptr<TaskState> ByTaskOriginAndTypePartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+int ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
   if (task->details_.origin == TaskDetails::V8) {
-    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::Post: V8\n");
-    return tps_[V8_TP_IX]->Post(std::move(task));
+    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool: V8\n");
+    return V8_TP_IX;
   } else if (task->details_.origin == TaskDetails::LIBUV) {
     switch (task->details_.type) {
       case TaskDetails::CPU:
       case TaskDetails::MEMORY:
-        LOG("ByTaskTypePartitionedNodeThreadpool::Post: CPU\n");
-        return tps_[LIBUV_CPU_TP_IX]->Post(std::move(task));
+        LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: CPU\n");
+        return LIBUV_CPU_TP_IX;
       default:
-        LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::Post: I/O\n");
-        return tps_[LIBUV_IO_TP_IX]->Post(std::move(task));
+        LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool: I/O\n");
+        return LIBUV_IO_TP_IX;
     }
   } else {
-    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::Post: Unexpected origin %d. Using libuv I/O pool\n", task->details_.origin);
-    return tps_[LIBUV_IO_TP_IX]->Post(std::move(task));
+    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool: Unexpected origin %d. Using libuv I/O pool\n", task->details_.origin);
+    return LIBUV_IO_TP_IX;
   }
 }
 
@@ -427,7 +434,7 @@ TaskSummary::TaskSummary(Task* completed_task) {
  * TaskState
  ***************/
 
-TaskState::TaskState() : state_(INITIAL)
+TaskState::TaskState() : lock_(), state_(INITIAL)
   , time_in_queue_(0), time_in_run_(0)
   , time_entered_queue_(0), time_exited_queue_(0)
   , time_entered_run_(0), time_exited_run_(0) {
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index f20db48d87b1f5..630c208ef3a3a6 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -255,6 +255,7 @@ class TaskQueue {
   TaskQueue();
 
   // Return true if Push succeeds, else false.
+  // Thread-safe.
   bool Push(std::unique_ptr<Task> task);
 
   // Non-blocking Pop. Returns nullptr if queue is empty.
@@ -304,9 +305,14 @@ class Threadpool {
   // Waits for queue to drain.
   ~Threadpool();
 
+  // Thread-safe.
   // Returns a TaskState by which caller can track the progress of the Task.
   // Caller can also use the TaskState to cancel the Task.
   // Returns nullptr on failure.
+  // TODO(davisjam): It should not return nullptr on failure.
+  // Then the task would be destroyed!
+  // Since the underlying queues should not be Stop'd until the Threadpool d'tor,
+  // I think it's reasonable that Post will *never* fail.
   std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task);
   // Block until there are no tasks pending or scheduled in the TP.
   void BlockingDrain();
@@ -374,7 +380,9 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
   // Waits for queue to drain.
   ~PartitionedNodeThreadpool();
 
-  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) =0;
+  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task);
+  // Sub-class can use our Post, but needs to tell us which TP to use.
+  virtual int ChooseThreadpool(Task* task) const =0;
   virtual void BlockingDrain() override;
 
   virtual int QueueLength() const override;
@@ -382,7 +390,7 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
   virtual int NWorkers() const override;
 
  protected:
-  // Permits sub-classes to compute tp_sizes as needed.
+  // Sub-classes should call this after computing tp_sizes in their c'tors.
   void Initialize(const std::vector<int>& tp_sizes);
   std::vector<std::shared_ptr<Threadpool>> tps_;
 };
@@ -397,7 +405,7 @@ class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
   // Waits for queue to drain.
   ~ByTaskOriginPartitionedNodeThreadpool();
 
-  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+  int ChooseThreadpool(Task* task) const;
 
  private:
   int V8_TP_IX;
@@ -414,7 +422,7 @@ class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
   // Waits for queue to drain.
   ~ByTaskTypePartitionedNodeThreadpool();
 
-  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+  int ChooseThreadpool(Task* task) const;
 
  private:
   int CPU_TP_IX;
@@ -431,7 +439,7 @@ class ByTaskOriginAndTypePartitionedNodeThreadpool : public PartitionedNodeThrea
   // Waits for queue to drain.
   ~ByTaskOriginAndTypePartitionedNodeThreadpool();
 
-  virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task) override;
+  int ChooseThreadpool(Task* task) const;
 
  private:
   int V8_TP_IX;

From ec5e624d2d2381402c14201bb109e3a81f657202 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Tue, 11 Sep 2018 15:13:32 -0400
Subject: [PATCH 22/31] PTP: Profiling: Sample TaskQueue lengths

Sample TaskQueue lengths every X Push/Pop operations.

This lets us track dlength/dt.

 Sign of derivative             Meaning
-------------------------------------------------------------
      0               Produce and consume rates are equal
                      Queue size is static

      -               Length is shrinking.
                      Consuming faster than can produce.

      +               Length is growing.
                      Producing faster than can consume.

Initial conditions:
Since the TaskQueue begins and ends empty, the length initially grows
and finally shrinks.

Analyzing the queue behavior:
The behavior in between program start/end is what we care about.

Case:
 The NodeThreadpool initially grows, then settles back down to zero.
 This implies the NodeThreadpool is used only during a startup phase,
 so tuning it won't help the performance of long-lived applications.

 Possible example:
   This may be what Node.js servers that don't use the threadpool look like.
   The V8 threadpool optimizes all the JS during start-up, and during steady-state
   the threadpool goes relatively unused.
   I'm not sure if the V8 GC uses the threadpool.
   In that case there may be intermittent uses of the threadpool throughout the
   application lifecycle.

Case:
 If the NodeThreadpool queue length is generally constant,
 then tuning it may not help performance.
 - We are doing work as quickly as it comes in.
 - However, if the work itself is the bottleneck,
   then performing the work more quickly will help the overall performance.

 Possible example:
   This may be what Node.js servers that use the threadpool during request processing
   look like.

Case:
 If the NodeThreadpool queue length is sinusoidal (grows-shrinks-grows-shrinks)
 then the application may have different stages/phases that rely on the threadpool.
 Tuning the threadpool may be beneficial in these cases.
 Lowering the amplitude of the sinusoid (i.e. "rising to the challenge" of the influx of work)
 may decrease the total cost of the corresponding application phase.

 Possible example:
   This may be what batch-processing applications look like.
   When handling a batch, you have a bunch of similar-looking tasks.
   If these tasks go to the threadpool then you'll get a sinusoidal queue length function.

   'npm install' might have a profile like this, if it is implemented as
   recursive calls to install dependencies for each application.
   But maybe it forks a child for each dependency, in which case the union of
   the threadpool lengths will be sinusoidal, but each individual pool
   will have a simple concave-down U-shape as it queues the necessary work,
   completes it, and exits.
---
 src/node_threadpool.cc         | 65 ++++++++++++++++++++++++++++------
 src/node_threadpool.h          | 32 +++++++++++++++--
 test/cctest/test_threadpool.cc |  3 ++
 3 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 0acfdf13a9de3e..0f256a2f96a4f6 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -46,7 +46,7 @@ NodeThreadpool::NodeThreadpool(int threadpool_size) {
   LOG("NodeThreadpool::NodeThreadpool: threadpool_size %d\n", threadpool_size);
   CHECK_GT(threadpool_size, 0);
 
-  tp_ = std::make_shared<Threadpool>(threadpool_size);
+  tp_ = std::make_shared<Threadpool>(threadpool_size, 0);
 }
 
 int NodeThreadpool::GoodCPUThreadpoolSize(void) {
@@ -100,7 +100,7 @@ void PartitionedNodeThreadpool::Initialize(const std::vector<int>& tp_sizes) {
   int i = 0;
   for (auto size : tp_sizes) {
     LOG("PartitionedNodeThreadpool::Initialize: tp %d: %d threads\n", i, size);
-    std::shared_ptr<Threadpool> tp = std::make_shared<Threadpool>(size);
+    std::shared_ptr<Threadpool> tp = std::make_shared<Threadpool>(size, i);
     tps_.push_back(tp);
     i++;
   }
@@ -115,10 +115,24 @@ PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
 
   for (size_t i = 0; i < tps_.size(); i++) {
     auto &tp = tps_[i];
-    LOG("TP %lu taskSummaries:\n", i);
+    LOG("Report on TP %d\n", tp->Id());
+
+    const std::vector<std::unique_ptr<QueueLengthSample>> &lengths = tp->GetQueueLengths();
+    LOG("  TP %d: Lengths at the %lu update intervals:\n", tp->Id(), lengths.size());
+
+    if (lengths.size()) {
+      // Print time relative to the first entry.
+      uint64_t prev_time = lengths[0]->time_;
+      for (const std::unique_ptr<QueueLengthSample> &length : lengths) {
+        LOG("    TP %d length %d time-step %lu\n", tp->Id(), length->length_, length->time_ - prev_time);
+        prev_time = length->time_;
+      }
+    }
+
     const std::vector<std::unique_ptr<TaskSummary>> &summaries = tp->GetTaskSummaries();
+    LOG("  TP %d: Task summaries for the %lu tasks:\n", tp->Id(), summaries.size());
     for (const std::unique_ptr<TaskSummary> &summary : summaries) {
-      LOG("  TP %lu origin %d type %d queue_time %lu run_time %lu\n", i, summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
+      LOG("    TP %d origin %d type %d queue_time %lu run_time %lu\n", tp->Id(), summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
     }
   }
 }
@@ -669,10 +683,12 @@ int LibuvExecutor::uv_executor_cancel(uv_executor_t* executor,
  * TaskQueue
  ***************/
 
-TaskQueue::TaskQueue()
-  : lock_()
+TaskQueue::TaskQueue(int id)
+  : id_(id), lock_()
   , task_available_(), tasks_drained_()
-  , queue_(), outstanding_tasks_(0), stopped_(false), task_summaries_() {
+  , queue_(), outstanding_tasks_(0), stopped_(false)
+  , length_(0), n_changes_since_last_length_sample_(0), length_report_freq_(10)
+  , task_summaries_(), queue_lengths_() {
 }
 
 bool TaskQueue::Push(std::unique_ptr<Task> task) {
@@ -689,12 +705,30 @@ bool TaskQueue::Push(std::unique_ptr<Task> task) {
   CHECK(task_state == TaskState::QUEUED || task_state == TaskState::CANCELLED);
 
   queue_.push(std::move(task));
+  UpdateLength(true);
   outstanding_tasks_++;
   task_available_.Signal(scoped_lock);
 
   return true;
 }
 
+void TaskQueue::UpdateLength(bool grew) {
+  if (grew) {
+    length_++;
+  } else {
+    length_--;
+  }
+  CHECK_GE(length_, 0);
+
+  n_changes_since_last_length_sample_++;
+  if (n_changes_since_last_length_sample_ == length_report_freq_) {
+    queue_lengths_.push_back(
+      std::unique_ptr<QueueLengthSample>(
+        new QueueLengthSample(length_, uv_hrtime())));
+    n_changes_since_last_length_sample_ = 0;
+  }
+}
+
 std::unique_ptr<Task> TaskQueue::Pop() {
   Mutex::ScopedLock scoped_lock(lock_);
 
@@ -706,6 +740,7 @@ std::unique_ptr<Task> TaskQueue::Pop() {
   task->task_state_->MarkExitedQueue();
 
   queue_.pop();
+  UpdateLength(false);
   return task;
 }
 
@@ -724,6 +759,7 @@ std::unique_ptr<Task> TaskQueue::BlockingPop() {
   task->task_state_->MarkExitedQueue();
 
   queue_.pop();
+  UpdateLength(false);
   return task;
 }
 
@@ -755,20 +791,25 @@ void TaskQueue::Stop() {
 
 int TaskQueue::Length() const {
   Mutex::ScopedLock scoped_lock(lock_);
-  return queue_.size();
+  CHECK_EQ(queue_.size(), length_);
+  return length_;
 }
 
 std::vector<std::unique_ptr<TaskSummary>> const& TaskQueue::GetTaskSummaries() const {
   return task_summaries_;
 }
 
+std::vector<std::unique_ptr<QueueLengthSample>> const& TaskQueue::GetQueueLengths() const {
+  return queue_lengths_;
+}
+
 /**************
  * Threadpool
  ***************/
 
-Threadpool::Threadpool(int threadpool_size) {
+Threadpool::Threadpool(int threadpool_size, int id) : id_(id) {
   CHECK_GT(threadpool_size, 0);
-  task_queue_ = std::make_shared<TaskQueue>();
+  task_queue_ = std::make_shared<TaskQueue>(id);
   worker_group_ = std::unique_ptr<WorkerGroup>(
     new WorkerGroup(threadpool_size, task_queue_));
 }
@@ -807,5 +848,9 @@ std::vector<std::unique_ptr<TaskSummary>> const& Threadpool::GetTaskSummaries()
   return task_queue_->GetTaskSummaries();
 }
 
+std::vector<std::unique_ptr<QueueLengthSample>> const& Threadpool::GetQueueLengths() const {
+  return task_queue_->GetQueueLengths();
+}
+
 }  // namespace threadpool
 }  // namespace node
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 630c208ef3a3a6..02158bc73de66d 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -241,6 +241,15 @@ class LibuvExecutor {
                             // instance of LibuvExecutor.
 };
 
+class QueueLengthSample {
+ public:
+  QueueLengthSample(int length, uint64_t time)
+   : length_(length), time_(time) {}
+ 
+  int length_;
+  uint64_t time_;
+};
+
 // Abstract notion of a queue of Tasks.
 // The default implementation is a FIFO queue.
 // Subclass to experiment, e.g.:
@@ -252,7 +261,7 @@ class LibuvExecutor {
 // Users should check the state of Tasks they Pop.
 class TaskQueue {
  public:
-  TaskQueue();
+  TaskQueue(int id =-1);
 
   // Return true if Push succeeds, else false.
   // Thread-safe.
@@ -276,8 +285,14 @@ class TaskQueue {
   int Length() const;
 
   std::vector<std::unique_ptr<TaskSummary>> const& GetTaskSummaries() const;
+  std::vector<std::unique_ptr<QueueLengthSample>> const& GetQueueLengths() const;
 
  private:
+  // Caller must hold lock_.
+  void UpdateLength(bool grew);
+
+  int id_;
+
   // Synchronization.
   Mutex lock_;
   // Signal'd when there is at least one task in the queue.
@@ -289,7 +304,13 @@ class TaskQueue {
   std::queue<std::unique_ptr<Task>> queue_;
   int outstanding_tasks_;  // Number of Tasks in non-COMPLETED states.
   bool stopped_;
-  std::vector<std::unique_ptr<TaskSummary>> task_summaries_;   // For statistics tracking.
+
+  // For statistics tracking.
+  int length_;
+  int n_changes_since_last_length_sample_;
+  int length_report_freq_;
+  std::vector<std::unique_ptr<TaskSummary>> task_summaries_;
+  std::vector<std::unique_ptr<QueueLengthSample>> queue_lengths_;
 };
 
 // A threadpool works on asynchronous Tasks.
@@ -301,10 +322,12 @@ class TaskQueue {
 //   - Elastic workers (scale up and down)
 class Threadpool {
  public:
-  explicit Threadpool(int threadpool_size);
+  explicit Threadpool(int threadpool_size, int id = -1);
   // Waits for queue to drain.
   ~Threadpool();
 
+  int Id() const { return id_; }
+
   // Thread-safe.
   // Returns a TaskState by which caller can track the progress of the Task.
   // Caller can also use the TaskState to cancel the Task.
@@ -324,11 +347,14 @@ class Threadpool {
   int NWorkers() const;
 
   std::vector<std::unique_ptr<TaskSummary>> const& GetTaskSummaries() const;
+  std::vector<std::unique_ptr<QueueLengthSample>> const& GetQueueLengths() const;
 
  protected:
   void Initialize();
 
  private:
+  int id_;
+
   std::shared_ptr<TaskQueue> task_queue_;
   std::unique_ptr<WorkerGroup> worker_group_;
 };
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index 984a74bf62ea0c..dcc33daabe57b1 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -80,6 +80,7 @@ TEST_F(ThreadpoolTest, TaskQueueEndToEnd) {
   testTaskDestroyedCount = 0;
 
   // Push
+  fprintf(stderr, "TaskQueueEndToEnd: Push\n");
   EXPECT_EQ(tq.Length(), 0);
   for (int i = 0; i < nTasks; i++) {
     auto task_state = std::make_shared<TaskState>();
@@ -90,6 +91,7 @@ TEST_F(ThreadpoolTest, TaskQueueEndToEnd) {
   EXPECT_EQ(tq.Length(), nTasks);
 
   // Successful Pop, BlockingPop
+  fprintf(stderr, "TaskQueueEndToEnd: Pop\n");
   for (int i = 0; i < nTasks; i++) {
     std::unique_ptr<Task> task;
     if (i % 2)
@@ -106,6 +108,7 @@ TEST_F(ThreadpoolTest, TaskQueueEndToEnd) {
   EXPECT_EQ(tq.Length(), 0);
 
   // Stop works
+  fprintf(stderr, "TaskQueueEndToEnd: Push after Stop\n");
   tq.Stop();
   EXPECT_EQ(tq.Push(std::unique_ptr<FastTestTask>(new FastTestTask())), false);
 }

From 2547f526e872b133a940bca50f2193ac6450b60d Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Tue, 11 Sep 2018 15:37:18 -0400
Subject: [PATCH 23/31] PTP: Add UnpartitionedPartitionedNodeThreadpool

Using UnpartitionedPartitionedNodeThreadpool we can get the
nice statistics reporting from PartitionedNodeThreadpool while having
the same behavior as the default NodeThreadpool.
---
 src/node.cc            |  3 ++-
 src/node_threadpool.cc | 32 ++++++++++++++++++++++++++++++++
 src/node_threadpool.h  | 15 +++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/node.cc b/src/node.cc
index c65d57cbe01516..6da46332c6c905 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -289,7 +289,8 @@ static struct {
     // What kind of threadpool is desired?
     char *tp_type = getenv("NODE_THREADPOOL_TYPE");
     if (!tp_type || strcmp(tp_type, "SHARED") == 0) {
-      tp_ = std::make_shared<threadpool::NodeThreadpool>(-1);
+      std::vector<int> sizes{-1};
+      tp_ = std::make_shared<threadpool::UnpartitionedPartitionedNodeThreadpool>(sizes);
     } else if (strcmp(tp_type, "SPLIT_BY_ORIGIN") == 0) {
       std::vector<int> sizes{-1, -1};
       tp_ = std::make_shared<threadpool::ByTaskOriginPartitionedNodeThreadpool>(sizes);
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 0f256a2f96a4f6..2b11d0f41d9db5 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -166,6 +166,38 @@ int PartitionedNodeThreadpool::NWorkers() const {
   return sum;
 }
 
+/**************
+ * UnpartitionedPartitionedNodeThreadpool
+ ***************/
+
+UnpartitionedPartitionedNodeThreadpool::UnpartitionedPartitionedNodeThreadpool(
+  std::vector<int> tp_sizes) : ONLY_TP_IX(0) {
+  CHECK_EQ(tp_sizes.size(), 1);
+
+  // TP size
+  if (tp_sizes[ONLY_TP_IX] <= 0) {
+    char buf[32];
+    size_t buf_size = sizeof(buf);
+    if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[ONLY_TP_IX] = atoi(buf);
+    }
+  }
+  if (tp_sizes[ONLY_TP_IX] <= 0) {
+    tp_sizes[ONLY_TP_IX] = 4;  // libuv default
+  }
+  LOG("UnpartitionedPartitionedNodeThreadpool::UnpartitionedPartitionedNodeThreadpool: only tp size %d\n", tp_sizes[ONLY_TP_IX]);
+  CHECK_GT(tp_sizes[ONLY_TP_IX], 0);
+
+  Initialize(tp_sizes);
+}
+
+UnpartitionedPartitionedNodeThreadpool::~UnpartitionedPartitionedNodeThreadpool() {
+}
+
+int UnpartitionedPartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
+  return ONLY_TP_IX;
+}
+
 /**************
  * ByTaskOriginPartitionedNodeThreadpool
  ***************/
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 02158bc73de66d..4b4a3d9e177468 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -421,6 +421,21 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
   std::vector<std::shared_ptr<Threadpool>> tps_;
 };
 
+// This is the same as a NodeThreadpool, but by inheriting from PartitionedNodeThreadpool
+// we get to benefit from its built-in monitoring.
+class UnpartitionedPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
+ public:
+  // tp_sizes[0] defines the only pool. Reads UV_THREADPOOL_SIZE, defaults to 4.
+  explicit UnpartitionedPartitionedNodeThreadpool(std::vector<int> tp_sizes);
+  // Waits for queue to drain.
+  ~UnpartitionedPartitionedNodeThreadpool();
+
+  int ChooseThreadpool(Task* task) const;
+ 
+ private:
+  int ONLY_TP_IX;
+};
+
 // Splits based on task origin: V8 or libuv
 class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
  public:

From 61b4cd3c80717164d1d697332312c1c8733f3e2c Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Tue, 11 Sep 2018 16:05:04 -0400
Subject: [PATCH 24/31] PTP: Visualize threadpool behavior

This tool handles the output of PrintStats.
It produces graphs of TP queue lengths sampled over time.
It also prints a summary of task counts groupe by origin and type.

In the plot of pool queue lengths, it includes a per-TP
plot of "# CPU tasks" in the queue.
When running with NODE_THREADPOOL_TYPE=SPLIT_BY_ORIGIN,
this lets us visualize the extent to which the libuv TP is
working on both CPU and I/O.
---
 src/node_threadpool.cc                 |  87 ++++++++++++---
 src/node_threadpool.h                  |  22 +++-
 tools/visualize-threadpool-behavior.py | 146 +++++++++++++++++++++++++
 3 files changed, 233 insertions(+), 22 deletions(-)
 create mode 100755 tools/visualize-threadpool-behavior.py

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 2b11d0f41d9db5..0aa9e50ba37543 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -6,6 +6,10 @@
 #include "util.h"
 
 #include <algorithm>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
 
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
 // TODO(davisjam): There must be a better way to do this.
@@ -13,9 +17,10 @@
 //#undef DEBUG_LOG
 
 #ifdef DEBUG_LOG
-#include <stdio.h>
-#define LOG(...) fprintf(stderr, __VA_ARGS__)
+#define LOG_TO_FILE(fd, ...) fprintf(fd, __VA_ARGS__)
+#define LOG(...) LOG_TO_FILE(stderr, __VA_ARGS__)
 #else
+#define LOG_TO_FILE(...) (void) 0
 #define LOG(...) (void) 0
 #endif
 
@@ -113,6 +118,23 @@ PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
     tp->BlockingDrain();
   }
 
+  // TODO(davisjam) Let's hope the application didn't make more than one of these :-D.
+  char logFile[128];
+  snprintf(logFile, sizeof(logFile), "/tmp/node-%d-PartitionedNodeThreadpool-result.log", getpid());
+  FILE* fd = fopen(logFile, "w");
+  CHECK(fd);
+
+  // Emit records in machine-parseable format as well as human-readable.
+  // We want to be able to easily grep for and extract lines into data frames
+  // so we can slice and dice quickly.
+  LOG_TO_FILE(fd, "TP key format: TP,name,size\n");
+  for (auto pair : tp_labels_) {
+    LOG_TO_FILE(fd, "TP key: %d,%s,%d\n", pair.first, tp_labels_[pair.first].c_str(), tp_sizes_[pair.first]);
+  }
+
+  LOG_TO_FILE(fd, "QueueLengths data format: TP,queue-length,n_cpu,n_io,time\n");
+  LOG_TO_FILE(fd, "TaskSummary data format: TP,task-origin,task-type,queue_time,run_time\n");
+
   for (size_t i = 0; i < tps_.size(); i++) {
     auto &tp = tps_[i];
     LOG("Report on TP %d\n", tp->Id());
@@ -121,11 +143,9 @@ PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
     LOG("  TP %d: Lengths at the %lu update intervals:\n", tp->Id(), lengths.size());
 
     if (lengths.size()) {
-      // Print time relative to the first entry.
-      uint64_t prev_time = lengths[0]->time_;
       for (const std::unique_ptr<QueueLengthSample> &length : lengths) {
-        LOG("    TP %d length %d time-step %lu\n", tp->Id(), length->length_, length->time_ - prev_time);
-        prev_time = length->time_;
+        LOG("    TP %d length %d n-cpu %d n-io %d time %lu\n", tp->Id(), length->length_, length->n_cpu_, length->n_io_, length->time_);
+        LOG_TO_FILE(fd, "QueueLengths data: %d,%d,%d,%d,%lu\n", tp->Id(), length->length_, length->n_cpu_, length->n_io_, length->time_);
       }
     }
 
@@ -133,6 +153,7 @@ PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
     LOG("  TP %d: Task summaries for the %lu tasks:\n", tp->Id(), summaries.size());
     for (const std::unique_ptr<TaskSummary> &summary : summaries) {
       LOG("    TP %d origin %d type %d queue_time %lu run_time %lu\n", tp->Id(), summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
+      LOG_TO_FILE(fd, "TaskSummary data: %d,%d,%d,%lu,%lu\n", tp->Id(), summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
     }
   }
 }
@@ -189,6 +210,9 @@ UnpartitionedPartitionedNodeThreadpool::UnpartitionedPartitionedNodeThreadpool(
   CHECK_GT(tp_sizes[ONLY_TP_IX], 0);
 
   Initialize(tp_sizes);
+
+  tp_labels_[ONLY_TP_IX] = "Universal TP";
+  tp_sizes_[ONLY_TP_IX] = tp_sizes[ONLY_TP_IX];
 }
 
 UnpartitionedPartitionedNodeThreadpool::~UnpartitionedPartitionedNodeThreadpool() {
@@ -236,6 +260,11 @@ ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
   CHECK_GT(tp_sizes[LIBUV_TP_IX], 0);
 
   Initialize(tp_sizes);
+
+  tp_labels_[V8_TP_IX] = "V8 TP";
+  tp_sizes_[V8_TP_IX] = tp_sizes[V8_TP_IX];
+  tp_labels_[LIBUV_TP_IX] = "Libuv TP";
+  tp_sizes_[LIBUV_TP_IX] = tp_sizes[LIBUV_TP_IX];
 }
 
 ByTaskOriginPartitionedNodeThreadpool::~ByTaskOriginPartitionedNodeThreadpool() {
@@ -290,6 +319,11 @@ ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool(
   CHECK_GT(tp_sizes[IO_TP_IX], 0);
 
   Initialize(tp_sizes);
+
+  tp_labels_[CPU_TP_IX] = "CPU TP";
+  tp_sizes_[CPU_TP_IX] = tp_sizes[CPU_TP_IX];
+  tp_labels_[IO_TP_IX] = "IO TP";
+  tp_sizes_[IO_TP_IX] = tp_sizes[IO_TP_IX];
 }
 
 ByTaskTypePartitionedNodeThreadpool::~ByTaskTypePartitionedNodeThreadpool() {
@@ -299,6 +333,7 @@ int ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
   switch (task->details_.type) {
     case TaskDetails::CPU:
     case TaskDetails::MEMORY:
+    case TaskDetails::TASK_TYPE_UNKNOWN:
       LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: CPU\n");
       return CPU_TP_IX;
     default:
@@ -360,6 +395,13 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
   CHECK_GT(tp_sizes[LIBUV_IO_TP_IX], 0);
 
   Initialize(tp_sizes);
+
+  tp_labels_[V8_TP_IX] = "V8 TP";
+  tp_sizes_[V8_TP_IX] = tp_sizes[V8_TP_IX];
+  tp_labels_[LIBUV_CPU_TP_IX] = "Libuv CPU TP";
+  tp_sizes_[LIBUV_CPU_TP_IX] = tp_sizes[LIBUV_CPU_TP_IX];
+  tp_labels_[LIBUV_IO_TP_IX] = "Libuv IO TP";
+  tp_sizes_[LIBUV_IO_TP_IX] = tp_sizes[LIBUV_IO_TP_IX];
 }
 
 ByTaskOriginAndTypePartitionedNodeThreadpool::~ByTaskOriginAndTypePartitionedNodeThreadpool() {
@@ -373,6 +415,7 @@ int ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) c
     switch (task->details_.type) {
       case TaskDetails::CPU:
       case TaskDetails::MEMORY:
+      case TaskDetails::TASK_TYPE_UNKNOWN:
         LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: CPU\n");
         return LIBUV_CPU_TP_IX;
       default:
@@ -719,7 +762,7 @@ TaskQueue::TaskQueue(int id)
   : id_(id), lock_()
   , task_available_(), tasks_drained_()
   , queue_(), outstanding_tasks_(0), stopped_(false)
-  , length_(0), n_changes_since_last_length_sample_(0), length_report_freq_(10)
+  , n_cpu_in_queue_(0), n_io_in_queue_(0), n_changes_since_last_length_sample_(0), length_report_freq_(10)
   , task_summaries_(), queue_lengths_() {
 }
 
@@ -736,27 +779,36 @@ bool TaskQueue::Push(std::unique_ptr<Task> task) {
   TaskState::State task_state = task->TryUpdateState(TaskState::QUEUED);
   CHECK(task_state == TaskState::QUEUED || task_state == TaskState::CANCELLED);
 
+  UpdateLength(task.get(), true);
   queue_.push(std::move(task));
-  UpdateLength(true);
   outstanding_tasks_++;
   task_available_.Signal(scoped_lock);
 
   return true;
 }
 
-void TaskQueue::UpdateLength(bool grew) {
+void TaskQueue::UpdateLength(Task* task, bool grew) {
+  int *counter = nullptr;
+  if (task->details_.type == TaskDetails::CPU
+   || task->details_.type == TaskDetails::MEMORY
+   || task->details_.type == TaskDetails::TASK_TYPE_UNKNOWN) {
+    counter = &n_cpu_in_queue_;
+  } else {
+    counter = &n_io_in_queue_;
+  }
+
   if (grew) {
-    length_++;
+    (*counter)++;
   } else {
-    length_--;
+    (*counter)--;
   }
-  CHECK_GE(length_, 0);
+  CHECK_GE(*counter, 0);
 
   n_changes_since_last_length_sample_++;
   if (n_changes_since_last_length_sample_ == length_report_freq_) {
     queue_lengths_.push_back(
       std::unique_ptr<QueueLengthSample>(
-        new QueueLengthSample(length_, uv_hrtime())));
+        new QueueLengthSample(n_cpu_in_queue_, n_io_in_queue_, uv_hrtime())));
     n_changes_since_last_length_sample_ = 0;
   }
 }
@@ -772,7 +824,7 @@ std::unique_ptr<Task> TaskQueue::Pop() {
   task->task_state_->MarkExitedQueue();
 
   queue_.pop();
-  UpdateLength(false);
+  UpdateLength(task.get(), false);
   return task;
 }
 
@@ -791,7 +843,7 @@ std::unique_ptr<Task> TaskQueue::BlockingPop() {
   task->task_state_->MarkExitedQueue();
 
   queue_.pop();
-  UpdateLength(false);
+  UpdateLength(task.get(), false);
   return task;
 }
 
@@ -823,8 +875,9 @@ void TaskQueue::Stop() {
 
 int TaskQueue::Length() const {
   Mutex::ScopedLock scoped_lock(lock_);
-  CHECK_EQ(queue_.size(), length_);
-  return length_;
+  int length = n_cpu_in_queue_ + n_io_in_queue_;
+  CHECK_EQ(queue_.size(), length);
+  return length;
 }
 
 std::vector<std::unique_ptr<TaskSummary>> const& TaskQueue::GetTaskSummaries() const {
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 4b4a3d9e177468..b88f57c9ba5e5d 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -5,6 +5,7 @@
 
 #include <queue>
 #include <vector>
+#include <map>
 #include <functional>
 
 #include "node.h"
@@ -243,11 +244,15 @@ class LibuvExecutor {
 
 class QueueLengthSample {
  public:
-  QueueLengthSample(int length, uint64_t time)
-   : length_(length), time_(time) {}
+  QueueLengthSample(int n_cpu, int n_io, uint64_t time)
+   : time_(time), length_(n_cpu + n_io), n_cpu_(n_cpu), n_io_(n_io) { }
  
-  int length_;
   uint64_t time_;
+
+  int length_;
+  // Length = sum of these
+  int n_cpu_;
+  int n_io_;
 };
 
 // Abstract notion of a queue of Tasks.
@@ -289,7 +294,7 @@ class TaskQueue {
 
  private:
   // Caller must hold lock_.
-  void UpdateLength(bool grew);
+  void UpdateLength(Task* task, bool grew);
 
   int id_;
 
@@ -306,7 +311,9 @@ class TaskQueue {
   bool stopped_;
 
   // For statistics tracking.
-  int length_;
+  int n_cpu_in_queue_;
+  int n_io_in_queue_;
+
   int n_changes_since_last_length_sample_;
   int length_report_freq_;
   std::vector<std::unique_ptr<TaskSummary>> task_summaries_;
@@ -419,6 +426,11 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
   // Sub-classes should call this after computing tp_sizes in their c'tors.
   void Initialize(const std::vector<int>& tp_sizes);
   std::vector<std::shared_ptr<Threadpool>> tps_;
+
+  // Sub-classes should populate these.
+  // Helps with statistics reporting
+  std::map<int, std::string> tp_labels_;
+  std::map<int, int> tp_sizes_;
 };
 
 // This is the same as a NodeThreadpool, but by inheriting from PartitionedNodeThreadpool
diff --git a/tools/visualize-threadpool-behavior.py b/tools/visualize-threadpool-behavior.py
new file mode 100755
index 00000000000000..7abb2193c271e4
--- /dev/null
+++ b/tools/visualize-threadpool-behavior.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+# Author: Jamie Davis <davisjam@vt.edu>
+# Description: Visualize threadpool behavior.
+#   Relies on the statistics dumped by NodeThreadpool::PrintStats.
+
+import matplotlib.pyplot as plt
+import json
+from collections import defaultdict
+import matplotlib.ticker
+import argparse
+import re
+import os
+
+import pandas as pd
+import numpy as np
+
+import StringIO
+
+# Parse args
+parser = argparse.ArgumentParser(description='Visualize Node.js threadpool behavior')
+parser.add_argument('--log-file', '-f', help='Log file containing LOG statements emitted by src/node_threadpool.cc during a Node.js application run.', required = True)
+parser.add_argument('--vis-dir', '-d', help='Where to save the visualizatons?', required = False)
+
+args = parser.parse_args()
+
+# Parsed args
+logFile = args.log_file
+if args.vis_dir:
+  visDir = args.vis_dir
+else:
+  visDir = '.'
+
+# Extract CSV lines from log file
+queueLengths_header = ''
+queueLengths_data = []
+taskSummaries_header = ''
+taskSummaries_data = []
+# And map from TP ID to label
+tpIDToLabel = {}
+taskOriginIDToLabel = {}
+taskTypeIDToLabel = {}
+for line in open(logFile, 'r'):
+  # Skip lines that cannot contain CSV data
+  if ',' not in line:
+    continue
+
+  m = re.match('^TP key: (\d+),\s*(.+)\s*,(\d+)$', line)
+  if m:
+    tpID = int(m.group(1))
+    label = m.group(2) + " (size {})".format(m.group(3))
+    tpIDToLabel[tpID] = label
+    print "{} -> {}".format(tpID, label)
+
+  m = re.match('^QueueLengths data format: (.+)$', line)
+  if m:
+    queueLengths_header = m.group(1)
+
+  m = re.match('^QueueLengths data: (.+)$', line)
+  if m:
+    queueLengths_data.append(m.group(1))
+
+  m = re.match('^TaskSummary data format: (.+)$', line)
+  if m:
+    taskSummaries_header = m.group(1)
+
+  m = re.match('^TaskSummary data: (.+)$', line)
+  if m:
+    taskSummaries_data.append(m.group(1))
+
+# Write these to a StringIO so we can pandas.read_csv easily
+queueLengths = [queueLengths_header]
+queueLengths = queueLengths + queueLengths_data
+
+taskSummaries = [taskSummaries_header]
+taskSummaries = taskSummaries + taskSummaries_data
+
+print "Got {} queueLengths".format(len(queueLengths))
+print "Got {} taskSummaries".format(len(taskSummaries))
+queueLengths_sio = StringIO.StringIO('\n'.join(queueLengths))
+taskSummaries_sio = StringIO.StringIO('\n'.join(taskSummaries))
+
+# Read as data frames
+df_ql = pd.read_csv(queueLengths_sio)
+df_ts = pd.read_csv(taskSummaries_sio)
+
+print "First queue lengths data frame"
+print df_ql.head(1)
+print "First task summaries data frame"
+print df_ts.head(1)
+
+# Align the queue length samples.
+# Normalize queue length sample times to zero -- the smallest value across all df_ql.time values.
+print "Normalizing QL times"
+minQLTime = df_ql['time'].min()
+minTSTime = df_ts['time-at-completion'].min()
+minObservedTime = min(minQLTime, minTSTime)
+df_ql['time'] = df_ql['time'] - minObservedTime
+df_ts['time-at-completion'] = df_ts['time-at-completion'] - minObservedTime
+
+# Describe the distribution of task types in each pool
+print "Distribution of task types by origin"
+print df_ts.groupby(['task-origin', 'task-type']).size()
+
+tpIDs = df_ql.TP.unique()
+tpIDToColor = { 0: 'red', 1: 'blue', 2: 'green', 3: 'orange', 4: 'yellow' }
+
+# Plot per-TP queue lengths over time.
+if True:
+  print "TPs: {}".format(tpIDs)
+  for tp in tpIDs:
+    print "\n-----------------------"
+    print "TP {}: {}".format(tp, tpIDToLabel[tp])
+    print "-----------------------\n"
+    df_ql_tp = df_ql.loc[df_ql['TP'] == tp]
+    print df_ql_tp.describe()
+
+    try:
+      # Queue length for this tp: line chart
+      #ax = df_ql_tp.plot(ax=ax, x='time', y='queue-length', color=colors[tp], kind='line', label='TP {}'.format(tp))
+
+      # Queue length for this tp: scatter
+      fig, ax = plt.subplots()
+      df_ql_tp.plot.scatter(ax=ax, x='time', y='queue-length', c='xkcd:{}'.format(tpIDToColor[tp]), s=1, label=tpIDToLabel[tp])
+      # Queue length for this tp's CPU tasks: scatter
+      df_ql_tp.plot.scatter(ax=ax, x='time', y='n_cpu', c='xkcd:light {}'.format(tpIDToColor[tp]), s=1, label=tpIDToLabel[tp] + " CPU tasks")
+
+      ax.set_title('Queue lengths over time')
+      ax.set_xlabel('Time (ns)')
+      ax.set_ylabel('Queue length')
+      fname = os.path.join(visDir, 'tp-{}-queueLengths.png'.format(tp))
+      plt.savefig(fname)
+      print "See {} for plot of TP {} queue lengths".format(fname, tp)
+    except:
+      pass
+
+# Plot per-TP task running times as a histogram.
+if True:
+  tpIDs = df_ql.TP.unique()
+  print "TPs: {}".format(tpIDs)
+  for tp in tpIDs:
+    df_ts_tp = df_ts.loc[df_ts['TP'] == tp]
+    print "TP {}".format(tp)
+    print df_ts_tp.describe()
+		# TODO It would also be nice to emit the count of the different types of tasks.
+    #df_ts_tp['run_time'].plot.hist(bins=10, title='Histogram of running times for tasks in TP {}'.format(tp))
+    #plt.show()

From 0c407381dd633357f519f0deeb5b5b01e9d978b9 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 16 Sep 2018 15:29:02 -0400
Subject: [PATCH 25/31] PTP: change env vars for consistency

---
 src/node_threadpool.cc | 6 ++++--
 src/node_threadpool.h  | 7 ++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 0aa9e50ba37543..185eb8057a1175 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -251,6 +251,8 @@ ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
     size_t buf_size = sizeof(buf);
     if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
       tp_sizes[LIBUV_TP_IX] = atoi(buf);
+    } else if (uv_os_getenv("NODE_THREADPOOL_UV_TP_SIZE", buf, &buf_size) == 0) {
+      tp_sizes[LIBUV_TP_IX] = atoi(buf);
     }
   }
   if (tp_sizes[LIBUV_TP_IX] <= 0) {
@@ -369,7 +371,7 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
   if (tp_sizes[LIBUV_CPU_TP_IX] <= 0) {
     char buf[32];
     size_t buf_size = sizeof(buf);
-    if (uv_os_getenv("NODE_THREADPOOL_UVTP_CPU_TP_SIZE", buf, &buf_size) == 0) {
+    if (uv_os_getenv("NODE_THREADPOOL_UV_CPU_TP_SIZE", buf, &buf_size) == 0) {
       tp_sizes[LIBUV_CPU_TP_IX] = atoi(buf);
     }
   }
@@ -384,7 +386,7 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
   if (tp_sizes[LIBUV_IO_TP_IX] <= 0) {
     char buf[32];
     size_t buf_size = sizeof(buf);
-    if (uv_os_getenv("NODE_THREADPOOL_UVTP_IO_TP_SIZE", buf, &buf_size) == 0) {
+    if (uv_os_getenv("NODE_THREADPOOL_UV_IO_TP_SIZE", buf, &buf_size) == 0) {
       tp_sizes[LIBUV_IO_TP_IX] = atoi(buf);
     }
   }
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index b88f57c9ba5e5d..500390eaca84cd 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -453,7 +453,7 @@ class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
  public:
   // tp_sizes[0] is V8, tp_sizes[1] is libuv
   // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or guesses based on # cores
-  // tp_sizes[1] -1: reads UV_THREADPOOL_SIZE, defaults to 4
+  // tp_sizes[1] -1: reads UV_THREADPOOL_SIZE or NODE_THREADPOOL_UV_THREADPOOL_SIZE defaults to 4
   explicit ByTaskOriginPartitionedNodeThreadpool(std::vector<int> tp_sizes);
   // Waits for queue to drain.
   ~ByTaskOriginPartitionedNodeThreadpool();
@@ -486,8 +486,9 @@ class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
 class ByTaskOriginAndTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
  public:
   // tp_sizes[0] is V8, tp_sizes[1] is libuv-CPU, tp_sizes[2] is libuv-I/O
-  // tp_sizes[1] -1: reads NODE_THREADPOOL_UVTP_CPU_TP_SIZE, or guesses based on # cores
-  // tp_sizes[2] -1: reads NODE_THREADPOOL_UVTP_IO_TP_SIZE, or guesses based on # cores
+  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or guesses based on # cores
+  // tp_sizes[1] -1: reads NODE_THREADPOOL_UV_CPU_TP_SIZE, or guesses based on # cores
+  // tp_sizes[2] -1: reads NODE_THREADPOOL_UV_IO_TP_SIZE, or guesses based on # cores
   explicit ByTaskOriginAndTypePartitionedNodeThreadpool(std::vector<int> tp_sizes);
   // Waits for queue to drain.
   ~ByTaskOriginAndTypePartitionedNodeThreadpool();

From 003c147065f144de0e758e5bc83b567b1b146ea8 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 16 Sep 2018 15:29:45 -0400
Subject: [PATCH 26/31] PTP: dump stats in either [Signal]Exit or Start

Problem:
Apparently some uses of node go through Exit without fully cleaning up,
so printing stats in the PartitionedNodeThreadpool d'tor was not
giving us stats.

This was true, for example, for 'npm install'.

Solution:
DrainAndPrintStats in both node.cc::Start and node.cc::Exit,
and only keep the first one if we see it twice.

This should not be merged. Just performance profiling.
And dumping stats in SignalExit is unsafe.
---
 src/node.cc            | 19 +++++++++++++++++++
 src/node_threadpool.cc | 10 +++++++++-
 src/node_threadpool.h  |  5 +++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/node.cc b/src/node.cc
index 6da46332c6c905..c4bdc8292a4d63 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -306,10 +306,20 @@ static struct {
 
     libuv_executor_ = std::unique_ptr<threadpool::LibuvExecutor>(
       new threadpool::LibuvExecutor(tp_));
+    printed_stats = false;
+  }
+
+  void DrainAndPrintStats(void) {
+    if (!printed_stats) {
+      tp_->BlockingDrain();
+      tp_->PrintStats();
+      printed_stats = true;
+    }
   }
 
   std::shared_ptr<threadpool::NodeThreadpool> tp_;
   std::unique_ptr<threadpool::LibuvExecutor> libuv_executor_;
+  bool printed_stats;
 } node_threadpool;
 
 static struct {
@@ -1224,6 +1234,9 @@ static void Exit(const FunctionCallbackInfo<Value>& args) {
   Environment* env = Environment::GetCurrent(args);
   WaitForInspectorDisconnect(env);
   v8_platform.StopTracingAgent();
+
+  node_threadpool.DrainAndPrintStats();
+
   env->Exit(args[0]->Int32Value());
 }
 
@@ -2213,6 +2226,9 @@ void SetupProcessObject(Environment* env,
 
 
 void SignalExit(int signo) {
+  // Yeah, yeah, this is not signal safe.
+  node_threadpool.DrainAndPrintStats();
+
   uv_tty_reset_mode();
   v8_platform.StopTracingAgent();
 #ifdef __FreeBSD__
@@ -3377,6 +3393,7 @@ int Start(int argc, char** argv) {
   // Replace the default V8 platform with our implementation.
   // Use our threadpool.
   v8_platform.Initialize(node_threadpool.tp_);
+
   V8::Initialize();
   performance::performance_v8_start = PERFORMANCE_NOW();
   v8_initialized = true;
@@ -3394,6 +3411,8 @@ int Start(int argc, char** argv) {
   // will never be fully cleaned up.
   v8_platform.Dispose();
 
+  node_threadpool.DrainAndPrintStats();
+
   return exit_code;
 }
 
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 185eb8057a1175..e48e1492b49196 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -88,6 +88,9 @@ int NodeThreadpool::NWorkers() const {
   return tp_->NWorkers();
 }
 
+void NodeThreadpool::PrintStats() const {
+}
+
 /**************
  * PartitionedNodeThreadpool
  ***************/
@@ -112,12 +115,17 @@ void PartitionedNodeThreadpool::Initialize(const std::vector<int>& tp_sizes) {
 }
 
 PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
+  LOG("PartitionedNodeThreadpool::~PartitionedNodeThreadpool: Goodbye\n");
+  fflush(stderr);
+
   // If we just return, the destructors of the tp's will drain them.
   // But if we want to report meaningful statistics we must drain them first.
   for (auto &tp : tps_) {
     tp->BlockingDrain();
   }
+}
 
+void PartitionedNodeThreadpool::PrintStats(void) const {
   // TODO(davisjam) Let's hope the application didn't make more than one of these :-D.
   char logFile[128];
   snprintf(logFile, sizeof(logFile), "/tmp/node-%d-PartitionedNodeThreadpool-result.log", getpid());
@@ -129,7 +137,7 @@ PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
   // so we can slice and dice quickly.
   LOG_TO_FILE(fd, "TP key format: TP,name,size\n");
   for (auto pair : tp_labels_) {
-    LOG_TO_FILE(fd, "TP key: %d,%s,%d\n", pair.first, tp_labels_[pair.first].c_str(), tp_sizes_[pair.first]);
+    LOG_TO_FILE(fd, "TP key: %d,%s,%d\n", pair.first, tp_labels_.at(pair.first).c_str(), tp_sizes_.at(pair.first));
   }
 
   LOG_TO_FILE(fd, "QueueLengths data format: TP,queue-length,n_cpu,n_io,time\n");
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 500390eaca84cd..661e6524818f51 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -395,6 +395,9 @@ class NodeThreadpool {
   // Attributes
   virtual int NWorkers() const;
 
+  // Does nothing. Subclasses can implement.
+  virtual void PrintStats() const;
+
  protected:
   int GoodCPUThreadpoolSize();
 
@@ -422,6 +425,8 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
 
   virtual int NWorkers() const override;
 
+  virtual void PrintStats() const override;
+
  protected:
   // Sub-classes should call this after computing tp_sizes in their c'tors.
   void Initialize(const std::vector<int>& tp_sizes);

From c289dcd8081220184471f8086a237525a8ae5093 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Mon, 17 Sep 2018 13:05:39 -0400
Subject: [PATCH 27/31] PTP: PrintStats tweaks for easy parsing

---
 src/node_threadpool.cc | 16 +++++++++---
 src/node_threadpool.h  | 59 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index e48e1492b49196..6631df45e36a87 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -141,7 +141,7 @@ void PartitionedNodeThreadpool::PrintStats(void) const {
   }
 
   LOG_TO_FILE(fd, "QueueLengths data format: TP,queue-length,n_cpu,n_io,time\n");
-  LOG_TO_FILE(fd, "TaskSummary data format: TP,task-origin,task-type,queue_time,run_time\n");
+  LOG_TO_FILE(fd, "TaskSummary data format: TP,task-origin,task-type,queue-duration,run-duration,time-at-completion\n");
 
   for (size_t i = 0; i < tps_.size(); i++) {
     auto &tp = tps_[i];
@@ -160,8 +160,10 @@ void PartitionedNodeThreadpool::PrintStats(void) const {
     const std::vector<std::unique_ptr<TaskSummary>> &summaries = tp->GetTaskSummaries();
     LOG("  TP %d: Task summaries for the %lu tasks:\n", tp->Id(), summaries.size());
     for (const std::unique_ptr<TaskSummary> &summary : summaries) {
-      LOG("    TP %d origin %d type %d queue_time %lu run_time %lu\n", tp->Id(), summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
-      LOG_TO_FILE(fd, "TaskSummary data: %d,%d,%d,%lu,%lu\n", tp->Id(), summary->details_.origin, summary->details_.type, summary->time_in_queue_, summary->time_in_run_);
+      LOG("    TP %d origin %s type %s queue-duration %lu run-duration %lu time-at-completion %lu\n",
+        tp->Id(), TaskDetails::AsString(summary->details_.origin).c_str(), TaskDetails::AsString(summary->details_.type).c_str(), summary->time_in_queue_, summary->time_in_run_, summary->time_at_completion_);
+      LOG_TO_FILE(fd, "TaskSummary data: %d,%s,%s,%lu,%lu,%lu\n",
+        tp->Id(), TaskDetails::AsString(summary->details_.origin).c_str(), TaskDetails::AsString(summary->details_.type).c_str(), summary->time_in_queue_, summary->time_in_run_, summary->time_at_completion_);
     }
   }
 }
@@ -169,7 +171,7 @@ void PartitionedNodeThreadpool::PrintStats(void) const {
 std::shared_ptr<TaskState> PartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
   int tp = ChooseThreadpool(task.get());
   CHECK_GE(tp, 0);
-  CHECK_LT(tp, tps_.size());
+  CHECK_LT(tp, (int) tps_.size());
   return tps_[tp]->Post(std::move(task));
 }
 
@@ -527,6 +529,7 @@ TaskSummary::TaskSummary(Task* completed_task) {
   details_ = completed_task->details_;
   time_in_queue_ = completed_task->task_state_->TimeInQueue();
   time_in_run_ = completed_task->task_state_->TimeInRun();
+  time_at_completion_ = completed_task->task_state_->TimeAtCompletion();
 }
 
 /**************
@@ -561,6 +564,11 @@ uint64_t TaskState::TimeInRun() const {
   return time_in_run_;
 }
 
+uint64_t TaskState::TimeAtCompletion() const {
+  Mutex::ScopedLock scoped_lock(lock_);
+  return time_exited_run_;
+}
+
 uint64_t TaskState::TimeInThreadpool() const {
   Mutex::ScopedLock scoped_lock(lock_);
   return time_in_queue_ + time_in_run_;
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 661e6524818f51..6ec32e2c995d09 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -80,6 +80,8 @@ class Worker {
 // This is basically a struct
 class TaskDetails {
  public:
+  // Handy types
+
   enum TaskOrigin {
        V8
      , LIBUV
@@ -87,6 +89,21 @@ class TaskDetails {
      , TASK_ORIGIN_UNKNOWN
   };
 
+  static std::string AsString(TaskOrigin to) {
+    switch (to) {
+      case V8:
+        return "V8";
+      case LIBUV:
+        return "LIBUV";
+      case USER:
+        return "USER";
+      case TASK_ORIGIN_UNKNOWN:
+        return "UNKNOWN";
+      default:
+        return "UNKNOWN";
+    }
+  }
+
   enum TaskType {
       FS
     , DNS
@@ -96,12 +113,46 @@ class TaskDetails {
     , TASK_TYPE_UNKNOWN
   };
 
+  static std::string AsString(TaskType tt) {
+    switch (tt) {
+      case FS:
+        return "FS";
+      case DNS:
+        return "DNS";
+      case IO:
+        return "IO";
+      case MEMORY:
+        return "MEMORY";
+      case CPU:
+        return "CPU";
+      case TASK_TYPE_UNKNOWN:
+        return "UNKNOWN";
+      default:
+        return "UNKNOWN";
+    }
+  }
+
   enum TaskSize {
       SMALL
     , LARGE
     , TASK_SIZE_UNKNOWN
   };
 
+  static std::string AsString(TaskSize ts) {
+    switch (ts) {
+      case SMALL:
+        return "SMALL";
+      case LARGE:
+        return "LARGE";
+      case TASK_SIZE_UNKNOWN:
+        return "UNKNOWN";
+      default:
+        return "UNKNOWN";
+    }
+  }
+
+  // Members
+
   TaskOrigin origin;
   TaskType type;
   TaskSize size;
@@ -142,9 +193,10 @@ class TaskState {
   bool Cancel();
 
   // Time in nanoseconds.
-  uint64_t TimeInQueue() const;
-  uint64_t TimeInRun() const;
-  uint64_t TimeInThreadpool() const;
+  uint64_t TimeInQueue() const;  // Duration
+  uint64_t TimeInRun() const;  // Duration
+  uint64_t TimeInThreadpool() const;  // Duration
+  uint64_t TimeAtCompletion() const;  // Timestamp from uv_hrtime
 
  protected:
   // Synchronization.
@@ -213,6 +265,7 @@ class TaskSummary {
   TaskDetails details_;
   uint64_t time_in_queue_;
   uint64_t time_in_run_;
+  uint64_t time_at_completion_;
 };
 
 // Shim that we plug into the libuv "pluggable TP" interface.

From c98c8d63a9be2b8f3406b371f4ca5dcdd0a9ac64 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 30 Sep 2018 12:10:13 -0400
Subject: [PATCH 28/31] PTP: address changes in the libuv PR

---
 src/node_threadpool.cc | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 6631df45e36a87..8759ff24168861 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -701,7 +701,7 @@ class LibuvTask : public Task {
     req_->reserved[0] = nullptr;
 
     // Inform libuv.
-    libuv_executor_->GetExecutor()->done(req_);
+    uv_executor_return_work(req_);
   }
 
   void Run() {
@@ -717,8 +717,6 @@ class LibuvTask : public Task {
 
 LibuvExecutor::LibuvExecutor(std::shared_ptr<NodeThreadpool> tp)
   : tp_(tp) {
-  executor_.init = uv_executor_init;
-  executor_.destroy = nullptr;
   executor_.submit = uv_executor_submit;
   executor_.cancel = uv_executor_cancel;
   executor_.data = this;
@@ -732,11 +730,6 @@ bool LibuvExecutor::Cancel(std::shared_ptr<TaskState> task_state) {
   return task_state->Cancel();
 }
 
-void LibuvExecutor::uv_executor_init(uv_executor_t* executor) {
-  // Already initialized.
-  // TODO(davisjam): I don't think we need this API in libuv. Nor destroy.
-}
-
 void LibuvExecutor::uv_executor_submit(uv_executor_t* executor,
                                        uv_work_t* req,
                                        const uv_work_options_t* opts) {

From 64ce6e1046ddf2b1fbe61179e75946706ea73779 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 30 Sep 2018 12:00:48 -0400
Subject: [PATCH 29/31] REMOVE ME: PTP: include libuv changes so people can try
 prototype

This commit should be reverted before merging the PR.

The Node.js PR should follow a separate PR bumping the libuv version
to one with my libuv PR.
---
 deps/uv/CMakeLists.txt                        |   5 +-
 deps/uv/CONTRIBUTING.md                       |   2 +-
 deps/uv/Makefile.am                           |   5 +-
 deps/uv/docs/src/threadpool.rst               | 158 ++++++-
 deps/uv/include/uv.h                          |  57 ++-
 deps/uv/include/uv/threadpool.h               |   3 +-
 deps/uv/include/uv/version.h                  |   6 +-
 deps/uv/src/executor.c                        | 232 ++++++++++
 deps/uv/src/threadpool.c                      | 415 +++++++++---------
 deps/uv/src/unix/fs.c                         | 160 ++++++-
 deps/uv/src/unix/getaddrinfo.c                |  46 +-
 deps/uv/src/unix/getnameinfo.c                |  41 +-
 deps/uv/src/unix/loop.c                       |   2 +-
 deps/uv/src/uv-common.h                       |  46 +-
 deps/uv/src/win/core.c                        |   2 +-
 deps/uv/src/win/fs.c                          | 165 ++++++-
 deps/uv/src/win/getaddrinfo.c                 |  43 +-
 deps/uv/src/win/getnameinfo.c                 |  37 +-
 deps/uv/test/runner.c                         |   2 +-
 ...adpool-cancel.c => test-executor-cancel.c} |  28 +-
 deps/uv/test/test-executor.c                  | 323 ++++++++++++++
 deps/uv/test/test-fork.c                      |   4 +-
 deps/uv/test/test-list.h                      |  40 +-
 deps/uv/test/test-thread.c                    |   2 +-
 deps/uv/test/test-threadpool.c                |  76 ----
 deps/uv/test/test.gyp                         |   4 +-
 deps/uv/uv.gyp                                |   1 +
 27 files changed, 1497 insertions(+), 408 deletions(-)
 create mode 100644 deps/uv/src/executor.c
 rename deps/uv/test/{test-threadpool-cancel.c => test-executor-cancel.c} (95%)
 create mode 100644 deps/uv/test/test-executor.c
 delete mode 100644 deps/uv/test/test-threadpool.c

diff --git a/deps/uv/CMakeLists.txt b/deps/uv/CMakeLists.txt
index 4f13efc8e6ef2b..bb75674a9f993c 100644
--- a/deps/uv/CMakeLists.txt
+++ b/deps/uv/CMakeLists.txt
@@ -12,6 +12,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
 endif()
 
 set(uv_sources
+    src/executor.c
     src/fs-poll.c
     src/inet.c
     src/threadpool.c
@@ -139,8 +140,8 @@ set(uv_test_sources
     test/test-tcp-writealot.c
     test/test-thread-equal.c
     test/test-thread.c
-    test/test-threadpool-cancel.c
-    test/test-threadpool.c
+    test/test-executor-cancel.c
+    test/test-executor.c
     test/test-timer-again.c
     test/test-timer-from-check.c
     test/test-timer.c
diff --git a/deps/uv/CONTRIBUTING.md b/deps/uv/CONTRIBUTING.md
index f22e124e3b23b9..02b9f6b109dbd1 100644
--- a/deps/uv/CONTRIBUTING.md
+++ b/deps/uv/CONTRIBUTING.md
@@ -142,7 +142,7 @@ Bug fixes and features should come with tests.  Add your tests in the
 If you add a new test file, it needs to be registered in three places:
 - `CMakeLists.txt`: add the file's name to the `uv_test_sources` list.
 - `Makefile.am`: add the file's name to the `test_run_tests_SOURCES` list.
-- `uv.gyp`: add the file's name to the `sources` list in the `run-tests` target.
+- `test/test.gyp`: add the file's name to the `sources` list in the `run-tests` target.
 
 Look at other tests to see how they should be structured (license boilerplate,
 the way entry points are declared, etc.).
diff --git a/deps/uv/Makefile.am b/deps/uv/Makefile.am
index a217faab3cea25..299978796f834b 100644
--- a/deps/uv/Makefile.am
+++ b/deps/uv/Makefile.am
@@ -31,6 +31,7 @@ libuv_la_SOURCES = src/fs-poll.c \
                    src/heap-inl.h \
                    src/inet.c \
                    src/queue.h \
+                   src/executor.c \
                    src/threadpool.c \
                    src/timer.c \
                    src/uv-data-getter-setters.c \
@@ -265,8 +266,8 @@ test_run_tests_SOURCES = test/blackhole-server.c \
                          test/test-tcp-write-queue-order.c \
                          test/test-thread-equal.c \
                          test/test-thread.c \
-                         test/test-threadpool-cancel.c \
-                         test/test-threadpool.c \
+                         test/test-executor-cancel.c \
+                         test/test-executor.c \
                          test/test-timer-again.c \
                          test/test-timer-from-check.c \
                          test/test-timer.c \
diff --git a/deps/uv/docs/src/threadpool.rst b/deps/uv/docs/src/threadpool.rst
index 93bd236d35e9f5..2c30a281e7143a 100644
--- a/deps/uv/docs/src/threadpool.rst
+++ b/deps/uv/docs/src/threadpool.rst
@@ -1,25 +1,48 @@
 
 .. _threadpool:
 
-Thread pool work scheduling
+Threadpool
 ===========================
 
-libuv provides a threadpool which can be used to run user code and get notified
-in the loop thread. This thread pool is internally used to run all file system
-operations, as well as getaddrinfo and getnameinfo requests.
+libuv provides the notion of an executor for asynchronous work.
+An executor runs work off of the loop thread and delivers a notification
+to the event loop when the work is finished.
 
-Its default size is 4, but it can be changed at startup time by setting the
-``UV_THREADPOOL_SIZE`` environment variable to any value (the absolute maximum
-is 128).
+Users can submit work directly to the executor via
+:c:func:`uv_executor_queue_work`.
 
-The threadpool is global and shared across all event loops. When a particular
-function makes use of the threadpool (i.e. when using :c:func:`uv_queue_work`)
+libuv will also convert certain asynchronous requests into work for the
+executor when appropriate OS-level facilities are unavailable.
+This includes asynchronous file system operations and DNS requests
+(getaddrinfo, getnameinfo).
+All such internally-generated work is submitted to the public
+executor API.
+
+libuv offers a default executor called the threadpool.
+This pool maintains a pool of worker threads that consume pending work in
+a FIFO manner.
+By default there are 4 threads in this pool.
+The size can be controlled by setting the ``UV_THREADPOOL_SIZE`` environment
+variable to the desired number of threads.
+
+libuv also permits users to replace the default executor with their own
+implementation via :c:func:`uv_replace_executor`.
+Users may thus define a more sophisticated executor if desired,
+e.g. handling I/O and CPU in different pools.
+
+.. note::
+    The default executor cannot be overriden after any work is queued.
+
+The executor is global and shared across all event loops.
+
+When a function makes use of the default executor (i.e. when using :c:func:`uv_queue_work`)
 libuv preallocates and initializes the maximum number of threads allowed by
 ``UV_THREADPOOL_SIZE``. This causes a relatively minor memory overhead
 (~1MB for 128 threads) but increases the performance of threading at runtime.
+The maximum size of the default libuv executor's threadpool is 128.
 
 .. note::
-    Note that even though a global thread pool which is shared across all events
+    Even though a global thread pool which is shared across all events
     loops is used, the functions are not thread safe.
 
 
@@ -32,36 +55,135 @@ Data types
 
 .. c:type:: void (*uv_work_cb)(uv_work_t* req)
 
-    Callback passed to :c:func:`uv_queue_work` which will be run on the thread
-    pool.
+    Callback passed to :c:func:`uv_queue_work` which will be run on
+    the executor.
 
 .. c:type:: void (*uv_after_work_cb)(uv_work_t* req, int status)
 
     Callback passed to :c:func:`uv_queue_work` which will be called on the loop
-    thread after the work on the threadpool has been completed. If the work
-    was cancelled using :c:func:`uv_cancel` `status` will be ``UV_ECANCELED``.
+    thread after the work on the executor has been completed or cancelled.
+    If the work was cancelled using :c:func:`uv_cancel`,
+    then `status` will be ``UV_ECANCELED``.
+
+.. c:type:: uv_executor_t
+
+    Executor type. Use when overriding the default threadpool.
+    Zero out objects of this type before use.
+
+.. c:type:: void (*uv_executor_submit_func)(uv_executor_t* executor, uv_work_t* req, uv_work_options_t* opts)
+
+    Called when the executor should handle this request.
+
+.. c:type:: int (*uv_executor_cancel_func)(uv_executor_t* executor, uv_work_t* req)
 
+    Called when someone wants to cancel a previously submitted request.
+    Return ``UV_EBUSY`` if you cannot cancel it.
+
+.. seealso:: :c:func:`uv_cancel_t`.
+
+.. c:type:: int uv_replace_executor(uv_executor_t* executor)
+
+    Replace the default libuv executor with this user-defined one.
+    Must be called before any work is submitted to the default libuv executor.
+    Returns 0 on success.
+
+.. c:type:: uv_work_options_t
+
+    Options for guiding the executor in its policy decisions.
 
 Public members
 ^^^^^^^^^^^^^^
 
+.. c:member:: void* uv_work_t.executor_data
+
+    Space for arbitrary data. libuv does not use this field.
+    This is intended for use by an executor implementation.
+
 .. c:member:: uv_loop_t* uv_work_t.loop
 
     Loop that started this request and where completion will be reported.
     Readonly.
 
+.. c:member:: uv_work_cb uv_work_t.work_cb
+
+    Executor should invoke this, not on the event loop.
+
 .. seealso:: The :c:type:`uv_req_t` members also apply.
 
+.. c:member:: uv_executor_submit_func uv_executor_t.submit
+
+    Must be non-NULL.
+
+.. c:member:: uv_executor_cancel_func uv_executor_t.cancel
+
+    Can be NULL.
+    If NULL, calls to :c:function:`uv_cancel` will return ``UV_ENOSYS``.
+
+.. c:member:: void * uv_executor_t.data
+
+    Space for user-defined arbitrary data. libuv does not use this field.
+
+.. c:member:: uv_work_type uv_work_options_t.type
+
+    Type of request. Readonly.
+
+    ::
+
+        typedef enum {
+            UV_WORK_UNKNOWN = 0,
+            UV_WORK_FS,
+            UV_WORK_DNS,
+            UV_WORK_USER_IO,
+            UV_WORK_USER_CPU,
+            UV_WORK_PRIVATE,
+            UV_WORK_MAX = 255
+        } uv_work_type;
+
+.. c:member:: int uv_work_options_t.priority
+
+    Suggested for use in user-defined executor.
+    Has no effect on libuv's default executor.
+
+.. c:member:: int uv_work_options_t.cancelable
+
+    Boolean.
+    If non-zero, it is safe to abort this work while it is being handled
+    by a thread (e.g. by pthread_cancel'ing the thread on which it is running).
+    In addition, work that has not yet been assigned to a thread can be
+    cancelled.
+
+.. c:member:: void * uv_work_options_t.data
+
+    Space for user-defined arbitrary data. libuv does not use this field.
+
 
 API
 ---
 
 .. c:function:: int uv_queue_work(uv_loop_t* loop, uv_work_t* req, uv_work_cb work_cb, uv_after_work_cb after_work_cb)
 
-    Initializes a work request which will run the given `work_cb` in a thread
-    from the threadpool. Once `work_cb` is completed, `after_work_cb` will be
-    called on the loop thread.
+    Calls :c:func:`uv_executor_queue_work` with NULL options.
+
+.. c:function:: int uv_executor_queue_work(uv_loop_t* loop, uv_work_t* req, uv_work_options_t* opts, uv_work_cb work_cb, uv_after_work_cb after_work_cb)
+
+    Submits a work request with options to the executor.
+    The executor will run the given `work_cb` off of the loop thread.
+    Once `work_cb` is completed, `after_work_cb` will be
+    called on loop's loop thread.
+
+    The `opts` can guide the executor used by libuv.
+
+    `req` must remain valid until `after_work_cb` is executed.
+    `opts` need not remain valid once `uv_executor_queue_work` returns.
 
     This request can be cancelled with :c:func:`uv_cancel`.
 
-.. seealso:: The :c:type:`uv_req_t` API functions also apply.
+.. c:function:: void uv_executor_return_work(uv_work_t* req)
+
+    An executor should invoke this function once it finishes with a request.
+    The effect is to return control over the `req` to libuv.
+
+    This function is thread safe. <-- TODO This seems desirable so the executor workers don't have to centralize returns through the event loop, but thread safety requires locking loop->wq_mutex. I'm having trouble imagining how this could lead to deadlock in a "reasonable" executor implementation, but wanted to discuss.
+
+.. seealso:: The :c:type:`uv_req_t` API functions also apply
+             to a :c:type:`uv_work_t`.
diff --git a/deps/uv/include/uv.h b/deps/uv/include/uv.h
index 717c2e570b9eb9..ac6813c1df7ca0 100644
--- a/deps/uv/include/uv.h
+++ b/deps/uv/include/uv.h
@@ -198,6 +198,16 @@ typedef enum {
   UV_REQ_TYPE_MAX
 } uv_req_type;
 
+/* TODO Use a UV_WORK_TYPE_MAP? Not sure it's worthwhile. */
+typedef enum {
+  UV_WORK_UNKNOWN = 0,
+  UV_WORK_FS,
+  UV_WORK_DNS,
+  UV_WORK_USER_IO,
+  UV_WORK_USER_CPU,
+  UV_WORK_PRIVATE,
+  UV_WORK_MAX = 255
+} uv_work_type;
 
 /* Handle types. */
 typedef struct uv_loop_s uv_loop_t;
@@ -229,6 +239,10 @@ typedef struct uv_udp_send_s uv_udp_send_t;
 typedef struct uv_fs_s uv_fs_t;
 typedef struct uv_work_s uv_work_t;
 
+/* Executor. */
+typedef struct uv_work_options_s uv_work_options_t;
+typedef struct uv_executor_s uv_executor_t;
+
 /* None of the above. */
 typedef struct uv_cpu_info_s uv_cpu_info_t;
 typedef struct uv_interface_address_s uv_interface_address_t;
@@ -321,6 +335,11 @@ typedef void (*uv_getnameinfo_cb)(uv_getnameinfo_t* req,
                                   int status,
                                   const char* hostname,
                                   const char* service);
+typedef void (*uv_executor_submit_func)(uv_executor_t* executor,
+                                        uv_work_t* req,
+                                        const uv_work_options_t* opts);
+typedef int (*uv_executor_cancel_func)(uv_executor_t* executor,
+                                       uv_work_t* req);
 
 typedef struct {
   long tv_sec;
@@ -381,8 +400,8 @@ UV_EXTERN char* uv_err_name_r(int err, char* buf, size_t buflen);
   void* data;                                                                 \
   /* read-only */                                                             \
   uv_req_type type;                                                           \
-  /* private */                                                               \
-  void* reserved[6];                                                          \
+  void* executor_data;                                                        \
+  void* reserved[5];                                                          \
   UV_REQ_PRIVATE_FIELDS                                                       \
 
 /* Abstract base class of all requests. */
@@ -999,13 +1018,45 @@ struct uv_work_s {
   UV_WORK_PRIVATE_FIELDS
 };
 
+/*
+ * Executor.
+ */
+
+struct uv_work_options_s {
+  /* public */
+  uv_work_type type;
+  int priority;
+  int cancelable;
+  void* data;
+};
+
+struct uv_executor_s {
+  /* public */
+  /* Defined by embedder. */
+  uv_executor_submit_func submit;
+  uv_executor_cancel_func cancel;
+  void* data;
+
+  void* reserved[4];
+};
+
+UV_EXTERN int uv_replace_executor(uv_executor_t* executor);
+
+/* Deprecated. */
 UV_EXTERN int uv_queue_work(uv_loop_t* loop,
                             uv_work_t* req,
                             uv_work_cb work_cb,
                             uv_after_work_cb after_work_cb);
 
+UV_EXTERN int uv_executor_queue_work(uv_loop_t* loop,
+                                     uv_work_t* req,
+                                     uv_work_options_t* opts,
+                                     uv_work_cb work_cb,
+                                     uv_after_work_cb after_work_cb);
+
 UV_EXTERN int uv_cancel(uv_req_t* req);
 
+UV_EXTERN void uv_executor_return_work(uv_work_t* req);
 
 struct uv_cpu_times_s {
   uint64_t user;
@@ -1603,6 +1654,8 @@ UV_EXTERN void uv_loop_set_data(uv_loop_t*, void* data);
 #undef UV_SIGNAL_PRIVATE_FIELDS
 #undef UV_LOOP_PRIVATE_FIELDS
 #undef UV_LOOP_PRIVATE_PLATFORM_FIELDS
+#undef UV_WORK_OPTIONS_PRIVATE_FIELDS
+#undef UV_EXECUTOR_PRIVATE_FIELDS
 #undef UV__ERR
 
 #ifdef __cplusplus
diff --git a/deps/uv/include/uv/threadpool.h b/deps/uv/include/uv/threadpool.h
index 9708ebdd5301a9..81bbd97da690b4 100644
--- a/deps/uv/include/uv/threadpool.h
+++ b/deps/uv/include/uv/threadpool.h
@@ -31,7 +31,8 @@ struct uv__work {
   void (*work)(struct uv__work *w);
   void (*done)(struct uv__work *w, int status);
   struct uv_loop_s* loop;
-  void* wq[2];
+  void* wq[2]; /* This is used by the executor API
+                * to queue completed work on the event loop. */
 };
 
 #endif /* UV_THREADPOOL_H_ */
diff --git a/deps/uv/include/uv/version.h b/deps/uv/include/uv/version.h
index 30e1d5a6f92bf8..805cd3ba89e4fc 100644
--- a/deps/uv/include/uv/version.h
+++ b/deps/uv/include/uv/version.h
@@ -32,9 +32,9 @@
 
 #define UV_VERSION_MAJOR 1
 #define UV_VERSION_MINOR 23
-#define UV_VERSION_PATCH 0
-#define UV_VERSION_IS_RELEASE 1
-#define UV_VERSION_SUFFIX ""
+#define UV_VERSION_PATCH 1
+#define UV_VERSION_IS_RELEASE 0
+#define UV_VERSION_SUFFIX "dev"
 
 #define UV_VERSION_HEX  ((UV_VERSION_MAJOR << 16) | \
                          (UV_VERSION_MINOR <<  8) | \
diff --git a/deps/uv/src/executor.c b/deps/uv/src/executor.c
new file mode 100644
index 00000000000000..6873c8252978d6
--- /dev/null
+++ b/deps/uv/src/executor.c
@@ -0,0 +1,232 @@
+/* Copyright libuv project contributors. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "uv-common.h"
+
+#if !defined(_WIN32)
+# include "unix/internal.h"
+#endif
+
+#include <stdlib.h>  /* abort() */
+
+#include <stdio.h> /* Debug */
+
+static uv_executor_t* executor = NULL;
+
+static uv_once_t once = UV_ONCE_INIT;
+static volatile int initialized = 0; /* Protected by once in
+                    uv_executor_queue_work, but not in uv_replace_executor. */
+
+uv_executor_t* uv__executor(void) {
+  return executor;
+}
+
+/* Executor has finished this request. */
+void uv_executor_return_work(uv_work_t* req) {
+  uv_mutex_lock(&req->loop->wq_mutex);
+
+  /* Place in the associated loop's queue.
+   * NB We are re-purposing req->work_req.wq here.
+   * This field is also used by the default executor, but
+   * after this call the executor will no longer touch it. */
+  QUEUE_INSERT_TAIL(&req->loop->wq, &req->work_req.wq);
+
+  /* Signal to loop that there's a pending task. */
+  uv_async_send(&req->loop->wq_async);
+
+  uv_mutex_unlock(&req->loop->wq_mutex);
+}
+
+/* CB: Event loop is handling completed requests. */
+void uv__executor_work_done(uv_async_t* handle) {
+  struct uv__work* w;
+  uv_work_t* req;
+  uv_loop_t* loop;
+  QUEUE* q;
+  QUEUE wq;
+  int err;
+
+  /* Grab this loop's completed work. */
+  loop = container_of(handle, uv_loop_t, wq_async);
+  uv_mutex_lock(&loop->wq_mutex);
+  QUEUE_MOVE(&loop->wq, &wq);
+  uv_mutex_unlock(&loop->wq_mutex);
+
+  /* Handle each uv__work on wq. */
+  while (!QUEUE_EMPTY(&wq)) {
+    q = QUEUE_HEAD(&wq);
+    QUEUE_REMOVE(q);
+
+    w = container_of(q, struct uv__work, wq);
+    req = container_of(w, uv_work_t, work_req);
+    err = (req->work_cb == uv__executor_work_cancelled) ? UV_ECANCELED : 0;
+
+    uv__req_unregister(req->loop, req);
+
+    if (req->after_work_cb != NULL)
+      req->after_work_cb(req, err);
+  }
+}
+
+int uv_replace_executor(uv_executor_t* _executor) {
+  /* Reject if no longer safe to replace. */
+  if (initialized)
+    return UV_EINVAL;
+
+  /* Check validity of _executor. */
+  if (_executor == NULL)
+    return UV_EINVAL;
+  if (_executor->submit == NULL)
+    return UV_EINVAL;
+
+  /* Replace our executor. */
+  executor = _executor;
+
+  return 0;
+}
+
+static void uv__executor_init(void) {
+  int rc;
+
+  /* Assign executor to default if none was set. */
+  if (executor == NULL) {
+    rc = uv_replace_executor(uv__default_executor());
+    assert(!rc);
+  }
+
+  /* Once initialized, it is no longer safe to replace. */
+  initialized = 1;
+}
+
+int uv_executor_queue_work(uv_loop_t* loop,
+                           uv_work_t* req,
+                           uv_work_options_t* opts,
+                           uv_work_cb work_cb,
+                           uv_after_work_cb after_work_cb) {
+  char work_type[32];
+  /* Initialize the executor once. */
+  uv_once(&once, uv__executor_init);
+
+  /* Check validity. */
+  if (loop == NULL || req == NULL || work_cb == NULL)
+    return UV_EINVAL;
+
+  /* Register req on loop. */
+  LOG_1("uv_executor_queue_work: req %p\n", (void*) req);
+  uv__req_init(loop, req, UV_WORK);
+  req->loop = loop;
+  req->work_cb = work_cb;
+  req->after_work_cb = after_work_cb;
+
+  /* TODO Just some logging. */
+  if (opts) {
+    switch(opts->type) {
+    case UV_WORK_UNKNOWN:
+      sprintf(work_type, "%s", "UV_WORK_UNKNOWN");
+      break;
+    case UV_WORK_FS:
+      sprintf(work_type, "%s", "UV_WORK_FS");
+      break;
+    case UV_WORK_DNS:
+      sprintf(work_type, "%s", "UV_WORK_DNS");
+      break;
+    case UV_WORK_USER_IO:
+      sprintf(work_type, "%s", "UV_WORK_USER_IO");
+      break;
+    case UV_WORK_USER_CPU:
+      sprintf(work_type, "%s", "UV_WORK_USER_CPU");
+      break;
+    case UV_WORK_PRIVATE:
+      sprintf(work_type, "%s", "UV_WORK_PRIVATE");
+      break;
+    default:
+      sprintf(work_type, "%s", "UNKNOWN");
+      break;
+    }
+    LOG_2("uv_executor_queue_work: type %d: %s\n", opts->type, work_type);
+  }
+  else
+    LOG_0("uv_executor_queue_work: no options provided\n");
+
+  /* Submit to the executor. */
+  executor->submit(executor, req, opts);
+
+  return 0;
+}
+
+int uv_queue_work(uv_loop_t* loop,
+                  uv_work_t* req,
+                  uv_work_cb work_cb,
+                  uv_after_work_cb after_work_cb) {
+  uv_work_options_t options;
+  options.type = UV_WORK_UNKNOWN;
+  options.priority = -1;
+  options.cancelable = 0;
+  options.data = NULL;
+  return uv_executor_queue_work(loop, req, &options, work_cb, after_work_cb);
+}
+
+static int uv__cancel_ask_executor(uv_work_t* work) {
+  int r;
+
+  r = UV_ENOSYS;
+  LOG_0("Trying to call cancel\n");
+  if (executor->cancel != NULL) {
+    LOG_0("Calling cancel!\n");
+    r = executor->cancel(executor, work);
+    if (r == 0)
+      work->work_cb = uv__executor_work_cancelled;
+  }
+
+  return r;
+}
+
+int uv_cancel(uv_req_t* req) {
+  uv_work_t* work;
+  int r;
+
+  LOG_1("uv_cancel: req %p\n", (void*) req);
+
+  r = UV_EINVAL;
+  switch (req->type) {
+  case UV_FS:
+  case UV_GETADDRINFO:
+  case UV_GETNAMEINFO:
+    /* These internal users prepare and submit requests to the executor. */
+    work = req->executor_data;
+    r = uv__cancel_ask_executor(work);
+    break;
+  case UV_WORK:
+    /* This is a direct request to the executor. */
+    work = (uv_work_t*) req;
+    r = uv__cancel_ask_executor(work);
+    break;
+  default:
+    return UV_EINVAL;
+  }
+
+  return r;
+}
+
+/* This is just a magic, it should never be called. */
+void uv__executor_work_cancelled(uv_work_t* work) {
+  abort();
+}
diff --git a/deps/uv/src/threadpool.c b/deps/uv/src/threadpool.c
index 413d1c204c2660..0769597dc757ed 100644
--- a/deps/uv/src/threadpool.c
+++ b/deps/uv/src/threadpool.c
@@ -29,290 +29,281 @@
 
 #define MAX_THREADPOOL_SIZE 128
 
-static uv_once_t once = UV_ONCE_INIT;
-static uv_cond_t cond;
-static uv_mutex_t mutex;
-static unsigned int idle_threads;
-static unsigned int nthreads;
-static uv_thread_t* threads;
-static uv_thread_t default_threads[4];
-static QUEUE exit_message;
-static QUEUE wq;
-
-
-static void uv__cancelled(struct uv__work* w) {
-  abort();
+/* executor */
+uv_once_t init_default_executor_once = UV_ONCE_INIT;
+static uv_executor_t default_executor;
+
+/* default_executor.data */
+uv_once_t start_workers_once = UV_ONCE_INIT;
+static struct default_executor_fields {
+  uv_once_t init;
+  uv_cond_t cond;
+  uv_mutex_t mutex;
+  unsigned int idle_workers;
+  unsigned int nworkers;
+  uv_thread_t* workers;
+  uv_thread_t default_workers[4];
+  QUEUE exit_message;
+  QUEUE wq;
+  int used;
+} _fields;
+
+/* For worker initialization. */
+static struct worker_arg {
+  uv_executor_t* executor;
+  uv_sem_t* ready;
+} worker_arg;
+
+/* Helpers for the default executor implementation. */
+
+/* Post item q to the TP queue.
+ * Caller must hold fields->lock. */
+static void post(struct default_executor_fields* fields, QUEUE* q) {
+  QUEUE_INSERT_TAIL(&fields->wq, q);
+  if (0 < fields->idle_workers)
+    uv_cond_signal(&fields->cond);
 }
 
-
-/* To avoid deadlock with uv_cancel() it's crucial that the worker
- * never holds the global mutex and the loop-local mutex at the same time.
- */
+/* This is the entry point for each worker in the threadpool.
+ * arg is a worker_arg*. */
 static void worker(void* arg) {
+  struct worker_arg* warg;
+  uv_executor_t* executor;
   struct uv__work* w;
+  uv_work_t* req;
   QUEUE* q;
+  struct default_executor_fields* fields;
+
+  /* Extract fields from warg. */
+  warg = arg;
+  executor = warg->executor;
+  assert(executor != NULL);
+  fields = executor->data;
+  assert(fields != NULL);
 
-  uv_sem_post((uv_sem_t*) arg);
+  /* Signal we're ready. */
+  uv_sem_post(warg->ready);
   arg = NULL;
+  warg = NULL;
 
   for (;;) {
-    uv_mutex_lock(&mutex);
+    /* Get the next work. */
+    uv_mutex_lock(&fields->mutex);
 
-    while (QUEUE_EMPTY(&wq)) {
-      idle_threads += 1;
-      uv_cond_wait(&cond, &mutex);
-      idle_threads -= 1;
+    while (QUEUE_EMPTY(&fields->wq)) {
+      fields->idle_workers += 1;
+      uv_cond_wait(&fields->cond, &fields->mutex);
+      fields->idle_workers -= 1;
     }
 
-    q = QUEUE_HEAD(&wq);
+    q = QUEUE_HEAD(&fields->wq);
 
-    if (q == &exit_message)
-      uv_cond_signal(&cond);
+    if (q == &fields->exit_message) {
+      /* Wake up another thread. */
+      uv_cond_signal(&fields->cond);
+    }
     else {
       QUEUE_REMOVE(q);
       QUEUE_INIT(q);  /* Signal uv_cancel() that the work req is
                              executing. */
     }
 
-    uv_mutex_unlock(&mutex);
+    uv_mutex_unlock(&fields->mutex);
 
-    if (q == &exit_message)
+    /* Are we done? */
+    if (q == &fields->exit_message)
       break;
 
     w = QUEUE_DATA(q, struct uv__work, wq);
-    w->work(w);
-
-    uv_mutex_lock(&w->loop->wq_mutex);
-    w->work = NULL;  /* Signal uv_cancel() that the work req is done
-                        executing. */
-    QUEUE_INSERT_TAIL(&w->loop->wq, &w->wq);
-    uv_async_send(&w->loop->wq_async);
-    uv_mutex_unlock(&w->loop->wq_mutex);
-  }
-}
-
-
-static void post(QUEUE* q) {
-  uv_mutex_lock(&mutex);
-  QUEUE_INSERT_TAIL(&wq, q);
-  if (idle_threads > 0)
-    uv_cond_signal(&cond);
-  uv_mutex_unlock(&mutex);
-}
-
-
-#ifndef _WIN32
-UV_DESTRUCTOR(static void cleanup(void)) {
-  unsigned int i;
-
-  if (nthreads == 0)
-    return;
-
-  post(&exit_message);
+    req = container_of(w, uv_work_t, work_req);
 
-  for (i = 0; i < nthreads; i++)
-    if (uv_thread_join(threads + i))
-      abort();
+    /* Do the work. */
+    LOG_1("Worker: running work_cb for req %p\n", req);
+    req->work_cb(req);
+    LOG_1("Worker: Done with req %p\n", req);
 
-  if (threads != default_threads)
-    uv__free(threads);
+    /* Signal uv_cancel() that the work req is done executing. */
+    uv_mutex_lock(&fields->mutex);
+    w->work = NULL;
+    uv_mutex_unlock(&fields->mutex);
 
-  uv_mutex_destroy(&mutex);
-  uv_cond_destroy(&cond);
-
-  threads = NULL;
-  nthreads = 0;
+    /* Tell event loop we finished with this request. */
+    uv_executor_return_work(req);
+  }
 }
-#endif
-
 
-static void init_threads(void) {
+/* (Initialize _fields and) start the workers. */
+static void start_workers(void) {
   unsigned int i;
   const char* val;
   uv_sem_t sem;
+  unsigned int n_default_workers;
 
-  nthreads = ARRAY_SIZE(default_threads);
+  /* Initialize various fields members. */
+  _fields.used = 1;
+
+  /* How many workers? */
+  n_default_workers = ARRAY_SIZE(_fields.default_workers);
+  _fields.nworkers = n_default_workers;
   val = getenv("UV_THREADPOOL_SIZE");
   if (val != NULL)
-    nthreads = atoi(val);
-  if (nthreads == 0)
-    nthreads = 1;
-  if (nthreads > MAX_THREADPOOL_SIZE)
-    nthreads = MAX_THREADPOOL_SIZE;
-
-  threads = default_threads;
-  if (nthreads > ARRAY_SIZE(default_threads)) {
-    threads = uv__malloc(nthreads * sizeof(threads[0]));
-    if (threads == NULL) {
-      nthreads = ARRAY_SIZE(default_threads);
-      threads = default_threads;
+    _fields.nworkers = atoi(val);
+  if (_fields.nworkers == 0)
+    _fields.nworkers = 1;
+  if (_fields.nworkers > MAX_THREADPOOL_SIZE)
+    _fields.nworkers = MAX_THREADPOOL_SIZE;
+
+  /* Try to use the statically declared workers instead of malloc. */
+  _fields.workers = _fields.default_workers;
+  if (_fields.nworkers > n_default_workers) {
+    _fields.workers = uv__malloc(_fields.nworkers * sizeof(_fields.workers[0]));
+    if (_fields.workers == NULL) {
+      _fields.nworkers = n_default_workers;
+      _fields.workers = _fields.default_workers;
     }
   }
 
-  if (uv_cond_init(&cond))
+  if (uv_cond_init(&_fields.cond))
     abort();
 
-  if (uv_mutex_init(&mutex))
+  if (uv_mutex_init(&_fields.mutex))
     abort();
 
-  QUEUE_INIT(&wq);
+  QUEUE_INIT(&_fields.wq);
 
   if (uv_sem_init(&sem, 0))
     abort();
 
-  for (i = 0; i < nthreads; i++)
-    if (uv_thread_create(threads + i, worker, &sem))
+  /* Start the workers. */
+  worker_arg.executor = &default_executor;
+  worker_arg.ready = &sem;
+  for (i = 0; i < _fields.nworkers; i++)
+    if (uv_thread_create(_fields.workers + i, worker, &worker_arg))
       abort();
 
-  for (i = 0; i < nthreads; i++)
+  /* Wait for workers to start. */
+  for (i = 0; i < _fields.nworkers; i++)
     uv_sem_wait(&sem);
-
   uv_sem_destroy(&sem);
 }
 
-
-#ifndef _WIN32
-static void reset_once(void) {
-  uv_once_t child_once = UV_ONCE_INIT;
-  memcpy(&once, &child_once, sizeof(child_once));
-}
-#endif
-
-
-static void init_once(void) {
 #ifndef _WIN32
-  /* Re-initialize the threadpool after fork.
-   * Note that this discards the global mutex and condition as well
-   * as the work queue.
-   */
-  if (pthread_atfork(NULL, NULL, &reset_once))
-    abort();
-#endif
-  init_threads();
-}
-
-
-void uv__work_submit(uv_loop_t* loop,
-                     struct uv__work* w,
-                     void (*work)(struct uv__work* w),
-                     void (*done)(struct uv__work* w, int status)) {
-  uv_once(&once, init_once);
-  w->loop = loop;
-  w->work = work;
-  w->done = done;
-  post(&w->wq);
-}
-
+/* cleanup of the default_executor if necessary. */
+UV_DESTRUCTOR(static void cleanup(void)) {
+  unsigned int i;
 
-static int uv__work_cancel(uv_loop_t* loop, uv_req_t* req, struct uv__work* w) {
-  int cancelled;
+  if (!_fields.used)
+    return;
 
-  uv_mutex_lock(&mutex);
-  uv_mutex_lock(&w->loop->wq_mutex);
+  if (_fields.nworkers == 0)
+    return;
 
-  cancelled = !QUEUE_EMPTY(&w->wq) && w->work != NULL;
-  if (cancelled)
-    QUEUE_REMOVE(&w->wq);
+  uv_mutex_lock(&_fields.mutex);
+  post(&_fields, &_fields.exit_message);
+  uv_mutex_unlock(&_fields.mutex);
 
-  uv_mutex_unlock(&w->loop->wq_mutex);
-  uv_mutex_unlock(&mutex);
+  for (i = 0; i < _fields.nworkers; i++)
+    if (uv_thread_join(_fields.workers + i))
+      abort();
 
-  if (!cancelled)
-    return UV_EBUSY;
+  if (_fields.workers != _fields.default_workers)
+    uv__free(_fields.workers);
 
-  w->work = uv__cancelled;
-  uv_mutex_lock(&loop->wq_mutex);
-  QUEUE_INSERT_TAIL(&loop->wq, &w->wq);
-  uv_async_send(&loop->wq_async);
-  uv_mutex_unlock(&loop->wq_mutex);
+  uv_mutex_destroy(&_fields.mutex);
+  uv_cond_destroy(&_fields.cond);
 
-  return 0;
+  _fields.workers = NULL;
+  _fields.nworkers = 0;
 }
+#endif
 
+/******************************
+ * Default libuv threadpool, implemented using the executor API.
+*******************************/
 
-void uv__work_done(uv_async_t* handle) {
-  struct uv__work* w;
-  uv_loop_t* loop;
-  QUEUE* q;
-  QUEUE wq;
-  int err;
+static void uv__default_executor_submit(uv_executor_t* executor,
+                                        uv_work_t* req,
+                                        const uv_work_options_t* opts) {
+  struct default_executor_fields* fields;
+  struct uv__work* wreq;
 
-  loop = container_of(handle, uv_loop_t, wq_async);
-  uv_mutex_lock(&loop->wq_mutex);
-  QUEUE_MOVE(&loop->wq, &wq);
-  uv_mutex_unlock(&loop->wq_mutex);
+  assert(executor == &default_executor);
+  /* Make sure we are initialized internally. */
+  uv_once(&start_workers_once, start_workers);
 
-  while (!QUEUE_EMPTY(&wq)) {
-    q = QUEUE_HEAD(&wq);
-    QUEUE_REMOVE(q);
+  fields = executor->data;
+  assert(fields != NULL);
 
-    w = container_of(q, struct uv__work, wq);
-    err = (w->work == uv__cancelled) ? UV_ECANCELED : 0;
-    w->done(w, err);
-  }
-}
+  /* Put executor-specific data into req->executor_data. */
+  wreq = &req->work_req;
+  req->executor_data = wreq;
+  /* TODO Don't do this. */
+  wreq->work = 0xdeadbeef; /* Non-NULL: "Not yet completed". */
 
+  uv_mutex_lock(&fields->mutex);
 
-static void uv__queue_work(struct uv__work* w) {
-  uv_work_t* req = container_of(w, uv_work_t, work_req);
+  /* Add to our queue. */
+  post(fields, &wreq->wq);
 
-  req->work_cb(req);
+  uv_mutex_unlock(&fields->mutex);
 }
 
-
-static void uv__queue_done(struct uv__work* w, int err) {
-  uv_work_t* req;
-
-  req = container_of(w, uv_work_t, work_req);
-  uv__req_unregister(req->loop, req);
-
-  if (req->after_work_cb == NULL)
-    return;
-
-  req->after_work_cb(req, err);
+static int uv__default_executor_cancel(uv_executor_t* executor, uv_work_t* req) {
+  struct default_executor_fields* fields;
+  struct uv__work* wreq;
+  int assigned;
+  int already_completed;
+  int still_on_queue;
+  int can_cancel;
+
+  assert(executor == &default_executor);
+  /* Make sure we are initialized internally. */
+  uv_once(&start_workers_once, start_workers);
+
+  fields = executor->data;
+  assert(fields != NULL);
+  wreq = req->executor_data;
+  assert(wreq != NULL);
+
+  uv_mutex_lock(&fields->mutex);
+
+  /* Check if we can cancel it. Determine what state req is in. */
+  assigned = QUEUE_EMPTY(&wreq->wq);
+  already_completed = (wreq->work == NULL);
+  still_on_queue = !assigned && !already_completed;
+  
+  LOG_3("assigned %d already_completed %d still_on_queue\n", assigned, already_completed, still_on_queue); 
+
+  can_cancel = still_on_queue;
+  if (can_cancel)
+    QUEUE_REMOVE(&wreq->wq);
+
+  uv_mutex_unlock(&fields->mutex);
+
+  LOG_1("uv__default_executor_cancel: can_cancel %d\n", can_cancel);
+  if (can_cancel) {
+    /* We are now done with req. Notify libuv.
+     * The cancellation is not yet complete, but that's OK because
+     * this API must be called by the event loop (single-threaded). */
+    uv_executor_return_work(req);
+    return 0;
+  } else {
+    /* Failed to cancel.
+     * Work is either already done or is still to be executed.
+     * Either way we need not call done here. */
+    return UV_EBUSY;
+  }
 }
 
-
-int uv_queue_work(uv_loop_t* loop,
-                  uv_work_t* req,
-                  uv_work_cb work_cb,
-                  uv_after_work_cb after_work_cb) {
-  if (work_cb == NULL)
-    return UV_EINVAL;
-
-  uv__req_init(loop, req, UV_WORK);
-  req->loop = loop;
-  req->work_cb = work_cb;
-  req->after_work_cb = after_work_cb;
-  uv__work_submit(loop, &req->work_req, uv__queue_work, uv__queue_done);
-  return 0;
+void uv__default_executor_init(void) {
+  /* TODO Behavior on fork? */
+  bzero(&default_executor, sizeof(default_executor));
+  default_executor.data = &_fields;
+  default_executor.submit = uv__default_executor_submit;
+  default_executor.cancel = uv__default_executor_cancel;
 }
 
-
-int uv_cancel(uv_req_t* req) {
-  struct uv__work* wreq;
-  uv_loop_t* loop;
-
-  switch (req->type) {
-  case UV_FS:
-    loop =  ((uv_fs_t*) req)->loop;
-    wreq = &((uv_fs_t*) req)->work_req;
-    break;
-  case UV_GETADDRINFO:
-    loop =  ((uv_getaddrinfo_t*) req)->loop;
-    wreq = &((uv_getaddrinfo_t*) req)->work_req;
-    break;
-  case UV_GETNAMEINFO:
-    loop = ((uv_getnameinfo_t*) req)->loop;
-    wreq = &((uv_getnameinfo_t*) req)->work_req;
-    break;
-  case UV_WORK:
-    loop =  ((uv_work_t*) req)->loop;
-    wreq = &((uv_work_t*) req)->work_req;
-    break;
-  default:
-    return UV_EINVAL;
-  }
-
-  return uv__work_cancel(loop, req, wreq);
+uv_executor_t* uv__default_executor(void) {
+  uv_once(&init_default_executor_once, uv__default_executor_init);
+  return &default_executor;
 }
diff --git a/deps/uv/src/unix/fs.c b/deps/uv/src/unix/fs.c
index 652cdfd734ac5b..f47a58907ab81f 100644
--- a/deps/uv/src/unix/fs.c
+++ b/deps/uv/src/unix/fs.c
@@ -71,6 +71,13 @@
   do {                                                                        \
     if (req == NULL)                                                          \
       return UV_EINVAL;                                                       \
+    work = NULL;                                                              \
+    if (cb != NULL) {                                                         \
+      work = uv__malloc(sizeof(*work));                                       \
+      if (work == NULL)                                                       \
+        return UV_ENOMEM;                                                     \
+      work->data = req;                                                       \
+    }                                                                         \
     UV_REQ_INIT(req, UV_FS);                                                  \
     req->fs_type = UV_FS_ ## subtype;                                         \
     req->result = 0;                                                          \
@@ -90,8 +97,11 @@
       req->path = path;                                                       \
     } else {                                                                  \
       req->path = uv__strdup(path);                                           \
-      if (req->path == NULL)                                                  \
+      if (req->path == NULL) {                                                \
+        if (work != NULL)                                                     \
+          uv__free(work);                                                     \
         return UV_ENOMEM;                                                     \
+      }                                                                       \
     }                                                                         \
   }                                                                           \
   while (0)
@@ -107,8 +117,11 @@
       path_len = strlen(path) + 1;                                            \
       new_path_len = strlen(new_path) + 1;                                    \
       req->path = uv__malloc(path_len + new_path_len);                        \
-      if (req->path == NULL)                                                  \
+      if (req->path == NULL) {                                                \
+        if (work != NULL)                                                     \
+          uv__free(work);                                                     \
         return UV_ENOMEM;                                                     \
+      }                                                                       \
       req->new_path = req->path + path_len;                                   \
       memcpy((void*) req->path, path, path_len);                              \
       memcpy((void*) req->new_path, new_path, new_path_len);                  \
@@ -119,8 +132,18 @@
 #define POST                                                                  \
   do {                                                                        \
     if (cb != NULL) {                                                         \
-      uv__req_register(loop, req);                                            \
-      uv__work_submit(loop, &req->work_req, uv__fs_work, uv__fs_done);        \
+      LOG_2("fs POST: req %p work %p\n", (void *) req, (void *) work);        \
+      uv__req_init(loop, req, UV_FS);                                         \
+      req->executor_data = work; /* For uv_cancel. */                         \
+      options.type = UV_WORK_FS;                                              \
+      options.priority = -1;                                                  \
+      options.cancelable = 0;                                                 \
+      options.data = NULL;                                                    \
+      uv_executor_queue_work(loop,                                            \
+                             work,                                            \
+                             &options,                                        \
+                             uv__fs_executor_work,                            \
+                             uv__fs_executor_done);                           \
       return 0;                                                               \
     }                                                                         \
     else {                                                                    \
@@ -1095,7 +1118,7 @@ static ssize_t uv__fs_buf_iter(uv_fs_t* req, uv__fs_buf_iter_processor process)
   return total;
 }
 
-
+/* TODO uv__fs_work and done APIs should take req directly. Windows too. */
 static void uv__fs_work(struct uv__work* w) {
   int retry_on_eintr;
   uv_fs_t* req;
@@ -1160,7 +1183,6 @@ static void uv__fs_work(struct uv__work* w) {
   }
 }
 
-
 static void uv__fs_done(struct uv__work* w, int status) {
   uv_fs_t* req;
 
@@ -1175,12 +1197,23 @@ static void uv__fs_done(struct uv__work* w, int status) {
   req->cb(req);
 }
 
+static void uv__fs_executor_work(uv_work_t* req) {
+  uv__fs_work(&((uv_fs_t*) req->data)->work_req);
+}
+
+static void uv__fs_executor_done(uv_work_t* req, int status) {
+  uv__fs_done(&((uv_fs_t*) req->data)->work_req, status);
+  uv__free(req);
+}
 
 int uv_fs_access(uv_loop_t* loop,
                  uv_fs_t* req,
                  const char* path,
                  int flags,
                  uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(ACCESS);
   PATH;
   req->flags = flags;
@@ -1193,6 +1226,9 @@ int uv_fs_chmod(uv_loop_t* loop,
                 const char* path,
                 int mode,
                 uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(CHMOD);
   PATH;
   req->mode = mode;
@@ -1206,6 +1242,9 @@ int uv_fs_chown(uv_loop_t* loop,
                 uv_uid_t uid,
                 uv_gid_t gid,
                 uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(CHOWN);
   PATH;
   req->uid = uid;
@@ -1215,6 +1254,9 @@ int uv_fs_chown(uv_loop_t* loop,
 
 
 int uv_fs_close(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(CLOSE);
   req->file = file;
   POST;
@@ -1226,6 +1268,9 @@ int uv_fs_fchmod(uv_loop_t* loop,
                  uv_file file,
                  int mode,
                  uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(FCHMOD);
   req->file = file;
   req->mode = mode;
@@ -1239,6 +1284,9 @@ int uv_fs_fchown(uv_loop_t* loop,
                  uv_uid_t uid,
                  uv_gid_t gid,
                  uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(FCHOWN);
   req->file = file;
   req->uid = uid;
@@ -1253,6 +1301,9 @@ int uv_fs_lchown(uv_loop_t* loop,
                  uv_uid_t uid,
                  uv_gid_t gid,
                  uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(LCHOWN);
   PATH;
   req->uid = uid;
@@ -1262,6 +1313,9 @@ int uv_fs_lchown(uv_loop_t* loop,
 
 
 int uv_fs_fdatasync(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(FDATASYNC);
   req->file = file;
   POST;
@@ -1269,6 +1323,9 @@ int uv_fs_fdatasync(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) {
 
 
 int uv_fs_fstat(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(FSTAT);
   req->file = file;
   POST;
@@ -1276,6 +1333,9 @@ int uv_fs_fstat(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) {
 
 
 int uv_fs_fsync(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(FSYNC);
   req->file = file;
   POST;
@@ -1287,6 +1347,9 @@ int uv_fs_ftruncate(uv_loop_t* loop,
                     uv_file file,
                     int64_t off,
                     uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(FTRUNCATE);
   req->file = file;
   req->off = off;
@@ -1300,6 +1363,9 @@ int uv_fs_futime(uv_loop_t* loop,
                  double atime,
                  double mtime,
                  uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(FUTIME);
   req->file = file;
   req->atime = atime;
@@ -1309,6 +1375,9 @@ int uv_fs_futime(uv_loop_t* loop,
 
 
 int uv_fs_lstat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(LSTAT);
   PATH;
   POST;
@@ -1320,6 +1389,9 @@ int uv_fs_link(uv_loop_t* loop,
                const char* path,
                const char* new_path,
                uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(LINK);
   PATH2;
   POST;
@@ -1331,6 +1403,9 @@ int uv_fs_mkdir(uv_loop_t* loop,
                 const char* path,
                 int mode,
                 uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(MKDIR);
   PATH;
   req->mode = mode;
@@ -1342,10 +1417,16 @@ int uv_fs_mkdtemp(uv_loop_t* loop,
                   uv_fs_t* req,
                   const char* tpl,
                   uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(MKDTEMP);
   req->path = uv__strdup(tpl);
-  if (req->path == NULL)
+  if (req->path == NULL) {
+    if (work != NULL)
+      uv__free(work);
     return UV_ENOMEM;
+  }
   POST;
 }
 
@@ -1356,6 +1437,9 @@ int uv_fs_open(uv_loop_t* loop,
                int flags,
                int mode,
                uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(OPEN);
   PATH;
   req->flags = flags;
@@ -1370,10 +1454,16 @@ int uv_fs_read(uv_loop_t* loop, uv_fs_t* req,
                unsigned int nbufs,
                int64_t off,
                uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(READ);
 
-  if (bufs == NULL || nbufs == 0)
+  if (bufs == NULL || nbufs == 0) {
+    if (work != NULL)
+      uv__free(work);
     return UV_EINVAL;
+  }
 
   req->file = file;
 
@@ -1382,8 +1472,11 @@ int uv_fs_read(uv_loop_t* loop, uv_fs_t* req,
   if (nbufs > ARRAY_SIZE(req->bufsml))
     req->bufs = uv__malloc(nbufs * sizeof(*bufs));
 
-  if (req->bufs == NULL)
+  if (req->bufs == NULL) {
+    if (work != NULL)
+      uv__free(work);
     return UV_ENOMEM;
+  }
 
   memcpy(req->bufs, bufs, nbufs * sizeof(*bufs));
 
@@ -1397,6 +1490,9 @@ int uv_fs_scandir(uv_loop_t* loop,
                   const char* path,
                   int flags,
                   uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(SCANDIR);
   PATH;
   req->flags = flags;
@@ -1408,6 +1504,9 @@ int uv_fs_readlink(uv_loop_t* loop,
                    uv_fs_t* req,
                    const char* path,
                    uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(READLINK);
   PATH;
   POST;
@@ -1418,6 +1517,9 @@ int uv_fs_realpath(uv_loop_t* loop,
                   uv_fs_t* req,
                   const char * path,
                   uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(REALPATH);
   PATH;
   POST;
@@ -1429,6 +1531,9 @@ int uv_fs_rename(uv_loop_t* loop,
                  const char* path,
                  const char* new_path,
                  uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(RENAME);
   PATH2;
   POST;
@@ -1436,6 +1541,9 @@ int uv_fs_rename(uv_loop_t* loop,
 
 
 int uv_fs_rmdir(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(RMDIR);
   PATH;
   POST;
@@ -1449,6 +1557,9 @@ int uv_fs_sendfile(uv_loop_t* loop,
                    int64_t off,
                    size_t len,
                    uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(SENDFILE);
   req->flags = in_fd; /* hack */
   req->file = out_fd;
@@ -1459,6 +1570,9 @@ int uv_fs_sendfile(uv_loop_t* loop,
 
 
 int uv_fs_stat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(STAT);
   PATH;
   POST;
@@ -1471,6 +1585,9 @@ int uv_fs_symlink(uv_loop_t* loop,
                   const char* new_path,
                   int flags,
                   uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(SYMLINK);
   PATH2;
   req->flags = flags;
@@ -1479,6 +1596,9 @@ int uv_fs_symlink(uv_loop_t* loop,
 
 
 int uv_fs_unlink(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UNLINK);
   PATH;
   POST;
@@ -1491,6 +1611,9 @@ int uv_fs_utime(uv_loop_t* loop,
                 double atime,
                 double mtime,
                 uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UTIME);
   PATH;
   req->atime = atime;
@@ -1506,11 +1629,16 @@ int uv_fs_write(uv_loop_t* loop,
                 unsigned int nbufs,
                 int64_t off,
                 uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(WRITE);
 
-  if (bufs == NULL || nbufs == 0)
+  if (bufs == NULL || nbufs == 0) {
+    if (work != NULL)
+      uv__free(work);
     return UV_EINVAL;
-
+  }
   req->file = file;
 
   req->nbufs = nbufs;
@@ -1518,8 +1646,11 @@ int uv_fs_write(uv_loop_t* loop,
   if (nbufs > ARRAY_SIZE(req->bufsml))
     req->bufs = uv__malloc(nbufs * sizeof(*bufs));
 
-  if (req->bufs == NULL)
+  if (req->bufs == NULL) {
+    if (work != NULL)
+      uv__free(work);
     return UV_ENOMEM;
+  }
 
   memcpy(req->bufs, bufs, nbufs * sizeof(*bufs));
 
@@ -1562,11 +1693,16 @@ int uv_fs_copyfile(uv_loop_t* loop,
                    const char* new_path,
                    int flags,
                    uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(COPYFILE);
 
   if (flags & ~(UV_FS_COPYFILE_EXCL |
                 UV_FS_COPYFILE_FICLONE |
                 UV_FS_COPYFILE_FICLONE_FORCE)) {
+    if (work != NULL)
+      uv__free(work);
     return UV_EINVAL;
   }
 
diff --git a/deps/uv/src/unix/getaddrinfo.c b/deps/uv/src/unix/getaddrinfo.c
index 10e8afd75e831b..947a61309b83b5 100644
--- a/deps/uv/src/unix/getaddrinfo.c
+++ b/deps/uv/src/unix/getaddrinfo.c
@@ -130,17 +130,27 @@ static void uv__getaddrinfo_done(struct uv__work* w, int status) {
     req->retcode = UV_EAI_CANCELED;
   }
 
-  if (req->cb)
+  if (req->cb != NULL)
     req->cb(req, req->retcode, req->addrinfo);
 }
 
+static void uv__getaddrinfo_executor_work(uv_work_t* req) {
+  uv__getaddrinfo_work(&((uv_getaddrinfo_t*) req->data)->work_req);
+}
+
+static void uv__getaddrinfo_executor_done(uv_work_t* req, int status) {
+  uv__getaddrinfo_done(&((uv_getaddrinfo_t*) req->data)->work_req, status);
+  uv__free(req);
+}
 
 int uv_getaddrinfo(uv_loop_t* loop,
                    uv_getaddrinfo_t* req,
-                   uv_getaddrinfo_cb cb,
+                   uv_getaddrinfo_cb getaddrinfo_cb,
                    const char* hostname,
                    const char* service,
                    const struct addrinfo* hints) {
+  uv_work_t* work;
+  uv_work_options_t options;
   size_t hostname_len;
   size_t service_len;
   size_t hints_len;
@@ -153,14 +163,23 @@ int uv_getaddrinfo(uv_loop_t* loop,
   hostname_len = hostname ? strlen(hostname) + 1 : 0;
   service_len = service ? strlen(service) + 1 : 0;
   hints_len = hints ? sizeof(*hints) : 0;
-  buf = uv__malloc(hostname_len + service_len + hints_len);
 
+  buf = uv__malloc(hostname_len + service_len + hints_len);
   if (buf == NULL)
     return UV_ENOMEM;
 
+  work = NULL;
+  if (getaddrinfo_cb != NULL) {
+    work = uv__malloc(sizeof(*work));
+    if (work == NULL) {
+      uv__free(buf);
+      return UV_ENOMEM;
+    }
+  }
+
   uv__req_init(loop, req, UV_GETADDRINFO);
   req->loop = loop;
-  req->cb = cb;
+  req->cb = getaddrinfo_cb;
   req->addrinfo = NULL;
   req->hints = NULL;
   req->service = NULL;
@@ -183,13 +202,22 @@ int uv_getaddrinfo(uv_loop_t* loop,
   if (hostname)
     req->hostname = memcpy(buf + len, hostname, hostname_len);
 
-  if (cb) {
-    uv__work_submit(loop,
-                    &req->work_req,
-                    uv__getaddrinfo_work,
-                    uv__getaddrinfo_done);
+  if (getaddrinfo_cb != NULL) {
+    work->data = req;
+    req->executor_data = work; /* For uv_cancel. */
+    options.type = UV_WORK_DNS;
+    options.priority = -1;
+    options.cancelable = 0;
+    options.data = NULL;
+    LOG_2("getaddrinfo: req %p work %p\n", req, work);
+    uv_executor_queue_work(loop,
+                           work,
+                           &options,
+                           uv__getaddrinfo_executor_work,
+                           uv__getaddrinfo_executor_done);
     return 0;
   } else {
+    /* TODO uv__getaddrinfo_work and done APIs should take req directly. Windows too. */
     uv__getaddrinfo_work(&req->work_req);
     uv__getaddrinfo_done(&req->work_req, 0);
     return req->retcode;
diff --git a/deps/uv/src/unix/getnameinfo.c b/deps/uv/src/unix/getnameinfo.c
index 9a4367224c7fa6..00cded8895ee79 100644
--- a/deps/uv/src/unix/getnameinfo.c
+++ b/deps/uv/src/unix/getnameinfo.c
@@ -69,10 +69,19 @@ static void uv__getnameinfo_done(struct uv__work* w, int status) {
     service = req->service;
   }
 
-  if (req->getnameinfo_cb)
+  if (req->getnameinfo_cb != NULL)
     req->getnameinfo_cb(req, req->retcode, host, service);
 }
 
+static void uv__getnameinfo_executor_work(uv_work_t* req) {
+  uv__getnameinfo_work(&((uv_getnameinfo_t*) req->data)->work_req);
+}
+
+static void uv__getnameinfo_executor_done(uv_work_t* req, int status) {
+  uv__getnameinfo_done(&((uv_getnameinfo_t*) req->data)->work_req, status);
+  uv__free(req);
+}
+
 /*
 * Entry point for getnameinfo
 * return 0 if a callback will be made
@@ -83,6 +92,9 @@ int uv_getnameinfo(uv_loop_t* loop,
                    uv_getnameinfo_cb getnameinfo_cb,
                    const struct sockaddr* addr,
                    int flags) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   if (req == NULL || addr == NULL)
     return UV_EINVAL;
 
@@ -98,6 +110,14 @@ int uv_getnameinfo(uv_loop_t* loop,
     return UV_EINVAL;
   }
 
+  work = NULL;
+  if (getnameinfo_cb != NULL) {
+    work = uv__malloc(sizeof(*work));
+    if (work == NULL)
+      return UV_ENOMEM;
+  }
+
+
   uv__req_init(loop, (uv_req_t*)req, UV_GETNAMEINFO);
 
   req->getnameinfo_cb = getnameinfo_cb;
@@ -106,13 +126,22 @@ int uv_getnameinfo(uv_loop_t* loop,
   req->loop = loop;
   req->retcode = 0;
 
-  if (getnameinfo_cb) {
-    uv__work_submit(loop,
-                    &req->work_req,
-                    uv__getnameinfo_work,
-                    uv__getnameinfo_done);
+  if (getnameinfo_cb != NULL) {
+    work->data = req;
+    req->executor_data = work; /* For uv_cancel. */
+    options.type = UV_WORK_DNS;
+    options.priority = -1;
+    options.cancelable = 0;
+    options.data = NULL;
+    LOG_2("getnameinfo: req %p work %p\n", req, work);
+    uv_executor_queue_work(loop,
+                           work,
+                           &options,
+                           uv__getnameinfo_executor_work,
+                           uv__getnameinfo_executor_done);
     return 0;
   } else {
+    /* TODO uv__getworkinfo_work and done APIs should take req directly. Windows too. */
     uv__getnameinfo_work(&req->work_req);
     uv__getnameinfo_done(&req->work_req, 0);
     return req->retcode;
diff --git a/deps/uv/src/unix/loop.c b/deps/uv/src/unix/loop.c
index c2a03d770f3764..de37e3b6c8211c 100644
--- a/deps/uv/src/unix/loop.c
+++ b/deps/uv/src/unix/loop.c
@@ -85,7 +85,7 @@ int uv_loop_init(uv_loop_t* loop) {
   if (err)
     goto fail_mutex_init;
 
-  err = uv_async_init(loop, &loop->wq_async, uv__work_done);
+  err = uv_async_init(loop, &loop->wq_async, uv__executor_work_done);
   if (err)
     goto fail_async_init;
 
diff --git a/deps/uv/src/uv-common.h b/deps/uv/src/uv-common.h
index 3289950d009ccd..557c0125336f5d 100644
--- a/deps/uv/src/uv-common.h
+++ b/deps/uv/src/uv-common.h
@@ -31,6 +31,30 @@
 #include <stdarg.h>
 #include <stddef.h>
 
+#ifdef _DEBUG
+#  define DEBUG_ONLY(x) x
+#else
+#  define DEBUG_ONLY(x) (void) 0
+#endif
+
+#undef DEBUG_LOG
+#ifdef DEBUG_LOG
+#include <stdio.h> /* UV_LOG */
+#define LOG_0(fmt) DEBUG_ONLY(fprintf(stderr, fmt))
+#define LOG_1(fmt, a1) DEBUG_ONLY(fprintf(stderr, fmt, a1))
+#define LOG_2(fmt, a1, a2) DEBUG_ONLY(fprintf(stderr, fmt, a1, a2))
+#define LOG_3(fmt, a1, a2, a3) DEBUG_ONLY(fprintf(stderr, fmt, a1, a2, a3))
+#define LOG_4(fmt, a1, a2, a3, a4) DEBUG_ONLY(fprintf(stderr, fmt, a1, a2, a3, a4))
+#define LOG_5(fmt, a1, a2, a3, a4, a5) DEBUG_ONLY(fprintf(stderr, fmt, a1, a2, a3, a4, a5))
+#else
+#define LOG_0(fmt) (void) 0
+#define LOG_1(fmt, a1) (void) 0
+#define LOG_2(fmt, a1, a2) (void) 0
+#define LOG_3(fmt, a1, a2, a3) (void) 0
+#define LOG_4(fmt, a1, a2, a3, a4) (void) 0
+#define LOG_5(fmt, a1, a2, a3, a4, a5) (void) 0
+#endif
+
 #if defined(_MSC_VER) && _MSC_VER < 1600
 # include "uv/stdint-msvc2008.h"
 #else
@@ -164,12 +188,24 @@ void uv__fs_poll_close(uv_fs_poll_t* handle);
 
 int uv__getaddrinfo_translate_error(int sys_err);    /* EAI_* error. */
 
-void uv__work_submit(uv_loop_t* loop,
-                     struct uv__work *w,
-                     void (*work)(struct uv__work *w),
-                     void (*done)(struct uv__work *w, int status));
+enum uv__work_kind {
+  UV__WORK_CPU,
+  UV__WORK_FAST_IO,
+  UV__WORK_SLOW_IO
+};
+
+/* Get the default executor. */
+uv_executor_t* uv__default_executor(void);
+
+/* Get current executor. */
+uv_executor_t* uv__executor(void);
+
+/* Called from event loop when executor has completed work. */
+void uv__executor_work_done(uv_async_t* handle);
 
-void uv__work_done(uv_async_t* handle);
+/* If a uv_work_t is successfully uv_cancel'd, its work CB is set
+ * to this magic value. */
+void uv__executor_work_cancelled(uv_work_t* work);
 
 size_t uv__count_bufs(const uv_buf_t bufs[], unsigned int nbufs);
 
diff --git a/deps/uv/src/win/core.c b/deps/uv/src/win/core.c
index afdf01e7878913..153c6708aee9c6 100644
--- a/deps/uv/src/win/core.c
+++ b/deps/uv/src/win/core.c
@@ -276,7 +276,7 @@ int uv_loop_init(uv_loop_t* loop) {
   if (err)
     goto fail_mutex_init;
 
-  err = uv_async_init(loop, &loop->wq_async, uv__work_done);
+  err = uv_async_init(loop, &loop->wq_async, uv__executor_work_done);
   if (err)
     goto fail_async_init;
 
diff --git a/deps/uv/src/win/fs.c b/deps/uv/src/win/fs.c
index 71b6a81a0d5a8a..ca22fcb3d0ebc5 100644
--- a/deps/uv/src/win/fs.c
+++ b/deps/uv/src/win/fs.c
@@ -47,6 +47,13 @@
   do {                                                                        \
     if (req == NULL)                                                          \
       return UV_EINVAL;                                                       \
+    work = NULL;                                                              \
+    if (cb != NULL) {                                                         \
+      work = uv__malloc(sizeof(*work));                                       \
+      if (work == NULL)                                                       \
+        return UV_ENOMEM;                                                     \
+      work->data = req;                                                       \
+    }                                                                         \
     uv_fs_req_init(loop, req, subtype, cb);                                   \
   }                                                                           \
   while (0)
@@ -55,7 +62,16 @@
   do {                                                                        \
     if (cb != NULL) {                                                         \
       uv__req_register(loop, req);                                            \
-      uv__work_submit(loop, &req->work_req, uv__fs_work, uv__fs_done);        \
+      req->executor_data = work; /* For uv_cancel. */                         \
+      options.type = UV_WORK_FS;                                              \
+      options.priority = -1;                                                  \
+      options.cancelable = 0;                                                 \
+      options.data = NULL;                                                    \
+      uv_executor_queue_work(loop,                                            \
+                      work,                                                   \
+                      &options,                                               \
+                      uv__fs_executor_work,                                   \
+                      uv__fs_executor_done);                                  \
       return 0;                                                               \
     } else {                                                                  \
       uv__fs_work(&req->work_req);                                            \
@@ -2033,6 +2049,14 @@ static void uv__fs_done(struct uv__work* w, int status) {
   req->cb(req);
 }
 
+static void uv__fs_executor_work(uv_work_t* req) {
+  uv__fs_work(&((uv_fs_t*) req->data)->work_req);
+}
+
+static void uv__fs_executor_done(uv_work_t* req, int status) {
+  uv__fs_done(&((uv_fs_t*) req->data)->work_req, status);
+  uv__free(req);
+}
 
 void uv_fs_req_cleanup(uv_fs_t* req) {
   if (req == NULL)
@@ -2067,10 +2091,14 @@ void uv_fs_req_cleanup(uv_fs_t* req) {
 int uv_fs_open(uv_loop_t* loop, uv_fs_t* req, const char* path, int flags,
     int mode, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_OPEN);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2081,6 +2109,9 @@ int uv_fs_open(uv_loop_t* loop, uv_fs_t* req, const char* path, int flags,
 
 
 int uv_fs_close(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_CLOSE);
   req->file.fd = fd;
   POST;
@@ -2094,10 +2125,16 @@ int uv_fs_read(uv_loop_t* loop,
                unsigned int nbufs,
                int64_t offset,
                uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_READ);
 
-  if (bufs == NULL || nbufs == 0)
+  if (bufs == NULL || nbufs == 0) {
+    if (work != NULL)
+      uv__free(work);
     return UV_EINVAL;
+  }
 
   req->file.fd = fd;
 
@@ -2106,8 +2143,11 @@ int uv_fs_read(uv_loop_t* loop,
   if (nbufs > ARRAY_SIZE(req->fs.info.bufsml))
     req->fs.info.bufs = uv__malloc(nbufs * sizeof(*bufs));
 
-  if (req->fs.info.bufs == NULL)
+  if (req->fs.info.bufs == NULL) {
+    if (work != NULL)
+      uv__free(work);
     return UV_ENOMEM;
+  }
 
   memcpy(req->fs.info.bufs, bufs, nbufs * sizeof(*bufs));
 
@@ -2123,10 +2163,16 @@ int uv_fs_write(uv_loop_t* loop,
                 unsigned int nbufs,
                 int64_t offset,
                 uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_WRITE);
 
-  if (bufs == NULL || nbufs == 0)
+  if (bufs == NULL || nbufs == 0) {
+    if (work != NULL)
+      uv__free(work);
     return UV_EINVAL;
+  }
 
   req->file.fd = fd;
 
@@ -2135,8 +2181,11 @@ int uv_fs_write(uv_loop_t* loop,
   if (nbufs > ARRAY_SIZE(req->fs.info.bufsml))
     req->fs.info.bufs = uv__malloc(nbufs * sizeof(*bufs));
 
-  if (req->fs.info.bufs == NULL)
+  if (req->fs.info.bufs == NULL) {
+    if (work != NULL)
+      uv__free(work);
     return UV_ENOMEM;
+  }
 
   memcpy(req->fs.info.bufs, bufs, nbufs * sizeof(*bufs));
 
@@ -2148,10 +2197,14 @@ int uv_fs_write(uv_loop_t* loop,
 int uv_fs_unlink(uv_loop_t* loop, uv_fs_t* req, const char* path,
     uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_UNLINK);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2162,10 +2215,14 @@ int uv_fs_unlink(uv_loop_t* loop, uv_fs_t* req, const char* path,
 int uv_fs_mkdir(uv_loop_t* loop, uv_fs_t* req, const char* path, int mode,
     uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_MKDIR);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2177,10 +2234,14 @@ int uv_fs_mkdir(uv_loop_t* loop, uv_fs_t* req, const char* path, int mode,
 int uv_fs_mkdtemp(uv_loop_t* loop, uv_fs_t* req, const char* tpl,
     uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_MKDTEMP);
   err = fs__capture_path(req, tpl, NULL, TRUE);
   if (err)
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
 
   POST;
@@ -2189,10 +2250,14 @@ int uv_fs_mkdtemp(uv_loop_t* loop, uv_fs_t* req, const char* tpl,
 
 int uv_fs_rmdir(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_RMDIR);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2203,10 +2268,14 @@ int uv_fs_rmdir(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
 int uv_fs_scandir(uv_loop_t* loop, uv_fs_t* req, const char* path, int flags,
     uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_SCANDIR);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2218,10 +2287,14 @@ int uv_fs_scandir(uv_loop_t* loop, uv_fs_t* req, const char* path, int flags,
 int uv_fs_link(uv_loop_t* loop, uv_fs_t* req, const char* path,
     const char* new_path, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_LINK);
   err = fs__capture_path(req, path, new_path, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2232,10 +2305,14 @@ int uv_fs_link(uv_loop_t* loop, uv_fs_t* req, const char* path,
 int uv_fs_symlink(uv_loop_t* loop, uv_fs_t* req, const char* path,
     const char* new_path, int flags, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_SYMLINK);
   err = fs__capture_path(req, path, new_path, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2247,10 +2324,14 @@ int uv_fs_symlink(uv_loop_t* loop, uv_fs_t* req, const char* path,
 int uv_fs_readlink(uv_loop_t* loop, uv_fs_t* req, const char* path,
     uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_READLINK);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2261,15 +2342,21 @@ int uv_fs_readlink(uv_loop_t* loop, uv_fs_t* req, const char* path,
 int uv_fs_realpath(uv_loop_t* loop, uv_fs_t* req, const char* path,
     uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_REALPATH);
 
   if (!path) {
+    if (work != NULL)
+      uv__free(work);
     return UV_EINVAL;
   }
 
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2280,10 +2367,14 @@ int uv_fs_realpath(uv_loop_t* loop, uv_fs_t* req, const char* path,
 int uv_fs_chown(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_uid_t uid,
     uv_gid_t gid, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_CHOWN);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2293,6 +2384,9 @@ int uv_fs_chown(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_uid_t uid,
 
 int uv_fs_fchown(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_uid_t uid,
     uv_gid_t gid, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_FCHOWN);
   POST;
 }
@@ -2301,10 +2395,14 @@ int uv_fs_fchown(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_uid_t uid,
 int uv_fs_lchown(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_uid_t uid,
     uv_gid_t gid, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_LCHOWN);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
   POST;
@@ -2313,10 +2411,14 @@ int uv_fs_lchown(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_uid_t uid,
 
 int uv_fs_stat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_STAT);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2326,10 +2428,14 @@ int uv_fs_stat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
 
 int uv_fs_lstat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_LSTAT);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2338,6 +2444,9 @@ int uv_fs_lstat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) {
 
 
 int uv_fs_fstat(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_FSTAT);
   req->file.fd = fd;
   POST;
@@ -2347,10 +2456,14 @@ int uv_fs_fstat(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_fs_cb cb) {
 int uv_fs_rename(uv_loop_t* loop, uv_fs_t* req, const char* path,
     const char* new_path, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_RENAME);
   err = fs__capture_path(req, path, new_path, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2359,6 +2472,9 @@ int uv_fs_rename(uv_loop_t* loop, uv_fs_t* req, const char* path,
 
 
 int uv_fs_fsync(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_FSYNC);
   req->file.fd = fd;
   POST;
@@ -2366,6 +2482,9 @@ int uv_fs_fsync(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_fs_cb cb) {
 
 
 int uv_fs_fdatasync(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_FDATASYNC);
   req->file.fd = fd;
   POST;
@@ -2374,6 +2493,9 @@ int uv_fs_fdatasync(uv_loop_t* loop, uv_fs_t* req, uv_file fd, uv_fs_cb cb) {
 
 int uv_fs_ftruncate(uv_loop_t* loop, uv_fs_t* req, uv_file fd,
     int64_t offset, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_FTRUNCATE);
   req->file.fd = fd;
   req->fs.info.offset = offset;
@@ -2388,19 +2510,26 @@ int uv_fs_copyfile(uv_loop_t* loop,
                    int flags,
                    uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_COPYFILE);
 
   if (flags & ~(UV_FS_COPYFILE_EXCL |
                 UV_FS_COPYFILE_FICLONE |
                 UV_FS_COPYFILE_FICLONE_FORCE)) {
+    if (work != NULL)
+      uv__free(work);
     return UV_EINVAL;
   }
 
   err = fs__capture_path(req, path, new_path, cb != NULL);
 
-  if (err)
+  if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
+  }
 
   req->fs.info.file_flags = flags;
   POST;
@@ -2409,6 +2538,9 @@ int uv_fs_copyfile(uv_loop_t* loop,
 
 int uv_fs_sendfile(uv_loop_t* loop, uv_fs_t* req, uv_file fd_out,
     uv_file fd_in, int64_t in_offset, size_t length, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_SENDFILE);
   req->file.fd = fd_in;
   req->fs.info.fd_out = fd_out;
@@ -2424,11 +2556,16 @@ int uv_fs_access(uv_loop_t* loop,
                  int flags,
                  uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_ACCESS);
   err = fs__capture_path(req, path, NULL, cb != NULL);
-  if (err)
+  if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
+  }
 
   req->fs.info.mode = flags;
   POST;
@@ -2438,10 +2575,14 @@ int uv_fs_access(uv_loop_t* loop,
 int uv_fs_chmod(uv_loop_t* loop, uv_fs_t* req, const char* path, int mode,
     uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_CHMOD);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2452,6 +2593,9 @@ int uv_fs_chmod(uv_loop_t* loop, uv_fs_t* req, const char* path, int mode,
 
 int uv_fs_fchmod(uv_loop_t* loop, uv_fs_t* req, uv_file fd, int mode,
     uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_FCHMOD);
   req->file.fd = fd;
   req->fs.info.mode = mode;
@@ -2462,10 +2606,14 @@ int uv_fs_fchmod(uv_loop_t* loop, uv_fs_t* req, uv_file fd, int mode,
 int uv_fs_utime(uv_loop_t* loop, uv_fs_t* req, const char* path, double atime,
     double mtime, uv_fs_cb cb) {
   int err;
+  uv_work_t* work;
+  uv_work_options_t options;
 
   INIT(UV_FS_UTIME);
   err = fs__capture_path(req, path, NULL, cb != NULL);
   if (err) {
+    if (work != NULL)
+      uv__free(work);
     return uv_translate_sys_error(err);
   }
 
@@ -2477,6 +2625,9 @@ int uv_fs_utime(uv_loop_t* loop, uv_fs_t* req, const char* path, double atime,
 
 int uv_fs_futime(uv_loop_t* loop, uv_fs_t* req, uv_file fd, double atime,
     double mtime, uv_fs_cb cb) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   INIT(UV_FS_FUTIME);
   req->file.fd = fd;
   req->fs.time.atime = atime;
diff --git a/deps/uv/src/win/getaddrinfo.c b/deps/uv/src/win/getaddrinfo.c
index 063b4937cdad24..ef4d13c521f214 100644
--- a/deps/uv/src/win/getaddrinfo.c
+++ b/deps/uv/src/win/getaddrinfo.c
@@ -227,7 +227,7 @@ static void uv__getaddrinfo_done(struct uv__work* w, int status) {
   uv__req_unregister(req->loop, req);
 
   /* finally do callback with converted result */
-  if (req->getaddrinfo_cb)
+  if (req->getaddrinfo_cb != NULL)
     req->getaddrinfo_cb(req, req->retcode, req->addrinfo);
 }
 
@@ -239,6 +239,14 @@ void uv_freeaddrinfo(struct addrinfo* ai) {
   uv__free(alloc_ptr);
 }
 
+static void uv__getaddrinfo_executor_work(uv_work_t* req) {
+  uv__getaddrinfo_work(&((uv_getaddrinfo_t*) req->data)->work_req);
+}
+
+static void uv__getaddrinfo_executor_done(uv_work_t* req, int status) {
+  uv__getaddrinfo_done(&((uv_getaddrinfo_t*) req->data)->work_req, status);
+  uv__free(req);
+}
 
 /*
  * Entry point for getaddrinfo
@@ -259,6 +267,8 @@ int uv_getaddrinfo(uv_loop_t* loop,
                    const char* node,
                    const char* service,
                    const struct addrinfo* hints) {
+  uv_work_t* work;
+  uv_work_options_t options;
   int nodesize = 0;
   int servicesize = 0;
   int hintssize = 0;
@@ -275,6 +285,8 @@ int uv_getaddrinfo(uv_loop_t* loop,
   req->loop = loop;
   req->retcode = 0;
 
+  work = NULL;
+
   /* calculate required memory size for all input values */
   if (node != NULL) {
     nodesize = ALIGNED_SIZE(MultiByteToWideChar(CP_UTF8, 0, node, -1, NULL, 0) *
@@ -311,6 +323,13 @@ int uv_getaddrinfo(uv_loop_t* loop,
 
   /* save alloc_ptr now so we can free if error */
   req->alloc = (void*)alloc_ptr;
+  if (getaddrinfo_cb != NULL) {
+    work = uv__malloc(sizeof(*work));
+    if (work == NULL) {
+      err = WSAENOBUFS;
+      goto error;
+    }
+  }
 
   /* Convert node string to UTF16 into allocated memory and save pointer in the
    * request. */
@@ -365,11 +384,19 @@ int uv_getaddrinfo(uv_loop_t* loop,
 
   uv__req_register(loop, req);
 
-  if (getaddrinfo_cb) {
-    uv__work_submit(loop,
-                    &req->work_req,
-                    uv__getaddrinfo_work,
-                    uv__getaddrinfo_done);
+  if (getaddrinfo_cb != NULL) {
+    /* TODO options should indicate type. */
+    work->data = req;
+    req->executor_data = work; /* For uv_cancel. */
+    options.type = UV_WORK_DNS;
+    options.priority = -1;
+    options.cancelable = 0;
+    options.data = NULL;
+    uv_executor_queue_work(loop,
+                           work,
+                           &options,
+                           uv__getaddrinfo_executor_work,
+                           uv__getaddrinfo_executor_done);
     return 0;
   } else {
     uv__getaddrinfo_work(&req->work_req);
@@ -382,6 +409,10 @@ int uv_getaddrinfo(uv_loop_t* loop,
     uv__free(req->alloc);
     req->alloc = NULL;
   }
+
+  if (work != NULL)
+    uv__free(work);
+
   return uv_translate_sys_error(err);
 }
 
diff --git a/deps/uv/src/win/getnameinfo.c b/deps/uv/src/win/getnameinfo.c
index 71785a9fa65718..aa743ce75d4914 100644
--- a/deps/uv/src/win/getnameinfo.c
+++ b/deps/uv/src/win/getnameinfo.c
@@ -104,10 +104,18 @@ static void uv__getnameinfo_done(struct uv__work* w, int status) {
     service = req->service;
   }
 
-  if (req->getnameinfo_cb)
+  if (req->getnameinfo_cb != NULL)
     req->getnameinfo_cb(req, req->retcode, host, service);
 }
 
+static void uv__getnameinfo_executor_work(uv_work_t* req) {
+  uv__getnameinfo_work(&((uv_getnameinfo_t*) req->data)->work_req);
+}
+
+static void uv__getnameinfo_executor_done(uv_work_t* req, int status) {
+  uv__getnameinfo_done(&((uv_getnameinfo_t*) req->data)->work_req, status);
+  uv__free(req);
+}
 
 /*
 * Entry point for getnameinfo
@@ -119,6 +127,9 @@ int uv_getnameinfo(uv_loop_t* loop,
                    uv_getnameinfo_cb getnameinfo_cb,
                    const struct sockaddr* addr,
                    int flags) {
+  uv_work_t* work;
+  uv_work_options_t options;
+
   if (req == NULL || addr == NULL)
     return UV_EINVAL;
 
@@ -134,6 +145,13 @@ int uv_getnameinfo(uv_loop_t* loop,
     return UV_EINVAL;
   }
 
+  work = NULL;
+  if (getnameinfo_cb != NULL) {
+    work = uv__malloc(sizeof(*work));
+    if (work == NULL)
+      return UV_ENOMEM;
+  }
+
   UV_REQ_INIT(req, UV_GETNAMEINFO);
   uv__req_register(loop, req);
 
@@ -142,11 +160,18 @@ int uv_getnameinfo(uv_loop_t* loop,
   req->loop = loop;
   req->retcode = 0;
 
-  if (getnameinfo_cb) {
-    uv__work_submit(loop,
-                    &req->work_req,
-                    uv__getnameinfo_work,
-                    uv__getnameinfo_done);
+  if (getnameinfo_cb != NULL) {
+    work->data = req;
+    req->executor_data = work; /* For uv_cancel. */
+    options.type = UV_WORK_DNS;
+    options.priority = -1;
+    options.cancelable = 0;
+    options.data = NULL;
+    uv_executor_queue_work(loop,
+                           work,
+                           &options,
+                           uv__getnameinfo_executor_work,
+                           uv__getnameinfo_executor_done);
     return 0;
   } else {
     uv__getnameinfo_work(&req->work_req);
diff --git a/deps/uv/test/runner.c b/deps/uv/test/runner.c
index f017902a04f7c8..88b3a3f929ba4f 100644
--- a/deps/uv/test/runner.c
+++ b/deps/uv/test/runner.c
@@ -37,7 +37,7 @@ static int compare_task(const void* va, const void* vb) {
 
 
 const char* fmt(double d) {
-  static char buf[1024];
+  static char buf[4096];
   static char* p;
   uint64_t v;
 
diff --git a/deps/uv/test/test-threadpool-cancel.c b/deps/uv/test/test-executor-cancel.c
similarity index 95%
rename from deps/uv/test/test-threadpool-cancel.c
rename to deps/uv/test/test-executor-cancel.c
index dd13d8ae4bf1fa..aa370a8f3c504e 100644
--- a/deps/uv/test/test-threadpool-cancel.c
+++ b/deps/uv/test/test-executor-cancel.c
@@ -55,7 +55,7 @@ static void done_cb(uv_work_t* req, int status) {
 }
 
 
-static void saturate_threadpool(void) {
+static void saturate_executor(void) {
   uv_loop_t* loop;
   char buf[64];
   size_t i;
@@ -74,7 +74,7 @@ static void saturate_threadpool(void) {
 }
 
 
-static void unblock_threadpool(void) {
+static void unblock_executor(void) {
   size_t i;
 
   for (i = 0; i < ARRAY_SIZE(pause_reqs); i += 1)
@@ -132,7 +132,7 @@ static void timer_cb(uv_timer_t* handle) {
   }
 
   uv_close((uv_handle_t*) &ci->timer_handle, NULL);
-  unblock_threadpool();
+  unblock_executor();
   timer_cb_called++;
 }
 
@@ -143,7 +143,7 @@ static void nop_done_cb(uv_work_t* req, int status) {
 }
 
 
-TEST_IMPL(threadpool_cancel_getaddrinfo) {
+TEST_IMPL(executor_cancel_getaddrinfo) {
   uv_getaddrinfo_t reqs[4];
   struct cancel_info ci;
   struct addrinfo hints;
@@ -152,7 +152,7 @@ TEST_IMPL(threadpool_cancel_getaddrinfo) {
 
   INIT_CANCEL_INFO(&ci, reqs);
   loop = uv_default_loop();
-  saturate_threadpool();
+  saturate_executor();
 
   r = uv_getaddrinfo(loop, reqs + 0, getaddrinfo_cb, "fail", NULL, NULL);
   ASSERT(r == 0);
@@ -176,7 +176,7 @@ TEST_IMPL(threadpool_cancel_getaddrinfo) {
 }
 
 
-TEST_IMPL(threadpool_cancel_getnameinfo) {
+TEST_IMPL(executor_cancel_getnameinfo) {
   uv_getnameinfo_t reqs[4];
   struct sockaddr_in addr4;
   struct cancel_info ci;
@@ -188,7 +188,7 @@ TEST_IMPL(threadpool_cancel_getnameinfo) {
 
   INIT_CANCEL_INFO(&ci, reqs);
   loop = uv_default_loop();
-  saturate_threadpool();
+  saturate_executor();
 
   r = uv_getnameinfo(loop, reqs + 0, getnameinfo_cb, (const struct sockaddr*)&addr4, 0);
   ASSERT(r == 0);
@@ -212,7 +212,7 @@ TEST_IMPL(threadpool_cancel_getnameinfo) {
 }
 
 
-TEST_IMPL(threadpool_cancel_work) {
+TEST_IMPL(executor_cancel_work) {
   struct cancel_info ci;
   uv_work_t reqs[16];
   uv_loop_t* loop;
@@ -220,7 +220,7 @@ TEST_IMPL(threadpool_cancel_work) {
 
   INIT_CANCEL_INFO(&ci, reqs);
   loop = uv_default_loop();
-  saturate_threadpool();
+  saturate_executor();
 
   for (i = 0; i < ARRAY_SIZE(reqs); i++)
     ASSERT(0 == uv_queue_work(loop, reqs + i, work2_cb, done2_cb));
@@ -236,7 +236,7 @@ TEST_IMPL(threadpool_cancel_work) {
 }
 
 
-TEST_IMPL(threadpool_cancel_fs) {
+TEST_IMPL(executor_cancel_fs) {
   struct cancel_info ci;
   uv_fs_t reqs[26];
   uv_loop_t* loop;
@@ -245,7 +245,7 @@ TEST_IMPL(threadpool_cancel_fs) {
 
   INIT_CANCEL_INFO(&ci, reqs);
   loop = uv_default_loop();
-  saturate_threadpool();
+  saturate_executor();
   iov = uv_buf_init(NULL, 0);
 
   /* Needs to match ARRAY_SIZE(fs_reqs). */
@@ -290,16 +290,16 @@ TEST_IMPL(threadpool_cancel_fs) {
 }
 
 
-TEST_IMPL(threadpool_cancel_single) {
+TEST_IMPL(executor_cancel_single) {
   uv_loop_t* loop;
   uv_work_t req;
 
-  saturate_threadpool();
+  saturate_executor();
   loop = uv_default_loop();
   ASSERT(0 == uv_queue_work(loop, &req, (uv_work_cb) abort, nop_done_cb));
   ASSERT(0 == uv_cancel((uv_req_t*) &req));
   ASSERT(0 == done_cb_called);
-  unblock_threadpool();
+  unblock_executor();
   ASSERT(0 == uv_run(loop, UV_RUN_DEFAULT));
   ASSERT(1 == done_cb_called);
 
diff --git a/deps/uv/test/test-executor.c b/deps/uv/test/test-executor.c
new file mode 100644
index 00000000000000..fe79008d86aeca
--- /dev/null
+++ b/deps/uv/test/test-executor.c
@@ -0,0 +1,323 @@
+/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "uv.h"
+#include "task.h"
+
+#include "strings.h" /* bzero */
+
+static int work_cb_count;
+static int after_work_cb_count;
+static uv_work_t work_req;
+static char data;
+
+
+static void work_cb(uv_work_t* req) {
+  ASSERT(req == &work_req);
+  ASSERT(req->data == &data);
+  work_cb_count++;
+}
+
+
+static void after_work_cb(uv_work_t* req, int status) {
+  ASSERT(status == 0);
+  ASSERT(req == &work_req);
+  ASSERT(req->data == &data);
+  after_work_cb_count++;
+}
+
+
+TEST_IMPL(executor_queue_work_simple) {
+  int r;
+
+  work_req.data = &data;
+  r = uv_queue_work(uv_default_loop(), &work_req, work_cb, after_work_cb);
+  ASSERT(r == 0);
+  uv_run(uv_default_loop(), UV_RUN_DEFAULT);
+
+  ASSERT(work_cb_count == 1);
+  ASSERT(after_work_cb_count == 1);
+
+  MAKE_VALGRIND_HAPPY();
+  return 0;
+}
+
+
+TEST_IMPL(executor_queue_work_einval) {
+  int r;
+
+  work_req.data = &data;
+  r = uv_queue_work(uv_default_loop(), &work_req, NULL, after_work_cb);
+  ASSERT(r == UV_EINVAL);
+
+  uv_run(uv_default_loop(), UV_RUN_DEFAULT);
+
+  ASSERT(work_cb_count == 0);
+  ASSERT(after_work_cb_count == 0);
+
+  MAKE_VALGRIND_HAPPY();
+  return 0;
+}
+
+/* Define an toy_executor.
+ * This is a trivial one-thread producer-consumer setup with a fixed buffer of work. */
+#define TOY_EXECUTOR_MAX_REQUESTS 100
+static uv_executor_t toy_executor;
+static struct toy_executor_data {
+  /* Do not hold while executing work. */
+  uv_mutex_t mutex;
+
+  int times_submit_called;
+  int times_cancel_called;
+
+  unsigned n_completed;
+
+  unsigned head;
+  unsigned tail;
+  /* Queue with space for some extras. */
+  uv_work_t* queued_work[TOY_EXECUTOR_MAX_REQUESTS + 10];
+
+  unsigned no_more_work_coming;
+  uv_sem_t thread_exiting;
+
+  /* For signaling toy_slow_work. */
+  uv_sem_t finish_slow_work;
+
+  uv_thread_t thread;
+  uv_executor_t *executor;
+} toy_executor_data;
+
+static void worker(void* arg) {
+  struct toy_executor_data* data;
+  uv_work_t* work;
+  int should_break;
+
+  data = arg;
+
+  should_break = 0;
+  for (;;) {
+    /* Run until we (1) have no work, and (2) see data->no_more_work_coming. */
+
+    /* Run pending work. */
+    for (;;) {
+      uv_mutex_lock(&data->mutex);
+      if (data->head == data->tail) {
+        uv_mutex_unlock(&data->mutex);
+        break;
+      }
+
+      printf("worker: found %d work -- head %d tail %d\n", data->tail - data->head, data->head, data->tail);
+      ASSERT(0 <= data->head && data->head <= data->tail);
+      uv_mutex_unlock(&data->mutex);
+
+      /* Handle one. */
+      work = data->queued_work[data->head];
+      printf("worker: Running work %p\n", work);
+      work->work_cb(work);
+
+      /* Tell libuv we're done with this work. */
+      printf("worker: Telling libuv we're done with %p\n", work);
+      uv_executor_return_work(work);
+
+      /* Advance. */
+      uv_mutex_lock(&data->mutex);
+      printf("worker: Advancing\n");
+      data->head++;
+      data->n_completed++;
+      uv_mutex_unlock(&data->mutex);
+    }
+
+    /* Loop unless (1) no work, and (2) no_more_work_coming. */
+    uv_mutex_lock(&data->mutex);
+    if (data->head == data->tail && data->no_more_work_coming) {
+      printf("worker exiting\n");
+      fflush(stdout);
+      should_break = 1;
+      uv_sem_post(&data->thread_exiting);
+    }
+    uv_mutex_unlock(&data->mutex);
+
+    if (should_break)
+      break;
+  }
+}
+
+static void toy_executor_init(void) {
+  bzero(&toy_executor, sizeof(toy_executor));
+
+  toy_executor_data.times_submit_called = 0;
+  toy_executor_data.times_cancel_called = 0;
+  toy_executor_data.n_completed = 0;
+  toy_executor_data.head = 0;
+  toy_executor_data.tail = 0;
+  toy_executor_data.no_more_work_coming = 0;
+  ASSERT(0 == uv_sem_init(&toy_executor_data.finish_slow_work, 0));
+  toy_executor_data.executor = &toy_executor;
+  ASSERT(0 == uv_mutex_init(&toy_executor_data.mutex));
+  ASSERT(0 == uv_sem_init(&toy_executor_data.thread_exiting, 0));
+
+  ASSERT(0 == uv_thread_create(&toy_executor_data.thread, worker, &toy_executor_data));
+}
+
+static void toy_executor_destroy(uv_executor_t* executor) {
+  struct toy_executor_data* data;
+  
+  data = executor->data;
+  uv_thread_join(&data->thread);
+  uv_mutex_destroy(&data->mutex);
+}
+
+static void toy_executor_submit(uv_executor_t* executor,
+                                uv_work_t* req,
+                                const uv_work_options_t* opts) {
+  struct toy_executor_data* data;
+  printf("toy_executor_submit: req %p\n", req);
+  
+  data = executor->data;
+  data->times_submit_called++;
+
+  uv_mutex_lock(&data->mutex);
+  ASSERT(data->tail < ARRAY_SIZE(data->queued_work));
+  data->queued_work[data->tail] = req;
+  data->tail++;
+  uv_mutex_unlock(&data->mutex);
+}
+
+static int toy_executor_cancel(uv_executor_t* executor, uv_work_t* req) {
+  struct toy_executor_data* data;
+  
+  data = executor->data;
+  printf("toy_executor_cancel: req %p\n", req);
+  data->times_cancel_called++;
+
+  return UV_EINVAL;
+}
+
+static void toy_work(uv_work_t* req) {
+  printf("toy_work: req %p\n", req);
+  ASSERT(req);
+}
+
+static void toy_slow_work(uv_work_t* req) {
+  printf("toy_slow_work: req %p\n", req);
+  ASSERT(req);
+  uv_sem_wait(&toy_executor_data.finish_slow_work);
+}
+
+TEST_IMPL(executor_replace) {
+  uv_work_t work[100];
+  int n_extra_requests;
+  uv_work_t slow_work;
+  uv_work_t cancel_work;
+  int i;
+
+  n_extra_requests = 0;
+
+  /* Replace the builtin executor with our toy_executor. */
+  toy_executor_init();
+  toy_executor.submit = toy_executor_submit;
+  toy_executor.cancel = toy_executor_cancel;
+  toy_executor.data = &toy_executor_data;
+  ASSERT(0 == uv_replace_executor(&toy_executor));
+
+  /* Submit work. */
+  for (i = 0; i < TOY_EXECUTOR_MAX_REQUESTS; i++) {
+    printf("Queuing work %p\n", &work[i]);
+    if (i < TOY_EXECUTOR_MAX_REQUESTS/2)
+      ASSERT(0 == uv_queue_work(uv_default_loop(), &work[i], toy_work, NULL));
+    else
+      ASSERT(0 == uv_executor_queue_work(uv_default_loop(), &work[i], NULL, toy_work, NULL));
+  }
+
+  /* Having queued work, we should no longer be able to replace. */
+  ASSERT(0 != uv_replace_executor(&toy_executor));
+
+  /* Submit a slow request so a subsequent request will be cancelable. */
+  n_extra_requests++;
+  ASSERT(0 == uv_queue_work(uv_default_loop(), &slow_work, toy_slow_work, NULL));
+
+  /* Submit and try to cancel some work. */
+  n_extra_requests++;
+  ASSERT(0 == uv_queue_work(uv_default_loop(), &cancel_work, toy_work, NULL));
+  ASSERT(UV_EINVAL == uv_cancel((uv_req_t *) &cancel_work));
+
+  /* Let the slow work finish. */
+  uv_sem_post(&toy_executor_data.finish_slow_work);
+
+  /* Side channel: tell pool we're done and wait until it finishes. */
+  printf("Telling pool we're done\n");
+  uv_mutex_lock(&toy_executor_data.mutex);
+  toy_executor_data.no_more_work_coming = 1;
+  uv_mutex_unlock(&toy_executor_data.mutex);
+
+  printf("Waiting for pool\n");
+  uv_sem_wait(&toy_executor_data.thread_exiting);
+
+  /* Validate. */
+  printf("Validating\n");
+  ASSERT(TOY_EXECUTOR_MAX_REQUESTS + n_extra_requests == toy_executor_data.times_submit_called);
+  ASSERT(TOY_EXECUTOR_MAX_REQUESTS + n_extra_requests == toy_executor_data.n_completed);
+  ASSERT(1 == toy_executor_data.times_cancel_called);
+
+  toy_executor_destroy(&toy_executor);
+
+  MAKE_VALGRIND_HAPPY();
+  return 0;
+}
+
+TEST_IMPL(executor_replace_nocancel) {
+  uv_work_t slow_work;
+  uv_work_t cancel_work;
+
+  /* Replace the builtin executor with our toy_executor.
+   * This executor does not implement a cancel CB.
+   * uv_cancel should fail with UV_ENOSYS. */
+  toy_executor_init();
+  toy_executor.submit = toy_executor_submit;
+  toy_executor.cancel = NULL;
+  toy_executor.data = &toy_executor_data;
+  ASSERT(0 == uv_replace_executor(&toy_executor));
+
+  /* Submit a slow request so a subsequent request will be cancelable. */
+  ASSERT(0 == uv_queue_work(uv_default_loop(), &slow_work, toy_slow_work, NULL));
+
+  /* Submit and then try to cancel a slow request.
+   * With toy_executor.cancel == NULL, should return UV_ENOSYS. */
+  ASSERT(0 == uv_queue_work(uv_default_loop(), &cancel_work, toy_work, NULL));
+  ASSERT(UV_ENOSYS == uv_cancel((uv_req_t *) &cancel_work));
+
+  /* Let the slow work finish. */
+  uv_sem_post(&toy_executor_data.finish_slow_work);
+
+  /* Side channel: tell pool we're done and wait until it finishes. */
+  printf("Telling pool we're done\n");
+  uv_mutex_lock(&toy_executor_data.mutex);
+  toy_executor_data.no_more_work_coming = 1;
+  uv_mutex_unlock(&toy_executor_data.mutex);
+
+  printf("Waiting for pool\n");
+  uv_sem_wait(&toy_executor_data.thread_exiting);
+
+  toy_executor_destroy(&toy_executor);
+
+  MAKE_VALGRIND_HAPPY();
+  return 0;
+}
diff --git a/deps/uv/test/test-fork.c b/deps/uv/test/test-fork.c
index 2a1ddc497a133f..1553cc074cbff0 100644
--- a/deps/uv/test/test-fork.c
+++ b/deps/uv/test/test-fork.c
@@ -638,8 +638,8 @@ static void assert_run_work(uv_loop_t* const loop) {
 
 
 #ifndef __MVS__
-TEST_IMPL(fork_threadpool_queue_work_simple) {
-  /* The threadpool works in a child process. */
+TEST_IMPL(fork_executor_queue_work_simple) {
+  /* The executor works in a child process. */
 
   pid_t child_pid;
   uv_loop_t loop;
diff --git a/deps/uv/test/test-list.h b/deps/uv/test/test-list.h
index b501722d4dc83b..0a26e60444de3e 100644
--- a/deps/uv/test/test-list.h
+++ b/deps/uv/test/test-list.h
@@ -346,14 +346,16 @@ TEST_DECLARE   (fs_exclusive_sharing_mode)
 TEST_DECLARE   (fs_open_readonly_acl)
 TEST_DECLARE   (fs_fchmod_archive_readonly)
 #endif
-TEST_DECLARE   (threadpool_queue_work_simple)
-TEST_DECLARE   (threadpool_queue_work_einval)
-TEST_DECLARE   (threadpool_multiple_event_loops)
-TEST_DECLARE   (threadpool_cancel_getaddrinfo)
-TEST_DECLARE   (threadpool_cancel_getnameinfo)
-TEST_DECLARE   (threadpool_cancel_work)
-TEST_DECLARE   (threadpool_cancel_fs)
-TEST_DECLARE   (threadpool_cancel_single)
+TEST_DECLARE   (executor_queue_work_simple)
+TEST_DECLARE   (executor_queue_work_einval)
+TEST_DECLARE   (executor_multiple_event_loops)
+TEST_DECLARE   (executor_cancel_getaddrinfo)
+TEST_DECLARE   (executor_cancel_getnameinfo)
+TEST_DECLARE   (executor_cancel_work)
+TEST_DECLARE   (executor_cancel_fs)
+TEST_DECLARE   (executor_cancel_single)
+TEST_DECLARE   (executor_replace)
+TEST_DECLARE   (executor_replace_nocancel)
 TEST_DECLARE   (thread_local_storage)
 TEST_DECLARE   (thread_stack_size)
 TEST_DECLARE   (thread_mutex)
@@ -432,7 +434,7 @@ TEST_DECLARE  (fork_fs_events_child)
 TEST_DECLARE  (fork_fs_events_child_dir)
 TEST_DECLARE  (fork_fs_events_file_parent_child)
 #ifndef __MVS__
-TEST_DECLARE  (fork_threadpool_queue_work_simple)
+TEST_DECLARE  (fork_executor_queue_work_simple)
 #endif
 #endif
 
@@ -894,14 +896,16 @@ TASK_LIST_START
 #endif
   TEST_ENTRY  (get_osfhandle_valid_handle)
   TEST_ENTRY  (open_osfhandle_valid_handle)
-  TEST_ENTRY  (threadpool_queue_work_simple)
-  TEST_ENTRY  (threadpool_queue_work_einval)
-  TEST_ENTRY  (threadpool_multiple_event_loops)
-  TEST_ENTRY  (threadpool_cancel_getaddrinfo)
-  TEST_ENTRY  (threadpool_cancel_getnameinfo)
-  TEST_ENTRY  (threadpool_cancel_work)
-  TEST_ENTRY  (threadpool_cancel_fs)
-  TEST_ENTRY  (threadpool_cancel_single)
+  TEST_ENTRY  (executor_queue_work_simple)
+  TEST_ENTRY  (executor_queue_work_einval)
+  TEST_ENTRY  (executor_multiple_event_loops)
+  TEST_ENTRY  (executor_cancel_getaddrinfo)
+  TEST_ENTRY  (executor_cancel_getnameinfo)
+  TEST_ENTRY  (executor_cancel_work)
+  TEST_ENTRY  (executor_cancel_fs)
+  TEST_ENTRY  (executor_cancel_single)
+  TEST_ENTRY  (executor_replace)
+  TEST_ENTRY  (executor_replace_nocancel)
   TEST_ENTRY  (thread_local_storage)
   TEST_ENTRY  (thread_stack_size)
   TEST_ENTRY  (thread_mutex)
@@ -930,7 +934,7 @@ TASK_LIST_START
   TEST_ENTRY  (fork_fs_events_child_dir)
   TEST_ENTRY  (fork_fs_events_file_parent_child)
 #ifndef __MVS__
-  TEST_ENTRY  (fork_threadpool_queue_work_simple)
+  TEST_ENTRY  (fork_executor_queue_work_simple)
 #endif
 #endif
 
diff --git a/deps/uv/test/test-thread.c b/deps/uv/test/test-thread.c
index 955c9f2f1be149..57d3d578813651 100644
--- a/deps/uv/test/test-thread.c
+++ b/deps/uv/test/test-thread.c
@@ -158,7 +158,7 @@ TEST_IMPL(thread_create) {
 /* Hilariously bad test name. Run a lot of tasks in the thread pool and verify
  * that each "finished" callback is run in its originating thread.
  */
-TEST_IMPL(threadpool_multiple_event_loops) {
+TEST_IMPL(executor_multiple_event_loops) {
   struct test_thread threads[8];
   size_t i;
   int r;
diff --git a/deps/uv/test/test-threadpool.c b/deps/uv/test/test-threadpool.c
deleted file mode 100644
index e3d17d7546f66b..00000000000000
--- a/deps/uv/test/test-threadpool.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "uv.h"
-#include "task.h"
-
-static int work_cb_count;
-static int after_work_cb_count;
-static uv_work_t work_req;
-static char data;
-
-
-static void work_cb(uv_work_t* req) {
-  ASSERT(req == &work_req);
-  ASSERT(req->data == &data);
-  work_cb_count++;
-}
-
-
-static void after_work_cb(uv_work_t* req, int status) {
-  ASSERT(status == 0);
-  ASSERT(req == &work_req);
-  ASSERT(req->data == &data);
-  after_work_cb_count++;
-}
-
-
-TEST_IMPL(threadpool_queue_work_simple) {
-  int r;
-
-  work_req.data = &data;
-  r = uv_queue_work(uv_default_loop(), &work_req, work_cb, after_work_cb);
-  ASSERT(r == 0);
-  uv_run(uv_default_loop(), UV_RUN_DEFAULT);
-
-  ASSERT(work_cb_count == 1);
-  ASSERT(after_work_cb_count == 1);
-
-  MAKE_VALGRIND_HAPPY();
-  return 0;
-}
-
-
-TEST_IMPL(threadpool_queue_work_einval) {
-  int r;
-
-  work_req.data = &data;
-  r = uv_queue_work(uv_default_loop(), &work_req, NULL, after_work_cb);
-  ASSERT(r == UV_EINVAL);
-
-  uv_run(uv_default_loop(), UV_RUN_DEFAULT);
-
-  ASSERT(work_cb_count == 0);
-  ASSERT(after_work_cb_count == 0);
-
-  MAKE_VALGRIND_HAPPY();
-  return 0;
-}
diff --git a/deps/uv/test/test.gyp b/deps/uv/test/test.gyp
index 855eda1c50003a..1208d51e445823 100644
--- a/deps/uv/test/test.gyp
+++ b/deps/uv/test/test.gyp
@@ -120,8 +120,8 @@
         'test-tcp-oob.c',
         'test-tcp-read-stop.c',
         'test-tcp-write-queue-order.c',
-        'test-threadpool.c',
-        'test-threadpool-cancel.c',
+        'test-executor.c',
+        'test-executor-cancel.c',
         'test-thread-equal.c',
         'test-tmpdir.c',
         'test-mutexes.c',
diff --git a/deps/uv/uv.gyp b/deps/uv/uv.gyp
index 37dcb3604f4c72..b5c2a598e590b1 100644
--- a/deps/uv/uv.gyp
+++ b/deps/uv/uv.gyp
@@ -72,6 +72,7 @@
         'src/heap-inl.h',
         'src/inet.c',
         'src/queue.h',
+        'src/executor.c',
         'src/threadpool.c',
         'src/timer.c',
         'src/uv-data-getter-setters.c',

From 883d7fc8896f3036d20d8ad306ee145730182f68 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 30 Sep 2018 12:43:40 -0400
Subject: [PATCH 30/31] PTP: make linter happy

---
 src/node.cc                    |  14 +--
 src/node_platform.h            |   3 +-
 src/node_threadpool.cc         | 152 ++++++++++++++++++++-------------
 src/node_threadpool.h          | 100 +++++++++++++---------
 test/cctest/test_threadpool.cc |   5 +-
 5 files changed, 168 insertions(+), 106 deletions(-)

diff --git a/src/node.cc b/src/node.cc
index c4bdc8292a4d63..e24d21269ccf06 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -287,19 +287,23 @@ class NodeTraceStateObserver :
 static struct {
   void Initialize(void) {
     // What kind of threadpool is desired?
-    char *tp_type = getenv("NODE_THREADPOOL_TYPE");
+    char* tp_type = getenv("NODE_THREADPOOL_TYPE");
     if (!tp_type || strcmp(tp_type, "SHARED") == 0) {
       std::vector<int> sizes{-1};
-      tp_ = std::make_shared<threadpool::UnpartitionedPartitionedNodeThreadpool>(sizes);
+      tp_ = std::make_shared<
+        threadpool::UnpartitionedPartitionedNodeThreadpool>(sizes);
     } else if (strcmp(tp_type, "SPLIT_BY_ORIGIN") == 0) {
       std::vector<int> sizes{-1, -1};
-      tp_ = std::make_shared<threadpool::ByTaskOriginPartitionedNodeThreadpool>(sizes);
+      tp_ = std::make_shared<
+        threadpool::ByTaskOriginPartitionedNodeThreadpool>(sizes);
     } else if (strcmp(tp_type, "SPLIT_BY_TYPE") == 0) {
       std::vector<int> sizes{-1, -1};
-      tp_ = std::make_shared<threadpool::ByTaskTypePartitionedNodeThreadpool>(sizes);
+      tp_ = std::make_shared<
+        threadpool::ByTaskTypePartitionedNodeThreadpool>(sizes);
     } else if (strcmp(tp_type, "SPLIT_BY_ORIGIN_AND_TYPE") == 0) {
       std::vector<int> sizes{-1, -1, -1};
-      tp_ = std::make_shared<threadpool::ByTaskOriginAndTypePartitionedNodeThreadpool>(sizes);
+      tp_ = std::make_shared<
+        threadpool::ByTaskOriginAndTypePartitionedNodeThreadpool>(sizes);
     } else {
       CHECK(0);
     }
diff --git a/src/node_platform.h b/src/node_platform.h
index e464ca70cb5bea..1dc963c15cf7d0 100644
--- a/src/node_platform.h
+++ b/src/node_platform.h
@@ -100,7 +100,8 @@ class PerIsolatePlatformData :
 // API is modeled on v8::TaskRunner.
 class WorkerThreadsTaskRunner {
  public:
-  explicit WorkerThreadsTaskRunner(std::shared_ptr<threadpool::NodeThreadpool> tp);
+  explicit WorkerThreadsTaskRunner(
+    std::shared_ptr<threadpool::NodeThreadpool> tp);
 
   // Add task to queue for eventual Run()
   void PostTask(std::unique_ptr<v8::Task> task);
diff --git a/src/node_threadpool.cc b/src/node_threadpool.cc
index 8759ff24168861..81ad0e49b30f73 100644
--- a/src/node_threadpool.cc
+++ b/src/node_threadpool.cc
@@ -14,7 +14,7 @@
 // TODO(davisjam): DO NOT MERGE. Only for debugging.
 // TODO(davisjam): There must be a better way to do this.
 #define DEBUG_LOG 1
-//#undef DEBUG_LOG
+// #undef DEBUG_LOG
 
 #ifdef DEBUG_LOG
 #define LOG_TO_FILE(fd, ...) fprintf(fd, __VA_ARGS__)
@@ -96,18 +96,19 @@ void NodeThreadpool::PrintStats() const {
  ***************/
 
 PartitionedNodeThreadpool::PartitionedNodeThreadpool() {
-  LOG("PartitionedNodeThreadpool::PartitionedNodeThreadpool: default constructor\n");
+  LOG("PNT::PNT: default constructor\n");
 }
 
-PartitionedNodeThreadpool::PartitionedNodeThreadpool(std::vector<int> tp_sizes) {
-  LOG("PartitionedNodeThreadpool::PartitionedNodeThreadpool: vector constructor\n");
+PartitionedNodeThreadpool::PartitionedNodeThreadpool
+(std::vector<int> tp_sizes) {
+  LOG("PNT::PNT: vector constructor\n");
   Initialize(tp_sizes);
 }
 
 void PartitionedNodeThreadpool::Initialize(const std::vector<int>& tp_sizes) {
   int i = 0;
   for (auto size : tp_sizes) {
-    LOG("PartitionedNodeThreadpool::Initialize: tp %d: %d threads\n", i, size);
+    LOG("PNT::Initialize: tp %d: %d threads\n", i, size);
     std::shared_ptr<Threadpool> tp = std::make_shared<Threadpool>(size, i);
     tps_.push_back(tp);
     i++;
@@ -115,7 +116,7 @@ void PartitionedNodeThreadpool::Initialize(const std::vector<int>& tp_sizes) {
 }
 
 PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
-  LOG("PartitionedNodeThreadpool::~PartitionedNodeThreadpool: Goodbye\n");
+  LOG("PNT::~PartitionedNodeThreadpool: Goodbye\n");
   fflush(stderr);
 
   // If we just return, the destructors of the tp's will drain them.
@@ -126,9 +127,11 @@ PartitionedNodeThreadpool::~PartitionedNodeThreadpool() {
 }
 
 void PartitionedNodeThreadpool::PrintStats(void) const {
-  // TODO(davisjam) Let's hope the application didn't make more than one of these :-D.
+  // TODO(davisjam) Let's hope the application didn't
+  // make more than one of these :-D.
   char logFile[128];
-  snprintf(logFile, sizeof(logFile), "/tmp/node-%d-PartitionedNodeThreadpool-result.log", getpid());
+  snprintf(logFile, sizeof(logFile),
+    "/tmp/node-%d-PartitionedNodeThreadpool-result.log", getpid());
   FILE* fd = fopen(logFile, "w");
   CHECK(fd);
 
@@ -137,38 +140,58 @@ void PartitionedNodeThreadpool::PrintStats(void) const {
   // so we can slice and dice quickly.
   LOG_TO_FILE(fd, "TP key format: TP,name,size\n");
   for (auto pair : tp_labels_) {
-    LOG_TO_FILE(fd, "TP key: %d,%s,%d\n", pair.first, tp_labels_.at(pair.first).c_str(), tp_sizes_.at(pair.first));
+    LOG_TO_FILE(fd, "TP key: %d,%s,%d\n",
+      pair.first, tp_labels_.at(pair.first).c_str(), tp_sizes_.at(pair.first));
   }
 
-  LOG_TO_FILE(fd, "QueueLengths data format: TP,queue-length,n_cpu,n_io,time\n");
-  LOG_TO_FILE(fd, "TaskSummary data format: TP,task-origin,task-type,queue-duration,run-duration,time-at-completion\n");
+  LOG_TO_FILE(fd,
+    "QueueLengths data format: TP,queue-length,n_cpu,n_io,time\n");
+  LOG_TO_FILE(fd, "TaskSummary data format: TP,task-origin,task-type,"
+                  "queue-duration,run-duration,time-at-completion\n");
 
   for (size_t i = 0; i < tps_.size(); i++) {
     auto &tp = tps_[i];
     LOG("Report on TP %d\n", tp->Id());
 
-    const std::vector<std::unique_ptr<QueueLengthSample>> &lengths = tp->GetQueueLengths();
-    LOG("  TP %d: Lengths at the %lu update intervals:\n", tp->Id(), lengths.size());
+    const std::vector<std::unique_ptr<QueueLengthSample>> &lengths =
+      tp->GetQueueLengths();
+    LOG("  TP %d: Lengths at the %lu update intervals:\n",
+      tp->Id(), lengths.size());
 
     if (lengths.size()) {
       for (const std::unique_ptr<QueueLengthSample> &length : lengths) {
-        LOG("    TP %d length %d n-cpu %d n-io %d time %lu\n", tp->Id(), length->length_, length->n_cpu_, length->n_io_, length->time_);
-        LOG_TO_FILE(fd, "QueueLengths data: %d,%d,%d,%d,%lu\n", tp->Id(), length->length_, length->n_cpu_, length->n_io_, length->time_);
+        LOG("    TP %d length %d n-cpu %d n-io %d time %lu\n",
+          tp->Id(), length->length_, length->n_cpu_,
+          length->n_io_, length->time_);
+        LOG_TO_FILE(fd, "QueueLengths data: %d,%d,%d,%d,%lu\n",
+          tp->Id(), length->length_,
+          length->n_cpu_, length->n_io_,
+          length->time_);
       }
     }
 
-    const std::vector<std::unique_ptr<TaskSummary>> &summaries = tp->GetTaskSummaries();
-    LOG("  TP %d: Task summaries for the %lu tasks:\n", tp->Id(), summaries.size());
+    const std::vector<std::unique_ptr<TaskSummary>> &summaries =
+      tp->GetTaskSummaries();
+    LOG("  TP %d: Task summaries for the %lu tasks:\n",
+      tp->Id(), summaries.size());
     for (const std::unique_ptr<TaskSummary> &summary : summaries) {
-      LOG("    TP %d origin %s type %s queue-duration %lu run-duration %lu time-at-completion %lu\n",
-        tp->Id(), TaskDetails::AsString(summary->details_.origin).c_str(), TaskDetails::AsString(summary->details_.type).c_str(), summary->time_in_queue_, summary->time_in_run_, summary->time_at_completion_);
+      LOG("    TP %d origin %s type %s queue-duration %lu"
+          "run-duration %lu time-at-completion %lu\n",
+        tp->Id(), TaskDetails::AsString(summary->details_.origin).c_str(),
+        TaskDetails::AsString(summary->details_.type).c_str(),
+        summary->time_in_queue_, summary->time_in_run_,
+        summary->time_at_completion_);
       LOG_TO_FILE(fd, "TaskSummary data: %d,%s,%s,%lu,%lu,%lu\n",
-        tp->Id(), TaskDetails::AsString(summary->details_.origin).c_str(), TaskDetails::AsString(summary->details_.type).c_str(), summary->time_in_queue_, summary->time_in_run_, summary->time_at_completion_);
+        tp->Id(), TaskDetails::AsString(summary->details_.origin).c_str(),
+        TaskDetails::AsString(summary->details_.type).c_str(),
+        summary->time_in_queue_, summary->time_in_run_,
+        summary->time_at_completion_);
     }
   }
 }
 
-std::shared_ptr<TaskState> PartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
+std::shared_ptr<TaskState>
+PartitionedNodeThreadpool::Post(std::unique_ptr<Task> task) {
   int tp = ChooseThreadpool(task.get());
   CHECK_GE(tp, 0);
   CHECK_LT(tp, (int) tps_.size());
@@ -216,7 +239,7 @@ UnpartitionedPartitionedNodeThreadpool::UnpartitionedPartitionedNodeThreadpool(
   if (tp_sizes[ONLY_TP_IX] <= 0) {
     tp_sizes[ONLY_TP_IX] = 4;  // libuv default
   }
-  LOG("UnpartitionedPartitionedNodeThreadpool::UnpartitionedPartitionedNodeThreadpool: only tp size %d\n", tp_sizes[ONLY_TP_IX]);
+  LOG("UPNT::UPNT: only tp size %d\n", tp_sizes[ONLY_TP_IX]);
   CHECK_GT(tp_sizes[ONLY_TP_IX], 0);
 
   Initialize(tp_sizes);
@@ -225,7 +248,8 @@ UnpartitionedPartitionedNodeThreadpool::UnpartitionedPartitionedNodeThreadpool(
   tp_sizes_[ONLY_TP_IX] = tp_sizes[ONLY_TP_IX];
 }
 
-UnpartitionedPartitionedNodeThreadpool::~UnpartitionedPartitionedNodeThreadpool() {
+UnpartitionedPartitionedNodeThreadpool::
+~UnpartitionedPartitionedNodeThreadpool() {
 }
 
 int UnpartitionedPartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
@@ -252,7 +276,7 @@ ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
     // No/bad env var, so take a guess.
     tp_sizes[V8_TP_IX] = GoodCPUThreadpoolSize();
   }
-  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: v8 tp size %d\n", tp_sizes[V8_TP_IX]);
+  LOG("BTOPNT::BTOPNT: v8 tp size %d\n", tp_sizes[V8_TP_IX]);
   CHECK_GT(tp_sizes[V8_TP_IX], 0);
 
   // LIBUV TP size
@@ -261,14 +285,15 @@ ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
     size_t buf_size = sizeof(buf);
     if (uv_os_getenv("UV_THREADPOOL_SIZE", buf, &buf_size) == 0) {
       tp_sizes[LIBUV_TP_IX] = atoi(buf);
-    } else if (uv_os_getenv("NODE_THREADPOOL_UV_TP_SIZE", buf, &buf_size) == 0) {
+    } else if (uv_os_getenv("NODE_THREADPOOL_UV_TP_SIZE", buf, &buf_size)
+        == 0) {
       tp_sizes[LIBUV_TP_IX] = atoi(buf);
     }
   }
   if (tp_sizes[LIBUV_TP_IX] <= 0) {
     tp_sizes[LIBUV_TP_IX] = 1 * tp_sizes[V8_TP_IX];
   }
-  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: libuv tp size %d\n", tp_sizes[LIBUV_TP_IX]);
+  LOG("BTOPNT::BTOPNT: libuv tp size %d\n", tp_sizes[LIBUV_TP_IX]);
   CHECK_GT(tp_sizes[LIBUV_TP_IX], 0);
 
   Initialize(tp_sizes);
@@ -279,16 +304,17 @@ ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool(
   tp_sizes_[LIBUV_TP_IX] = tp_sizes[LIBUV_TP_IX];
 }
 
-ByTaskOriginPartitionedNodeThreadpool::~ByTaskOriginPartitionedNodeThreadpool() {
+ByTaskOriginPartitionedNodeThreadpool::
+~ByTaskOriginPartitionedNodeThreadpool() {
 }
 
 int ByTaskOriginPartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
   switch (task->details_.origin) {
     case TaskDetails::V8:
-      LOG("ByTaskOriginPartitionedNodeThreadpool::ChooseThreadpool: V8\n");
+      LOG("BTOPNT::ChooseThreadpool: V8\n");
       return V8_TP_IX;
     default:
-      LOG("ByTaskOriginPartitionedNodeThreadpool::ChooseThreadpool: LIBUV\n");
+      LOG("BTOPNT::ChooseThreadpool: LIBUV\n");
       return LIBUV_TP_IX;
   }
 }
@@ -313,7 +339,7 @@ ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool(
     // No/bad env var, so take a guess.
     tp_sizes[CPU_TP_IX] = GoodCPUThreadpoolSize();
   }
-  LOG("ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool: cpu_pool_size %d\n", tp_sizes[CPU_TP_IX]);
+  LOG("BTTPNT::BTTPNT: cpu_pool_size %d\n", tp_sizes[CPU_TP_IX]);
   CHECK_GT(tp_sizes[CPU_TP_IX], 0);
 
   // IO TP size
@@ -327,7 +353,7 @@ ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool(
   if (tp_sizes[IO_TP_IX] < 0) {
     tp_sizes[IO_TP_IX] = 1 * tp_sizes[CPU_TP_IX];
   }
-  LOG("ByTaskTypePartitionedNodeThreadpool::ByTaskTypePartitionedNodeThreadpool: io_pool_size %d\n", tp_sizes[IO_TP_IX]);
+  LOG("BTTPNT::BTTPNT: io_pool_size %d\n", tp_sizes[IO_TP_IX]);
   CHECK_GT(tp_sizes[IO_TP_IX], 0);
 
   Initialize(tp_sizes);
@@ -346,10 +372,10 @@ int ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
     case TaskDetails::CPU:
     case TaskDetails::MEMORY:
     case TaskDetails::TASK_TYPE_UNKNOWN:
-      LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: CPU\n");
+      LOG("BTTPNT::ChooseThreadpool: CPU\n");
       return CPU_TP_IX;
     default:
-      LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: IO\n");
+      LOG("BTTPNT::ChooseThreadpool: IO\n");
       return IO_TP_IX;
   }
 }
@@ -358,8 +384,10 @@ int ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
  * ByTaskOriginAndTypePartitionedNodeThreadpool
  ***************/
 
-ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNodeThreadpool(
-  std::vector<int> tp_sizes) : V8_TP_IX(0), LIBUV_CPU_TP_IX(1), LIBUV_IO_TP_IX(2) {
+ByTaskOriginAndTypePartitionedNodeThreadpool::
+ByTaskOriginAndTypePartitionedNodeThreadpool(
+  std::vector<int> tp_sizes)
+  : V8_TP_IX(0), LIBUV_CPU_TP_IX(1), LIBUV_IO_TP_IX(2) {
   CHECK_EQ(tp_sizes.size(), 3);
 
   // V8 TP size
@@ -374,7 +402,7 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
     // No/bad env var, so take a guess.
     tp_sizes[V8_TP_IX] = GoodCPUThreadpoolSize();
   }
-  LOG("ByTaskOriginPartitionedNodeThreadpool::ByTaskOriginPartitionedNodeThreadpool: v8 tp size %d\n", tp_sizes[V8_TP_IX]);
+  LOG("BTOATPNT::BTOATPNT: v8 tp size %d\n", tp_sizes[V8_TP_IX]);
   CHECK_GT(tp_sizes[V8_TP_IX], 0);
 
   // LIBUV-CPU TP size
@@ -389,7 +417,8 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
     // No/bad env var, so take a guess.
     tp_sizes[LIBUV_CPU_TP_IX] = GoodCPUThreadpoolSize();
   }
-  LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNodeThreadpool: libuv cpu pool size %d\n", tp_sizes[LIBUV_CPU_TP_IX]);
+  LOG("BTOATPNT::BTOATPNT: libuv cpu pool size %d\n",
+    tp_sizes[LIBUV_CPU_TP_IX]);
   CHECK_GT(tp_sizes[LIBUV_CPU_TP_IX], 0);
 
   // IO TP size
@@ -403,7 +432,7 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
   if (tp_sizes[LIBUV_IO_TP_IX] < 0) {
     tp_sizes[LIBUV_IO_TP_IX] = 1 * tp_sizes[LIBUV_CPU_TP_IX];
   }
-  LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNodeThreadpool: libuv io pool size %d\n", tp_sizes[LIBUV_IO_TP_IX]);
+  LOG("BTOATPNT::BTOATPNT: libuv io pool size %d\n", tp_sizes[LIBUV_IO_TP_IX]);
   CHECK_GT(tp_sizes[LIBUV_IO_TP_IX], 0);
 
   Initialize(tp_sizes);
@@ -416,26 +445,30 @@ ByTaskOriginAndTypePartitionedNodeThreadpool::ByTaskOriginAndTypePartitionedNode
   tp_sizes_[LIBUV_IO_TP_IX] = tp_sizes[LIBUV_IO_TP_IX];
 }
 
-ByTaskOriginAndTypePartitionedNodeThreadpool::~ByTaskOriginAndTypePartitionedNodeThreadpool() {
+ByTaskOriginAndTypePartitionedNodeThreadpool::
+~ByTaskOriginAndTypePartitionedNodeThreadpool() {
 }
 
-int ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) const {
+int
+ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task)
+const {
   if (task->details_.origin == TaskDetails::V8) {
-    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool: V8\n");
+    LOG("BTOATPNT::ChooseThreadpool: V8\n");
     return V8_TP_IX;
   } else if (task->details_.origin == TaskDetails::LIBUV) {
     switch (task->details_.type) {
       case TaskDetails::CPU:
       case TaskDetails::MEMORY:
       case TaskDetails::TASK_TYPE_UNKNOWN:
-        LOG("ByTaskTypePartitionedNodeThreadpool::ChooseThreadpool: CPU\n");
+        LOG("BTOATPNT::ChooseThreadpool: CPU\n");
         return LIBUV_CPU_TP_IX;
       default:
-        LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool: I/O\n");
+        LOG("BTOATPNT::ChooseThreadpool:\ I/O\n");
         return LIBUV_IO_TP_IX;
     }
   } else {
-    LOG("ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool: Unexpected origin %d. Using libuv I/O pool\n", task->details_.origin);
+    LOG("BTOATPNT::ChooseThreadpool: Unexpected origin %d --> libuv I/O pool\n",
+      task->details_.origin);
     return LIBUV_IO_TP_IX;
   }
 }
@@ -445,7 +478,7 @@ int ByTaskOriginAndTypePartitionedNodeThreadpool::ChooseThreadpool(Task* task) c
  ***************/
 
 WorkerGroup::WorkerGroup(int n_workers, std::shared_ptr<TaskQueue> tq)
- : workers_() {
+  : workers_() {
   for (int i = 0; i < n_workers; i++) {
     std::unique_ptr<Worker> worker(new Worker(tq));
     worker->Start();
@@ -584,7 +617,8 @@ TaskState::State TaskState::TryUpdateState(TaskState::State new_state) {
   return state_;
 }
 
-bool TaskState::ValidStateTransition(TaskState::State old_state, TaskState::State new_state) {
+bool TaskState::
+ValidStateTransition(TaskState::State old_state, TaskState::State new_state) {
   // Normal flow: INITIAL -> QUEUED -> ASSIGNED -> COMPLETED.
   // Also: non-terminal state -> CANCELLED -> COMPLETED.
   switch (old_state) {
@@ -639,10 +673,10 @@ class LibuvTask;
 // Internal LibuvExecutor mechanism to enable uv_cancel.
 // Preserves task_state so smart pointers knows not to delete it.
 class LibuvTaskData {
- friend class LibuvExecutor;
+  friend class LibuvExecutor;
 
  public:
-  LibuvTaskData(std::shared_ptr<TaskState> state) : state_(state) {
+  explicit LibuvTaskData(std::shared_ptr<TaskState> state) : state_(state) {
   }
 
  private:
@@ -709,7 +743,6 @@ class LibuvTask : public Task {
     req_->work_cb(req_);
   }
 
- protected:
  private:
   LibuvExecutor* libuv_executor_;
   uv_work_t* req_;
@@ -773,7 +806,8 @@ TaskQueue::TaskQueue(int id)
   : id_(id), lock_()
   , task_available_(), tasks_drained_()
   , queue_(), outstanding_tasks_(0), stopped_(false)
-  , n_cpu_in_queue_(0), n_io_in_queue_(0), n_changes_since_last_length_sample_(0), length_report_freq_(10)
+  , n_cpu_in_queue_(0), n_io_in_queue_(0)
+  , n_changes_since_last_length_sample_(0), length_report_freq_(10)
   , task_summaries_(), queue_lengths_() {
 }
 
@@ -799,10 +833,10 @@ bool TaskQueue::Push(std::unique_ptr<Task> task) {
 }
 
 void TaskQueue::UpdateLength(Task* task, bool grew) {
-  int *counter = nullptr;
+  int* counter = nullptr;
   if (task->details_.type == TaskDetails::CPU
-   || task->details_.type == TaskDetails::MEMORY
-   || task->details_.type == TaskDetails::TASK_TYPE_UNKNOWN) {
+    || task->details_.type == TaskDetails::MEMORY
+    || task->details_.type == TaskDetails::TASK_TYPE_UNKNOWN) {
     counter = &n_cpu_in_queue_;
   } else {
     counter = &n_io_in_queue_;
@@ -891,11 +925,13 @@ int TaskQueue::Length() const {
   return length;
 }
 
-std::vector<std::unique_ptr<TaskSummary>> const& TaskQueue::GetTaskSummaries() const {
+std::vector<std::unique_ptr<TaskSummary>> const&
+TaskQueue::GetTaskSummaries() const {
   return task_summaries_;
 }
 
-std::vector<std::unique_ptr<QueueLengthSample>> const& TaskQueue::GetQueueLengths() const {
+std::vector<std::unique_ptr<QueueLengthSample>> const&
+TaskQueue::GetQueueLengths() const {
   return queue_lengths_;
 }
 
@@ -940,11 +976,13 @@ int Threadpool::NWorkers() const {
   return worker_group_->Size();
 }
 
-std::vector<std::unique_ptr<TaskSummary>> const& Threadpool::GetTaskSummaries() const {
+std::vector<std::unique_ptr<TaskSummary>> const&
+Threadpool::GetTaskSummaries() const {
   return task_queue_->GetTaskSummaries();
 }
 
-std::vector<std::unique_ptr<QueueLengthSample>> const& Threadpool::GetQueueLengths() const {
+std::vector<std::unique_ptr<QueueLengthSample>> const&
+Threadpool::GetQueueLengths() const {
   return task_queue_->GetQueueLengths();
 }
 
diff --git a/src/node_threadpool.h b/src/node_threadpool.h
index 6ec32e2c995d09..63e5d79604a18c 100644
--- a/src/node_threadpool.h
+++ b/src/node_threadpool.h
@@ -51,7 +51,7 @@ class WorkerGroup {
 //   - cancellation (a la Davis et al. 2018's Manager-Worker-Hangman approach)
 class Worker {
  public:
-  Worker(std::shared_ptr<TaskQueue> tq);
+  explicit Worker(std::shared_ptr<TaskQueue> tq);
 
   // Starts a thread and returns control to the caller.
   void Start();
@@ -70,12 +70,14 @@ class Worker {
 // At what level does the user want to monitor TP performance?
 // Presumably they want this info via APIs in NodeThreadpool.
 // Tracking it on a per-Task basis might be overkill. But on the other hand
-// this would permit users to dynamically identify slower and faster tasks for us.
-// Which would be cool.
-// If we track this in TaskState, then Task knows about it, and can tell TaskQueue about it,
+// this would permit users to dynamically identify slower and faster tasks
+// for us. Which would be cool.
+// If we track this in TaskState, then Task knows about it, and
+// can tell TaskQueue about it,
 // which can propagate to Threadpool, which can propagate to NodeThreadpool.
 
-// TODO(davisjam): I don't like all of the 'friend class XXX' I introduced to make the time APIs compile.
+// TODO(davisjam): I don't like all of the 'friend class XXX'
+// I introduced to make the time APIs compile.
 
 // This is basically a struct
 class TaskDetails {
@@ -169,18 +171,18 @@ class TaskDetails {
 //      - try to Cancel it
 //      - monitor how long it spends in the QUEUED and ASSIGNED states.
 class TaskState {
- // My friends may TryUpdateState, update my time, etc.
- friend class Task;
- friend class TaskQueue;
- friend class Worker;
- friend class TaskSummary;
+  // My friends may TryUpdateState, update my time, etc.
+  friend class Task;
+  friend class TaskQueue;
+  friend class Worker;
+  friend class TaskSummary;
 
  public:
   enum State {
       INITIAL
     , QUEUED
     , ASSIGNED
-    , COMPLETED // Terminal state
+    , COMPLETED  // Terminal state
     , CANCELLED
   };
 
@@ -235,9 +237,9 @@ class TaskState {
 //  - User work from the N-API
 class Task {
   // For access to task_state_'s time tracking.
- friend class TaskQueue;   
- friend class Worker;
- friend class TaskSummary;
+  friend class TaskQueue;
+  friend class Worker;
+  friend class TaskSummary;
 
  public:
   // Subclasses should set details_ in their constructor.
@@ -260,7 +262,7 @@ class Task {
 
 class TaskSummary {
  public:
-  TaskSummary(Task* completed_task);
+  explicit TaskSummary(Task* completed_task);
 
   TaskDetails details_;
   uint64_t time_in_queue_;
@@ -298,8 +300,8 @@ class LibuvExecutor {
 class QueueLengthSample {
  public:
   QueueLengthSample(int n_cpu, int n_io, uint64_t time)
-   : time_(time), length_(n_cpu + n_io), n_cpu_(n_cpu), n_io_(n_io) { }
- 
+    : time_(time), length_(n_cpu + n_io), n_cpu_(n_cpu), n_io_(n_io) { }
+
   uint64_t time_;
 
   int length_;
@@ -319,7 +321,7 @@ class QueueLengthSample {
 // Users should check the state of Tasks they Pop.
 class TaskQueue {
  public:
-  TaskQueue(int id =-1);
+  explicit TaskQueue(int id =-1);
 
   // Return true if Push succeeds, else false.
   // Thread-safe.
@@ -342,8 +344,10 @@ class TaskQueue {
 
   int Length() const;
 
-  std::vector<std::unique_ptr<TaskSummary>> const& GetTaskSummaries() const;
-  std::vector<std::unique_ptr<QueueLengthSample>> const& GetQueueLengths() const;
+  std::vector<std::unique_ptr<TaskSummary>> const&
+    GetTaskSummaries() const;
+  std::vector<std::unique_ptr<QueueLengthSample>> const&
+    GetQueueLengths() const;
 
  private:
   // Caller must hold lock_.
@@ -394,7 +398,8 @@ class Threadpool {
   // Returns nullptr on failure.
   // TODO(davisjam): It should not return nullptr on failure.
   // Then the task would be destroyed!
-  // Since the underlying queues should not be Stop'd until the Threadpool d'tor,
+  // Since the underlying queues should not be Stop'd
+  // until the Threadpool d'tor,
   // I think it's reasonable that Post will *never* fail.
   std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task);
   // Block until there are no tasks pending or scheduled in the TP.
@@ -407,7 +412,8 @@ class Threadpool {
   int NWorkers() const;
 
   std::vector<std::unique_ptr<TaskSummary>> const& GetTaskSummaries() const;
-  std::vector<std::unique_ptr<QueueLengthSample>> const& GetQueueLengths() const;
+  std::vector<std::unique_ptr<QueueLengthSample>> const& GetQueueLengths()
+    const;
 
  protected:
   void Initialize();
@@ -424,8 +430,8 @@ class Threadpool {
 class NodeThreadpool {
  public:
   // TODO(davisjam): Is this OK? It permits sub-classing.
-  // But maybe we should take an interface approach and have all of these virtual
-  // methods be pure virtual?
+  // But maybe we should take an interface approach and
+  // have all of these virtual methods be pure virtual?
   NodeThreadpool();
   // If threadpool_size <= 0:
   //   - checks UV_THREADPOOL_SIZE to determine threadpool_size
@@ -471,14 +477,14 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
 
   virtual std::shared_ptr<TaskState> Post(std::unique_ptr<Task> task);
   // Sub-class can use our Post, but needs to tell us which TP to use.
-  virtual int ChooseThreadpool(Task* task) const =0;
-  virtual void BlockingDrain() override;
+  virtual int ChooseThreadpool(Task* task) const = 0;
+  void BlockingDrain() override;
 
-  virtual int QueueLength() const override;
+  int QueueLength() const override;
 
-  virtual int NWorkers() const override;
+  int NWorkers() const override;
 
-  virtual void PrintStats() const override;
+  void PrintStats() const override;
 
  protected:
   // Sub-classes should call this after computing tp_sizes in their c'tors.
@@ -491,9 +497,11 @@ class PartitionedNodeThreadpool : public NodeThreadpool {
   std::map<int, int> tp_sizes_;
 };
 
-// This is the same as a NodeThreadpool, but by inheriting from PartitionedNodeThreadpool
+// This is the same as a NodeThreadpool,
+// but by inheriting from PartitionedNodeThreadpool
 // we get to benefit from its built-in monitoring.
-class UnpartitionedPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
+class UnpartitionedPartitionedNodeThreadpool
+: public PartitionedNodeThreadpool {
  public:
   // tp_sizes[0] defines the only pool. Reads UV_THREADPOOL_SIZE, defaults to 4.
   explicit UnpartitionedPartitionedNodeThreadpool(std::vector<int> tp_sizes);
@@ -501,7 +509,7 @@ class UnpartitionedPartitionedNodeThreadpool : public PartitionedNodeThreadpool
   ~UnpartitionedPartitionedNodeThreadpool();
 
   int ChooseThreadpool(Task* task) const;
- 
+
  private:
   int ONLY_TP_IX;
 };
@@ -510,8 +518,11 @@ class UnpartitionedPartitionedNodeThreadpool : public PartitionedNodeThreadpool
 class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
  public:
   // tp_sizes[0] is V8, tp_sizes[1] is libuv
-  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or guesses based on # cores
-  // tp_sizes[1] -1: reads UV_THREADPOOL_SIZE or NODE_THREADPOOL_UV_THREADPOOL_SIZE defaults to 4
+  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE,
+  //   or guesses based on # cores
+  // tp_sizes[1] -1: reads UV_THREADPOOL_SIZE or
+  //   NODE_THREADPOOL_UV_THREADPOOL_SIZE
+  //   defaults to 4
   explicit ByTaskOriginPartitionedNodeThreadpool(std::vector<int> tp_sizes);
   // Waits for queue to drain.
   ~ByTaskOriginPartitionedNodeThreadpool();
@@ -527,8 +538,10 @@ class ByTaskOriginPartitionedNodeThreadpool : public PartitionedNodeThreadpool {
 class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
  public:
   // tp_sizes[0] is CPU, tp_sizes[1] is I/O
-  // tp_sizes[0] -1: reads NODE_THREADPOOL_CPU_TP_SIZE, or guesses based on # cores
-  // tp_sizes[1] -1: reads NODE_THREADPOOL_IO_TP_SIZE, or guesses based on # cores
+  // tp_sizes[0] -1: reads NODE_THREADPOOL_CPU_TP_SIZE,
+  //   or guesses based on # cores
+  // tp_sizes[1] -1: reads NODE_THREADPOOL_IO_TP_SIZE,
+  //   or guesses based on # cores
   explicit ByTaskTypePartitionedNodeThreadpool(std::vector<int> tp_sizes);
   // Waits for queue to drain.
   ~ByTaskTypePartitionedNodeThreadpool();
@@ -541,13 +554,18 @@ class ByTaskTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
 };
 
 // Splits based on task origin and type: V8 or libuv-{CPU or I/O}
-class ByTaskOriginAndTypePartitionedNodeThreadpool : public PartitionedNodeThreadpool {
+class ByTaskOriginAndTypePartitionedNodeThreadpool
+: public PartitionedNodeThreadpool {
  public:
   // tp_sizes[0] is V8, tp_sizes[1] is libuv-CPU, tp_sizes[2] is libuv-I/O
-  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or guesses based on # cores
-  // tp_sizes[1] -1: reads NODE_THREADPOOL_UV_CPU_TP_SIZE, or guesses based on # cores
-  // tp_sizes[2] -1: reads NODE_THREADPOOL_UV_IO_TP_SIZE, or guesses based on # cores
-  explicit ByTaskOriginAndTypePartitionedNodeThreadpool(std::vector<int> tp_sizes);
+  // tp_sizes[0] -1: reads NODE_THREADPOOL_V8_TP_SIZE, or
+  //   guesses based on # cores
+  // tp_sizes[1] -1: reads NODE_THREADPOOL_UV_CPU_TP_SIZE, or
+  //   guesses based on # cores
+  // tp_sizes[2] -1: reads NODE_THREADPOOL_UV_IO_TP_SIZE, or
+  //   guesses based on # cores
+  explicit ByTaskOriginAndTypePartitionedNodeThreadpool(
+    std::vector<int> tp_sizes);
   // Waits for queue to drain.
   ~ByTaskOriginAndTypePartitionedNodeThreadpool();
 
diff --git a/test/cctest/test_threadpool.cc b/test/cctest/test_threadpool.cc
index dcc33daabe57b1..0edd3a840a006e 100644
--- a/test/cctest/test_threadpool.cc
+++ b/test/cctest/test_threadpool.cc
@@ -67,7 +67,8 @@ class SlowTestTask : public node::threadpool::Task {
 
   void Run() {
     testTaskRunCount++;
-    for (int i = 0; i < 10000000; i++);
+    for (int i = 0; i < 10000000; i++)
+      continue;
   }
 };
 
@@ -168,7 +169,7 @@ TEST_F(ThreadpoolTest, WorkerGroupWorksWithTaskQueue) {
     // Once we create the WorkerGroup, it should empty tq.
     WorkerGroup wg(4, tq);
     tq->Stop();
-  } // wg leaves scope
+  }  // wg leaves scope
   // wg destructor should drain tq
   EXPECT_EQ(tq->Length(), 0);
 

From 83e42144ec32c348eca357e7b19ecec6ee6a74ab Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Sun, 30 Sep 2018 12:50:21 -0400
Subject: [PATCH 31/31] REMOVE ME: PTP: include libuv changes so people can try
 prototype

Updates the libuv PR, which I rebased on v1.x (~v1.23.1).
---
 deps/uv/AUTHORS                               |   5 +
 deps/uv/ChangeLog                             |  46 ++++
 deps/uv/MAINTAINERS.md                        |   1 +
 deps/uv/README.md                             |  79 +++++-
 deps/uv/android-configure                     |  23 --
 deps/uv/configure.ac                          |   2 +-
 deps/uv/docs/src/misc.rst                     |   4 +
 deps/uv/docs/src/signal.rst                   |  10 +-
 deps/uv/docs/src/stream.rst                   |   2 +-
 deps/uv/docs/src/tcp.rst                      |   4 +-
 deps/uv/docs/src/tty.rst                      |  11 +-
 deps/uv/include/uv/version.h                  |   2 +-
 deps/uv/src/unix/bsd-ifaddrs.c                |   5 +-
 deps/uv/src/unix/cygwin.c                     |   2 +-
 deps/uv/src/unix/fs.c                         | 195 ++++++-------
 deps/uv/src/unix/ibmi.c                       |   3 +-
 deps/uv/src/unix/linux-core.c                 |  93 +++----
 deps/uv/src/unix/linux-syscalls.c             | 133 ---------
 deps/uv/src/unix/linux-syscalls.h             |  34 ---
 deps/uv/src/unix/os390-syscalls.h             |   4 -
 deps/uv/src/unix/os390.c                      |  15 +-
 deps/uv/src/unix/pipe.c                       |  20 +-
 deps/uv/src/unix/stream.c                     |   1 +
 deps/uv/src/unix/tty.c                        |  37 ++-
 deps/uv/src/win/core.c                        |  57 +++-
 deps/uv/src/win/tty.c                         |  13 +-
 deps/uv/src/win/udp.c                         |   4 +-
 deps/uv/src/win/winapi.c                      |  13 +
 deps/uv/src/win/winapi.h                      |  11 +
 deps/uv/test/test-condvar.c                   |   2 +-
 deps/uv/test/test-fork.c                      |   6 +-
 deps/uv/test/test-fs.c                        | 258 +++++++++++++++++-
 deps/uv/test/test-handle-fileno.c             |   6 +-
 deps/uv/test/test-list.h                      |   6 +
 .../test/test-pipe-close-stdout-read-stdin.c  |   3 +-
 deps/uv/test/test-process-priority.c          |   4 +-
 deps/uv/test/test-spawn.c                     |  10 +
 deps/uv/test/test-tty.c                       |  80 +++++-
 38 files changed, 763 insertions(+), 441 deletions(-)
 delete mode 100755 deps/uv/android-configure

diff --git a/deps/uv/AUTHORS b/deps/uv/AUTHORS
index 0ba7c067681477..65048007663a03 100644
--- a/deps/uv/AUTHORS
+++ b/deps/uv/AUTHORS
@@ -346,3 +346,8 @@ Paolo Greppi <paolo.greppi@libpf.com>
 Shelley Vohr <shelley.vohr@gmail.com>
 Ujjwal Sharma <usharma1998@gmail.com>
 Michał Kozakiewicz <michalkozakiewicz3@gmail.com>
+Emil Bay <github@tixz.dk>
+Jeremiah Senkpiel <fishrock123@rocketmail.com>
+Andy Zhang <zhangyong232@gmail.com>
+dmabupt <dmabupt@gmail.com>
+Ryan Liptak <squeek502@hotmail.com>
diff --git a/deps/uv/ChangeLog b/deps/uv/ChangeLog
index b57cea4baf150a..d01b06f08aaefc 100644
--- a/deps/uv/ChangeLog
+++ b/deps/uv/ChangeLog
@@ -1,3 +1,49 @@
+2018.09.22, Version 1.23.1 (Stable), d2282b3d67821dc53c907c2155fa8c5c6ce25180
+
+Changes since version 1.23.0:
+
+* unix,win: limit concurrent DNS calls to nthreads/2 (Anna Henningsen)
+
+* doc: add addaleax to maintainers (Anna Henningsen)
+
+* doc: add missing slash in stream.rst (Emil Bay)
+
+* unix,fs: use utimes & friends for uv_fs_utime (Jeremiah Senkpiel)
+
+* unix,fs: remove linux fallback from utimesat() (Jeremiah Senkpiel)
+
+* unix,fs: remove uv__utimesat() syscall fallback (Jeremiah Senkpiel)
+
+* doc: fix argument name in tcp.rts (Emil Bay)
+
+* doc: notes on running tests, benchmarks, tools (Jamie Davis)
+
+* linux: remove epoll syscall wrappers (Ben Noordhuis)
+
+* linux: drop code path for epoll_pwait-less kernels (Ben Noordhuis)
+
+* Partially revert "win,code: remove GetQueuedCompletionStatus-based poller"
+  (Jameson Nash)
+
+* build: add compile for android arm64/x86/x86-64 (Andy Zhang)
+
+* doc: clarify that some remarks apply to windows (Bert Belder)
+
+* test: fix compiler warnings (Jamie Davis)
+
+* ibmi: return 0 from uv_resident_set_memory() (dmabupt)
+
+* win: fix uv_udp_recv_start() error translation (Ryan Liptak)
+
+* win,doc: improve uv_os_setpriority() documentation (Bartosz Sosnowski)
+
+* test: increase upper bound in condvar_5 (Jamie Davis)
+
+* win,tty: remove deadcode (Jameson Nash)
+
+* stream: autodetect direction (Jameson Nash)
+
+
 2018.08.18, Version 1.23.0 (Stable), 7ebb26225f2eaae6db22f4ef34ce76fa16ff89ec
 
 Changes since version 1.22.0:
diff --git a/deps/uv/MAINTAINERS.md b/deps/uv/MAINTAINERS.md
index 889ee4988c482c..543dc3cda7bce2 100644
--- a/deps/uv/MAINTAINERS.md
+++ b/deps/uv/MAINTAINERS.md
@@ -3,6 +3,7 @@
 
 libuv is currently managed by the following individuals:
 
+* **Anna Henningsen** ([@addaleax](https://github.com/addaleax))
 * **Bartosz Sosnowski** ([@bzoz](https://github.com/bzoz))
 * **Ben Noordhuis** ([@bnoordhuis](https://github.com/bnoordhuis))
   - GPG key: D77B 1E34 243F BAF0 5F8E  9CC3 4F55 C8C8 46AB 89B9 (pubkey-bnoordhuis)
diff --git a/deps/uv/README.md b/deps/uv/README.md
index cb9e26c1e03a7a..b24b722612edf3 100644
--- a/deps/uv/README.md
+++ b/deps/uv/README.md
@@ -282,8 +282,31 @@ Make sure that you specify the architecture you wish to build for in the
 
 Run:
 
+For arm
+
+```bash
+$ source ./android-configure-arm NDK_PATH gyp [API_LEVEL]
+$ make -C out
+```
+
+or for arm64
+
+```bash
+$ source ./android-configure-arm64 NDK_PATH gyp [API_LEVEL]
+$ make -C out
+```
+
+or for x86
+
+```bash
+$ source ./android-configure-x86 NDK_PATH gyp [API_LEVEL]
+$ make -C out
+```
+
+or for x86_64
+
 ```bash
-$ source ./android-configure NDK_PATH gyp [API_LEVEL]
+$ source ./android-configure-x86_64 NDK_PATH gyp [API_LEVEL]
 $ make -C out
 ```
 
@@ -310,14 +333,66 @@ $ ninja -C out/Release
 
 ### Running tests
 
-Run:
+#### Build
+
+Build (includes tests):
 
 ```bash
 $ ./gyp_uv.py -f make
 $ make -C out
+```
+
+#### Run all tests
+
+```bash
 $ ./out/Debug/run-tests
 ```
 
+#### Run one test
+
+The list of all tests is in `test/test-list.h`.
+
+This invocation will cause the `run-tests` driver to fork and execute `TEST_NAME` in a child process:
+
+```bash
+$ ./out/Debug/run-tests TEST_NAME
+```
+
+This invocation will cause the `run-tests` driver to execute the test within the `run-tests` process:
+
+```bash
+$ ./out/Debug/run-tests TEST_NAME TEST_NAME
+```
+
+#### Debugging tools
+
+When running the test from within the `run-tests` process (`run-tests TEST_NAME TEST_NAME`), tools like gdb and valgrind work normally.
+When running the test from a child of the `run-tests` process (`run-tests TEST_NAME`), use these tools in a fork-aware manner.
+
+##### Fork-aware gdb
+
+Use the [follow-fork-mode](https://sourceware.org/gdb/onlinedocs/gdb/Forks.html) setting:
+
+```
+$ gdb --args out/Debug/run-tests TEST_NAME
+
+(gdb) set follow-fork-mode child
+...
+```
+
+##### Fork-aware valgrind
+
+Use the `--trace-children=yes` parameter:
+
+```bash
+$ valgrind --trace-children=yes -v --tool=memcheck --leak-check=full --track-origins=yes --leak-resolution=high --show-reachable=yes --log-file=memcheck.log out/Debug/run-tests TEST_NAME
+```
+
+### Running benchmarks
+
+See the section on running tests.
+The benchmark driver is `out/Debug/run-benchmarks` and the benchmarks are listed in `test/benchmark-list.h`.
+
 ## Supported Platforms
 
 Check the [SUPPORTED_PLATFORMS file](SUPPORTED_PLATFORMS.md).
diff --git a/deps/uv/android-configure b/deps/uv/android-configure
deleted file mode 100755
index b5c11cd40c6873..00000000000000
--- a/deps/uv/android-configure
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-export TOOLCHAIN=$PWD/android-toolchain
-mkdir -p $TOOLCHAIN
-API=${3:-24}
-$1/build/tools/make-standalone-toolchain.sh \
-    --toolchain=arm-linux-androideabi-4.9 \
-    --arch=arm \
-    --install-dir=$TOOLCHAIN \
-    --platform=android-$API \
-    --force
-export PATH=$TOOLCHAIN/bin:$PATH
-export AR=arm-linux-androideabi-ar
-export CC=arm-linux-androideabi-gcc
-export CXX=arm-linux-androideabi-g++
-export LINK=arm-linux-androideabi-g++
-export PLATFORM=android
-export CFLAGS="-D__ANDROID_API__=$API"
-
-if [[ $2 == 'gyp' ]]
-  then
-    ./gyp_uv.py -Dtarget_arch=arm -DOS=android -f make-android
-fi
diff --git a/deps/uv/configure.ac b/deps/uv/configure.ac
index 6e084fd04d9a7d..ce307b1d70677c 100644
--- a/deps/uv/configure.ac
+++ b/deps/uv/configure.ac
@@ -13,7 +13,7 @@
 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 AC_PREREQ(2.57)
-AC_INIT([libuv], [1.23.0], [https://github.com/libuv/libuv/issues])
+AC_INIT([libuv], [1.23.1], [https://github.com/libuv/libuv/issues])
 AC_CONFIG_MACRO_DIR([m4])
 m4_include([m4/libuv-extra-automake-flags.m4])
 m4_include([m4/as_case.m4])
diff --git a/deps/uv/docs/src/misc.rst b/deps/uv/docs/src/misc.rst
index 529d588c5d4bd1..cf4a7895cd147b 100644
--- a/deps/uv/docs/src/misc.rst
+++ b/deps/uv/docs/src/misc.rst
@@ -544,4 +544,8 @@ API
         process priority, the result will equal one of the `UV_PRIORITY`
         constants, and not necessarily the exact value of `priority`.
 
+    .. note::
+        On Windows, setting `PRIORITY_HIGHEST` will only work for elevated user,
+        for others it will be silently reduced to `PRIORITY_HIGH`.
+
     .. versionadded:: 1.23.0
diff --git a/deps/uv/docs/src/signal.rst b/deps/uv/docs/src/signal.rst
index 24354e4f7c1329..f52b64706ab890 100644
--- a/deps/uv/docs/src/signal.rst
+++ b/deps/uv/docs/src/signal.rst
@@ -17,12 +17,12 @@ Reception of some signals is emulated on Windows:
   program is given approximately 10 seconds to perform cleanup. After that
   Windows will unconditionally terminate it.
 
-Watchers for other signals can be successfully created, but these signals
-are never received. These signals are: `SIGILL`, `SIGABRT`, `SIGFPE`, `SIGSEGV`,
-`SIGTERM` and `SIGKILL.`
+* Watchers for other signals can be successfully created, but these signals
+  are never received. These signals are: `SIGILL`, `SIGABRT`, `SIGFPE`, `SIGSEGV`,
+  `SIGTERM` and `SIGKILL.`
 
-Calls to raise() or abort() to programmatically raise a signal are
-not detected by libuv; these will not trigger a signal watcher.
+* Calls to raise() or abort() to programmatically raise a signal are
+  not detected by libuv; these will not trigger a signal watcher.
 
 .. note::
     On Linux SIGRT0 and SIGRT1 (signals 32 and 33) are used by the NPTL pthreads library to
diff --git a/deps/uv/docs/src/stream.rst b/deps/uv/docs/src/stream.rst
index 9ec23622512519..6a704367b1b361 100644
--- a/deps/uv/docs/src/stream.rst
+++ b/deps/uv/docs/src/stream.rst
@@ -45,7 +45,7 @@ Data types
         `nread` might be 0, which does *not* indicate an error or EOF. This
         is equivalent to ``EAGAIN`` or ``EWOULDBLOCK`` under ``read(2)``.
 
-    The callee is responsible for stopping closing the stream when an error happens
+    The callee is responsible for stopping/closing the stream when an error happens
     by calling :c:func:`uv_read_stop` or :c:func:`uv_close`. Trying to read
     from the stream again is undefined.
 
diff --git a/deps/uv/docs/src/tcp.rst b/deps/uv/docs/src/tcp.rst
index e761b460d0e636..d20a6362af94d5 100644
--- a/deps/uv/docs/src/tcp.rst
+++ b/deps/uv/docs/src/tcp.rst
@@ -86,13 +86,13 @@ API
 
 .. c:function:: int uv_tcp_getsockname(const uv_tcp_t* handle, struct sockaddr* name, int* namelen)
 
-    Get the current address to which the handle is bound. `addr` must point to
+    Get the current address to which the handle is bound. `name` must point to
     a valid and big enough chunk of memory, ``struct sockaddr_storage`` is
     recommended for IPv4 and IPv6 support.
 
 .. c:function:: int uv_tcp_getpeername(const uv_tcp_t* handle, struct sockaddr* name, int* namelen)
 
-    Get the address of the peer connected to the handle. `addr` must point to
+    Get the address of the peer connected to the handle. `name` must point to
     a valid and big enough chunk of memory, ``struct sockaddr_storage`` is
     recommended for IPv4 and IPv6 support.
 
diff --git a/deps/uv/docs/src/tty.rst b/deps/uv/docs/src/tty.rst
index 01a0585287affc..9889a0a0b6465b 100644
--- a/deps/uv/docs/src/tty.rst
+++ b/deps/uv/docs/src/tty.rst
@@ -46,7 +46,7 @@ N/A
 API
 ---
 
-.. c:function:: int uv_tty_init(uv_loop_t* loop, uv_tty_t* handle, uv_file fd, int readable)
+.. c:function:: int uv_tty_init(uv_loop_t* loop, uv_tty_t* handle, uv_file fd, int unused)
 
     Initialize a new TTY stream with the given file descriptor. Usually the
     file descriptor will be:
@@ -55,9 +55,6 @@ API
     * 1 = stdout
     * 2 = stderr
 
-    `readable`, specifies if you plan on calling :c:func:`uv_read_start` with
-    this stream. stdin is readable, stdout is not.
-
     On Unix this function will determine the path of the fd of the terminal
     using :man:`ttyname_r(3)`, open it, and use it if the passed file descriptor
     refers to a TTY. This lets libuv put the tty in non-blocking mode without
@@ -67,8 +64,10 @@ API
     ioctl TIOCGPTN or TIOCPTYGNAME, for instance OpenBSD and Solaris.
 
     .. note::
-        If reopening the TTY fails, libuv falls back to blocking writes for
-        non-readable TTY streams.
+        If reopening the TTY fails, libuv falls back to blocking writes.
+
+    .. versionchanged:: 1.23.1: the `readable` parameter is now unused and ignored.
+                        The correct value will now be auto-detected from the kernel.
 
     .. versionchanged:: 1.9.0: the path of the TTY is determined by
                         :man:`ttyname_r(3)`. In earlier versions libuv opened
diff --git a/deps/uv/include/uv/version.h b/deps/uv/include/uv/version.h
index 805cd3ba89e4fc..002d91c92012b7 100644
--- a/deps/uv/include/uv/version.h
+++ b/deps/uv/include/uv/version.h
@@ -32,7 +32,7 @@
 
 #define UV_VERSION_MAJOR 1
 #define UV_VERSION_MINOR 23
-#define UV_VERSION_PATCH 1
+#define UV_VERSION_PATCH 2
 #define UV_VERSION_IS_RELEASE 0
 #define UV_VERSION_SUFFIX "dev"
 
diff --git a/deps/uv/src/unix/bsd-ifaddrs.c b/deps/uv/src/unix/bsd-ifaddrs.c
index 0d0215448640a4..9825b1c4db4865 100644
--- a/deps/uv/src/unix/bsd-ifaddrs.c
+++ b/deps/uv/src/unix/bsd-ifaddrs.c
@@ -119,16 +119,13 @@ int uv_interface_addresses(uv_interface_address_t** addresses, int* count) {
       continue;
 
     address = *addresses;
+    memset(address->phys_addr, 0, sizeof(address->phys_addr));
 
     for (i = 0; i < *count; i++) {
       if (strcmp(address->name, ent->ifa_name) == 0) {
-#if defined(__CYGWIN__) || defined(__MSYS__)
-        memset(address->phys_addr, 0, sizeof(address->phys_addr));
-#else
         struct sockaddr_dl* sa_addr;
         sa_addr = (struct sockaddr_dl*)(ent->ifa_addr);
         memcpy(address->phys_addr, LLADDR(sa_addr), sizeof(address->phys_addr));
-#endif
       }
       address++;
     }
diff --git a/deps/uv/src/unix/cygwin.c b/deps/uv/src/unix/cygwin.c
index 9fe4093ef46fb0..9da20e203aa238 100644
--- a/deps/uv/src/unix/cygwin.c
+++ b/deps/uv/src/unix/cygwin.c
@@ -38,7 +38,7 @@ int uv_uptime(double* uptime) {
 int uv_resident_set_memory(size_t* rss) {
   /* FIXME: read /proc/meminfo? */
   *rss = 0;
-  return UV_ENOSYS;
+  return 0;
 }
 
 int uv_cpu_info(uv_cpu_info_t** cpu_infos, int* count) {
diff --git a/deps/uv/src/unix/fs.c b/deps/uv/src/unix/fs.c
index f47a58907ab81f..e3851a5de782cd 100644
--- a/deps/uv/src/unix/fs.c
+++ b/deps/uv/src/unix/fs.c
@@ -43,7 +43,6 @@
 #include <pthread.h>
 #include <unistd.h>
 #include <fcntl.h>
-#include <utime.h>
 #include <poll.h>
 
 #if defined(__DragonFly__)        ||                                      \
@@ -67,6 +66,10 @@
 # define FICLONE _IOW(0x94, 9, int)
 #endif
 
+#if defined(_AIX) && !defined(_AIX71)
+# include <utime.h>
+#endif
+
 #define INIT(subtype)                                                         \
   do {                                                                        \
     if (req == NULL)                                                          \
@@ -188,59 +191,17 @@ static ssize_t uv__fs_fdatasync(uv_fs_t* req) {
 
 
 static ssize_t uv__fs_futime(uv_fs_t* req) {
-#if defined(__linux__)
+#if defined(__linux__)                                                        \
+    || defined(_AIX71)
   /* utimesat() has nanosecond resolution but we stick to microseconds
    * for the sake of consistency with other platforms.
    */
-  static int no_utimesat;
   struct timespec ts[2];
-  struct timeval tv[2];
-  char path[sizeof("/proc/self/fd/") + 3 * sizeof(int)];
-  int r;
-
-  if (no_utimesat)
-    goto skip;
-
   ts[0].tv_sec  = req->atime;
   ts[0].tv_nsec = (uint64_t)(req->atime * 1000000) % 1000000 * 1000;
   ts[1].tv_sec  = req->mtime;
   ts[1].tv_nsec = (uint64_t)(req->mtime * 1000000) % 1000000 * 1000;
-
-  r = uv__utimesat(req->file, NULL, ts, 0);
-  if (r == 0)
-    return r;
-
-  if (errno != ENOSYS)
-    return r;
-
-  no_utimesat = 1;
-
-skip:
-
-  tv[0].tv_sec  = req->atime;
-  tv[0].tv_usec = (uint64_t)(req->atime * 1000000) % 1000000;
-  tv[1].tv_sec  = req->mtime;
-  tv[1].tv_usec = (uint64_t)(req->mtime * 1000000) % 1000000;
-  snprintf(path, sizeof(path), "/proc/self/fd/%d", (int) req->file);
-
-  r = utimes(path, tv);
-  if (r == 0)
-    return r;
-
-  switch (errno) {
-  case ENOENT:
-    if (fcntl(req->file, F_GETFL) == -1 && errno == EBADF)
-      break;
-    /* Fall through. */
-
-  case EACCES:
-  case ENOTDIR:
-    errno = ENOSYS;
-    break;
-  }
-
-  return r;
-
+  return futimens(req->file, ts);
 #elif defined(__APPLE__)                                                      \
     || defined(__DragonFly__)                                                 \
     || defined(__FreeBSD__)                                                   \
@@ -258,13 +219,6 @@ static ssize_t uv__fs_futime(uv_fs_t* req) {
 # else
   return futimes(req->file, tv);
 # endif
-#elif defined(_AIX71)
-  struct timespec ts[2];
-  ts[0].tv_sec  = req->atime;
-  ts[0].tv_nsec = (uint64_t)(req->atime * 1000000) % 1000000 * 1000;
-  ts[1].tv_sec  = req->mtime;
-  ts[1].tv_nsec = (uint64_t)(req->mtime * 1000000) % 1000000 * 1000;
-  return futimens(req->file, ts);
 #elif defined(__MVS__)
   attrib_t atr;
   memset(&atr, 0, sizeof(atr));
@@ -327,17 +281,25 @@ static ssize_t uv__fs_read(uv_fs_t* req) {
 #if defined(__linux__)
   static int no_preadv;
 #endif
+  unsigned int iovmax;
   ssize_t result;
 
 #if defined(_AIX)
   struct stat buf;
-  if(fstat(req->file, &buf))
-    return -1;
-  if(S_ISDIR(buf.st_mode)) {
+  result = fstat(req->file, &buf);
+  if (result)
+    goto done;
+  if (S_ISDIR(buf.st_mode)) {
     errno = EISDIR;
-    return -1;
+    result -1;
+    goto done;
   }
 #endif /* defined(_AIX) */
+
+  iovmax = uv__getiovmax();
+  if (req->nbufs > iovmax)
+    req->nbufs = iovmax;
+
   if (req->off < 0) {
     if (req->nbufs == 1)
       result = read(req->file, req->bufs[0].base, req->bufs[0].len);
@@ -356,25 +318,7 @@ static ssize_t uv__fs_read(uv_fs_t* req) {
     if (no_preadv) retry:
 # endif
     {
-      off_t nread;
-      size_t index;
-
-      nread = 0;
-      index = 0;
-      result = 1;
-      do {
-        if (req->bufs[index].len > 0) {
-          result = pread(req->file,
-                         req->bufs[index].base,
-                         req->bufs[index].len,
-                         req->off + nread);
-          if (result > 0)
-            nread += result;
-        }
-        index++;
-      } while (index < req->nbufs && result > 0);
-      if (nread > 0)
-        result = nread;
+      result = pread(req->file, req->bufs[0].base, req->bufs[0].len, req->off);
     }
 # if defined(__linux__)
     else {
@@ -392,6 +336,13 @@ static ssize_t uv__fs_read(uv_fs_t* req) {
   }
 
 done:
+  /* Early cleanup of bufs allocation, since we're done with it. */
+  if (req->bufs != req->bufsml)
+    uv__free(req->bufs);
+
+  req->bufs = NULL;
+  req->nbufs = 0;
+
   return result;
 }
 
@@ -721,10 +672,48 @@ static ssize_t uv__fs_sendfile(uv_fs_t* req) {
 
 
 static ssize_t uv__fs_utime(uv_fs_t* req) {
+#if defined(__linux__)                                                         \
+    || defined(_AIX71)                                                         \
+    || defined(__sun)
+  /* utimesat() has nanosecond resolution but we stick to microseconds
+   * for the sake of consistency with other platforms.
+   */
+  struct timespec ts[2];
+  ts[0].tv_sec  = req->atime;
+  ts[0].tv_nsec = (uint64_t)(req->atime * 1000000) % 1000000 * 1000;
+  ts[1].tv_sec  = req->mtime;
+  ts[1].tv_nsec = (uint64_t)(req->mtime * 1000000) % 1000000 * 1000;
+  return utimensat(AT_FDCWD, req->path, ts, 0);
+#elif defined(__APPLE__)                                                      \
+    || defined(__DragonFly__)                                                 \
+    || defined(__FreeBSD__)                                                   \
+    || defined(__FreeBSD_kernel__)                                            \
+    || defined(__NetBSD__)                                                    \
+    || defined(__OpenBSD__)
+  struct timeval tv[2];
+  tv[0].tv_sec  = req->atime;
+  tv[0].tv_usec = (uint64_t)(req->atime * 1000000) % 1000000;
+  tv[1].tv_sec  = req->mtime;
+  tv[1].tv_usec = (uint64_t)(req->mtime * 1000000) % 1000000;
+  return utimes(req->path, tv);
+#elif defined(_AIX)                                                           \
+    && !defined(_AIX71)
   struct utimbuf buf;
   buf.actime = req->atime;
   buf.modtime = req->mtime;
-  return utime(req->path, &buf); /* TODO use utimes() where available */
+  return utime(req->path, &buf);
+#elif defined(__MVS__)
+  attrib_t atr;
+  memset(&atr, 0, sizeof(atr));
+  atr.att_mtimechg = 1;
+  atr.att_atimechg = 1;
+  atr.att_mtime = req->mtime;
+  atr.att_atime = req->atime;
+  return __lchattr(req->path, &atr, sizeof(atr));
+#else
+  errno = ENOSYS;
+  return -1;
+#endif
 }
 
 
@@ -762,25 +751,7 @@ static ssize_t uv__fs_write(uv_fs_t* req) {
     if (no_pwritev) retry:
 # endif
     {
-      off_t written;
-      size_t index;
-
-      written = 0;
-      index = 0;
-      r = 0;
-      do {
-        if (req->bufs[index].len > 0) {
-          r = pwrite(req->file,
-                     req->bufs[index].base,
-                     req->bufs[index].len,
-                     req->off + written);
-          if (r > 0)
-            written += r;
-        }
-        index++;
-      } while (index < req->nbufs && r >= 0);
-      if (written > 0)
-        r = written;
+      r = pwrite(req->file, req->bufs[0].base, req->bufs[0].len, req->off);
     }
 # if defined(__linux__)
     else {
@@ -1072,9 +1043,21 @@ static int uv__fs_fstat(int fd, uv_stat_t *buf) {
   return ret;
 }
 
+static size_t uv__fs_buf_offset(uv_buf_t* bufs, size_t size) {
+  size_t offset;
+  /* Figure out which bufs are done */
+  for (offset = 0; size > 0 && bufs[offset].len <= size; ++offset)
+    size -= bufs[offset].len;
+
+  /* Fix a partial read/write */
+  if (size > 0) {
+    bufs[offset].base += size;
+    bufs[offset].len -= size;
+  }
+  return offset;
+}
 
-typedef ssize_t (*uv__fs_buf_iter_processor)(uv_fs_t* req);
-static ssize_t uv__fs_buf_iter(uv_fs_t* req, uv__fs_buf_iter_processor process) {
+static ssize_t uv__fs_write_all(uv_fs_t* req) {
   unsigned int iovmax;
   unsigned int nbufs;
   uv_buf_t* bufs;
@@ -1091,7 +1074,10 @@ static ssize_t uv__fs_buf_iter(uv_fs_t* req, uv__fs_buf_iter_processor process)
     if (req->nbufs > iovmax)
       req->nbufs = iovmax;
 
-    result = process(req);
+    do
+      result = uv__fs_write(req);
+    while (result < 0 && errno == EINTR);
+
     if (result <= 0) {
       if (total == 0)
         total = result;
@@ -1101,14 +1087,12 @@ static ssize_t uv__fs_buf_iter(uv_fs_t* req, uv__fs_buf_iter_processor process)
     if (req->off >= 0)
       req->off += result;
 
+    req->nbufs = uv__fs_buf_offset(req->bufs, result);
     req->bufs += req->nbufs;
     nbufs -= req->nbufs;
     total += result;
   }
 
-  if (errno == EINTR && total == -1)
-    return total;
-
   if (bufs != req->bufsml)
     uv__free(bufs);
 
@@ -1125,7 +1109,8 @@ static void uv__fs_work(struct uv__work* w) {
   ssize_t r;
 
   req = container_of(w, uv_fs_t, work_req);
-  retry_on_eintr = !(req->fs_type == UV_FS_CLOSE);
+  retry_on_eintr = !(req->fs_type == UV_FS_CLOSE ||
+                     req->fs_type == UV_FS_READ);
 
   do {
     errno = 0;
@@ -1154,7 +1139,7 @@ static void uv__fs_work(struct uv__work* w) {
     X(MKDIR, mkdir(req->path, req->mode));
     X(MKDTEMP, uv__fs_mkdtemp(req));
     X(OPEN, uv__fs_open(req));
-    X(READ, uv__fs_buf_iter(req, uv__fs_read));
+    X(READ, uv__fs_read(req));
     X(SCANDIR, uv__fs_scandir(req));
     X(READLINK, uv__fs_readlink(req));
     X(REALPATH, uv__fs_realpath(req));
@@ -1165,7 +1150,7 @@ static void uv__fs_work(struct uv__work* w) {
     X(SYMLINK, symlink(req->path, req->new_path));
     X(UNLINK, unlink(req->path));
     X(UTIME, uv__fs_utime(req));
-    X(WRITE, uv__fs_buf_iter(req, uv__fs_write));
+    X(WRITE, uv__fs_write_all(req));
     default: abort();
     }
 #undef X
diff --git a/deps/uv/src/unix/ibmi.c b/deps/uv/src/unix/ibmi.c
index c50a4e76f84119..b1ab549c23f4b9 100644
--- a/deps/uv/src/unix/ibmi.c
+++ b/deps/uv/src/unix/ibmi.c
@@ -72,7 +72,8 @@ void uv_loadavg(double avg[3]) {
 
 
 int uv_resident_set_memory(size_t* rss) {
-  return UV_ENOSYS;
+  *rss = 0;
+  return 0;
 }
 
 
diff --git a/deps/uv/src/unix/linux-core.c b/deps/uv/src/unix/linux-core.c
index d09bbcdd6f292b..75362eb76d7f5d 100644
--- a/deps/uv/src/unix/linux-core.c
+++ b/deps/uv/src/unix/linux-core.c
@@ -20,7 +20,7 @@
 
 /* We lean on the fact that POLL{IN,OUT,ERR,HUP} correspond with their
  * EPOLL* counterparts.  We use the POLL* variants in this file because that
- * is what libuv uses elsewhere and it avoids a dependency on <sys/epoll.h>.
+ * is what libuv uses elsewhere.
  */
 
 #include "uv.h"
@@ -34,6 +34,7 @@
 #include <errno.h>
 
 #include <net/if.h>
+#include <sys/epoll.h>
 #include <sys/param.h>
 #include <sys/prctl.h>
 #include <sys/sysinfo.h>
@@ -84,13 +85,13 @@ static unsigned long read_cpufreq(unsigned int cpunum);
 int uv__platform_loop_init(uv_loop_t* loop) {
   int fd;
 
-  fd = uv__epoll_create1(UV__EPOLL_CLOEXEC);
+  fd = epoll_create1(EPOLL_CLOEXEC);
 
   /* epoll_create1() can fail either because it's not implemented (old kernel)
    * or because it doesn't understand the EPOLL_CLOEXEC flag.
    */
   if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) {
-    fd = uv__epoll_create(256);
+    fd = epoll_create(256);
 
     if (fd != -1)
       uv__cloexec(fd, 1);
@@ -134,20 +135,20 @@ void uv__platform_loop_delete(uv_loop_t* loop) {
 
 
 void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
-  struct uv__epoll_event* events;
-  struct uv__epoll_event dummy;
+  struct epoll_event* events;
+  struct epoll_event dummy;
   uintptr_t i;
   uintptr_t nfds;
 
   assert(loop->watchers != NULL);
 
-  events = (struct uv__epoll_event*) loop->watchers[loop->nwatchers];
+  events = (struct epoll_event*) loop->watchers[loop->nwatchers];
   nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
   if (events != NULL)
     /* Invalidate events with same file descriptor */
     for (i = 0; i < nfds; i++)
-      if ((int) events[i].data == fd)
-        events[i].data = -1;
+      if (events[i].data.fd == fd)
+        events[i].data.fd = -1;
 
   /* Remove the file descriptor from the epoll.
    * This avoids a problem where the same file description remains open
@@ -160,25 +161,25 @@ void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
      * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
      */
     memset(&dummy, 0, sizeof(dummy));
-    uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_DEL, fd, &dummy);
+    epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
   }
 }
 
 
 int uv__io_check_fd(uv_loop_t* loop, int fd) {
-  struct uv__epoll_event e;
+  struct epoll_event e;
   int rc;
 
   e.events = POLLIN;
-  e.data = -1;
+  e.data.fd = -1;
 
   rc = 0;
-  if (uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_ADD, fd, &e))
+  if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
     if (errno != EEXIST)
       rc = UV__ERR(errno);
 
   if (rc == 0)
-    if (uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_DEL, fd, &e))
+    if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
       abort();
 
   return rc;
@@ -195,16 +196,14 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
    * that being the largest value I have seen in the wild (and only once.)
    */
   static const int max_safe_timeout = 1789569;
-  static int no_epoll_pwait;
-  static int no_epoll_wait;
-  struct uv__epoll_event events[1024];
-  struct uv__epoll_event* pe;
-  struct uv__epoll_event e;
+  struct epoll_event events[1024];
+  struct epoll_event* pe;
+  struct epoll_event e;
   int real_timeout;
   QUEUE* q;
   uv__io_t* w;
   sigset_t sigset;
-  uint64_t sigmask;
+  sigset_t* psigset;
   uint64_t base;
   int have_signals;
   int nevents;
@@ -230,35 +229,35 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
     assert(w->fd < (int) loop->nwatchers);
 
     e.events = w->pevents;
-    e.data = w->fd;
+    e.data.fd = w->fd;
 
     if (w->events == 0)
-      op = UV__EPOLL_CTL_ADD;
+      op = EPOLL_CTL_ADD;
     else
-      op = UV__EPOLL_CTL_MOD;
+      op = EPOLL_CTL_MOD;
 
     /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching
      * events, skip the syscall and squelch the events after epoll_wait().
      */
-    if (uv__epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
+    if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
       if (errno != EEXIST)
         abort();
 
-      assert(op == UV__EPOLL_CTL_ADD);
+      assert(op == EPOLL_CTL_ADD);
 
       /* We've reactivated a file descriptor that's been watched before. */
-      if (uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_MOD, w->fd, &e))
+      if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e))
         abort();
     }
 
     w->events = w->pevents;
   }
 
-  sigmask = 0;
+  psigset = NULL;
   if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
     sigemptyset(&sigset);
     sigaddset(&sigset, SIGPROF);
-    sigmask |= 1 << (SIGPROF - 1);
+    psigset = &sigset;
   }
 
   assert(timeout >= -1);
@@ -273,30 +272,11 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
     if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout)
       timeout = max_safe_timeout;
 
-    if (sigmask != 0 && no_epoll_pwait != 0)
-      if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
-        abort();
-
-    if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) {
-      nfds = uv__epoll_pwait(loop->backend_fd,
-                             events,
-                             ARRAY_SIZE(events),
-                             timeout,
-                             sigmask);
-      if (nfds == -1 && errno == ENOSYS)
-        no_epoll_pwait = 1;
-    } else {
-      nfds = uv__epoll_wait(loop->backend_fd,
-                            events,
-                            ARRAY_SIZE(events),
-                            timeout);
-      if (nfds == -1 && errno == ENOSYS)
-        no_epoll_wait = 1;
-    }
-
-    if (sigmask != 0 && no_epoll_pwait != 0)
-      if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL))
-        abort();
+    nfds = epoll_pwait(loop->backend_fd,
+                       events,
+                       ARRAY_SIZE(events),
+                       timeout,
+                       psigset);
 
     /* Update loop->time unconditionally. It's tempting to skip the update when
      * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
@@ -317,12 +297,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
     }
 
     if (nfds == -1) {
-      if (errno == ENOSYS) {
-        /* epoll_wait() or epoll_pwait() failed, try the other system call. */
-        assert(no_epoll_wait == 0 || no_epoll_pwait == 0);
-        continue;
-      }
-
       if (errno != EINTR)
         abort();
 
@@ -344,7 +318,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
     loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
     for (i = 0; i < nfds; i++) {
       pe = events + i;
-      fd = pe->data;
+      fd = pe->data.fd;
 
       /* Skip invalidated events, see uv__platform_invalidate_fd */
       if (fd == -1)
@@ -361,7 +335,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
          * Ignore all errors because we may be racing with another thread
          * when the file descriptor is closed.
          */
-        uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_DEL, fd, pe);
+        epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe);
         continue;
       }
 
@@ -916,6 +890,7 @@ int uv_interface_addresses(uv_interface_address_t** addresses,
       continue;
 
     address = *addresses;
+    memset(address->phys_addr, 0, sizeof(address->phys_addr));
 
     for (i = 0; i < (*count); i++) {
       if (strcmp(address->name, ent->ifa_name) == 0) {
diff --git a/deps/uv/src/unix/linux-syscalls.c b/deps/uv/src/unix/linux-syscalls.c
index 89998ded26b17c..bfd75448793388 100644
--- a/deps/uv/src/unix/linux-syscalls.c
+++ b/deps/uv/src/unix/linux-syscalls.c
@@ -77,56 +77,6 @@
 # endif
 #endif /* __NR_eventfd2 */
 
-#ifndef __NR_epoll_create
-# if defined(__x86_64__)
-#  define __NR_epoll_create 213
-# elif defined(__i386__)
-#  define __NR_epoll_create 254
-# elif defined(__arm__)
-#  define __NR_epoll_create (UV_SYSCALL_BASE + 250)
-# endif
-#endif /* __NR_epoll_create */
-
-#ifndef __NR_epoll_create1
-# if defined(__x86_64__)
-#  define __NR_epoll_create1 291
-# elif defined(__i386__)
-#  define __NR_epoll_create1 329
-# elif defined(__arm__)
-#  define __NR_epoll_create1 (UV_SYSCALL_BASE + 357)
-# endif
-#endif /* __NR_epoll_create1 */
-
-#ifndef __NR_epoll_ctl
-# if defined(__x86_64__)
-#  define __NR_epoll_ctl 233 /* used to be 214 */
-# elif defined(__i386__)
-#  define __NR_epoll_ctl 255
-# elif defined(__arm__)
-#  define __NR_epoll_ctl (UV_SYSCALL_BASE + 251)
-# endif
-#endif /* __NR_epoll_ctl */
-
-#ifndef __NR_epoll_wait
-# if defined(__x86_64__)
-#  define __NR_epoll_wait 232 /* used to be 215 */
-# elif defined(__i386__)
-#  define __NR_epoll_wait 256
-# elif defined(__arm__)
-#  define __NR_epoll_wait (UV_SYSCALL_BASE + 252)
-# endif
-#endif /* __NR_epoll_wait */
-
-#ifndef __NR_epoll_pwait
-# if defined(__x86_64__)
-#  define __NR_epoll_pwait 281
-# elif defined(__i386__)
-#  define __NR_epoll_pwait 319
-# elif defined(__arm__)
-#  define __NR_epoll_pwait (UV_SYSCALL_BASE + 346)
-# endif
-#endif /* __NR_epoll_pwait */
-
 #ifndef __NR_inotify_init
 # if defined(__x86_64__)
 #  define __NR_inotify_init 253
@@ -285,76 +235,6 @@ int uv__eventfd2(unsigned int count, int flags) {
 }
 
 
-int uv__epoll_create(int size) {
-#if defined(__NR_epoll_create)
-  return syscall(__NR_epoll_create, size);
-#else
-  return errno = ENOSYS, -1;
-#endif
-}
-
-
-int uv__epoll_create1(int flags) {
-#if defined(__NR_epoll_create1)
-  return syscall(__NR_epoll_create1, flags);
-#else
-  return errno = ENOSYS, -1;
-#endif
-}
-
-
-int uv__epoll_ctl(int epfd, int op, int fd, struct uv__epoll_event* events) {
-#if defined(__NR_epoll_ctl)
-  return syscall(__NR_epoll_ctl, epfd, op, fd, events);
-#else
-  return errno = ENOSYS, -1;
-#endif
-}
-
-
-int uv__epoll_wait(int epfd,
-                   struct uv__epoll_event* events,
-                   int nevents,
-                   int timeout) {
-#if defined(__NR_epoll_wait)
-  int result;
-  result = syscall(__NR_epoll_wait, epfd, events, nevents, timeout);
-#if MSAN_ACTIVE
-  if (result > 0)
-    __msan_unpoison(events, sizeof(events[0]) * result);
-#endif
-  return result;
-#else
-  return errno = ENOSYS, -1;
-#endif
-}
-
-
-int uv__epoll_pwait(int epfd,
-                    struct uv__epoll_event* events,
-                    int nevents,
-                    int timeout,
-                    uint64_t sigmask) {
-#if defined(__NR_epoll_pwait)
-  int result;
-  result = syscall(__NR_epoll_pwait,
-                   epfd,
-                   events,
-                   nevents,
-                   timeout,
-                   &sigmask,
-                   sizeof(sigmask));
-#if MSAN_ACTIVE
-  if (result > 0)
-    __msan_unpoison(events, sizeof(events[0]) * result);
-#endif
-  return result;
-#else
-  return errno = ENOSYS, -1;
-#endif
-}
-
-
 int uv__inotify_init(void) {
 #if defined(__NR_inotify_init)
   return syscall(__NR_inotify_init);
@@ -431,19 +311,6 @@ int uv__recvmmsg(int fd,
 }
 
 
-int uv__utimesat(int dirfd,
-                 const char* path,
-                 const struct timespec times[2],
-                 int flags)
-{
-#if defined(__NR_utimensat)
-  return syscall(__NR_utimensat, dirfd, path, times, flags);
-#else
-  return errno = ENOSYS, -1;
-#endif
-}
-
-
 ssize_t uv__preadv(int fd, const struct iovec *iov, int iovcnt, int64_t offset) {
 #if defined(__NR_preadv)
   return syscall(__NR_preadv, fd, iov, iovcnt, (long)offset, (long)(offset >> 32));
diff --git a/deps/uv/src/unix/linux-syscalls.h b/deps/uv/src/unix/linux-syscalls.h
index 4c095e9b537996..3dfd329d6c84b5 100644
--- a/deps/uv/src/unix/linux-syscalls.h
+++ b/deps/uv/src/unix/linux-syscalls.h
@@ -66,12 +66,6 @@
 # define UV__SOCK_NONBLOCK    UV__O_NONBLOCK
 #endif
 
-/* epoll flags */
-#define UV__EPOLL_CLOEXEC     UV__O_CLOEXEC
-#define UV__EPOLL_CTL_ADD     1
-#define UV__EPOLL_CTL_DEL     2
-#define UV__EPOLL_CTL_MOD     3
-
 /* inotify flags */
 #define UV__IN_ACCESS         0x001
 #define UV__IN_MODIFY         0x002
@@ -86,18 +80,6 @@
 #define UV__IN_DELETE_SELF    0x400
 #define UV__IN_MOVE_SELF      0x800
 
-#if defined(__x86_64__)
-struct uv__epoll_event {
-  uint32_t events;
-  uint64_t data;
-} __attribute__((packed));
-#else
-struct uv__epoll_event {
-  uint32_t events;
-  uint64_t data;
-};
-#endif
-
 struct uv__inotify_event {
   int32_t wd;
   uint32_t mask;
@@ -113,18 +95,6 @@ struct uv__mmsghdr {
 
 int uv__accept4(int fd, struct sockaddr* addr, socklen_t* addrlen, int flags);
 int uv__eventfd(unsigned int count);
-int uv__epoll_create(int size);
-int uv__epoll_create1(int flags);
-int uv__epoll_ctl(int epfd, int op, int fd, struct uv__epoll_event *ev);
-int uv__epoll_wait(int epfd,
-                   struct uv__epoll_event* events,
-                   int nevents,
-                   int timeout);
-int uv__epoll_pwait(int epfd,
-                    struct uv__epoll_event* events,
-                    int nevents,
-                    int timeout,
-                    uint64_t sigmask);
 int uv__eventfd2(unsigned int count, int flags);
 int uv__inotify_init(void);
 int uv__inotify_init1(int flags);
@@ -140,10 +110,6 @@ int uv__sendmmsg(int fd,
                  struct uv__mmsghdr* mmsg,
                  unsigned int vlen,
                  unsigned int flags);
-int uv__utimesat(int dirfd,
-                 const char* path,
-                 const struct timespec times[2],
-                 int flags);
 ssize_t uv__preadv(int fd, const struct iovec *iov, int iovcnt, int64_t offset);
 ssize_t uv__pwritev(int fd, const struct iovec *iov, int iovcnt, int64_t offset);
 int uv__dup3(int oldfd, int newfd, int flags);
diff --git a/deps/uv/src/unix/os390-syscalls.h b/deps/uv/src/unix/os390-syscalls.h
index 6e34a88cb95d1b..ea599107b30281 100644
--- a/deps/uv/src/unix/os390-syscalls.h
+++ b/deps/uv/src/unix/os390-syscalls.h
@@ -36,10 +36,6 @@
 #define MAX_ITEMS_PER_EPOLL       1024
 
 #define UV__O_CLOEXEC             0x80000
-#define UV__EPOLL_CLOEXEC         UV__O_CLOEXEC
-#define UV__EPOLL_CTL_ADD         EPOLL_CTL_ADD
-#define UV__EPOLL_CTL_DEL         EPOLL_CTL_DEL
-#define UV__EPOLL_CTL_MOD         EPOLL_CTL_MOD
 
 struct epoll_event {
   int events;
diff --git a/deps/uv/src/unix/os390.c b/deps/uv/src/unix/os390.c
index f766b393395ee7..65e9b708303668 100644
--- a/deps/uv/src/unix/os390.c
+++ b/deps/uv/src/unix/os390.c
@@ -512,7 +512,7 @@ static int uv__interface_addresses_v6(uv_interface_address_t** addresses,
     /* TODO: Retrieve netmask using SIOCGIFNETMASK ioctl */
 
     address->is_internal = flg.__nif6e_flags & _NIF6E_FLAGS_LOOPBACK ? 1 : 0;
-
+    memset(address->phys_addr, 0, sizeof(address->phys_addr));
     address++;
   }
 
@@ -624,6 +624,7 @@ int uv_interface_addresses(uv_interface_address_t** addresses, int* count) {
     }
 
     address->is_internal = flg.ifr_flags & IFF_LOOPBACK ? 1 : 0;
+    memset(address->phys_addr, 0, sizeof(address->phys_addr));
     address++;
   }
 
@@ -662,7 +663,7 @@ void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
 
   /* Remove the file descriptor from the epoll. */
   if (loop->ep != NULL)
-    epoll_ctl(loop->ep, UV__EPOLL_CTL_DEL, fd, &dummy);
+    epoll_ctl(loop->ep, EPOLL_CTL_DEL, fd, &dummy);
 }
 
 
@@ -838,9 +839,9 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
     e.fd = w->fd;
 
     if (w->events == 0)
-      op = UV__EPOLL_CTL_ADD;
+      op = EPOLL_CTL_ADD;
     else
-      op = UV__EPOLL_CTL_MOD;
+      op = EPOLL_CTL_MOD;
 
     /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching
      * events, skip the syscall and squelch the events after epoll_wait().
@@ -849,10 +850,10 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
       if (errno != EEXIST)
         abort();
 
-      assert(op == UV__EPOLL_CTL_ADD);
+      assert(op == EPOLL_CTL_ADD);
 
       /* We've reactivated a file descriptor that's been watched before. */
-      if (epoll_ctl(loop->ep, UV__EPOLL_CTL_MOD, w->fd, &e))
+      if (epoll_ctl(loop->ep, EPOLL_CTL_MOD, w->fd, &e))
         abort();
     }
 
@@ -934,7 +935,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
          * Ignore all errors because we may be racing with another thread
          * when the file descriptor is closed.
          */
-        epoll_ctl(loop->ep, UV__EPOLL_CTL_DEL, fd, pe);
+        epoll_ctl(loop->ep, EPOLL_CTL_DEL, fd, pe);
         continue;
       }
 
diff --git a/deps/uv/src/unix/pipe.c b/deps/uv/src/unix/pipe.c
index 0718bc81b86f63..e450a30e9c7e0c 100644
--- a/deps/uv/src/unix/pipe.c
+++ b/deps/uv/src/unix/pipe.c
@@ -132,11 +132,21 @@ void uv__pipe_close(uv_pipe_t* handle) {
 
 
 int uv_pipe_open(uv_pipe_t* handle, uv_file fd) {
+  int flags;
+  int mode;
   int err;
+  flags = 0;
 
   if (uv__fd_exists(handle->loop, fd))
     return UV_EEXIST;
 
+  do
+    mode = fcntl(fd, F_GETFL);
+  while (mode == -1 && errno == EINTR);
+
+  if (mode == -1)
+    return UV__ERR(errno); /* according to docs, must be EBADF */
+
   err = uv__nonblock(fd, 1);
   if (err)
     return err;
@@ -147,9 +157,13 @@ int uv_pipe_open(uv_pipe_t* handle, uv_file fd) {
     return err;
 #endif /* defined(__APPLE__) */
 
-  return uv__stream_open((uv_stream_t*)handle,
-                         fd,
-                         UV_HANDLE_READABLE | UV_HANDLE_WRITABLE);
+  mode &= O_ACCMODE;
+  if (mode != O_WRONLY)
+    flags |= UV_HANDLE_READABLE;
+  if (mode != O_RDONLY)
+    flags |= UV_HANDLE_WRITABLE;
+
+  return uv__stream_open((uv_stream_t*)handle, fd, flags);
 }
 
 
diff --git a/deps/uv/src/unix/stream.c b/deps/uv/src/unix/stream.c
index 5a96b66b17bfd5..2e84eeeb82877e 100644
--- a/deps/uv/src/unix/stream.c
+++ b/deps/uv/src/unix/stream.c
@@ -1676,6 +1676,7 @@ void uv__stream_close(uv_stream_t* handle) {
   uv__io_close(handle->loop, &handle->io_watcher);
   uv_read_stop(handle);
   uv__handle_stop(handle);
+  handle->flags &= ~(UV_HANDLE_READABLE | UV_HANDLE_WRITABLE);
 
   if (handle->io_watcher.fd != -1) {
     /* Don't close stdio file descriptors.  Nothing good comes from it. */
diff --git a/deps/uv/src/unix/tty.c b/deps/uv/src/unix/tty.c
index 1b92b5c914ce9e..74d3d75d7615d9 100644
--- a/deps/uv/src/unix/tty.c
+++ b/deps/uv/src/unix/tty.c
@@ -92,13 +92,15 @@ static int uv__tty_is_slave(const int fd) {
   return result;
 }
 
-int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, int fd, int readable) {
+int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, int fd, int unused) {
   uv_handle_type type;
   int flags;
   int newfd;
   int r;
   int saved_flags;
+  int mode;
   char path[256];
+  (void)unused; /* deprecated parameter is no longer needed */
 
   /* File descriptors that refer to files cannot be monitored with epoll.
    * That restriction also applies to character devices like /dev/random
@@ -111,6 +113,15 @@ int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, int fd, int readable) {
   flags = 0;
   newfd = -1;
 
+  /* Save the fd flags in case we need to restore them due to an error. */
+  do
+    saved_flags = fcntl(fd, F_GETFL);
+  while (saved_flags == -1 && errno == EINTR);
+
+  if (saved_flags == -1)
+    return UV__ERR(errno);
+  mode = saved_flags & O_ACCMODE;
+
   /* Reopen the file descriptor when it refers to a tty. This lets us put the
    * tty in non-blocking mode without affecting other processes that share it
    * with us.
@@ -128,13 +139,13 @@ int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, int fd, int readable) {
      * slave device.
      */
     if (uv__tty_is_slave(fd) && ttyname_r(fd, path, sizeof(path)) == 0)
-      r = uv__open_cloexec(path, O_RDWR);
+      r = uv__open_cloexec(path, mode);
     else
       r = -1;
 
     if (r < 0) {
       /* fallback to using blocking writes */
-      if (!readable)
+      if (mode != O_RDONLY)
         flags |= UV_HANDLE_BLOCKING_WRITES;
       goto skip;
     }
@@ -154,22 +165,6 @@ int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, int fd, int readable) {
     fd = newfd;
   }
 
-#if defined(__APPLE__)
-  /* Save the fd flags in case we need to restore them due to an error. */
-  do
-    saved_flags = fcntl(fd, F_GETFL);
-  while (saved_flags == -1 && errno == EINTR);
-
-  if (saved_flags == -1) {
-    if (newfd != -1)
-      uv__close(newfd);
-    return UV__ERR(errno);
-  }
-#endif
-
-  /* Pacify the compiler. */
-  (void) &saved_flags;
-
 skip:
   uv__stream_init(loop, (uv_stream_t*) tty, UV_TTY);
 
@@ -194,9 +189,9 @@ int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, int fd, int readable) {
   }
 #endif
 
-  if (readable)
+  if (mode != O_WRONLY)
     flags |= UV_HANDLE_READABLE;
-  else
+  if (mode != O_RDONLY)
     flags |= UV_HANDLE_WRITABLE;
 
   uv__stream_open((uv_stream_t*) tty, fd, flags);
diff --git a/deps/uv/src/win/core.c b/deps/uv/src/win/core.c
index 153c6708aee9c6..c573a4ab5a770d 100644
--- a/deps/uv/src/win/core.c
+++ b/deps/uv/src/win/core.c
@@ -381,6 +381,57 @@ int uv_backend_timeout(const uv_loop_t* loop) {
 }
 
 
+static void uv__poll_wine(uv_loop_t* loop, DWORD timeout) {
+  DWORD bytes;
+  ULONG_PTR key;
+  OVERLAPPED* overlapped;
+  uv_req_t* req;
+  int repeat;
+  uint64_t timeout_time;
+
+  timeout_time = loop->time + timeout;
+
+  for (repeat = 0; ; repeat++) {
+    GetQueuedCompletionStatus(loop->iocp,
+                              &bytes,
+                              &key,
+                              &overlapped,
+                              timeout);
+
+    if (overlapped) {
+      /* Package was dequeued */
+      req = uv_overlapped_to_req(overlapped);
+      uv_insert_pending_req(loop, req);
+
+      /* Some time might have passed waiting for I/O,
+       * so update the loop time here.
+       */
+      uv_update_time(loop);
+    } else if (GetLastError() != WAIT_TIMEOUT) {
+      /* Serious error */
+      uv_fatal_error(GetLastError(), "GetQueuedCompletionStatus");
+    } else if (timeout > 0) {
+      /* GetQueuedCompletionStatus can occasionally return a little early.
+       * Make sure that the desired timeout target time is reached.
+       */
+      uv_update_time(loop);
+      if (timeout_time > loop->time) {
+        timeout = (DWORD)(timeout_time - loop->time);
+        /* The first call to GetQueuedCompletionStatus should return very
+         * close to the target time and the second should reach it, but
+         * this is not stated in the documentation. To make sure a busy
+         * loop cannot happen, the timeout is increased exponentially
+         * starting on the third round.
+         */
+        timeout += repeat ? (1 << (repeat - 1)) : 0;
+        continue;
+      }
+    }
+    break;
+  }
+}
+
+
 static void uv__poll(uv_loop_t* loop, DWORD timeout) {
   BOOL success;
   uv_req_t* req;
@@ -473,7 +524,11 @@ int uv_run(uv_loop_t *loop, uv_run_mode mode) {
     if ((mode == UV_RUN_ONCE && !ran_pending) || mode == UV_RUN_DEFAULT)
       timeout = uv_backend_timeout(loop);
 
-    uv__poll(loop, timeout);
+    if (pGetQueuedCompletionStatusEx)
+      uv__poll(loop, timeout);
+    else
+      uv__poll_wine(loop, timeout);
+
 
     uv_check_invoke(loop);
     uv_process_endgames(loop);
diff --git a/deps/uv/src/win/tty.c b/deps/uv/src/win/tty.c
index d62aafb7d8c921..dacb8a8269c5bf 100644
--- a/deps/uv/src/win/tty.c
+++ b/deps/uv/src/win/tty.c
@@ -172,9 +172,12 @@ void uv_console_init(void) {
 }
 
 
-int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, uv_file fd, int readable) {
+int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, uv_file fd, int unused) {
+  BOOL readable;
+  DWORD NumberOfEvents;
   HANDLE handle;
   CONSOLE_SCREEN_BUFFER_INFO screen_buffer_info;
+  (void)unused;
 
   uv__once_init();
   handle = (HANDLE) uv__get_osfhandle(fd);
@@ -199,6 +202,7 @@ int uv_tty_init(uv_loop_t* loop, uv_tty_t* tty, uv_file fd, int readable) {
     fd = -1;
   }
 
+  readable = GetNumberOfConsoleInputEvents(handle, &NumberOfEvents);
   if (!readable) {
     /* Obtain the screen buffer info with the output handle. */
     if (!GetConsoleScreenBufferInfo(handle, &screen_buffer_info)) {
@@ -382,12 +386,6 @@ int uv_tty_set_mode(uv_tty_t* tty, uv_tty_mode_t mode) {
 }
 
 
-int uv_is_tty(uv_file file) {
-  DWORD result;
-  return GetConsoleMode((HANDLE) _get_osfhandle(file), &result) != 0;
-}
-
-
 int uv_tty_get_winsize(uv_tty_t* tty, int* width, int* height) {
   CONSOLE_SCREEN_BUFFER_INFO info;
 
@@ -1035,6 +1033,7 @@ int uv_tty_read_stop(uv_tty_t* handle) {
     /* Cancel raw read. Write some bullshit event to force the console wait to
      * return. */
     memset(&record, 0, sizeof record);
+    record.EventType = FOCUS_EVENT;
     if (!WriteConsoleInputW(handle->handle, &record, 1, &written)) {
       return GetLastError();
     }
diff --git a/deps/uv/src/win/udp.c b/deps/uv/src/win/udp.c
index 402aeea6666b5a..37df849f8faf20 100644
--- a/deps/uv/src/win/udp.c
+++ b/deps/uv/src/win/udp.c
@@ -366,7 +366,7 @@ int uv__udp_recv_start(uv_udp_t* handle, uv_alloc_cb alloc_cb,
   int err;
 
   if (handle->flags & UV_HANDLE_READING) {
-    return WSAEALREADY;
+    return UV_EALREADY;
   }
 
   err = uv_udp_maybe_bind(handle,
@@ -374,7 +374,7 @@ int uv__udp_recv_start(uv_udp_t* handle, uv_alloc_cb alloc_cb,
                           sizeof(uv_addr_ip4_any_),
                           0);
   if (err)
-    return err;
+    return uv_translate_sys_error(err);
 
   handle->flags |= UV_HANDLE_READING;
   INCREASE_ACTIVE_COUNT(loop, handle);
diff --git a/deps/uv/src/win/winapi.c b/deps/uv/src/win/winapi.c
index 0fd598eacb4503..2c09b448a95c01 100644
--- a/deps/uv/src/win/winapi.c
+++ b/deps/uv/src/win/winapi.c
@@ -34,6 +34,9 @@ sNtQueryVolumeInformationFile pNtQueryVolumeInformationFile;
 sNtQueryDirectoryFile pNtQueryDirectoryFile;
 sNtQuerySystemInformation pNtQuerySystemInformation;
 
+/* Kernel32 function pointers */
+sGetQueuedCompletionStatusEx pGetQueuedCompletionStatusEx;
+
 /* Powrprof.dll function pointer */
 sPowerRegisterSuspendResumeNotification pPowerRegisterSuspendResumeNotification;
 
@@ -45,6 +48,7 @@ void uv_winapi_init(void) {
   HMODULE ntdll_module;
   HMODULE powrprof_module;
   HMODULE user32_module;
+  HMODULE kernel32_module;
 
   ntdll_module = GetModuleHandleA("ntdll.dll");
   if (ntdll_module == NULL) {
@@ -98,6 +102,15 @@ void uv_winapi_init(void) {
     uv_fatal_error(GetLastError(), "GetProcAddress");
   }
 
+  kernel32_module = GetModuleHandleA("kernel32.dll");
+  if (kernel32_module == NULL) {
+    uv_fatal_error(GetLastError(), "GetModuleHandleA");
+  }
+
+  pGetQueuedCompletionStatusEx = (sGetQueuedCompletionStatusEx) GetProcAddress(
+      kernel32_module,
+      "GetQueuedCompletionStatusEx");
+
   powrprof_module = LoadLibraryA("powrprof.dll");
   if (powrprof_module != NULL) {
     pPowerRegisterSuspendResumeNotification = (sPowerRegisterSuspendResumeNotification)
diff --git a/deps/uv/src/win/winapi.h b/deps/uv/src/win/winapi.h
index d0fcfd8e7ae021..cfbac52eb1d6f4 100644
--- a/deps/uv/src/win/winapi.h
+++ b/deps/uv/src/win/winapi.h
@@ -4642,6 +4642,14 @@ typedef NTSTATUS (NTAPI *sNtQueryDirectoryFile)
 # define ERROR_MUI_FILE_NOT_LOADED 15105
 #endif
 
+typedef BOOL (WINAPI *sGetQueuedCompletionStatusEx)
+             (HANDLE CompletionPort,
+              LPOVERLAPPED_ENTRY lpCompletionPortEntries,
+              ULONG ulCount,
+              PULONG ulNumEntriesRemoved,
+              DWORD dwMilliseconds,
+              BOOL fAlertable);
+
 /* from powerbase.h */
 #ifndef DEVICE_NOTIFY_CALLBACK
 # define DEVICE_NOTIFY_CALLBACK 2
@@ -4704,6 +4712,9 @@ extern sNtQueryVolumeInformationFile pNtQueryVolumeInformationFile;
 extern sNtQueryDirectoryFile pNtQueryDirectoryFile;
 extern sNtQuerySystemInformation pNtQuerySystemInformation;
 
+/* Kernel32 function pointers */
+extern sGetQueuedCompletionStatusEx pGetQueuedCompletionStatusEx;
+
 /* Powrprof.dll function pointer */
 extern sPowerRegisterSuspendResumeNotification pPowerRegisterSuspendResumeNotification;
 
diff --git a/deps/uv/test/test-condvar.c b/deps/uv/test/test-condvar.c
index ec60f16403ad0b..50f3c047c00cd2 100644
--- a/deps/uv/test/test-condvar.c
+++ b/deps/uv/test/test-condvar.c
@@ -259,7 +259,7 @@ TEST_IMPL(condvar_5) {
    * https://msdn.microsoft.com/en-us/library/ms687069(VS.85).aspx */
   elapsed = after - before;
   ASSERT(0.75 * timeout <= elapsed); /* 1.0 too large for Windows. */
-  ASSERT(elapsed <= 1.5 * timeout); /* 1.1 too small for OSX. */
+  ASSERT(elapsed <= 5.0 * timeout); /* MacOS has reported failures up to 1.75. */
 
   worker_config_destroy(&wc);
 
diff --git a/deps/uv/test/test-fork.c b/deps/uv/test/test-fork.c
index 1553cc074cbff0..d974b6706e0d5c 100644
--- a/deps/uv/test/test-fork.c
+++ b/deps/uv/test/test-fork.c
@@ -283,6 +283,7 @@ TEST_IMPL(fork_signal_to_child_closed) {
   int sync_pipe[2];
   int sync_pipe2[2];
   char sync_buf[1];
+  int r;
 
   fork_signal_cb_called = 0;    /* reset */
 
@@ -326,9 +327,10 @@ TEST_IMPL(fork_signal_to_child_closed) {
     /* Don't run the loop. Wait for the parent to call us */
     printf("Waiting on parent in child\n");
     /* Wait for parent. read may fail if the parent tripped an ASSERT
-       and exited, so this isn't in an ASSERT.
+       and exited, so this ASSERT is generous.
     */
-    read(sync_pipe2[0], sync_buf, 1);
+    r = read(sync_pipe2[0], sync_buf, 1);
+    ASSERT(-1 <= r && r <= 1);
     ASSERT(0 == fork_signal_cb_called);
     printf("Exiting child \n");
     /* Note that we're deliberately not running the loop
diff --git a/deps/uv/test/test-fs.c b/deps/uv/test/test-fs.c
index 9c1e8bec205f5d..b4df5e3d1515df 100644
--- a/deps/uv/test/test-fs.c
+++ b/deps/uv/test/test-fs.c
@@ -26,6 +26,7 @@
 #include <string.h> /* memset */
 #include <fcntl.h>
 #include <sys/stat.h>
+#include <limits.h> /* INT_MAX, PATH_MAX, IOV_MAX */
 
 /* FIXME we shouldn't need to branch in this file */
 #if defined(__unix__) || defined(__POSIX__) || \
@@ -120,6 +121,31 @@ static char test_buf[] = "test-buffer\n";
 static char test_buf2[] = "second-buffer\n";
 static uv_buf_t iov;
 
+#ifdef _WIN32
+int uv_test_getiovmax(void) {
+  return INT32_MAX; /* Emulated by libuv, so no real limit. */
+}
+#else
+int uv_test_getiovmax(void) {
+#if defined(IOV_MAX)
+  return IOV_MAX;
+#elif defined(_SC_IOV_MAX)
+  static int iovmax = -1;
+  if (iovmax == -1) {
+    iovmax = sysconf(_SC_IOV_MAX);
+    /* On some embedded devices (arm-linux-uclibc based ip camera),
+     * sysconf(_SC_IOV_MAX) can not get the correct value. The return
+     * value is -1 and the errno is EINPROGRESS. Degrade the value to 1.
+     */
+    if (iovmax == -1) iovmax = 1;
+  }
+  return iovmax;
+#else
+  return 1024;
+#endif
+}
+#endif
+
 #ifdef _WIN32
 /*
  * This tag and guid have no special meaning, and don't conflict with
@@ -2752,19 +2778,44 @@ TEST_IMPL(fs_write_multiple_bufs) {
 
   memset(buf, 0, sizeof(buf));
   memset(buf2, 0, sizeof(buf2));
+  /* Read the strings back to separate buffers. */
+  iovs[0] = uv_buf_init(buf, sizeof(test_buf));
+  iovs[1] = uv_buf_init(buf2, sizeof(test_buf2));
+  ASSERT(lseek(open_req1.result, 0, SEEK_CUR) == 0);
+  r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, 2, -1, NULL);
+  ASSERT(r >= 0);
+  ASSERT(read_req.result == sizeof(test_buf) + sizeof(test_buf2));
+  ASSERT(strcmp(buf, test_buf) == 0);
+  ASSERT(strcmp(buf2, test_buf2) == 0);
+  uv_fs_req_cleanup(&read_req);
+
+  iov = uv_buf_init(buf, sizeof(buf));
+  r = uv_fs_read(NULL, &read_req, open_req1.result, &iov, 1, -1, NULL);
+  ASSERT(r == 0);
+  ASSERT(read_req.result == 0);
+  uv_fs_req_cleanup(&read_req);
+
   /* Read the strings back to separate buffers. */
   iovs[0] = uv_buf_init(buf, sizeof(test_buf));
   iovs[1] = uv_buf_init(buf2, sizeof(test_buf2));
   r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, 2, 0, NULL);
   ASSERT(r >= 0);
-  ASSERT(read_req.result >= 0);
+  if (read_req.result == sizeof(test_buf)) {
+    /* Infer that preadv is not available. */
+    uv_fs_req_cleanup(&read_req);
+    r = uv_fs_read(NULL, &read_req, open_req1.result, &iovs[1], 1, read_req.result, NULL);
+    ASSERT(r >= 0);
+    ASSERT(read_req.result == sizeof(test_buf2));
+  } else {
+    ASSERT(read_req.result == sizeof(test_buf) + sizeof(test_buf2));
+  }
   ASSERT(strcmp(buf, test_buf) == 0);
   ASSERT(strcmp(buf2, test_buf2) == 0);
   uv_fs_req_cleanup(&read_req);
 
   iov = uv_buf_init(buf, sizeof(buf));
   r = uv_fs_read(NULL, &read_req, open_req1.result, &iov, 1,
-                 read_req.result, NULL);
+                 sizeof(test_buf) + sizeof(test_buf2), NULL);
   ASSERT(r == 0);
   ASSERT(read_req.result == 0);
   uv_fs_req_cleanup(&read_req);
@@ -2783,12 +2834,15 @@ TEST_IMPL(fs_write_multiple_bufs) {
 
 
 TEST_IMPL(fs_write_alotof_bufs) {
-  const size_t iovcount = 54321;
+  size_t iovcount;
+  size_t iovmax;
   uv_buf_t* iovs;
   char* buffer;
   size_t index;
   int r;
 
+  iovcount = 54321;
+
   /* Setup. */
   unlink("test_file");
 
@@ -2796,6 +2850,7 @@ TEST_IMPL(fs_write_alotof_bufs) {
 
   iovs = malloc(sizeof(*iovs) * iovcount);
   ASSERT(iovs != NULL);
+  iovmax = uv_test_getiovmax();
 
   r = uv_fs_open(NULL,
                  &open_req1,
@@ -2829,7 +2884,10 @@ TEST_IMPL(fs_write_alotof_bufs) {
     iovs[index] = uv_buf_init(buffer + index * sizeof(test_buf),
                               sizeof(test_buf));
 
-  r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, iovcount, 0, NULL);
+  ASSERT(lseek(open_req1.result, 0, SEEK_SET) == 0);
+  r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, iovcount, -1, NULL);
+  if (iovcount > iovmax)
+    iovcount = iovmax;
   ASSERT(r >= 0);
   ASSERT((size_t)read_req.result == sizeof(test_buf) * iovcount);
 
@@ -2841,13 +2899,14 @@ TEST_IMPL(fs_write_alotof_bufs) {
   uv_fs_req_cleanup(&read_req);
   free(buffer);
 
+  ASSERT(lseek(open_req1.result, write_req.result, SEEK_SET) == write_req.result);
   iov = uv_buf_init(buf, sizeof(buf));
   r = uv_fs_read(NULL,
                  &read_req,
                  open_req1.result,
                  &iov,
                  1,
-                 read_req.result,
+                 -1,
                  NULL);
   ASSERT(r == 0);
   ASSERT(read_req.result == 0);
@@ -2868,14 +2927,19 @@ TEST_IMPL(fs_write_alotof_bufs) {
 
 
 TEST_IMPL(fs_write_alotof_bufs_with_offset) {
-  const size_t iovcount = 54321;
+  size_t iovcount;
+  size_t iovmax;
   uv_buf_t* iovs;
   char* buffer;
   size_t index;
   int r;
   int64_t offset;
-  char* filler = "0123456789";
-  int filler_len = strlen(filler);
+  char* filler;
+  int filler_len;
+
+  filler = "0123456789";
+  filler_len = strlen(filler);
+  iovcount = 54321;
 
   /* Setup. */
   unlink("test_file");
@@ -2884,6 +2948,7 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) {
 
   iovs = malloc(sizeof(*iovs) * iovcount);
   ASSERT(iovs != NULL);
+  iovmax = uv_test_getiovmax();
 
   r = uv_fs_open(NULL,
                  &open_req1,
@@ -2927,6 +2992,10 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) {
   r = uv_fs_read(NULL, &read_req, open_req1.result,
                  iovs, iovcount, offset, NULL);
   ASSERT(r >= 0);
+  if (r == sizeof(test_buf))
+    iovcount = 1; /* Infer that preadv is not available. */
+  else if (iovcount > iovmax)
+    iovcount = iovmax;
   ASSERT((size_t)read_req.result == sizeof(test_buf) * iovcount);
 
   for (index = 0; index < iovcount; ++index)
@@ -2940,7 +3009,7 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) {
   r = uv_fs_stat(NULL, &stat_req, "test_file", NULL);
   ASSERT(r == 0);
   ASSERT((int64_t)((uv_stat_t*)stat_req.ptr)->st_size ==
-         offset + (int64_t)(iovcount * sizeof(test_buf)));
+         offset + (int64_t)write_req.result);
   uv_fs_req_cleanup(&stat_req);
 
   iov = uv_buf_init(buf, sizeof(buf));
@@ -2949,7 +3018,7 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) {
                  open_req1.result,
                  &iov,
                  1,
-                 read_req.result + offset,
+                 offset + write_req.result,
                  NULL);
   ASSERT(r == 0);
   ASSERT(read_req.result == 0);
@@ -2969,6 +3038,175 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) {
 }
 
 
+#ifdef _WIN32
+
+TEST_IMPL(fs_partial_read) {
+  RETURN_SKIP("Test not implemented on Windows.");
+}
+
+TEST_IMPL(fs_partial_write) {
+  RETURN_SKIP("Test not implemented on Windows.");
+}
+
+#else  /* !_WIN32 */
+
+struct thread_ctx {
+  pthread_t pid;
+  int fd;
+  char* data;
+  int size;
+  int interval;
+  int doread;
+};
+
+static void thread_main(void* arg) {
+  const struct thread_ctx* ctx;
+  int size;
+  char* data;
+
+  ctx = (struct thread_ctx*)arg;
+  size = ctx->size;
+  data = ctx->data;
+
+  while (size > 0) {
+    ssize_t result;
+    int nbytes;
+    nbytes = size < ctx->interval ? size : ctx->interval;
+    if (ctx->doread) {
+      result = write(ctx->fd, data, nbytes);
+      /* Should not see EINTR (or other errors) */
+      ASSERT(result == nbytes);
+    } else {
+      result = read(ctx->fd, data, nbytes);
+      /* Should not see EINTR (or other errors),
+       * but might get a partial read if we are faster than the writer
+       */
+      ASSERT(result > 0 && result <= nbytes);
+    }
+
+    pthread_kill(ctx->pid, SIGUSR1);
+    size -= result;
+    data += result;
+  }
+}
+
+static void sig_func(uv_signal_t* handle, int signum) {
+  uv_signal_stop(handle);
+}
+
+static size_t uv_test_fs_buf_offset(uv_buf_t* bufs, size_t size) {
+  size_t offset;
+  /* Figure out which bufs are done */
+  for (offset = 0; size > 0 && bufs[offset].len <= size; ++offset)
+    size -= bufs[offset].len;
+
+  /* Fix a partial read/write */
+  if (size > 0) {
+    bufs[offset].base += size;
+    bufs[offset].len -= size;
+  }
+  return offset;
+}
+
+static void test_fs_partial(int doread) {
+  struct thread_ctx ctx;
+  uv_thread_t thread;
+  uv_signal_t signal;
+  int pipe_fds[2];
+  size_t iovcount;
+  uv_buf_t* iovs;
+  char* buffer;
+  size_t index;
+
+  iovcount = 54321;
+
+  iovs = malloc(sizeof(*iovs) * iovcount);
+  ASSERT(iovs != NULL);
+
+  ctx.pid = pthread_self();
+  ctx.doread = doread;
+  ctx.interval = 1000;
+  ctx.size = sizeof(test_buf) * iovcount;
+  ctx.data = malloc(ctx.size);
+  ASSERT(ctx.data != NULL);
+  buffer = malloc(ctx.size);
+  ASSERT(buffer != NULL);
+
+  for (index = 0; index < iovcount; ++index)
+    iovs[index] = uv_buf_init(buffer + index * sizeof(test_buf), sizeof(test_buf));
+
+  loop = uv_default_loop();
+
+  ASSERT(0 == uv_signal_init(loop, &signal));
+  ASSERT(0 == uv_signal_start(&signal, sig_func, SIGUSR1));
+
+  ASSERT(0 == pipe(pipe_fds));
+
+  ctx.fd = pipe_fds[doread];
+  ASSERT(0 == uv_thread_create(&thread, thread_main, &ctx));
+
+  if (doread) {
+    uv_buf_t* read_iovs;
+    int nread;
+    read_iovs = iovs;
+    nread = 0;
+    while (nread < ctx.size) {
+      int result;
+      result = uv_fs_read(loop, &read_req, pipe_fds[0], read_iovs, iovcount, -1, NULL);
+      if (result > 0) {
+        size_t read_iovcount;
+        read_iovcount = uv_test_fs_buf_offset(read_iovs, result);
+        read_iovs += read_iovcount;
+        iovcount -= read_iovcount;
+        nread += result;
+      } else {
+        ASSERT(result == UV_EINTR);
+      }
+      uv_fs_req_cleanup(&read_req);
+    }
+  } else {
+    int result;
+    result = uv_fs_write(loop, &write_req, pipe_fds[1], iovs, iovcount, -1, NULL);
+    ASSERT(write_req.result == result);
+    ASSERT(result == ctx.size);
+    uv_fs_req_cleanup(&write_req);
+  }
+
+  ASSERT(0 == memcmp(buffer, ctx.data, ctx.size));
+
+  ASSERT(0 == uv_thread_join(&thread));
+  ASSERT(0 == uv_run(loop, UV_RUN_DEFAULT));
+
+  ASSERT(0 == close(pipe_fds[1]));
+  uv_close((uv_handle_t*) &signal, NULL);
+
+  { /* Make sure we read everything that we wrote. */
+      int result;
+      result = uv_fs_read(loop, &read_req, pipe_fds[0], iovs, 1, -1, NULL);
+      ASSERT(result == 0);
+      uv_fs_req_cleanup(&read_req);
+  }
+  ASSERT(0 == close(pipe_fds[0]));
+
+  free(iovs);
+  free(buffer);
+  free(ctx.data);
+
+  MAKE_VALGRIND_HAPPY();
+}
+
+TEST_IMPL(fs_partial_read) {
+  test_fs_partial(1);
+  return 0;
+}
+
+TEST_IMPL(fs_partial_write) {
+  test_fs_partial(0);
+  return 0;
+}
+
+#endif/* _WIN32 */
+
 TEST_IMPL(fs_read_write_null_arguments) {
   int r;
 
diff --git a/deps/uv/test/test-handle-fileno.c b/deps/uv/test/test-handle-fileno.c
index 3fe933adebdd87..8a093e2ea46e2c 100644
--- a/deps/uv/test/test-handle-fileno.c
+++ b/deps/uv/test/test-handle-fileno.c
@@ -27,7 +27,7 @@ static int get_tty_fd(void) {
   /* Make sure we have an FD that refers to a tty */
 #ifdef _WIN32
   HANDLE handle;
-  handle = CreateFileA("conout$",
+  handle = CreateFileA("conin$",
                        GENERIC_READ | GENERIC_WRITE,
                        FILE_SHARE_READ | FILE_SHARE_WRITE,
                        NULL,
@@ -107,11 +107,15 @@ TEST_IMPL(handle_fileno) {
   } else {
     r = uv_tty_init(loop, &tty, tty_fd, 0);
     ASSERT(r == 0);
+    ASSERT(uv_is_readable((uv_stream_t*) &tty));
+    ASSERT(!uv_is_writable((uv_stream_t*) &tty));
     r = uv_fileno((uv_handle_t*) &tty, &fd);
     ASSERT(r == 0);
     uv_close((uv_handle_t*) &tty, NULL);
     r = uv_fileno((uv_handle_t*) &tty, &fd);
     ASSERT(r == UV_EBADF);
+    ASSERT(!uv_is_readable((uv_stream_t*) &tty));
+    ASSERT(!uv_is_writable((uv_stream_t*) &tty));
   }
 
   uv_run(loop, UV_RUN_DEFAULT);
diff --git a/deps/uv/test/test-list.h b/deps/uv/test/test-list.h
index 0a26e60444de3e..07817e382cd5d9 100644
--- a/deps/uv/test/test-list.h
+++ b/deps/uv/test/test-list.h
@@ -50,6 +50,7 @@ TEST_DECLARE   (tty)
 TEST_DECLARE   (tty_raw)
 TEST_DECLARE   (tty_empty_write)
 TEST_DECLARE   (tty_large_write)
+TEST_DECLARE   (tty_raw_cancel)
 #endif
 TEST_DECLARE   (tty_file)
 TEST_DECLARE   (tty_pty)
@@ -339,6 +340,8 @@ TEST_DECLARE   (get_osfhandle_valid_handle)
 TEST_DECLARE   (open_osfhandle_valid_handle)
 TEST_DECLARE   (fs_write_alotof_bufs)
 TEST_DECLARE   (fs_write_alotof_bufs_with_offset)
+TEST_DECLARE   (fs_partial_read)
+TEST_DECLARE   (fs_partial_write)
 TEST_DECLARE   (fs_file_pos_after_op_with_offset)
 TEST_DECLARE   (fs_null_req)
 #ifdef _WIN32
@@ -482,6 +485,7 @@ TASK_LIST_START
   TEST_ENTRY  (tty_raw)
   TEST_ENTRY  (tty_empty_write)
   TEST_ENTRY  (tty_large_write)
+  TEST_ENTRY  (tty_raw_cancel)
 #endif
   TEST_ENTRY  (tty_file)
   TEST_ENTRY  (tty_pty)
@@ -886,6 +890,8 @@ TASK_LIST_START
   TEST_ENTRY  (fs_write_multiple_bufs)
   TEST_ENTRY  (fs_write_alotof_bufs)
   TEST_ENTRY  (fs_write_alotof_bufs_with_offset)
+  TEST_ENTRY  (fs_partial_read)
+  TEST_ENTRY  (fs_partial_write)
   TEST_ENTRY  (fs_read_write_null_arguments)
   TEST_ENTRY  (fs_file_pos_after_op_with_offset)
   TEST_ENTRY  (fs_null_req)
diff --git a/deps/uv/test/test-pipe-close-stdout-read-stdin.c b/deps/uv/test/test-pipe-close-stdout-read-stdin.c
index 4ab14789a3858b..c8804b0e189249 100644
--- a/deps/uv/test/test-pipe-close-stdout-read-stdin.c
+++ b/deps/uv/test/test-pipe-close-stdout-read-stdin.c
@@ -66,7 +66,8 @@ TEST_IMPL(pipe_close_stdout_read_stdin) {
     */
     close(fd[1]);
     /* block until write end of pipe is closed */
-    read(fd[0], &buf, 1);
+    r = read(fd[0], &buf, 1);
+    ASSERT(-1 <= r && r <= 1);
     close(0);
     r = dup(fd[0]);
     ASSERT(r != -1);
diff --git a/deps/uv/test/test-process-priority.c b/deps/uv/test/test-process-priority.c
index ebee6b90afd555..b3d0a85bdd70ec 100644
--- a/deps/uv/test/test-process-priority.c
+++ b/deps/uv/test/test-process-priority.c
@@ -54,8 +54,10 @@ TEST_IMPL(process_priority) {
 #ifndef _WIN32
     ASSERT(priority == i);
 #else
+    /* On Windows, only elevated users can set UV_PRIORITY_HIGHEST. Other
+       users will silently be set to UV_PRIORITY_HIGH. */
     if (i < UV_PRIORITY_HIGH)
-      ASSERT(priority == UV_PRIORITY_HIGHEST);
+      ASSERT(priority == UV_PRIORITY_HIGHEST || priority == UV_PRIORITY_HIGH);
     else if (i < UV_PRIORITY_ABOVE_NORMAL)
       ASSERT(priority == UV_PRIORITY_HIGH);
     else if (i < UV_PRIORITY_NORMAL)
diff --git a/deps/uv/test/test-spawn.c b/deps/uv/test/test-spawn.c
index 1ab6e78807ff5f..4fcd905eed7500 100644
--- a/deps/uv/test/test-spawn.c
+++ b/deps/uv/test/test-spawn.c
@@ -1733,6 +1733,7 @@ TEST_IMPL(spawn_inherit_streams) {
   uv_buf_t buf;
   unsigned int i;
   int r;
+  int bidir;
   uv_write_t write_req;
   uv_loop_t* loop;
 
@@ -1751,6 +1752,15 @@ TEST_IMPL(spawn_inherit_streams) {
   ASSERT(uv_pipe_open(&pipe_stdout_child, fds_stdout[1]) == 0);
   ASSERT(uv_pipe_open(&pipe_stdin_parent, fds_stdin[1]) == 0);
   ASSERT(uv_pipe_open(&pipe_stdout_parent, fds_stdout[0]) == 0);
+  ASSERT(uv_is_readable((uv_stream_t*) &pipe_stdin_child));
+  ASSERT(uv_is_writable((uv_stream_t*) &pipe_stdout_child));
+  ASSERT(uv_is_writable((uv_stream_t*) &pipe_stdin_parent));
+  ASSERT(uv_is_readable((uv_stream_t*) &pipe_stdout_parent));
+  /* Some systems (SVR4) open a bidirectional pipe, most don't. */
+  bidir = uv_is_writable((uv_stream_t*) &pipe_stdin_child);
+  ASSERT(uv_is_readable((uv_stream_t*) &pipe_stdout_child) == bidir);
+  ASSERT(uv_is_readable((uv_stream_t*) &pipe_stdin_parent) == bidir);
+  ASSERT(uv_is_writable((uv_stream_t*) &pipe_stdout_parent) == bidir);
 
   child_stdio[0].flags = UV_INHERIT_STREAM;
   child_stdio[0].data.stream = (uv_stream_t *)&pipe_stdin_child;
diff --git a/deps/uv/test/test-tty.c b/deps/uv/test/test-tty.c
index 6aaeda8f59619b..979a6ec38d7fcd 100644
--- a/deps/uv/test/test-tty.c
+++ b/deps/uv/test/test-tty.c
@@ -96,9 +96,13 @@ TEST_IMPL(tty) {
 
   r = uv_tty_init(uv_default_loop(), &tty_in, ttyin_fd, 1);  /* Readable. */
   ASSERT(r == 0);
+  ASSERT(uv_is_readable((uv_stream_t*) &tty_in));
+  ASSERT(!uv_is_writable((uv_stream_t*) &tty_in));
 
   r = uv_tty_init(uv_default_loop(), &tty_out, ttyout_fd, 0);  /* Writable. */
   ASSERT(r == 0);
+  ASSERT(!uv_is_readable((uv_stream_t*) &tty_out));
+  ASSERT(uv_is_writable((uv_stream_t*) &tty_out));
 
   r = uv_tty_get_winsize(&tty_out, &width, &height);
   ASSERT(r == 0);
@@ -186,6 +190,8 @@ TEST_IMPL(tty_raw) {
 
   r = uv_tty_init(uv_default_loop(), &tty_in, ttyin_fd, 1);  /* Readable. */
   ASSERT(r == 0);
+  ASSERT(uv_is_readable((uv_stream_t*) &tty_in));
+  ASSERT(!uv_is_writable((uv_stream_t*) &tty_in));
 
   r = uv_read_start((uv_stream_t*)&tty_in, tty_raw_alloc, tty_raw_read);
   ASSERT(r == 0);
@@ -242,6 +248,8 @@ TEST_IMPL(tty_empty_write) {
 
   r = uv_tty_init(uv_default_loop(), &tty_out, ttyout_fd, 0);  /* Writable. */
   ASSERT(r == 0);
+  ASSERT(!uv_is_readable((uv_stream_t*) &tty_out));
+  ASSERT(uv_is_writable((uv_stream_t*) &tty_out));
 
   bufs[0].len = 0;
   bufs[0].base = &dummy[0];
@@ -302,6 +310,41 @@ TEST_IMPL(tty_large_write) {
   MAKE_VALGRIND_HAPPY();
   return 0;
 }
+
+TEST_IMPL(tty_raw_cancel) {
+  int r;
+  int ttyin_fd;
+  uv_tty_t tty_in;
+  uv_loop_t* loop;
+  HANDLE handle;
+
+  loop = uv_default_loop();
+  /* Make sure we have an FD that refers to a tty */
+  handle = CreateFileA("conin$",
+                       GENERIC_READ | GENERIC_WRITE,
+                       FILE_SHARE_READ | FILE_SHARE_WRITE,
+                       NULL,
+                       OPEN_EXISTING,
+                       FILE_ATTRIBUTE_NORMAL,
+                       NULL);
+  ASSERT(handle != INVALID_HANDLE_VALUE);
+  ttyin_fd = _open_osfhandle((intptr_t) handle, 0);
+  ASSERT(ttyin_fd >= 0);
+  ASSERT(UV_TTY == uv_guess_handle(ttyin_fd));
+
+  r = uv_tty_init(uv_default_loop(), &tty_in, ttyin_fd, 1);  /* Readable. */
+  ASSERT(r == 0);
+  r = uv_tty_set_mode(&tty_in, UV_TTY_MODE_RAW);
+  ASSERT(r == 0);
+  r = uv_read_start((uv_stream_t*)&tty_in, tty_raw_alloc, tty_raw_read);
+  ASSERT(r == 0);
+
+  r = uv_read_stop((uv_stream_t*) &tty_in);
+  ASSERT(r == 0);
+
+  MAKE_VALGRIND_HAPPY();
+  return 0;
+}
 #endif
 
 
@@ -309,6 +352,8 @@ TEST_IMPL(tty_file) {
 #ifndef _WIN32
   uv_loop_t loop;
   uv_tty_t tty;
+  uv_tty_t tty_ro;
+  uv_tty_t tty_wo;
   int fd;
 
   ASSERT(0 == uv_loop_init(&loop));
@@ -334,13 +379,40 @@ TEST_IMPL(tty_file) {
     ASSERT(0 == close(fd));
   }
 
-  fd = open("/dev/tty", O_RDONLY);
+  fd = open("/dev/tty", O_RDWR);
   if (fd != -1) {
     ASSERT(0 == uv_tty_init(&loop, &tty, fd, 1));
-    ASSERT(0 == close(fd));
+    ASSERT(0 == close(fd)); /* TODO: it's indeterminate who owns fd now */
+    ASSERT(uv_is_readable((uv_stream_t*) &tty));
+    ASSERT(uv_is_writable((uv_stream_t*) &tty));
     uv_close((uv_handle_t*) &tty, NULL);
+    ASSERT(!uv_is_readable((uv_stream_t*) &tty));
+    ASSERT(!uv_is_writable((uv_stream_t*) &tty));
+  }
+
+  fd = open("/dev/tty", O_RDONLY);
+  if (fd != -1) {
+    ASSERT(0 == uv_tty_init(&loop, &tty_ro, fd, 1));
+    ASSERT(0 == close(fd)); /* TODO: it's indeterminate who owns fd now */
+    ASSERT(uv_is_readable((uv_stream_t*) &tty_ro));
+    ASSERT(!uv_is_writable((uv_stream_t*) &tty_ro));
+    uv_close((uv_handle_t*) &tty_ro, NULL);
+    ASSERT(!uv_is_readable((uv_stream_t*) &tty_ro));
+    ASSERT(!uv_is_writable((uv_stream_t*) &tty_ro));
   }
 
+  fd = open("/dev/tty", O_WRONLY);
+  if (fd != -1) {
+    ASSERT(0 == uv_tty_init(&loop, &tty_wo, fd, 0));
+    ASSERT(0 == close(fd)); /* TODO: it's indeterminate who owns fd now */
+    ASSERT(!uv_is_readable((uv_stream_t*) &tty_wo));
+    ASSERT(uv_is_writable((uv_stream_t*) &tty_wo));
+    uv_close((uv_handle_t*) &tty_wo, NULL);
+    ASSERT(!uv_is_readable((uv_stream_t*) &tty_wo));
+    ASSERT(!uv_is_writable((uv_stream_t*) &tty_wo));
+  }
+
+
   ASSERT(0 == uv_run(&loop, UV_RUN_DEFAULT));
   ASSERT(0 == uv_loop_close(&loop));
 
@@ -370,6 +442,10 @@ TEST_IMPL(tty_pty) {
 
   ASSERT(0 == uv_tty_init(&loop, &slave_tty, slave_fd, 0));
   ASSERT(0 == uv_tty_init(&loop, &master_tty, master_fd, 0));
+  ASSERT(uv_is_readable((uv_stream_t*) &slave_tty));
+  ASSERT(uv_is_writable((uv_stream_t*) &slave_tty));
+  ASSERT(uv_is_readable((uv_stream_t*) &master_tty));
+  ASSERT(uv_is_writable((uv_stream_t*) &master_tty));
   /* Check if the file descriptor was reopened. If it is,
    * UV_HANDLE_BLOCKING_WRITES (value 0x100000) isn't set on flags.
    */