Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lightningd: make shutdown smoother and safer #4897

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lightningd/jsonrpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ static struct io_plan *start_json_stream(struct io_conn *conn,
io_wake(conn);

/* Once the stop_conn conn is drained, we can shut down. */
if (jcon->ld->stop_conn == conn) {
if (jcon->ld->stop_conn == conn && jcon->ld->state == LD_STATE_RUNNING) {
/* Return us to toplevel lightningd.c */
io_break(jcon->ld);
/* We never come back. */
Expand Down
5 changes: 3 additions & 2 deletions lightningd/lightningd.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
*/
ld->plugins = plugins_new(ld, ld->log_book, ld);
ld->plugins->startup = true;
ld->plugins->shutdown = false;

/*~ This is set when a JSON RPC command comes in to shut us down. */
ld->stop_conn = NULL;
Expand Down Expand Up @@ -1187,10 +1188,10 @@ int main(int argc, char *argv[])

/* We're not going to collect our children. */
remove_sigchild_handler();
shutdown_subdaemons(ld);

/* Tell plugins we're shutting down. */
/* Tell plugins we're shutting down, closes the db for write access. */
shutdown_plugins(ld);
shutdown_subdaemons(ld);

/* Clean up the JSON-RPC. This needs to happen in a DB transaction since
* it might actually be touching the DB in some destructors, e.g.,
Expand Down
45 changes: 23 additions & 22 deletions lightningd/plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,19 +187,18 @@ static void destroy_plugin(struct plugin *p)
if (p->plugin_state == AWAITING_GETMANIFEST_RESPONSE)
check_plugins_manifests(p->plugins);

/* If this was the last one init was waiting for, handle cmd replies */
if (p->plugin_state == AWAITING_INIT_RESPONSE)
check_plugins_initted(p->plugins);

/* If we are shutting down, do not continue to checking if
* the dying plugin is important. */
/* Daemon shutdown overrules plugin's importance; aborts init checks */
if (p->plugins->shutdown) {
/* But return if this was the last plugin! */
if (list_empty(&p->plugins->plugins))
io_break(p->plugins);
io_break(destroy_plugin);
return;
}

/* If this was the last one init was waiting for, handle cmd replies */
if (p->plugin_state == AWAITING_INIT_RESPONSE)
check_plugins_initted(p->plugins);

/* Now check if the dying plugin is important. */
if (p->important) {
log_broken(p->log,
Expand Down Expand Up @@ -2082,46 +2081,48 @@ bool was_plugin_destroyed(struct plugin_destroyed *pd)

static void plugin_shutdown_timeout(struct lightningd *ld)
{
io_break(ld->plugins);
io_break(plugin_shutdown_timeout);
}

void shutdown_plugins(struct lightningd *ld)
{
struct plugin *p, *next;

/* This makes sure we don't complain about important plugins
* vanishing! */
/* Don't complain about important plugins vanishing and
* crash any attempt to write to db. */
ld->plugins->shutdown = true;

/* Tell them all to shutdown; if they care. */
list_for_each_safe(&ld->plugins->plugins, p, next, list) {
/* Kill immediately, deletes self from list. */
if (!notify_plugin_shutdown(ld, p))
if (p->plugin_state != INIT_COMPLETE || !notify_plugin_shutdown(ld, p))
tal_free(p);
}

/* If anyone was interested in shutdown, give them time. */
if (!list_empty(&ld->plugins->plugins)) {
struct oneshot *t;
struct timers *orig_timers, *timer;

/* 30 seconds should do it. */
t = new_reltimer(ld->timers, ld,
time_from_sec(30),
plugin_shutdown_timeout, ld);
/* 30 seconds should do it, use a clean timers struct */
orig_timers = ld->timers;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I theory this can break our debug tests is list_del_from() if a timer were to delete itself, but luckily we don't use that.

timer = tal(NULL, struct timers);
timers_init(timer, time_mono());
new_reltimer(timer, timer, time_from_sec(30),
plugin_shutdown_timeout, ld);

io_loop_with_timers(ld);
tal_free(t);
ld->timers = timer;
void *ret = io_loop_with_timers(ld);
assert(ret == plugin_shutdown_timeout || ret == destroy_plugin);
ld->timers = orig_timers;
tal_free(timer);

/* Report and free remaining plugins. */
while (!list_empty(&ld->plugins->plugins)) {
p = list_pop(&ld->plugins->plugins, struct plugin, list);
log_debug(ld->log,
"%s: failed to shutdown, killing.",
"%s: failed to self-terminate in time, killing.",
p->shortname);
tal_free(p);
}
}

/* NULL stops notifications trying to access plugins. */
ld->plugins = tal_free(ld->plugins);
}
2 changes: 1 addition & 1 deletion lightningd/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ struct plugins {
/* Blacklist of plugins from --disable-plugin */
const char **blacklist;

/* Whether we are shutting down (`plugins_free` is called) */
/* Whether we are shutting down, blocks db write's */
bool shutdown;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be far neater as a enum lightningd_state, but we can patch that afterwards.


/* Index to show what order they were added in */
Expand Down
1 change: 1 addition & 0 deletions lightningd/plugin_hook.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ void plugin_hook_db_sync(struct db *db)
size_t i;
size_t num_hooks;

db_check_plugins_not_shutdown(db);
const char **changes = db_changes(db);
num_hooks = tal_count(hook->hooks);
if (num_hooks == 0)
Expand Down
11 changes: 9 additions & 2 deletions tests/plugins/misc_notifications.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python3
"""Plugin to be used to test miscellaneous notifications.

Only used for 'channel_opened' for now.
"""

from pyln.client import Plugin
from time import sleep
import sys

plugin = Plugin()

Expand All @@ -27,4 +27,11 @@ def channel_state_changed(plugin, channel_state_changed, **kwargs):
plugin.log("channel_state_changed {}".format(channel_state_changed))


@plugin.subscribe("shutdown")
def shutdown(plugin, **kwargs):
plugin.log("delaying shutdown with 5s")
sleep(5)
sys.exit(0)


plugin.run()
26 changes: 26 additions & 0 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,32 @@ def test_htlc_accepted_hook_direct_restart(node_factory, executor):
f1.result()


def test_htlc_accepted_hook_shutdown(node_factory, executor):
"""Hooks of important-plugins are never removed and these plugins are kept
alive until after subdaemons are shutdown.
"""
l1, l2 = node_factory.line_graph(2, opts=[
{'may_reconnect': True, 'log-level': 'info'},
{'may_reconnect': True, 'log-level': 'debug',
'plugin': [os.path.join(os.getcwd(), 'tests/plugins/misc_notifications.py')],
'important-plugin': [os.path.join(os.getcwd(), 'tests/plugins/fail_htlcs.py')]}
])

i1 = l2.rpc.invoice(msatoshi=1000, label="inv1", description="desc")['bolt11']

# fail_htlcs.py makes payment fail
with pytest.raises(RpcError):
l1.rpc.pay(i1)

f_stop = executor.submit(l2.rpc.stop)
l2.daemon.wait_for_log(r'plugin-misc_notifications.py: delaying shutdown with 5s')

# Should still fail htlc while shutting down
with pytest.raises(RpcError):
l1.rpc.pay(i1)
f_stop.result()


@pytest.mark.developer("without DEVELOPER=1, gossip v slow")
def test_htlc_accepted_hook_forward_restart(node_factory, executor):
"""l2 restarts while it is pondering what to do with an HTLC.
Expand Down
6 changes: 6 additions & 0 deletions wallet/db.c
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,7 @@ struct db *db_setup(const tal_t *ctx, struct lightningd *ld,
struct db *db = db_open(ctx, ld->wallet_dsn);
bool migrated;
db->log = new_log(db, ld->log_book, NULL, "database");
db->plugins_shutdown = &ld->plugins->shutdown;

db_begin_transaction(db);

Expand Down Expand Up @@ -2340,6 +2341,11 @@ void db_changes_add(struct db_stmt *stmt, const char * expanded)
tal_arr_expand(&db->changes, tal_strdup(db->changes, expanded));
}

void db_check_plugins_not_shutdown(struct db *db)
{
assert(!*db->plugins_shutdown);
}

const char **db_changes(struct db *db)
{
return db->changes;
Expand Down
3 changes: 3 additions & 0 deletions wallet/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ struct db_stmt *db_prepare_v2_(const char *location, struct db *db,
#define db_prepare_v2(db,query) \
db_prepare_v2_(__FILE__ ":" stringify(__LINE__), db, query)

/* Check that plugins are not shutting down when calling db_write hook */
void db_check_plugins_not_shutdown(struct db *db);

/**
* Access pending changes that have been added to the current transaction.
*/
Expand Down
3 changes: 3 additions & 0 deletions wallet/db_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ struct db {
* Used to bump the data_version in the DB.*/
bool dirty;

/* Only needed to check shutdown state of plugins */
const bool *plugins_shutdown;

/* The current DB version we expect to update if changes are
* committed. */
u32 data_version;
Expand Down