diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index b8057c51b205..4ea4b0a26975 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -399,8 +399,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); - /* we won't reconnect after our valid conn goes away for - * this hb iteration.. here so it shows up in the logs */ if (was_valid && !valid && err == 0) err = -ENOTCONN; @@ -430,11 +428,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, if (!was_valid && valid) { o2quo_conn_up(o2net_num_from_nn(nn)); - /* this is a bit of a hack. we only try reconnecting - * when heartbeating starts until we get a connection. - * if that connection then dies we don't try reconnecting. - * the only way to start connecting again is to down - * heartbeat and bring it back up. */ cancel_delayed_work(&nn->nn_connect_expired); printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", o2nm_this_node() > sc->sc_node->nd_num ? @@ -457,6 +450,18 @@ static void o2net_set_nn_state(struct o2net_node *nn, delay = 0; mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); + + /* + * Delay the expired work after idle timeout. + * + * We might have lots of failed connection attempts that run + * through here but we only cancel the connect_expired work when + * a connection attempt succeeds. So only the first enqueue of + * the connect_expired work will do anything. The rest will see + * that it's already queued and do nothing. + */ + delay += msecs_to_jiffies(o2net_idle_timeout(NULL)); + queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); } /* keep track of the nn's sc ref for the caller */ @@ -1193,6 +1198,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) * shut down already */ if (nn->nn_sc == sc) { o2net_sc_reset_idle_timer(sc); + atomic_set(&nn->nn_timeout, 0); o2net_set_nn_state(nn, sc, 1, 0); } spin_unlock(&nn->nn_lock); @@ -1391,6 +1397,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work) static void o2net_idle_timer(unsigned long data) { struct o2net_sock_container *sc = (struct o2net_sock_container *)data; + struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); struct timeval now; do_gettimeofday(&now); @@ -1413,6 +1420,12 @@ static void o2net_idle_timer(unsigned long data) sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); + /* + * Initialize the nn_timeout so that the next connection attempt + * will continue in o2net_start_connect. + */ + atomic_set(&nn->nn_timeout, 1); + o2net_sc_queue_work(sc, &sc->sc_shutdown_work); } @@ -1447,6 +1460,7 @@ static void o2net_start_connect(struct work_struct *work) struct socket *sock = NULL; struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; int ret = 0, stop; + unsigned int timeout; /* if we're greater we initiate tx, otherwise we accept */ if (o2nm_this_node() <= o2net_num_from_nn(nn)) @@ -1466,8 +1480,17 @@ static void o2net_start_connect(struct work_struct *work) } spin_lock(&nn->nn_lock); - /* see if we already have one pending or have given up */ - stop = (nn->nn_sc || nn->nn_persistent_error); + /* + * see if we already have one pending or have given up. + * For nn_timeout, it is set when we close the connection + * because of the idle time out. So it means that we have + * at least connected to that node successfully once, + * now try to connect to it again. + */ + timeout = atomic_read(&nn->nn_timeout); + stop = (nn->nn_sc || + (nn->nn_persistent_error && + (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); spin_unlock(&nn->nn_lock); if (stop) goto out; @@ -1579,6 +1602,7 @@ void o2net_disconnect_node(struct o2nm_node *node) /* don't reconnect until it's heartbeating again */ spin_lock(&nn->nn_lock); + atomic_set(&nn->nn_timeout, 0); o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); spin_unlock(&nn->nn_lock); @@ -1613,17 +1637,12 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); if (node_num != o2nm_this_node()) { - /* heartbeat doesn't work unless a local node number is - * configured and doing so brings up the o2net_wq, so we can - * use it.. */ - queue_delayed_work(o2net_wq, &nn->nn_connect_expired, - msecs_to_jiffies(o2net_idle_timeout(node))); - /* believe it or not, accept and node hearbeating testing * can succeed for this node before we got here.. so * only use set_nn_state to clear the persistent error * if that hasn't already happened */ spin_lock(&nn->nn_lock); + atomic_set(&nn->nn_timeout, 0); if (nn->nn_persistent_error) o2net_set_nn_state(nn, NULL, 0, 0); spin_unlock(&nn->nn_lock); @@ -1747,6 +1766,7 @@ static int o2net_accept_one(struct socket *sock) new_sock = NULL; spin_lock(&nn->nn_lock); + atomic_set(&nn->nn_timeout, 0); o2net_set_nn_state(nn, sc, 0, 0); spin_unlock(&nn->nn_lock); @@ -1941,6 +1961,7 @@ int o2net_init(void) for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { struct o2net_node *nn = o2net_nn_from_num(i); + atomic_set(&nn->nn_timeout, 0); spin_lock_init(&nn->nn_lock); INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); INIT_DELAYED_WORK(&nn->nn_connect_expired, diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index d25b9af28500..b4c5586f46ea 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -95,6 +95,8 @@ struct o2net_node { unsigned nn_sc_valid:1; /* if this is set tx just returns it */ int nn_persistent_error; + /* It is only set to 1 after the idle time out. */ + atomic_t nn_timeout; /* threads waiting for an sc to arrive wait on the wq for generation * to increase. it is increased when a connecting socket succeeds