mirror of
https://github.com/followmsi/android_kernel_google_msm.git
synced 2024-11-06 23:17:41 +00:00
libceph: fix messenger retry
(cherry picked from commit 5bdca4e076
)
In ancient times, the messenger could both initiate and accept connections.
An artifact if that was data structures to store/process an incoming
ceph_msg_connect request and send an outgoing ceph_msg_connect_reply.
Sadly, the negotiation code was referencing those structures and ignoring
important information (like the peer's connect_seq) from the correct ones.
Among other things, this fixes tight reconnect loops where the server sends
RETRY_SESSION and we (the client) retries with the same connect_seq as last
time. This bug pretty easily triggered by injecting socket failures on the
MDS and running some fs workload like workunits/direct_io/test_sync_io.
Signed-off-by: Sage Weil <sage@inktank.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
21cbad59b0
commit
49da293c7d
2 changed files with 8 additions and 16 deletions
|
@ -163,16 +163,8 @@ struct ceph_connection {
|
|||
|
||||
/* connection negotiation temps */
|
||||
char in_banner[CEPH_BANNER_MAX_LEN];
|
||||
union {
|
||||
struct { /* outgoing connection */
|
||||
struct ceph_msg_connect out_connect;
|
||||
struct ceph_msg_connect_reply in_reply;
|
||||
};
|
||||
struct { /* incoming */
|
||||
struct ceph_msg_connect in_connect;
|
||||
struct ceph_msg_connect_reply out_reply;
|
||||
};
|
||||
};
|
||||
struct ceph_msg_connect out_connect;
|
||||
struct ceph_msg_connect_reply in_reply;
|
||||
struct ceph_entity_addr actual_peer_addr;
|
||||
|
||||
/* message out temps */
|
||||
|
|
|
@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_connection *con)
|
|||
* dropped messages.
|
||||
*/
|
||||
dout("process_connect got RESET peer seq %u\n",
|
||||
le32_to_cpu(con->in_connect.connect_seq));
|
||||
le32_to_cpu(con->in_reply.connect_seq));
|
||||
pr_err("%s%lld %s connection reset\n",
|
||||
ENTITY_NAME(con->peer_name),
|
||||
ceph_pr_addr(&con->peer_addr.in_addr));
|
||||
|
@ -1450,10 +1450,10 @@ static int process_connect(struct ceph_connection *con)
|
|||
* If we sent a smaller connect_seq than the peer has, try
|
||||
* again with a larger value.
|
||||
*/
|
||||
dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
|
||||
dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
|
||||
le32_to_cpu(con->out_connect.connect_seq),
|
||||
le32_to_cpu(con->in_connect.connect_seq));
|
||||
con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
|
||||
le32_to_cpu(con->in_reply.connect_seq));
|
||||
con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
|
||||
ceph_con_out_kvec_reset(con);
|
||||
ret = prepare_write_connect(con);
|
||||
if (ret < 0)
|
||||
|
@ -1468,9 +1468,9 @@ static int process_connect(struct ceph_connection *con)
|
|||
*/
|
||||
dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
|
||||
con->peer_global_seq,
|
||||
le32_to_cpu(con->in_connect.global_seq));
|
||||
le32_to_cpu(con->in_reply.global_seq));
|
||||
get_global_seq(con->msgr,
|
||||
le32_to_cpu(con->in_connect.global_seq));
|
||||
le32_to_cpu(con->in_reply.global_seq));
|
||||
ceph_con_out_kvec_reset(con);
|
||||
ret = prepare_write_connect(con);
|
||||
if (ret < 0)
|
||||
|
|
Loading…
Reference in a new issue