Commit 692d20f576fb26f62c83f80dbf3ea899998391b7

Authored by Sage Weil
1 parent 38815b7802

libceph: retry after authorization failure

If we mark the connection CLOSED we will give up trying to reconnect to
this server instance.  That is appropriate for things like a protocol
version mismatch that won't change until the server is restarted, at which
point we'll get a new addr and reconnect.  An authorization failure like
this is probably due to the server not properly rotating it's secret keys,
however, and should be treated as transient so that the normal backoff and
retry behavior kicks in.

Signed-off-by: Sage Weil <sage@newdream.net>

Showing 1 changed file with 0 additions and 2 deletions Inline Diff

net/ceph/messenger.c
1 #include <linux/ceph/ceph_debug.h> 1 #include <linux/ceph/ceph_debug.h>
2 2
3 #include <linux/crc32c.h> 3 #include <linux/crc32c.h>
4 #include <linux/ctype.h> 4 #include <linux/ctype.h>
5 #include <linux/highmem.h> 5 #include <linux/highmem.h>
6 #include <linux/inet.h> 6 #include <linux/inet.h>
7 #include <linux/kthread.h> 7 #include <linux/kthread.h>
8 #include <linux/net.h> 8 #include <linux/net.h>
9 #include <linux/slab.h> 9 #include <linux/slab.h>
10 #include <linux/socket.h> 10 #include <linux/socket.h>
11 #include <linux/string.h> 11 #include <linux/string.h>
12 #include <linux/bio.h> 12 #include <linux/bio.h>
13 #include <linux/blkdev.h> 13 #include <linux/blkdev.h>
14 #include <net/tcp.h> 14 #include <net/tcp.h>
15 15
16 #include <linux/ceph/libceph.h> 16 #include <linux/ceph/libceph.h>
17 #include <linux/ceph/messenger.h> 17 #include <linux/ceph/messenger.h>
18 #include <linux/ceph/decode.h> 18 #include <linux/ceph/decode.h>
19 #include <linux/ceph/pagelist.h> 19 #include <linux/ceph/pagelist.h>
20 20
21 /* 21 /*
22 * Ceph uses the messenger to exchange ceph_msg messages with other 22 * Ceph uses the messenger to exchange ceph_msg messages with other
23 * hosts in the system. The messenger provides ordered and reliable 23 * hosts in the system. The messenger provides ordered and reliable
24 * delivery. We tolerate TCP disconnects by reconnecting (with 24 * delivery. We tolerate TCP disconnects by reconnecting (with
25 * exponential backoff) in the case of a fault (disconnection, bad 25 * exponential backoff) in the case of a fault (disconnection, bad
26 * crc, protocol error). Acks allow sent messages to be discarded by 26 * crc, protocol error). Acks allow sent messages to be discarded by
27 * the sender. 27 * the sender.
28 */ 28 */
29 29
30 /* static tag bytes (protocol control messages) */ 30 /* static tag bytes (protocol control messages) */
31 static char tag_msg = CEPH_MSGR_TAG_MSG; 31 static char tag_msg = CEPH_MSGR_TAG_MSG;
32 static char tag_ack = CEPH_MSGR_TAG_ACK; 32 static char tag_ack = CEPH_MSGR_TAG_ACK;
33 static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 33 static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
34 34
35 #ifdef CONFIG_LOCKDEP 35 #ifdef CONFIG_LOCKDEP
36 static struct lock_class_key socket_class; 36 static struct lock_class_key socket_class;
37 #endif 37 #endif
38 38
39 39
40 static void queue_con(struct ceph_connection *con); 40 static void queue_con(struct ceph_connection *con);
41 static void con_work(struct work_struct *); 41 static void con_work(struct work_struct *);
42 static void ceph_fault(struct ceph_connection *con); 42 static void ceph_fault(struct ceph_connection *con);
43 43
44 /* 44 /*
45 * nicely render a sockaddr as a string. 45 * nicely render a sockaddr as a string.
46 */ 46 */
47 #define MAX_ADDR_STR 20 47 #define MAX_ADDR_STR 20
48 #define MAX_ADDR_STR_LEN 60 48 #define MAX_ADDR_STR_LEN 60
49 static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; 49 static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
50 static DEFINE_SPINLOCK(addr_str_lock); 50 static DEFINE_SPINLOCK(addr_str_lock);
51 static int last_addr_str; 51 static int last_addr_str;
52 52
53 const char *ceph_pr_addr(const struct sockaddr_storage *ss) 53 const char *ceph_pr_addr(const struct sockaddr_storage *ss)
54 { 54 {
55 int i; 55 int i;
56 char *s; 56 char *s;
57 struct sockaddr_in *in4 = (void *)ss; 57 struct sockaddr_in *in4 = (void *)ss;
58 struct sockaddr_in6 *in6 = (void *)ss; 58 struct sockaddr_in6 *in6 = (void *)ss;
59 59
60 spin_lock(&addr_str_lock); 60 spin_lock(&addr_str_lock);
61 i = last_addr_str++; 61 i = last_addr_str++;
62 if (last_addr_str == MAX_ADDR_STR) 62 if (last_addr_str == MAX_ADDR_STR)
63 last_addr_str = 0; 63 last_addr_str = 0;
64 spin_unlock(&addr_str_lock); 64 spin_unlock(&addr_str_lock);
65 s = addr_str[i]; 65 s = addr_str[i];
66 66
67 switch (ss->ss_family) { 67 switch (ss->ss_family) {
68 case AF_INET: 68 case AF_INET:
69 snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, 69 snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
70 (unsigned int)ntohs(in4->sin_port)); 70 (unsigned int)ntohs(in4->sin_port));
71 break; 71 break;
72 72
73 case AF_INET6: 73 case AF_INET6:
74 snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, 74 snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
75 (unsigned int)ntohs(in6->sin6_port)); 75 (unsigned int)ntohs(in6->sin6_port));
76 break; 76 break;
77 77
78 default: 78 default:
79 sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); 79 sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family);
80 } 80 }
81 81
82 return s; 82 return s;
83 } 83 }
84 EXPORT_SYMBOL(ceph_pr_addr); 84 EXPORT_SYMBOL(ceph_pr_addr);
85 85
86 static void encode_my_addr(struct ceph_messenger *msgr) 86 static void encode_my_addr(struct ceph_messenger *msgr)
87 { 87 {
88 memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); 88 memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
89 ceph_encode_addr(&msgr->my_enc_addr); 89 ceph_encode_addr(&msgr->my_enc_addr);
90 } 90 }
91 91
92 /* 92 /*
93 * work queue for all reading and writing to/from the socket. 93 * work queue for all reading and writing to/from the socket.
94 */ 94 */
95 struct workqueue_struct *ceph_msgr_wq; 95 struct workqueue_struct *ceph_msgr_wq;
96 96
97 int ceph_msgr_init(void) 97 int ceph_msgr_init(void)
98 { 98 {
99 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); 99 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
100 if (!ceph_msgr_wq) { 100 if (!ceph_msgr_wq) {
101 pr_err("msgr_init failed to create workqueue\n"); 101 pr_err("msgr_init failed to create workqueue\n");
102 return -ENOMEM; 102 return -ENOMEM;
103 } 103 }
104 return 0; 104 return 0;
105 } 105 }
106 EXPORT_SYMBOL(ceph_msgr_init); 106 EXPORT_SYMBOL(ceph_msgr_init);
107 107
108 void ceph_msgr_exit(void) 108 void ceph_msgr_exit(void)
109 { 109 {
110 destroy_workqueue(ceph_msgr_wq); 110 destroy_workqueue(ceph_msgr_wq);
111 } 111 }
112 EXPORT_SYMBOL(ceph_msgr_exit); 112 EXPORT_SYMBOL(ceph_msgr_exit);
113 113
114 void ceph_msgr_flush(void) 114 void ceph_msgr_flush(void)
115 { 115 {
116 flush_workqueue(ceph_msgr_wq); 116 flush_workqueue(ceph_msgr_wq);
117 } 117 }
118 EXPORT_SYMBOL(ceph_msgr_flush); 118 EXPORT_SYMBOL(ceph_msgr_flush);
119 119
120 120
121 /* 121 /*
122 * socket callback functions 122 * socket callback functions
123 */ 123 */
124 124
125 /* data available on socket, or listen socket received a connect */ 125 /* data available on socket, or listen socket received a connect */
126 static void ceph_data_ready(struct sock *sk, int count_unused) 126 static void ceph_data_ready(struct sock *sk, int count_unused)
127 { 127 {
128 struct ceph_connection *con = 128 struct ceph_connection *con =
129 (struct ceph_connection *)sk->sk_user_data; 129 (struct ceph_connection *)sk->sk_user_data;
130 if (sk->sk_state != TCP_CLOSE_WAIT) { 130 if (sk->sk_state != TCP_CLOSE_WAIT) {
131 dout("ceph_data_ready on %p state = %lu, queueing work\n", 131 dout("ceph_data_ready on %p state = %lu, queueing work\n",
132 con, con->state); 132 con, con->state);
133 queue_con(con); 133 queue_con(con);
134 } 134 }
135 } 135 }
136 136
137 /* socket has buffer space for writing */ 137 /* socket has buffer space for writing */
138 static void ceph_write_space(struct sock *sk) 138 static void ceph_write_space(struct sock *sk)
139 { 139 {
140 struct ceph_connection *con = 140 struct ceph_connection *con =
141 (struct ceph_connection *)sk->sk_user_data; 141 (struct ceph_connection *)sk->sk_user_data;
142 142
143 /* only queue to workqueue if there is data we want to write. */ 143 /* only queue to workqueue if there is data we want to write. */
144 if (test_bit(WRITE_PENDING, &con->state)) { 144 if (test_bit(WRITE_PENDING, &con->state)) {
145 dout("ceph_write_space %p queueing write work\n", con); 145 dout("ceph_write_space %p queueing write work\n", con);
146 queue_con(con); 146 queue_con(con);
147 } else { 147 } else {
148 dout("ceph_write_space %p nothing to write\n", con); 148 dout("ceph_write_space %p nothing to write\n", con);
149 } 149 }
150 150
151 /* since we have our own write_space, clear the SOCK_NOSPACE flag */ 151 /* since we have our own write_space, clear the SOCK_NOSPACE flag */
152 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 152 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
153 } 153 }
154 154
155 /* socket's state has changed */ 155 /* socket's state has changed */
156 static void ceph_state_change(struct sock *sk) 156 static void ceph_state_change(struct sock *sk)
157 { 157 {
158 struct ceph_connection *con = 158 struct ceph_connection *con =
159 (struct ceph_connection *)sk->sk_user_data; 159 (struct ceph_connection *)sk->sk_user_data;
160 160
161 dout("ceph_state_change %p state = %lu sk_state = %u\n", 161 dout("ceph_state_change %p state = %lu sk_state = %u\n",
162 con, con->state, sk->sk_state); 162 con, con->state, sk->sk_state);
163 163
164 if (test_bit(CLOSED, &con->state)) 164 if (test_bit(CLOSED, &con->state))
165 return; 165 return;
166 166
167 switch (sk->sk_state) { 167 switch (sk->sk_state) {
168 case TCP_CLOSE: 168 case TCP_CLOSE:
169 dout("ceph_state_change TCP_CLOSE\n"); 169 dout("ceph_state_change TCP_CLOSE\n");
170 case TCP_CLOSE_WAIT: 170 case TCP_CLOSE_WAIT:
171 dout("ceph_state_change TCP_CLOSE_WAIT\n"); 171 dout("ceph_state_change TCP_CLOSE_WAIT\n");
172 if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { 172 if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
173 if (test_bit(CONNECTING, &con->state)) 173 if (test_bit(CONNECTING, &con->state))
174 con->error_msg = "connection failed"; 174 con->error_msg = "connection failed";
175 else 175 else
176 con->error_msg = "socket closed"; 176 con->error_msg = "socket closed";
177 queue_con(con); 177 queue_con(con);
178 } 178 }
179 break; 179 break;
180 case TCP_ESTABLISHED: 180 case TCP_ESTABLISHED:
181 dout("ceph_state_change TCP_ESTABLISHED\n"); 181 dout("ceph_state_change TCP_ESTABLISHED\n");
182 queue_con(con); 182 queue_con(con);
183 break; 183 break;
184 } 184 }
185 } 185 }
186 186
187 /* 187 /*
188 * set up socket callbacks 188 * set up socket callbacks
189 */ 189 */
190 static void set_sock_callbacks(struct socket *sock, 190 static void set_sock_callbacks(struct socket *sock,
191 struct ceph_connection *con) 191 struct ceph_connection *con)
192 { 192 {
193 struct sock *sk = sock->sk; 193 struct sock *sk = sock->sk;
194 sk->sk_user_data = (void *)con; 194 sk->sk_user_data = (void *)con;
195 sk->sk_data_ready = ceph_data_ready; 195 sk->sk_data_ready = ceph_data_ready;
196 sk->sk_write_space = ceph_write_space; 196 sk->sk_write_space = ceph_write_space;
197 sk->sk_state_change = ceph_state_change; 197 sk->sk_state_change = ceph_state_change;
198 } 198 }
199 199
200 200
201 /* 201 /*
202 * socket helpers 202 * socket helpers
203 */ 203 */
204 204
205 /* 205 /*
206 * initiate connection to a remote socket. 206 * initiate connection to a remote socket.
207 */ 207 */
208 static struct socket *ceph_tcp_connect(struct ceph_connection *con) 208 static struct socket *ceph_tcp_connect(struct ceph_connection *con)
209 { 209 {
210 struct sockaddr_storage *paddr = &con->peer_addr.in_addr; 210 struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
211 struct socket *sock; 211 struct socket *sock;
212 int ret; 212 int ret;
213 213
214 BUG_ON(con->sock); 214 BUG_ON(con->sock);
215 ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, 215 ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
216 IPPROTO_TCP, &sock); 216 IPPROTO_TCP, &sock);
217 if (ret) 217 if (ret)
218 return ERR_PTR(ret); 218 return ERR_PTR(ret);
219 con->sock = sock; 219 con->sock = sock;
220 sock->sk->sk_allocation = GFP_NOFS; 220 sock->sk->sk_allocation = GFP_NOFS;
221 221
222 #ifdef CONFIG_LOCKDEP 222 #ifdef CONFIG_LOCKDEP
223 lockdep_set_class(&sock->sk->sk_lock, &socket_class); 223 lockdep_set_class(&sock->sk->sk_lock, &socket_class);
224 #endif 224 #endif
225 225
226 set_sock_callbacks(sock, con); 226 set_sock_callbacks(sock, con);
227 227
228 dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); 228 dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
229 229
230 ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), 230 ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
231 O_NONBLOCK); 231 O_NONBLOCK);
232 if (ret == -EINPROGRESS) { 232 if (ret == -EINPROGRESS) {
233 dout("connect %s EINPROGRESS sk_state = %u\n", 233 dout("connect %s EINPROGRESS sk_state = %u\n",
234 ceph_pr_addr(&con->peer_addr.in_addr), 234 ceph_pr_addr(&con->peer_addr.in_addr),
235 sock->sk->sk_state); 235 sock->sk->sk_state);
236 ret = 0; 236 ret = 0;
237 } 237 }
238 if (ret < 0) { 238 if (ret < 0) {
239 pr_err("connect %s error %d\n", 239 pr_err("connect %s error %d\n",
240 ceph_pr_addr(&con->peer_addr.in_addr), ret); 240 ceph_pr_addr(&con->peer_addr.in_addr), ret);
241 sock_release(sock); 241 sock_release(sock);
242 con->sock = NULL; 242 con->sock = NULL;
243 con->error_msg = "connect error"; 243 con->error_msg = "connect error";
244 } 244 }
245 245
246 if (ret < 0) 246 if (ret < 0)
247 return ERR_PTR(ret); 247 return ERR_PTR(ret);
248 return sock; 248 return sock;
249 } 249 }
250 250
251 static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) 251 static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
252 { 252 {
253 struct kvec iov = {buf, len}; 253 struct kvec iov = {buf, len};
254 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 254 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
255 int r; 255 int r;
256 256
257 r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); 257 r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
258 if (r == -EAGAIN) 258 if (r == -EAGAIN)
259 r = 0; 259 r = 0;
260 return r; 260 return r;
261 } 261 }
262 262
263 /* 263 /*
264 * write something. @more is true if caller will be sending more data 264 * write something. @more is true if caller will be sending more data
265 * shortly. 265 * shortly.
266 */ 266 */
267 static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, 267 static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
268 size_t kvlen, size_t len, int more) 268 size_t kvlen, size_t len, int more)
269 { 269 {
270 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 270 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
271 int r; 271 int r;
272 272
273 if (more) 273 if (more)
274 msg.msg_flags |= MSG_MORE; 274 msg.msg_flags |= MSG_MORE;
275 else 275 else
276 msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ 276 msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
277 277
278 r = kernel_sendmsg(sock, &msg, iov, kvlen, len); 278 r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
279 if (r == -EAGAIN) 279 if (r == -EAGAIN)
280 r = 0; 280 r = 0;
281 return r; 281 return r;
282 } 282 }
283 283
284 284
285 /* 285 /*
286 * Shutdown/close the socket for the given connection. 286 * Shutdown/close the socket for the given connection.
287 */ 287 */
288 static int con_close_socket(struct ceph_connection *con) 288 static int con_close_socket(struct ceph_connection *con)
289 { 289 {
290 int rc; 290 int rc;
291 291
292 dout("con_close_socket on %p sock %p\n", con, con->sock); 292 dout("con_close_socket on %p sock %p\n", con, con->sock);
293 if (!con->sock) 293 if (!con->sock)
294 return 0; 294 return 0;
295 set_bit(SOCK_CLOSED, &con->state); 295 set_bit(SOCK_CLOSED, &con->state);
296 rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); 296 rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
297 sock_release(con->sock); 297 sock_release(con->sock);
298 con->sock = NULL; 298 con->sock = NULL;
299 clear_bit(SOCK_CLOSED, &con->state); 299 clear_bit(SOCK_CLOSED, &con->state);
300 return rc; 300 return rc;
301 } 301 }
302 302
303 /* 303 /*
304 * Reset a connection. Discard all incoming and outgoing messages 304 * Reset a connection. Discard all incoming and outgoing messages
305 * and clear *_seq state. 305 * and clear *_seq state.
306 */ 306 */
307 static void ceph_msg_remove(struct ceph_msg *msg) 307 static void ceph_msg_remove(struct ceph_msg *msg)
308 { 308 {
309 list_del_init(&msg->list_head); 309 list_del_init(&msg->list_head);
310 ceph_msg_put(msg); 310 ceph_msg_put(msg);
311 } 311 }
312 static void ceph_msg_remove_list(struct list_head *head) 312 static void ceph_msg_remove_list(struct list_head *head)
313 { 313 {
314 while (!list_empty(head)) { 314 while (!list_empty(head)) {
315 struct ceph_msg *msg = list_first_entry(head, struct ceph_msg, 315 struct ceph_msg *msg = list_first_entry(head, struct ceph_msg,
316 list_head); 316 list_head);
317 ceph_msg_remove(msg); 317 ceph_msg_remove(msg);
318 } 318 }
319 } 319 }
320 320
321 static void reset_connection(struct ceph_connection *con) 321 static void reset_connection(struct ceph_connection *con)
322 { 322 {
323 /* reset connection, out_queue, msg_ and connect_seq */ 323 /* reset connection, out_queue, msg_ and connect_seq */
324 /* discard existing out_queue and msg_seq */ 324 /* discard existing out_queue and msg_seq */
325 ceph_msg_remove_list(&con->out_queue); 325 ceph_msg_remove_list(&con->out_queue);
326 ceph_msg_remove_list(&con->out_sent); 326 ceph_msg_remove_list(&con->out_sent);
327 327
328 if (con->in_msg) { 328 if (con->in_msg) {
329 ceph_msg_put(con->in_msg); 329 ceph_msg_put(con->in_msg);
330 con->in_msg = NULL; 330 con->in_msg = NULL;
331 } 331 }
332 332
333 con->connect_seq = 0; 333 con->connect_seq = 0;
334 con->out_seq = 0; 334 con->out_seq = 0;
335 if (con->out_msg) { 335 if (con->out_msg) {
336 ceph_msg_put(con->out_msg); 336 ceph_msg_put(con->out_msg);
337 con->out_msg = NULL; 337 con->out_msg = NULL;
338 } 338 }
339 con->out_keepalive_pending = false; 339 con->out_keepalive_pending = false;
340 con->in_seq = 0; 340 con->in_seq = 0;
341 con->in_seq_acked = 0; 341 con->in_seq_acked = 0;
342 } 342 }
343 343
344 /* 344 /*
345 * mark a peer down. drop any open connections. 345 * mark a peer down. drop any open connections.
346 */ 346 */
347 void ceph_con_close(struct ceph_connection *con) 347 void ceph_con_close(struct ceph_connection *con)
348 { 348 {
349 dout("con_close %p peer %s\n", con, 349 dout("con_close %p peer %s\n", con,
350 ceph_pr_addr(&con->peer_addr.in_addr)); 350 ceph_pr_addr(&con->peer_addr.in_addr));
351 set_bit(CLOSED, &con->state); /* in case there's queued work */ 351 set_bit(CLOSED, &con->state); /* in case there's queued work */
352 clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ 352 clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
353 clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ 353 clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
354 clear_bit(KEEPALIVE_PENDING, &con->state); 354 clear_bit(KEEPALIVE_PENDING, &con->state);
355 clear_bit(WRITE_PENDING, &con->state); 355 clear_bit(WRITE_PENDING, &con->state);
356 mutex_lock(&con->mutex); 356 mutex_lock(&con->mutex);
357 reset_connection(con); 357 reset_connection(con);
358 con->peer_global_seq = 0; 358 con->peer_global_seq = 0;
359 cancel_delayed_work(&con->work); 359 cancel_delayed_work(&con->work);
360 mutex_unlock(&con->mutex); 360 mutex_unlock(&con->mutex);
361 queue_con(con); 361 queue_con(con);
362 } 362 }
363 EXPORT_SYMBOL(ceph_con_close); 363 EXPORT_SYMBOL(ceph_con_close);
364 364
365 /* 365 /*
366 * Reopen a closed connection, with a new peer address. 366 * Reopen a closed connection, with a new peer address.
367 */ 367 */
368 void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) 368 void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
369 { 369 {
370 dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); 370 dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
371 set_bit(OPENING, &con->state); 371 set_bit(OPENING, &con->state);
372 clear_bit(CLOSED, &con->state); 372 clear_bit(CLOSED, &con->state);
373 memcpy(&con->peer_addr, addr, sizeof(*addr)); 373 memcpy(&con->peer_addr, addr, sizeof(*addr));
374 con->delay = 0; /* reset backoff memory */ 374 con->delay = 0; /* reset backoff memory */
375 queue_con(con); 375 queue_con(con);
376 } 376 }
377 EXPORT_SYMBOL(ceph_con_open); 377 EXPORT_SYMBOL(ceph_con_open);
378 378
379 /* 379 /*
380 * return true if this connection ever successfully opened 380 * return true if this connection ever successfully opened
381 */ 381 */
382 bool ceph_con_opened(struct ceph_connection *con) 382 bool ceph_con_opened(struct ceph_connection *con)
383 { 383 {
384 return con->connect_seq > 0; 384 return con->connect_seq > 0;
385 } 385 }
386 386
387 /* 387 /*
388 * generic get/put 388 * generic get/put
389 */ 389 */
390 struct ceph_connection *ceph_con_get(struct ceph_connection *con) 390 struct ceph_connection *ceph_con_get(struct ceph_connection *con)
391 { 391 {
392 dout("con_get %p nref = %d -> %d\n", con, 392 dout("con_get %p nref = %d -> %d\n", con,
393 atomic_read(&con->nref), atomic_read(&con->nref) + 1); 393 atomic_read(&con->nref), atomic_read(&con->nref) + 1);
394 if (atomic_inc_not_zero(&con->nref)) 394 if (atomic_inc_not_zero(&con->nref))
395 return con; 395 return con;
396 return NULL; 396 return NULL;
397 } 397 }
398 398
399 void ceph_con_put(struct ceph_connection *con) 399 void ceph_con_put(struct ceph_connection *con)
400 { 400 {
401 dout("con_put %p nref = %d -> %d\n", con, 401 dout("con_put %p nref = %d -> %d\n", con,
402 atomic_read(&con->nref), atomic_read(&con->nref) - 1); 402 atomic_read(&con->nref), atomic_read(&con->nref) - 1);
403 BUG_ON(atomic_read(&con->nref) == 0); 403 BUG_ON(atomic_read(&con->nref) == 0);
404 if (atomic_dec_and_test(&con->nref)) { 404 if (atomic_dec_and_test(&con->nref)) {
405 BUG_ON(con->sock); 405 BUG_ON(con->sock);
406 kfree(con); 406 kfree(con);
407 } 407 }
408 } 408 }
409 409
410 /* 410 /*
411 * initialize a new connection. 411 * initialize a new connection.
412 */ 412 */
413 void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) 413 void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
414 { 414 {
415 dout("con_init %p\n", con); 415 dout("con_init %p\n", con);
416 memset(con, 0, sizeof(*con)); 416 memset(con, 0, sizeof(*con));
417 atomic_set(&con->nref, 1); 417 atomic_set(&con->nref, 1);
418 con->msgr = msgr; 418 con->msgr = msgr;
419 mutex_init(&con->mutex); 419 mutex_init(&con->mutex);
420 INIT_LIST_HEAD(&con->out_queue); 420 INIT_LIST_HEAD(&con->out_queue);
421 INIT_LIST_HEAD(&con->out_sent); 421 INIT_LIST_HEAD(&con->out_sent);
422 INIT_DELAYED_WORK(&con->work, con_work); 422 INIT_DELAYED_WORK(&con->work, con_work);
423 } 423 }
424 EXPORT_SYMBOL(ceph_con_init); 424 EXPORT_SYMBOL(ceph_con_init);
425 425
426 426
427 /* 427 /*
428 * We maintain a global counter to order connection attempts. Get 428 * We maintain a global counter to order connection attempts. Get
429 * a unique seq greater than @gt. 429 * a unique seq greater than @gt.
430 */ 430 */
431 static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) 431 static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
432 { 432 {
433 u32 ret; 433 u32 ret;
434 434
435 spin_lock(&msgr->global_seq_lock); 435 spin_lock(&msgr->global_seq_lock);
436 if (msgr->global_seq < gt) 436 if (msgr->global_seq < gt)
437 msgr->global_seq = gt; 437 msgr->global_seq = gt;
438 ret = ++msgr->global_seq; 438 ret = ++msgr->global_seq;
439 spin_unlock(&msgr->global_seq_lock); 439 spin_unlock(&msgr->global_seq_lock);
440 return ret; 440 return ret;
441 } 441 }
442 442
443 443
444 /* 444 /*
445 * Prepare footer for currently outgoing message, and finish things 445 * Prepare footer for currently outgoing message, and finish things
446 * off. Assumes out_kvec* are already valid.. we just add on to the end. 446 * off. Assumes out_kvec* are already valid.. we just add on to the end.
447 */ 447 */
448 static void prepare_write_message_footer(struct ceph_connection *con, int v) 448 static void prepare_write_message_footer(struct ceph_connection *con, int v)
449 { 449 {
450 struct ceph_msg *m = con->out_msg; 450 struct ceph_msg *m = con->out_msg;
451 451
452 dout("prepare_write_message_footer %p\n", con); 452 dout("prepare_write_message_footer %p\n", con);
453 con->out_kvec_is_msg = true; 453 con->out_kvec_is_msg = true;
454 con->out_kvec[v].iov_base = &m->footer; 454 con->out_kvec[v].iov_base = &m->footer;
455 con->out_kvec[v].iov_len = sizeof(m->footer); 455 con->out_kvec[v].iov_len = sizeof(m->footer);
456 con->out_kvec_bytes += sizeof(m->footer); 456 con->out_kvec_bytes += sizeof(m->footer);
457 con->out_kvec_left++; 457 con->out_kvec_left++;
458 con->out_more = m->more_to_follow; 458 con->out_more = m->more_to_follow;
459 con->out_msg_done = true; 459 con->out_msg_done = true;
460 } 460 }
461 461
462 /* 462 /*
463 * Prepare headers for the next outgoing message. 463 * Prepare headers for the next outgoing message.
464 */ 464 */
465 static void prepare_write_message(struct ceph_connection *con) 465 static void prepare_write_message(struct ceph_connection *con)
466 { 466 {
467 struct ceph_msg *m; 467 struct ceph_msg *m;
468 int v = 0; 468 int v = 0;
469 469
470 con->out_kvec_bytes = 0; 470 con->out_kvec_bytes = 0;
471 con->out_kvec_is_msg = true; 471 con->out_kvec_is_msg = true;
472 con->out_msg_done = false; 472 con->out_msg_done = false;
473 473
474 /* Sneak an ack in there first? If we can get it into the same 474 /* Sneak an ack in there first? If we can get it into the same
475 * TCP packet that's a good thing. */ 475 * TCP packet that's a good thing. */
476 if (con->in_seq > con->in_seq_acked) { 476 if (con->in_seq > con->in_seq_acked) {
477 con->in_seq_acked = con->in_seq; 477 con->in_seq_acked = con->in_seq;
478 con->out_kvec[v].iov_base = &tag_ack; 478 con->out_kvec[v].iov_base = &tag_ack;
479 con->out_kvec[v++].iov_len = 1; 479 con->out_kvec[v++].iov_len = 1;
480 con->out_temp_ack = cpu_to_le64(con->in_seq_acked); 480 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
481 con->out_kvec[v].iov_base = &con->out_temp_ack; 481 con->out_kvec[v].iov_base = &con->out_temp_ack;
482 con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); 482 con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack);
483 con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); 483 con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack);
484 } 484 }
485 485
486 m = list_first_entry(&con->out_queue, 486 m = list_first_entry(&con->out_queue,
487 struct ceph_msg, list_head); 487 struct ceph_msg, list_head);
488 con->out_msg = m; 488 con->out_msg = m;
489 if (test_bit(LOSSYTX, &con->state)) { 489 if (test_bit(LOSSYTX, &con->state)) {
490 list_del_init(&m->list_head); 490 list_del_init(&m->list_head);
491 } else { 491 } else {
492 /* put message on sent list */ 492 /* put message on sent list */
493 ceph_msg_get(m); 493 ceph_msg_get(m);
494 list_move_tail(&m->list_head, &con->out_sent); 494 list_move_tail(&m->list_head, &con->out_sent);
495 } 495 }
496 496
497 /* 497 /*
498 * only assign outgoing seq # if we haven't sent this message 498 * only assign outgoing seq # if we haven't sent this message
499 * yet. if it is requeued, resend with it's original seq. 499 * yet. if it is requeued, resend with it's original seq.
500 */ 500 */
501 if (m->needs_out_seq) { 501 if (m->needs_out_seq) {
502 m->hdr.seq = cpu_to_le64(++con->out_seq); 502 m->hdr.seq = cpu_to_le64(++con->out_seq);
503 m->needs_out_seq = false; 503 m->needs_out_seq = false;
504 } 504 }
505 505
506 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 506 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
507 m, con->out_seq, le16_to_cpu(m->hdr.type), 507 m, con->out_seq, le16_to_cpu(m->hdr.type),
508 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), 508 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
509 le32_to_cpu(m->hdr.data_len), 509 le32_to_cpu(m->hdr.data_len),
510 m->nr_pages); 510 m->nr_pages);
511 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); 511 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
512 512
513 /* tag + hdr + front + middle */ 513 /* tag + hdr + front + middle */
514 con->out_kvec[v].iov_base = &tag_msg; 514 con->out_kvec[v].iov_base = &tag_msg;
515 con->out_kvec[v++].iov_len = 1; 515 con->out_kvec[v++].iov_len = 1;
516 con->out_kvec[v].iov_base = &m->hdr; 516 con->out_kvec[v].iov_base = &m->hdr;
517 con->out_kvec[v++].iov_len = sizeof(m->hdr); 517 con->out_kvec[v++].iov_len = sizeof(m->hdr);
518 con->out_kvec[v++] = m->front; 518 con->out_kvec[v++] = m->front;
519 if (m->middle) 519 if (m->middle)
520 con->out_kvec[v++] = m->middle->vec; 520 con->out_kvec[v++] = m->middle->vec;
521 con->out_kvec_left = v; 521 con->out_kvec_left = v;
522 con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + 522 con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len +
523 (m->middle ? m->middle->vec.iov_len : 0); 523 (m->middle ? m->middle->vec.iov_len : 0);
524 con->out_kvec_cur = con->out_kvec; 524 con->out_kvec_cur = con->out_kvec;
525 525
526 /* fill in crc (except data pages), footer */ 526 /* fill in crc (except data pages), footer */
527 con->out_msg->hdr.crc = 527 con->out_msg->hdr.crc =
528 cpu_to_le32(crc32c(0, (void *)&m->hdr, 528 cpu_to_le32(crc32c(0, (void *)&m->hdr,
529 sizeof(m->hdr) - sizeof(m->hdr.crc))); 529 sizeof(m->hdr) - sizeof(m->hdr.crc)));
530 con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; 530 con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
531 con->out_msg->footer.front_crc = 531 con->out_msg->footer.front_crc =
532 cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); 532 cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len));
533 if (m->middle) 533 if (m->middle)
534 con->out_msg->footer.middle_crc = 534 con->out_msg->footer.middle_crc =
535 cpu_to_le32(crc32c(0, m->middle->vec.iov_base, 535 cpu_to_le32(crc32c(0, m->middle->vec.iov_base,
536 m->middle->vec.iov_len)); 536 m->middle->vec.iov_len));
537 else 537 else
538 con->out_msg->footer.middle_crc = 0; 538 con->out_msg->footer.middle_crc = 0;
539 con->out_msg->footer.data_crc = 0; 539 con->out_msg->footer.data_crc = 0;
540 dout("prepare_write_message front_crc %u data_crc %u\n", 540 dout("prepare_write_message front_crc %u data_crc %u\n",
541 le32_to_cpu(con->out_msg->footer.front_crc), 541 le32_to_cpu(con->out_msg->footer.front_crc),
542 le32_to_cpu(con->out_msg->footer.middle_crc)); 542 le32_to_cpu(con->out_msg->footer.middle_crc));
543 543
544 /* is there a data payload? */ 544 /* is there a data payload? */
545 if (le32_to_cpu(m->hdr.data_len) > 0) { 545 if (le32_to_cpu(m->hdr.data_len) > 0) {
546 /* initialize page iterator */ 546 /* initialize page iterator */
547 con->out_msg_pos.page = 0; 547 con->out_msg_pos.page = 0;
548 if (m->pages) 548 if (m->pages)
549 con->out_msg_pos.page_pos = m->page_alignment; 549 con->out_msg_pos.page_pos = m->page_alignment;
550 else 550 else
551 con->out_msg_pos.page_pos = 0; 551 con->out_msg_pos.page_pos = 0;
552 con->out_msg_pos.data_pos = 0; 552 con->out_msg_pos.data_pos = 0;
553 con->out_msg_pos.did_page_crc = 0; 553 con->out_msg_pos.did_page_crc = 0;
554 con->out_more = 1; /* data + footer will follow */ 554 con->out_more = 1; /* data + footer will follow */
555 } else { 555 } else {
556 /* no, queue up footer too and be done */ 556 /* no, queue up footer too and be done */
557 prepare_write_message_footer(con, v); 557 prepare_write_message_footer(con, v);
558 } 558 }
559 559
560 set_bit(WRITE_PENDING, &con->state); 560 set_bit(WRITE_PENDING, &con->state);
561 } 561 }
562 562
563 /* 563 /*
564 * Prepare an ack. 564 * Prepare an ack.
565 */ 565 */
566 static void prepare_write_ack(struct ceph_connection *con) 566 static void prepare_write_ack(struct ceph_connection *con)
567 { 567 {
568 dout("prepare_write_ack %p %llu -> %llu\n", con, 568 dout("prepare_write_ack %p %llu -> %llu\n", con,
569 con->in_seq_acked, con->in_seq); 569 con->in_seq_acked, con->in_seq);
570 con->in_seq_acked = con->in_seq; 570 con->in_seq_acked = con->in_seq;
571 571
572 con->out_kvec[0].iov_base = &tag_ack; 572 con->out_kvec[0].iov_base = &tag_ack;
573 con->out_kvec[0].iov_len = 1; 573 con->out_kvec[0].iov_len = 1;
574 con->out_temp_ack = cpu_to_le64(con->in_seq_acked); 574 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
575 con->out_kvec[1].iov_base = &con->out_temp_ack; 575 con->out_kvec[1].iov_base = &con->out_temp_ack;
576 con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); 576 con->out_kvec[1].iov_len = sizeof(con->out_temp_ack);
577 con->out_kvec_left = 2; 577 con->out_kvec_left = 2;
578 con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); 578 con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack);
579 con->out_kvec_cur = con->out_kvec; 579 con->out_kvec_cur = con->out_kvec;
580 con->out_more = 1; /* more will follow.. eventually.. */ 580 con->out_more = 1; /* more will follow.. eventually.. */
581 set_bit(WRITE_PENDING, &con->state); 581 set_bit(WRITE_PENDING, &con->state);
582 } 582 }
583 583
584 /* 584 /*
585 * Prepare to write keepalive byte. 585 * Prepare to write keepalive byte.
586 */ 586 */
587 static void prepare_write_keepalive(struct ceph_connection *con) 587 static void prepare_write_keepalive(struct ceph_connection *con)
588 { 588 {
589 dout("prepare_write_keepalive %p\n", con); 589 dout("prepare_write_keepalive %p\n", con);
590 con->out_kvec[0].iov_base = &tag_keepalive; 590 con->out_kvec[0].iov_base = &tag_keepalive;
591 con->out_kvec[0].iov_len = 1; 591 con->out_kvec[0].iov_len = 1;
592 con->out_kvec_left = 1; 592 con->out_kvec_left = 1;
593 con->out_kvec_bytes = 1; 593 con->out_kvec_bytes = 1;
594 con->out_kvec_cur = con->out_kvec; 594 con->out_kvec_cur = con->out_kvec;
595 set_bit(WRITE_PENDING, &con->state); 595 set_bit(WRITE_PENDING, &con->state);
596 } 596 }
597 597
598 /* 598 /*
599 * Connection negotiation. 599 * Connection negotiation.
600 */ 600 */
601 601
602 static void prepare_connect_authorizer(struct ceph_connection *con) 602 static void prepare_connect_authorizer(struct ceph_connection *con)
603 { 603 {
604 void *auth_buf; 604 void *auth_buf;
605 int auth_len = 0; 605 int auth_len = 0;
606 int auth_protocol = 0; 606 int auth_protocol = 0;
607 607
608 mutex_unlock(&con->mutex); 608 mutex_unlock(&con->mutex);
609 if (con->ops->get_authorizer) 609 if (con->ops->get_authorizer)
610 con->ops->get_authorizer(con, &auth_buf, &auth_len, 610 con->ops->get_authorizer(con, &auth_buf, &auth_len,
611 &auth_protocol, &con->auth_reply_buf, 611 &auth_protocol, &con->auth_reply_buf,
612 &con->auth_reply_buf_len, 612 &con->auth_reply_buf_len,
613 con->auth_retry); 613 con->auth_retry);
614 mutex_lock(&con->mutex); 614 mutex_lock(&con->mutex);
615 615
616 con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); 616 con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
617 con->out_connect.authorizer_len = cpu_to_le32(auth_len); 617 con->out_connect.authorizer_len = cpu_to_le32(auth_len);
618 618
619 con->out_kvec[con->out_kvec_left].iov_base = auth_buf; 619 con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
620 con->out_kvec[con->out_kvec_left].iov_len = auth_len; 620 con->out_kvec[con->out_kvec_left].iov_len = auth_len;
621 con->out_kvec_left++; 621 con->out_kvec_left++;
622 con->out_kvec_bytes += auth_len; 622 con->out_kvec_bytes += auth_len;
623 } 623 }
624 624
625 /* 625 /*
626 * We connected to a peer and are saying hello. 626 * We connected to a peer and are saying hello.
627 */ 627 */
628 static void prepare_write_banner(struct ceph_messenger *msgr, 628 static void prepare_write_banner(struct ceph_messenger *msgr,
629 struct ceph_connection *con) 629 struct ceph_connection *con)
630 { 630 {
631 int len = strlen(CEPH_BANNER); 631 int len = strlen(CEPH_BANNER);
632 632
633 con->out_kvec[0].iov_base = CEPH_BANNER; 633 con->out_kvec[0].iov_base = CEPH_BANNER;
634 con->out_kvec[0].iov_len = len; 634 con->out_kvec[0].iov_len = len;
635 con->out_kvec[1].iov_base = &msgr->my_enc_addr; 635 con->out_kvec[1].iov_base = &msgr->my_enc_addr;
636 con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); 636 con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr);
637 con->out_kvec_left = 2; 637 con->out_kvec_left = 2;
638 con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); 638 con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr);
639 con->out_kvec_cur = con->out_kvec; 639 con->out_kvec_cur = con->out_kvec;
640 con->out_more = 0; 640 con->out_more = 0;
641 set_bit(WRITE_PENDING, &con->state); 641 set_bit(WRITE_PENDING, &con->state);
642 } 642 }
643 643
644 static void prepare_write_connect(struct ceph_messenger *msgr, 644 static void prepare_write_connect(struct ceph_messenger *msgr,
645 struct ceph_connection *con, 645 struct ceph_connection *con,
646 int after_banner) 646 int after_banner)
647 { 647 {
648 unsigned global_seq = get_global_seq(con->msgr, 0); 648 unsigned global_seq = get_global_seq(con->msgr, 0);
649 int proto; 649 int proto;
650 650
651 switch (con->peer_name.type) { 651 switch (con->peer_name.type) {
652 case CEPH_ENTITY_TYPE_MON: 652 case CEPH_ENTITY_TYPE_MON:
653 proto = CEPH_MONC_PROTOCOL; 653 proto = CEPH_MONC_PROTOCOL;
654 break; 654 break;
655 case CEPH_ENTITY_TYPE_OSD: 655 case CEPH_ENTITY_TYPE_OSD:
656 proto = CEPH_OSDC_PROTOCOL; 656 proto = CEPH_OSDC_PROTOCOL;
657 break; 657 break;
658 case CEPH_ENTITY_TYPE_MDS: 658 case CEPH_ENTITY_TYPE_MDS:
659 proto = CEPH_MDSC_PROTOCOL; 659 proto = CEPH_MDSC_PROTOCOL;
660 break; 660 break;
661 default: 661 default:
662 BUG(); 662 BUG();
663 } 663 }
664 664
665 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 665 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
666 con->connect_seq, global_seq, proto); 666 con->connect_seq, global_seq, proto);
667 667
668 con->out_connect.features = cpu_to_le64(msgr->supported_features); 668 con->out_connect.features = cpu_to_le64(msgr->supported_features);
669 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 669 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
670 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); 670 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
671 con->out_connect.global_seq = cpu_to_le32(global_seq); 671 con->out_connect.global_seq = cpu_to_le32(global_seq);
672 con->out_connect.protocol_version = cpu_to_le32(proto); 672 con->out_connect.protocol_version = cpu_to_le32(proto);
673 con->out_connect.flags = 0; 673 con->out_connect.flags = 0;
674 674
675 if (!after_banner) { 675 if (!after_banner) {
676 con->out_kvec_left = 0; 676 con->out_kvec_left = 0;
677 con->out_kvec_bytes = 0; 677 con->out_kvec_bytes = 0;
678 } 678 }
679 con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; 679 con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect;
680 con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); 680 con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect);
681 con->out_kvec_left++; 681 con->out_kvec_left++;
682 con->out_kvec_bytes += sizeof(con->out_connect); 682 con->out_kvec_bytes += sizeof(con->out_connect);
683 con->out_kvec_cur = con->out_kvec; 683 con->out_kvec_cur = con->out_kvec;
684 con->out_more = 0; 684 con->out_more = 0;
685 set_bit(WRITE_PENDING, &con->state); 685 set_bit(WRITE_PENDING, &con->state);
686 686
687 prepare_connect_authorizer(con); 687 prepare_connect_authorizer(con);
688 } 688 }
689 689
690 690
691 /* 691 /*
692 * write as much of pending kvecs to the socket as we can. 692 * write as much of pending kvecs to the socket as we can.
693 * 1 -> done 693 * 1 -> done
694 * 0 -> socket full, but more to do 694 * 0 -> socket full, but more to do
695 * <0 -> error 695 * <0 -> error
696 */ 696 */
697 static int write_partial_kvec(struct ceph_connection *con) 697 static int write_partial_kvec(struct ceph_connection *con)
698 { 698 {
699 int ret; 699 int ret;
700 700
701 dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes); 701 dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes);
702 while (con->out_kvec_bytes > 0) { 702 while (con->out_kvec_bytes > 0) {
703 ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur, 703 ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur,
704 con->out_kvec_left, con->out_kvec_bytes, 704 con->out_kvec_left, con->out_kvec_bytes,
705 con->out_more); 705 con->out_more);
706 if (ret <= 0) 706 if (ret <= 0)
707 goto out; 707 goto out;
708 con->out_kvec_bytes -= ret; 708 con->out_kvec_bytes -= ret;
709 if (con->out_kvec_bytes == 0) 709 if (con->out_kvec_bytes == 0)
710 break; /* done */ 710 break; /* done */
711 while (ret > 0) { 711 while (ret > 0) {
712 if (ret >= con->out_kvec_cur->iov_len) { 712 if (ret >= con->out_kvec_cur->iov_len) {
713 ret -= con->out_kvec_cur->iov_len; 713 ret -= con->out_kvec_cur->iov_len;
714 con->out_kvec_cur++; 714 con->out_kvec_cur++;
715 con->out_kvec_left--; 715 con->out_kvec_left--;
716 } else { 716 } else {
717 con->out_kvec_cur->iov_len -= ret; 717 con->out_kvec_cur->iov_len -= ret;
718 con->out_kvec_cur->iov_base += ret; 718 con->out_kvec_cur->iov_base += ret;
719 ret = 0; 719 ret = 0;
720 break; 720 break;
721 } 721 }
722 } 722 }
723 } 723 }
724 con->out_kvec_left = 0; 724 con->out_kvec_left = 0;
725 con->out_kvec_is_msg = false; 725 con->out_kvec_is_msg = false;
726 ret = 1; 726 ret = 1;
727 out: 727 out:
728 dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, 728 dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
729 con->out_kvec_bytes, con->out_kvec_left, ret); 729 con->out_kvec_bytes, con->out_kvec_left, ret);
730 return ret; /* done! */ 730 return ret; /* done! */
731 } 731 }
732 732
733 #ifdef CONFIG_BLOCK 733 #ifdef CONFIG_BLOCK
734 static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) 734 static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
735 { 735 {
736 if (!bio) { 736 if (!bio) {
737 *iter = NULL; 737 *iter = NULL;
738 *seg = 0; 738 *seg = 0;
739 return; 739 return;
740 } 740 }
741 *iter = bio; 741 *iter = bio;
742 *seg = bio->bi_idx; 742 *seg = bio->bi_idx;
743 } 743 }
744 744
745 static void iter_bio_next(struct bio **bio_iter, int *seg) 745 static void iter_bio_next(struct bio **bio_iter, int *seg)
746 { 746 {
747 if (*bio_iter == NULL) 747 if (*bio_iter == NULL)
748 return; 748 return;
749 749
750 BUG_ON(*seg >= (*bio_iter)->bi_vcnt); 750 BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
751 751
752 (*seg)++; 752 (*seg)++;
753 if (*seg == (*bio_iter)->bi_vcnt) 753 if (*seg == (*bio_iter)->bi_vcnt)
754 init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); 754 init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
755 } 755 }
756 #endif 756 #endif
757 757
758 /* 758 /*
759 * Write as much message data payload as we can. If we finish, queue 759 * Write as much message data payload as we can. If we finish, queue
760 * up the footer. 760 * up the footer.
761 * 1 -> done, footer is now queued in out_kvec[]. 761 * 1 -> done, footer is now queued in out_kvec[].
762 * 0 -> socket full, but more to do 762 * 0 -> socket full, but more to do
763 * <0 -> error 763 * <0 -> error
764 */ 764 */
765 static int write_partial_msg_pages(struct ceph_connection *con) 765 static int write_partial_msg_pages(struct ceph_connection *con)
766 { 766 {
767 struct ceph_msg *msg = con->out_msg; 767 struct ceph_msg *msg = con->out_msg;
768 unsigned data_len = le32_to_cpu(msg->hdr.data_len); 768 unsigned data_len = le32_to_cpu(msg->hdr.data_len);
769 size_t len; 769 size_t len;
770 int crc = con->msgr->nocrc; 770 int crc = con->msgr->nocrc;
771 int ret; 771 int ret;
772 int total_max_write; 772 int total_max_write;
773 int in_trail = 0; 773 int in_trail = 0;
774 size_t trail_len = (msg->trail ? msg->trail->length : 0); 774 size_t trail_len = (msg->trail ? msg->trail->length : 0);
775 775
776 dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", 776 dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
777 con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, 777 con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
778 con->out_msg_pos.page_pos); 778 con->out_msg_pos.page_pos);
779 779
780 #ifdef CONFIG_BLOCK 780 #ifdef CONFIG_BLOCK
781 if (msg->bio && !msg->bio_iter) 781 if (msg->bio && !msg->bio_iter)
782 init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); 782 init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
783 #endif 783 #endif
784 784
785 while (data_len > con->out_msg_pos.data_pos) { 785 while (data_len > con->out_msg_pos.data_pos) {
786 struct page *page = NULL; 786 struct page *page = NULL;
787 void *kaddr = NULL; 787 void *kaddr = NULL;
788 int max_write = PAGE_SIZE; 788 int max_write = PAGE_SIZE;
789 int page_shift = 0; 789 int page_shift = 0;
790 790
791 total_max_write = data_len - trail_len - 791 total_max_write = data_len - trail_len -
792 con->out_msg_pos.data_pos; 792 con->out_msg_pos.data_pos;
793 793
794 /* 794 /*
795 * if we are calculating the data crc (the default), we need 795 * if we are calculating the data crc (the default), we need
796 * to map the page. if our pages[] has been revoked, use the 796 * to map the page. if our pages[] has been revoked, use the
797 * zero page. 797 * zero page.
798 */ 798 */
799 799
800 /* have we reached the trail part of the data? */ 800 /* have we reached the trail part of the data? */
801 if (con->out_msg_pos.data_pos >= data_len - trail_len) { 801 if (con->out_msg_pos.data_pos >= data_len - trail_len) {
802 in_trail = 1; 802 in_trail = 1;
803 803
804 total_max_write = data_len - con->out_msg_pos.data_pos; 804 total_max_write = data_len - con->out_msg_pos.data_pos;
805 805
806 page = list_first_entry(&msg->trail->head, 806 page = list_first_entry(&msg->trail->head,
807 struct page, lru); 807 struct page, lru);
808 if (crc) 808 if (crc)
809 kaddr = kmap(page); 809 kaddr = kmap(page);
810 max_write = PAGE_SIZE; 810 max_write = PAGE_SIZE;
811 } else if (msg->pages) { 811 } else if (msg->pages) {
812 page = msg->pages[con->out_msg_pos.page]; 812 page = msg->pages[con->out_msg_pos.page];
813 if (crc) 813 if (crc)
814 kaddr = kmap(page); 814 kaddr = kmap(page);
815 } else if (msg->pagelist) { 815 } else if (msg->pagelist) {
816 page = list_first_entry(&msg->pagelist->head, 816 page = list_first_entry(&msg->pagelist->head,
817 struct page, lru); 817 struct page, lru);
818 if (crc) 818 if (crc)
819 kaddr = kmap(page); 819 kaddr = kmap(page);
820 #ifdef CONFIG_BLOCK 820 #ifdef CONFIG_BLOCK
821 } else if (msg->bio) { 821 } else if (msg->bio) {
822 struct bio_vec *bv; 822 struct bio_vec *bv;
823 823
824 bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); 824 bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
825 page = bv->bv_page; 825 page = bv->bv_page;
826 page_shift = bv->bv_offset; 826 page_shift = bv->bv_offset;
827 if (crc) 827 if (crc)
828 kaddr = kmap(page) + page_shift; 828 kaddr = kmap(page) + page_shift;
829 max_write = bv->bv_len; 829 max_write = bv->bv_len;
830 #endif 830 #endif
831 } else { 831 } else {
832 page = con->msgr->zero_page; 832 page = con->msgr->zero_page;
833 if (crc) 833 if (crc)
834 kaddr = page_address(con->msgr->zero_page); 834 kaddr = page_address(con->msgr->zero_page);
835 } 835 }
836 len = min_t(int, max_write - con->out_msg_pos.page_pos, 836 len = min_t(int, max_write - con->out_msg_pos.page_pos,
837 total_max_write); 837 total_max_write);
838 838
839 if (crc && !con->out_msg_pos.did_page_crc) { 839 if (crc && !con->out_msg_pos.did_page_crc) {
840 void *base = kaddr + con->out_msg_pos.page_pos; 840 void *base = kaddr + con->out_msg_pos.page_pos;
841 u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); 841 u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
842 842
843 BUG_ON(kaddr == NULL); 843 BUG_ON(kaddr == NULL);
844 con->out_msg->footer.data_crc = 844 con->out_msg->footer.data_crc =
845 cpu_to_le32(crc32c(tmpcrc, base, len)); 845 cpu_to_le32(crc32c(tmpcrc, base, len));
846 con->out_msg_pos.did_page_crc = 1; 846 con->out_msg_pos.did_page_crc = 1;
847 } 847 }
848 ret = kernel_sendpage(con->sock, page, 848 ret = kernel_sendpage(con->sock, page,
849 con->out_msg_pos.page_pos + page_shift, 849 con->out_msg_pos.page_pos + page_shift,
850 len, 850 len,
851 MSG_DONTWAIT | MSG_NOSIGNAL | 851 MSG_DONTWAIT | MSG_NOSIGNAL |
852 MSG_MORE); 852 MSG_MORE);
853 853
854 if (crc && 854 if (crc &&
855 (msg->pages || msg->pagelist || msg->bio || in_trail)) 855 (msg->pages || msg->pagelist || msg->bio || in_trail))
856 kunmap(page); 856 kunmap(page);
857 857
858 if (ret == -EAGAIN) 858 if (ret == -EAGAIN)
859 ret = 0; 859 ret = 0;
860 if (ret <= 0) 860 if (ret <= 0)
861 goto out; 861 goto out;
862 862
863 con->out_msg_pos.data_pos += ret; 863 con->out_msg_pos.data_pos += ret;
864 con->out_msg_pos.page_pos += ret; 864 con->out_msg_pos.page_pos += ret;
865 if (ret == len) { 865 if (ret == len) {
866 con->out_msg_pos.page_pos = 0; 866 con->out_msg_pos.page_pos = 0;
867 con->out_msg_pos.page++; 867 con->out_msg_pos.page++;
868 con->out_msg_pos.did_page_crc = 0; 868 con->out_msg_pos.did_page_crc = 0;
869 if (in_trail) 869 if (in_trail)
870 list_move_tail(&page->lru, 870 list_move_tail(&page->lru,
871 &msg->trail->head); 871 &msg->trail->head);
872 else if (msg->pagelist) 872 else if (msg->pagelist)
873 list_move_tail(&page->lru, 873 list_move_tail(&page->lru,
874 &msg->pagelist->head); 874 &msg->pagelist->head);
875 #ifdef CONFIG_BLOCK 875 #ifdef CONFIG_BLOCK
876 else if (msg->bio) 876 else if (msg->bio)
877 iter_bio_next(&msg->bio_iter, &msg->bio_seg); 877 iter_bio_next(&msg->bio_iter, &msg->bio_seg);
878 #endif 878 #endif
879 } 879 }
880 } 880 }
881 881
882 dout("write_partial_msg_pages %p msg %p done\n", con, msg); 882 dout("write_partial_msg_pages %p msg %p done\n", con, msg);
883 883
884 /* prepare and queue up footer, too */ 884 /* prepare and queue up footer, too */
885 if (!crc) 885 if (!crc)
886 con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; 886 con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
887 con->out_kvec_bytes = 0; 887 con->out_kvec_bytes = 0;
888 con->out_kvec_left = 0; 888 con->out_kvec_left = 0;
889 con->out_kvec_cur = con->out_kvec; 889 con->out_kvec_cur = con->out_kvec;
890 prepare_write_message_footer(con, 0); 890 prepare_write_message_footer(con, 0);
891 ret = 1; 891 ret = 1;
892 out: 892 out:
893 return ret; 893 return ret;
894 } 894 }
895 895
896 /* 896 /*
897 * write some zeros 897 * write some zeros
898 */ 898 */
899 static int write_partial_skip(struct ceph_connection *con) 899 static int write_partial_skip(struct ceph_connection *con)
900 { 900 {
901 int ret; 901 int ret;
902 902
903 while (con->out_skip > 0) { 903 while (con->out_skip > 0) {
904 struct kvec iov = { 904 struct kvec iov = {
905 .iov_base = page_address(con->msgr->zero_page), 905 .iov_base = page_address(con->msgr->zero_page),
906 .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) 906 .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE)
907 }; 907 };
908 908
909 ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); 909 ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1);
910 if (ret <= 0) 910 if (ret <= 0)
911 goto out; 911 goto out;
912 con->out_skip -= ret; 912 con->out_skip -= ret;
913 } 913 }
914 ret = 1; 914 ret = 1;
915 out: 915 out:
916 return ret; 916 return ret;
917 } 917 }
918 918
919 /* 919 /*
920 * Prepare to read connection handshake, or an ack. 920 * Prepare to read connection handshake, or an ack.
921 */ 921 */
922 static void prepare_read_banner(struct ceph_connection *con) 922 static void prepare_read_banner(struct ceph_connection *con)
923 { 923 {
924 dout("prepare_read_banner %p\n", con); 924 dout("prepare_read_banner %p\n", con);
925 con->in_base_pos = 0; 925 con->in_base_pos = 0;
926 } 926 }
927 927
928 static void prepare_read_connect(struct ceph_connection *con) 928 static void prepare_read_connect(struct ceph_connection *con)
929 { 929 {
930 dout("prepare_read_connect %p\n", con); 930 dout("prepare_read_connect %p\n", con);
931 con->in_base_pos = 0; 931 con->in_base_pos = 0;
932 } 932 }
933 933
934 static void prepare_read_ack(struct ceph_connection *con) 934 static void prepare_read_ack(struct ceph_connection *con)
935 { 935 {
936 dout("prepare_read_ack %p\n", con); 936 dout("prepare_read_ack %p\n", con);
937 con->in_base_pos = 0; 937 con->in_base_pos = 0;
938 } 938 }
939 939
940 static void prepare_read_tag(struct ceph_connection *con) 940 static void prepare_read_tag(struct ceph_connection *con)
941 { 941 {
942 dout("prepare_read_tag %p\n", con); 942 dout("prepare_read_tag %p\n", con);
943 con->in_base_pos = 0; 943 con->in_base_pos = 0;
944 con->in_tag = CEPH_MSGR_TAG_READY; 944 con->in_tag = CEPH_MSGR_TAG_READY;
945 } 945 }
946 946
947 /* 947 /*
948 * Prepare to read a message. 948 * Prepare to read a message.
949 */ 949 */
950 static int prepare_read_message(struct ceph_connection *con) 950 static int prepare_read_message(struct ceph_connection *con)
951 { 951 {
952 dout("prepare_read_message %p\n", con); 952 dout("prepare_read_message %p\n", con);
953 BUG_ON(con->in_msg != NULL); 953 BUG_ON(con->in_msg != NULL);
954 con->in_base_pos = 0; 954 con->in_base_pos = 0;
955 con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; 955 con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
956 return 0; 956 return 0;
957 } 957 }
958 958
959 959
960 static int read_partial(struct ceph_connection *con, 960 static int read_partial(struct ceph_connection *con,
961 int *to, int size, void *object) 961 int *to, int size, void *object)
962 { 962 {
963 *to += size; 963 *to += size;
964 while (con->in_base_pos < *to) { 964 while (con->in_base_pos < *to) {
965 int left = *to - con->in_base_pos; 965 int left = *to - con->in_base_pos;
966 int have = size - left; 966 int have = size - left;
967 int ret = ceph_tcp_recvmsg(con->sock, object + have, left); 967 int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
968 if (ret <= 0) 968 if (ret <= 0)
969 return ret; 969 return ret;
970 con->in_base_pos += ret; 970 con->in_base_pos += ret;
971 } 971 }
972 return 1; 972 return 1;
973 } 973 }
974 974
975 975
976 /* 976 /*
977 * Read all or part of the connect-side handshake on a new connection 977 * Read all or part of the connect-side handshake on a new connection
978 */ 978 */
979 static int read_partial_banner(struct ceph_connection *con) 979 static int read_partial_banner(struct ceph_connection *con)
980 { 980 {
981 int ret, to = 0; 981 int ret, to = 0;
982 982
983 dout("read_partial_banner %p at %d\n", con, con->in_base_pos); 983 dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
984 984
985 /* peer's banner */ 985 /* peer's banner */
986 ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner); 986 ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
987 if (ret <= 0) 987 if (ret <= 0)
988 goto out; 988 goto out;
989 ret = read_partial(con, &to, sizeof(con->actual_peer_addr), 989 ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
990 &con->actual_peer_addr); 990 &con->actual_peer_addr);
991 if (ret <= 0) 991 if (ret <= 0)
992 goto out; 992 goto out;
993 ret = read_partial(con, &to, sizeof(con->peer_addr_for_me), 993 ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
994 &con->peer_addr_for_me); 994 &con->peer_addr_for_me);
995 if (ret <= 0) 995 if (ret <= 0)
996 goto out; 996 goto out;
997 out: 997 out:
998 return ret; 998 return ret;
999 } 999 }
1000 1000
1001 static int read_partial_connect(struct ceph_connection *con) 1001 static int read_partial_connect(struct ceph_connection *con)
1002 { 1002 {
1003 int ret, to = 0; 1003 int ret, to = 0;
1004 1004
1005 dout("read_partial_connect %p at %d\n", con, con->in_base_pos); 1005 dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
1006 1006
1007 ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply); 1007 ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
1008 if (ret <= 0) 1008 if (ret <= 0)
1009 goto out; 1009 goto out;
1010 ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len), 1010 ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
1011 con->auth_reply_buf); 1011 con->auth_reply_buf);
1012 if (ret <= 0) 1012 if (ret <= 0)
1013 goto out; 1013 goto out;
1014 1014
1015 dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", 1015 dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
1016 con, (int)con->in_reply.tag, 1016 con, (int)con->in_reply.tag,
1017 le32_to_cpu(con->in_reply.connect_seq), 1017 le32_to_cpu(con->in_reply.connect_seq),
1018 le32_to_cpu(con->in_reply.global_seq)); 1018 le32_to_cpu(con->in_reply.global_seq));
1019 out: 1019 out:
1020 return ret; 1020 return ret;
1021 1021
1022 } 1022 }
1023 1023
1024 /* 1024 /*
1025 * Verify the hello banner looks okay. 1025 * Verify the hello banner looks okay.
1026 */ 1026 */
1027 static int verify_hello(struct ceph_connection *con) 1027 static int verify_hello(struct ceph_connection *con)
1028 { 1028 {
1029 if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { 1029 if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
1030 pr_err("connect to %s got bad banner\n", 1030 pr_err("connect to %s got bad banner\n",
1031 ceph_pr_addr(&con->peer_addr.in_addr)); 1031 ceph_pr_addr(&con->peer_addr.in_addr));
1032 con->error_msg = "protocol error, bad banner"; 1032 con->error_msg = "protocol error, bad banner";
1033 return -1; 1033 return -1;
1034 } 1034 }
1035 return 0; 1035 return 0;
1036 } 1036 }
1037 1037
1038 static bool addr_is_blank(struct sockaddr_storage *ss) 1038 static bool addr_is_blank(struct sockaddr_storage *ss)
1039 { 1039 {
1040 switch (ss->ss_family) { 1040 switch (ss->ss_family) {
1041 case AF_INET: 1041 case AF_INET:
1042 return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; 1042 return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
1043 case AF_INET6: 1043 case AF_INET6:
1044 return 1044 return
1045 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && 1045 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
1046 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && 1046 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
1047 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && 1047 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
1048 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; 1048 ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
1049 } 1049 }
1050 return false; 1050 return false;
1051 } 1051 }
1052 1052
1053 static int addr_port(struct sockaddr_storage *ss) 1053 static int addr_port(struct sockaddr_storage *ss)
1054 { 1054 {
1055 switch (ss->ss_family) { 1055 switch (ss->ss_family) {
1056 case AF_INET: 1056 case AF_INET:
1057 return ntohs(((struct sockaddr_in *)ss)->sin_port); 1057 return ntohs(((struct sockaddr_in *)ss)->sin_port);
1058 case AF_INET6: 1058 case AF_INET6:
1059 return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); 1059 return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
1060 } 1060 }
1061 return 0; 1061 return 0;
1062 } 1062 }
1063 1063
1064 static void addr_set_port(struct sockaddr_storage *ss, int p) 1064 static void addr_set_port(struct sockaddr_storage *ss, int p)
1065 { 1065 {
1066 switch (ss->ss_family) { 1066 switch (ss->ss_family) {
1067 case AF_INET: 1067 case AF_INET:
1068 ((struct sockaddr_in *)ss)->sin_port = htons(p); 1068 ((struct sockaddr_in *)ss)->sin_port = htons(p);
1069 case AF_INET6: 1069 case AF_INET6:
1070 ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); 1070 ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
1071 } 1071 }
1072 } 1072 }
1073 1073
1074 /* 1074 /*
1075 * Parse an ip[:port] list into an addr array. Use the default 1075 * Parse an ip[:port] list into an addr array. Use the default
1076 * monitor port if a port isn't specified. 1076 * monitor port if a port isn't specified.
1077 */ 1077 */
1078 int ceph_parse_ips(const char *c, const char *end, 1078 int ceph_parse_ips(const char *c, const char *end,
1079 struct ceph_entity_addr *addr, 1079 struct ceph_entity_addr *addr,
1080 int max_count, int *count) 1080 int max_count, int *count)
1081 { 1081 {
1082 int i; 1082 int i;
1083 const char *p = c; 1083 const char *p = c;
1084 1084
1085 dout("parse_ips on '%.*s'\n", (int)(end-c), c); 1085 dout("parse_ips on '%.*s'\n", (int)(end-c), c);
1086 for (i = 0; i < max_count; i++) { 1086 for (i = 0; i < max_count; i++) {
1087 const char *ipend; 1087 const char *ipend;
1088 struct sockaddr_storage *ss = &addr[i].in_addr; 1088 struct sockaddr_storage *ss = &addr[i].in_addr;
1089 struct sockaddr_in *in4 = (void *)ss; 1089 struct sockaddr_in *in4 = (void *)ss;
1090 struct sockaddr_in6 *in6 = (void *)ss; 1090 struct sockaddr_in6 *in6 = (void *)ss;
1091 int port; 1091 int port;
1092 char delim = ','; 1092 char delim = ',';
1093 1093
1094 if (*p == '[') { 1094 if (*p == '[') {
1095 delim = ']'; 1095 delim = ']';
1096 p++; 1096 p++;
1097 } 1097 }
1098 1098
1099 memset(ss, 0, sizeof(*ss)); 1099 memset(ss, 0, sizeof(*ss));
1100 if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, 1100 if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
1101 delim, &ipend)) 1101 delim, &ipend))
1102 ss->ss_family = AF_INET; 1102 ss->ss_family = AF_INET;
1103 else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, 1103 else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
1104 delim, &ipend)) 1104 delim, &ipend))
1105 ss->ss_family = AF_INET6; 1105 ss->ss_family = AF_INET6;
1106 else 1106 else
1107 goto bad; 1107 goto bad;
1108 p = ipend; 1108 p = ipend;
1109 1109
1110 if (delim == ']') { 1110 if (delim == ']') {
1111 if (*p != ']') { 1111 if (*p != ']') {
1112 dout("missing matching ']'\n"); 1112 dout("missing matching ']'\n");
1113 goto bad; 1113 goto bad;
1114 } 1114 }
1115 p++; 1115 p++;
1116 } 1116 }
1117 1117
1118 /* port? */ 1118 /* port? */
1119 if (p < end && *p == ':') { 1119 if (p < end && *p == ':') {
1120 port = 0; 1120 port = 0;
1121 p++; 1121 p++;
1122 while (p < end && *p >= '0' && *p <= '9') { 1122 while (p < end && *p >= '0' && *p <= '9') {
1123 port = (port * 10) + (*p - '0'); 1123 port = (port * 10) + (*p - '0');
1124 p++; 1124 p++;
1125 } 1125 }
1126 if (port > 65535 || port == 0) 1126 if (port > 65535 || port == 0)
1127 goto bad; 1127 goto bad;
1128 } else { 1128 } else {
1129 port = CEPH_MON_PORT; 1129 port = CEPH_MON_PORT;
1130 } 1130 }
1131 1131
1132 addr_set_port(ss, port); 1132 addr_set_port(ss, port);
1133 1133
1134 dout("parse_ips got %s\n", ceph_pr_addr(ss)); 1134 dout("parse_ips got %s\n", ceph_pr_addr(ss));
1135 1135
1136 if (p == end) 1136 if (p == end)
1137 break; 1137 break;
1138 if (*p != ',') 1138 if (*p != ',')
1139 goto bad; 1139 goto bad;
1140 p++; 1140 p++;
1141 } 1141 }
1142 1142
1143 if (p != end) 1143 if (p != end)
1144 goto bad; 1144 goto bad;
1145 1145
1146 if (count) 1146 if (count)
1147 *count = i + 1; 1147 *count = i + 1;
1148 return 0; 1148 return 0;
1149 1149
1150 bad: 1150 bad:
1151 pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); 1151 pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
1152 return -EINVAL; 1152 return -EINVAL;
1153 } 1153 }
1154 EXPORT_SYMBOL(ceph_parse_ips); 1154 EXPORT_SYMBOL(ceph_parse_ips);
1155 1155
1156 static int process_banner(struct ceph_connection *con) 1156 static int process_banner(struct ceph_connection *con)
1157 { 1157 {
1158 dout("process_banner on %p\n", con); 1158 dout("process_banner on %p\n", con);
1159 1159
1160 if (verify_hello(con) < 0) 1160 if (verify_hello(con) < 0)
1161 return -1; 1161 return -1;
1162 1162
1163 ceph_decode_addr(&con->actual_peer_addr); 1163 ceph_decode_addr(&con->actual_peer_addr);
1164 ceph_decode_addr(&con->peer_addr_for_me); 1164 ceph_decode_addr(&con->peer_addr_for_me);
1165 1165
1166 /* 1166 /*
1167 * Make sure the other end is who we wanted. note that the other 1167 * Make sure the other end is who we wanted. note that the other
1168 * end may not yet know their ip address, so if it's 0.0.0.0, give 1168 * end may not yet know their ip address, so if it's 0.0.0.0, give
1169 * them the benefit of the doubt. 1169 * them the benefit of the doubt.
1170 */ 1170 */
1171 if (memcmp(&con->peer_addr, &con->actual_peer_addr, 1171 if (memcmp(&con->peer_addr, &con->actual_peer_addr,
1172 sizeof(con->peer_addr)) != 0 && 1172 sizeof(con->peer_addr)) != 0 &&
1173 !(addr_is_blank(&con->actual_peer_addr.in_addr) && 1173 !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
1174 con->actual_peer_addr.nonce == con->peer_addr.nonce)) { 1174 con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
1175 pr_warning("wrong peer, want %s/%d, got %s/%d\n", 1175 pr_warning("wrong peer, want %s/%d, got %s/%d\n",
1176 ceph_pr_addr(&con->peer_addr.in_addr), 1176 ceph_pr_addr(&con->peer_addr.in_addr),
1177 (int)le32_to_cpu(con->peer_addr.nonce), 1177 (int)le32_to_cpu(con->peer_addr.nonce),
1178 ceph_pr_addr(&con->actual_peer_addr.in_addr), 1178 ceph_pr_addr(&con->actual_peer_addr.in_addr),
1179 (int)le32_to_cpu(con->actual_peer_addr.nonce)); 1179 (int)le32_to_cpu(con->actual_peer_addr.nonce));
1180 con->error_msg = "wrong peer at address"; 1180 con->error_msg = "wrong peer at address";
1181 return -1; 1181 return -1;
1182 } 1182 }
1183 1183
1184 /* 1184 /*
1185 * did we learn our address? 1185 * did we learn our address?
1186 */ 1186 */
1187 if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { 1187 if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
1188 int port = addr_port(&con->msgr->inst.addr.in_addr); 1188 int port = addr_port(&con->msgr->inst.addr.in_addr);
1189 1189
1190 memcpy(&con->msgr->inst.addr.in_addr, 1190 memcpy(&con->msgr->inst.addr.in_addr,
1191 &con->peer_addr_for_me.in_addr, 1191 &con->peer_addr_for_me.in_addr,
1192 sizeof(con->peer_addr_for_me.in_addr)); 1192 sizeof(con->peer_addr_for_me.in_addr));
1193 addr_set_port(&con->msgr->inst.addr.in_addr, port); 1193 addr_set_port(&con->msgr->inst.addr.in_addr, port);
1194 encode_my_addr(con->msgr); 1194 encode_my_addr(con->msgr);
1195 dout("process_banner learned my addr is %s\n", 1195 dout("process_banner learned my addr is %s\n",
1196 ceph_pr_addr(&con->msgr->inst.addr.in_addr)); 1196 ceph_pr_addr(&con->msgr->inst.addr.in_addr));
1197 } 1197 }
1198 1198
1199 set_bit(NEGOTIATING, &con->state); 1199 set_bit(NEGOTIATING, &con->state);
1200 prepare_read_connect(con); 1200 prepare_read_connect(con);
1201 return 0; 1201 return 0;
1202 } 1202 }
1203 1203
1204 static void fail_protocol(struct ceph_connection *con) 1204 static void fail_protocol(struct ceph_connection *con)
1205 { 1205 {
1206 reset_connection(con); 1206 reset_connection(con);
1207 set_bit(CLOSED, &con->state); /* in case there's queued work */ 1207 set_bit(CLOSED, &con->state); /* in case there's queued work */
1208 1208
1209 mutex_unlock(&con->mutex); 1209 mutex_unlock(&con->mutex);
1210 if (con->ops->bad_proto) 1210 if (con->ops->bad_proto)
1211 con->ops->bad_proto(con); 1211 con->ops->bad_proto(con);
1212 mutex_lock(&con->mutex); 1212 mutex_lock(&con->mutex);
1213 } 1213 }
1214 1214
1215 static int process_connect(struct ceph_connection *con) 1215 static int process_connect(struct ceph_connection *con)
1216 { 1216 {
1217 u64 sup_feat = con->msgr->supported_features; 1217 u64 sup_feat = con->msgr->supported_features;
1218 u64 req_feat = con->msgr->required_features; 1218 u64 req_feat = con->msgr->required_features;
1219 u64 server_feat = le64_to_cpu(con->in_reply.features); 1219 u64 server_feat = le64_to_cpu(con->in_reply.features);
1220 1220
1221 dout("process_connect on %p tag %d\n", con, (int)con->in_tag); 1221 dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
1222 1222
1223 switch (con->in_reply.tag) { 1223 switch (con->in_reply.tag) {
1224 case CEPH_MSGR_TAG_FEATURES: 1224 case CEPH_MSGR_TAG_FEATURES:
1225 pr_err("%s%lld %s feature set mismatch," 1225 pr_err("%s%lld %s feature set mismatch,"
1226 " my %llx < server's %llx, missing %llx\n", 1226 " my %llx < server's %llx, missing %llx\n",
1227 ENTITY_NAME(con->peer_name), 1227 ENTITY_NAME(con->peer_name),
1228 ceph_pr_addr(&con->peer_addr.in_addr), 1228 ceph_pr_addr(&con->peer_addr.in_addr),
1229 sup_feat, server_feat, server_feat & ~sup_feat); 1229 sup_feat, server_feat, server_feat & ~sup_feat);
1230 con->error_msg = "missing required protocol features"; 1230 con->error_msg = "missing required protocol features";
1231 fail_protocol(con); 1231 fail_protocol(con);
1232 return -1; 1232 return -1;
1233 1233
1234 case CEPH_MSGR_TAG_BADPROTOVER: 1234 case CEPH_MSGR_TAG_BADPROTOVER:
1235 pr_err("%s%lld %s protocol version mismatch," 1235 pr_err("%s%lld %s protocol version mismatch,"
1236 " my %d != server's %d\n", 1236 " my %d != server's %d\n",
1237 ENTITY_NAME(con->peer_name), 1237 ENTITY_NAME(con->peer_name),
1238 ceph_pr_addr(&con->peer_addr.in_addr), 1238 ceph_pr_addr(&con->peer_addr.in_addr),
1239 le32_to_cpu(con->out_connect.protocol_version), 1239 le32_to_cpu(con->out_connect.protocol_version),
1240 le32_to_cpu(con->in_reply.protocol_version)); 1240 le32_to_cpu(con->in_reply.protocol_version));
1241 con->error_msg = "protocol version mismatch"; 1241 con->error_msg = "protocol version mismatch";
1242 fail_protocol(con); 1242 fail_protocol(con);
1243 return -1; 1243 return -1;
1244 1244
1245 case CEPH_MSGR_TAG_BADAUTHORIZER: 1245 case CEPH_MSGR_TAG_BADAUTHORIZER:
1246 con->auth_retry++; 1246 con->auth_retry++;
1247 dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, 1247 dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
1248 con->auth_retry); 1248 con->auth_retry);
1249 if (con->auth_retry == 2) { 1249 if (con->auth_retry == 2) {
1250 con->error_msg = "connect authorization failure"; 1250 con->error_msg = "connect authorization failure";
1251 reset_connection(con);
1252 set_bit(CLOSED, &con->state);
1253 return -1; 1251 return -1;
1254 } 1252 }
1255 con->auth_retry = 1; 1253 con->auth_retry = 1;
1256 prepare_write_connect(con->msgr, con, 0); 1254 prepare_write_connect(con->msgr, con, 0);
1257 prepare_read_connect(con); 1255 prepare_read_connect(con);
1258 break; 1256 break;
1259 1257
1260 case CEPH_MSGR_TAG_RESETSESSION: 1258 case CEPH_MSGR_TAG_RESETSESSION:
1261 /* 1259 /*
1262 * If we connected with a large connect_seq but the peer 1260 * If we connected with a large connect_seq but the peer
1263 * has no record of a session with us (no connection, or 1261 * has no record of a session with us (no connection, or
1264 * connect_seq == 0), they will send RESETSESION to indicate 1262 * connect_seq == 0), they will send RESETSESION to indicate
1265 * that they must have reset their session, and may have 1263 * that they must have reset their session, and may have
1266 * dropped messages. 1264 * dropped messages.
1267 */ 1265 */
1268 dout("process_connect got RESET peer seq %u\n", 1266 dout("process_connect got RESET peer seq %u\n",
1269 le32_to_cpu(con->in_connect.connect_seq)); 1267 le32_to_cpu(con->in_connect.connect_seq));
1270 pr_err("%s%lld %s connection reset\n", 1268 pr_err("%s%lld %s connection reset\n",
1271 ENTITY_NAME(con->peer_name), 1269 ENTITY_NAME(con->peer_name),
1272 ceph_pr_addr(&con->peer_addr.in_addr)); 1270 ceph_pr_addr(&con->peer_addr.in_addr));
1273 reset_connection(con); 1271 reset_connection(con);
1274 prepare_write_connect(con->msgr, con, 0); 1272 prepare_write_connect(con->msgr, con, 0);
1275 prepare_read_connect(con); 1273 prepare_read_connect(con);
1276 1274
1277 /* Tell ceph about it. */ 1275 /* Tell ceph about it. */
1278 mutex_unlock(&con->mutex); 1276 mutex_unlock(&con->mutex);
1279 pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name)); 1277 pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name));
1280 if (con->ops->peer_reset) 1278 if (con->ops->peer_reset)
1281 con->ops->peer_reset(con); 1279 con->ops->peer_reset(con);
1282 mutex_lock(&con->mutex); 1280 mutex_lock(&con->mutex);
1283 break; 1281 break;
1284 1282
1285 case CEPH_MSGR_TAG_RETRY_SESSION: 1283 case CEPH_MSGR_TAG_RETRY_SESSION:
1286 /* 1284 /*
1287 * If we sent a smaller connect_seq than the peer has, try 1285 * If we sent a smaller connect_seq than the peer has, try
1288 * again with a larger value. 1286 * again with a larger value.
1289 */ 1287 */
1290 dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", 1288 dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
1291 le32_to_cpu(con->out_connect.connect_seq), 1289 le32_to_cpu(con->out_connect.connect_seq),
1292 le32_to_cpu(con->in_connect.connect_seq)); 1290 le32_to_cpu(con->in_connect.connect_seq));
1293 con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); 1291 con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
1294 prepare_write_connect(con->msgr, con, 0); 1292 prepare_write_connect(con->msgr, con, 0);
1295 prepare_read_connect(con); 1293 prepare_read_connect(con);
1296 break; 1294 break;
1297 1295
1298 case CEPH_MSGR_TAG_RETRY_GLOBAL: 1296 case CEPH_MSGR_TAG_RETRY_GLOBAL:
1299 /* 1297 /*
1300 * If we sent a smaller global_seq than the peer has, try 1298 * If we sent a smaller global_seq than the peer has, try
1301 * again with a larger value. 1299 * again with a larger value.
1302 */ 1300 */
1303 dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", 1301 dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
1304 con->peer_global_seq, 1302 con->peer_global_seq,
1305 le32_to_cpu(con->in_connect.global_seq)); 1303 le32_to_cpu(con->in_connect.global_seq));
1306 get_global_seq(con->msgr, 1304 get_global_seq(con->msgr,
1307 le32_to_cpu(con->in_connect.global_seq)); 1305 le32_to_cpu(con->in_connect.global_seq));
1308 prepare_write_connect(con->msgr, con, 0); 1306 prepare_write_connect(con->msgr, con, 0);
1309 prepare_read_connect(con); 1307 prepare_read_connect(con);
1310 break; 1308 break;
1311 1309
1312 case CEPH_MSGR_TAG_READY: 1310 case CEPH_MSGR_TAG_READY:
1313 if (req_feat & ~server_feat) { 1311 if (req_feat & ~server_feat) {
1314 pr_err("%s%lld %s protocol feature mismatch," 1312 pr_err("%s%lld %s protocol feature mismatch,"
1315 " my required %llx > server's %llx, need %llx\n", 1313 " my required %llx > server's %llx, need %llx\n",
1316 ENTITY_NAME(con->peer_name), 1314 ENTITY_NAME(con->peer_name),
1317 ceph_pr_addr(&con->peer_addr.in_addr), 1315 ceph_pr_addr(&con->peer_addr.in_addr),
1318 req_feat, server_feat, req_feat & ~server_feat); 1316 req_feat, server_feat, req_feat & ~server_feat);
1319 con->error_msg = "missing required protocol features"; 1317 con->error_msg = "missing required protocol features";
1320 fail_protocol(con); 1318 fail_protocol(con);
1321 return -1; 1319 return -1;
1322 } 1320 }
1323 clear_bit(CONNECTING, &con->state); 1321 clear_bit(CONNECTING, &con->state);
1324 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); 1322 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
1325 con->connect_seq++; 1323 con->connect_seq++;
1326 con->peer_features = server_feat; 1324 con->peer_features = server_feat;
1327 dout("process_connect got READY gseq %d cseq %d (%d)\n", 1325 dout("process_connect got READY gseq %d cseq %d (%d)\n",
1328 con->peer_global_seq, 1326 con->peer_global_seq,
1329 le32_to_cpu(con->in_reply.connect_seq), 1327 le32_to_cpu(con->in_reply.connect_seq),
1330 con->connect_seq); 1328 con->connect_seq);
1331 WARN_ON(con->connect_seq != 1329 WARN_ON(con->connect_seq !=
1332 le32_to_cpu(con->in_reply.connect_seq)); 1330 le32_to_cpu(con->in_reply.connect_seq));
1333 1331
1334 if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) 1332 if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
1335 set_bit(LOSSYTX, &con->state); 1333 set_bit(LOSSYTX, &con->state);
1336 1334
1337 prepare_read_tag(con); 1335 prepare_read_tag(con);
1338 break; 1336 break;
1339 1337
1340 case CEPH_MSGR_TAG_WAIT: 1338 case CEPH_MSGR_TAG_WAIT:
1341 /* 1339 /*
1342 * If there is a connection race (we are opening 1340 * If there is a connection race (we are opening
1343 * connections to each other), one of us may just have 1341 * connections to each other), one of us may just have
1344 * to WAIT. This shouldn't happen if we are the 1342 * to WAIT. This shouldn't happen if we are the
1345 * client. 1343 * client.
1346 */ 1344 */
1347 pr_err("process_connect peer connecting WAIT\n"); 1345 pr_err("process_connect peer connecting WAIT\n");
1348 1346
1349 default: 1347 default:
1350 pr_err("connect protocol error, will retry\n"); 1348 pr_err("connect protocol error, will retry\n");
1351 con->error_msg = "protocol error, garbage tag during connect"; 1349 con->error_msg = "protocol error, garbage tag during connect";
1352 return -1; 1350 return -1;
1353 } 1351 }
1354 return 0; 1352 return 0;
1355 } 1353 }
1356 1354
1357 1355
1358 /* 1356 /*
1359 * read (part of) an ack 1357 * read (part of) an ack
1360 */ 1358 */
1361 static int read_partial_ack(struct ceph_connection *con) 1359 static int read_partial_ack(struct ceph_connection *con)
1362 { 1360 {
1363 int to = 0; 1361 int to = 0;
1364 1362
1365 return read_partial(con, &to, sizeof(con->in_temp_ack), 1363 return read_partial(con, &to, sizeof(con->in_temp_ack),
1366 &con->in_temp_ack); 1364 &con->in_temp_ack);
1367 } 1365 }
1368 1366
1369 1367
1370 /* 1368 /*
1371 * We can finally discard anything that's been acked. 1369 * We can finally discard anything that's been acked.
1372 */ 1370 */
1373 static void process_ack(struct ceph_connection *con) 1371 static void process_ack(struct ceph_connection *con)
1374 { 1372 {
1375 struct ceph_msg *m; 1373 struct ceph_msg *m;
1376 u64 ack = le64_to_cpu(con->in_temp_ack); 1374 u64 ack = le64_to_cpu(con->in_temp_ack);
1377 u64 seq; 1375 u64 seq;
1378 1376
1379 while (!list_empty(&con->out_sent)) { 1377 while (!list_empty(&con->out_sent)) {
1380 m = list_first_entry(&con->out_sent, struct ceph_msg, 1378 m = list_first_entry(&con->out_sent, struct ceph_msg,
1381 list_head); 1379 list_head);
1382 seq = le64_to_cpu(m->hdr.seq); 1380 seq = le64_to_cpu(m->hdr.seq);
1383 if (seq > ack) 1381 if (seq > ack)
1384 break; 1382 break;
1385 dout("got ack for seq %llu type %d at %p\n", seq, 1383 dout("got ack for seq %llu type %d at %p\n", seq,
1386 le16_to_cpu(m->hdr.type), m); 1384 le16_to_cpu(m->hdr.type), m);
1387 ceph_msg_remove(m); 1385 ceph_msg_remove(m);
1388 } 1386 }
1389 prepare_read_tag(con); 1387 prepare_read_tag(con);
1390 } 1388 }
1391 1389
1392 1390
1393 1391
1394 1392
1395 static int read_partial_message_section(struct ceph_connection *con, 1393 static int read_partial_message_section(struct ceph_connection *con,
1396 struct kvec *section, 1394 struct kvec *section,
1397 unsigned int sec_len, u32 *crc) 1395 unsigned int sec_len, u32 *crc)
1398 { 1396 {
1399 int ret, left; 1397 int ret, left;
1400 1398
1401 BUG_ON(!section); 1399 BUG_ON(!section);
1402 1400
1403 while (section->iov_len < sec_len) { 1401 while (section->iov_len < sec_len) {
1404 BUG_ON(section->iov_base == NULL); 1402 BUG_ON(section->iov_base == NULL);
1405 left = sec_len - section->iov_len; 1403 left = sec_len - section->iov_len;
1406 ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + 1404 ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
1407 section->iov_len, left); 1405 section->iov_len, left);
1408 if (ret <= 0) 1406 if (ret <= 0)
1409 return ret; 1407 return ret;
1410 section->iov_len += ret; 1408 section->iov_len += ret;
1411 if (section->iov_len == sec_len) 1409 if (section->iov_len == sec_len)
1412 *crc = crc32c(0, section->iov_base, 1410 *crc = crc32c(0, section->iov_base,
1413 section->iov_len); 1411 section->iov_len);
1414 } 1412 }
1415 1413
1416 return 1; 1414 return 1;
1417 } 1415 }
1418 1416
1419 static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, 1417 static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
1420 struct ceph_msg_header *hdr, 1418 struct ceph_msg_header *hdr,
1421 int *skip); 1419 int *skip);
1422 1420
1423 1421
1424 static int read_partial_message_pages(struct ceph_connection *con, 1422 static int read_partial_message_pages(struct ceph_connection *con,
1425 struct page **pages, 1423 struct page **pages,
1426 unsigned data_len, int datacrc) 1424 unsigned data_len, int datacrc)
1427 { 1425 {
1428 void *p; 1426 void *p;
1429 int ret; 1427 int ret;
1430 int left; 1428 int left;
1431 1429
1432 left = min((int)(data_len - con->in_msg_pos.data_pos), 1430 left = min((int)(data_len - con->in_msg_pos.data_pos),
1433 (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); 1431 (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
1434 /* (page) data */ 1432 /* (page) data */
1435 BUG_ON(pages == NULL); 1433 BUG_ON(pages == NULL);
1436 p = kmap(pages[con->in_msg_pos.page]); 1434 p = kmap(pages[con->in_msg_pos.page]);
1437 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, 1435 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
1438 left); 1436 left);
1439 if (ret > 0 && datacrc) 1437 if (ret > 0 && datacrc)
1440 con->in_data_crc = 1438 con->in_data_crc =
1441 crc32c(con->in_data_crc, 1439 crc32c(con->in_data_crc,
1442 p + con->in_msg_pos.page_pos, ret); 1440 p + con->in_msg_pos.page_pos, ret);
1443 kunmap(pages[con->in_msg_pos.page]); 1441 kunmap(pages[con->in_msg_pos.page]);
1444 if (ret <= 0) 1442 if (ret <= 0)
1445 return ret; 1443 return ret;
1446 con->in_msg_pos.data_pos += ret; 1444 con->in_msg_pos.data_pos += ret;
1447 con->in_msg_pos.page_pos += ret; 1445 con->in_msg_pos.page_pos += ret;
1448 if (con->in_msg_pos.page_pos == PAGE_SIZE) { 1446 if (con->in_msg_pos.page_pos == PAGE_SIZE) {
1449 con->in_msg_pos.page_pos = 0; 1447 con->in_msg_pos.page_pos = 0;
1450 con->in_msg_pos.page++; 1448 con->in_msg_pos.page++;
1451 } 1449 }
1452 1450
1453 return ret; 1451 return ret;
1454 } 1452 }
1455 1453
1456 #ifdef CONFIG_BLOCK 1454 #ifdef CONFIG_BLOCK
1457 static int read_partial_message_bio(struct ceph_connection *con, 1455 static int read_partial_message_bio(struct ceph_connection *con,
1458 struct bio **bio_iter, int *bio_seg, 1456 struct bio **bio_iter, int *bio_seg,
1459 unsigned data_len, int datacrc) 1457 unsigned data_len, int datacrc)
1460 { 1458 {
1461 struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); 1459 struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
1462 void *p; 1460 void *p;
1463 int ret, left; 1461 int ret, left;
1464 1462
1465 if (IS_ERR(bv)) 1463 if (IS_ERR(bv))
1466 return PTR_ERR(bv); 1464 return PTR_ERR(bv);
1467 1465
1468 left = min((int)(data_len - con->in_msg_pos.data_pos), 1466 left = min((int)(data_len - con->in_msg_pos.data_pos),
1469 (int)(bv->bv_len - con->in_msg_pos.page_pos)); 1467 (int)(bv->bv_len - con->in_msg_pos.page_pos));
1470 1468
1471 p = kmap(bv->bv_page) + bv->bv_offset; 1469 p = kmap(bv->bv_page) + bv->bv_offset;
1472 1470
1473 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, 1471 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
1474 left); 1472 left);
1475 if (ret > 0 && datacrc) 1473 if (ret > 0 && datacrc)
1476 con->in_data_crc = 1474 con->in_data_crc =
1477 crc32c(con->in_data_crc, 1475 crc32c(con->in_data_crc,
1478 p + con->in_msg_pos.page_pos, ret); 1476 p + con->in_msg_pos.page_pos, ret);
1479 kunmap(bv->bv_page); 1477 kunmap(bv->bv_page);
1480 if (ret <= 0) 1478 if (ret <= 0)
1481 return ret; 1479 return ret;
1482 con->in_msg_pos.data_pos += ret; 1480 con->in_msg_pos.data_pos += ret;
1483 con->in_msg_pos.page_pos += ret; 1481 con->in_msg_pos.page_pos += ret;
1484 if (con->in_msg_pos.page_pos == bv->bv_len) { 1482 if (con->in_msg_pos.page_pos == bv->bv_len) {
1485 con->in_msg_pos.page_pos = 0; 1483 con->in_msg_pos.page_pos = 0;
1486 iter_bio_next(bio_iter, bio_seg); 1484 iter_bio_next(bio_iter, bio_seg);
1487 } 1485 }
1488 1486
1489 return ret; 1487 return ret;
1490 } 1488 }
1491 #endif 1489 #endif
1492 1490
1493 /* 1491 /*
1494 * read (part of) a message. 1492 * read (part of) a message.
1495 */ 1493 */
1496 static int read_partial_message(struct ceph_connection *con) 1494 static int read_partial_message(struct ceph_connection *con)
1497 { 1495 {
1498 struct ceph_msg *m = con->in_msg; 1496 struct ceph_msg *m = con->in_msg;
1499 int ret; 1497 int ret;
1500 int to, left; 1498 int to, left;
1501 unsigned front_len, middle_len, data_len; 1499 unsigned front_len, middle_len, data_len;
1502 int datacrc = con->msgr->nocrc; 1500 int datacrc = con->msgr->nocrc;
1503 int skip; 1501 int skip;
1504 u64 seq; 1502 u64 seq;
1505 1503
1506 dout("read_partial_message con %p msg %p\n", con, m); 1504 dout("read_partial_message con %p msg %p\n", con, m);
1507 1505
1508 /* header */ 1506 /* header */
1509 while (con->in_base_pos < sizeof(con->in_hdr)) { 1507 while (con->in_base_pos < sizeof(con->in_hdr)) {
1510 left = sizeof(con->in_hdr) - con->in_base_pos; 1508 left = sizeof(con->in_hdr) - con->in_base_pos;
1511 ret = ceph_tcp_recvmsg(con->sock, 1509 ret = ceph_tcp_recvmsg(con->sock,
1512 (char *)&con->in_hdr + con->in_base_pos, 1510 (char *)&con->in_hdr + con->in_base_pos,
1513 left); 1511 left);
1514 if (ret <= 0) 1512 if (ret <= 0)
1515 return ret; 1513 return ret;
1516 con->in_base_pos += ret; 1514 con->in_base_pos += ret;
1517 if (con->in_base_pos == sizeof(con->in_hdr)) { 1515 if (con->in_base_pos == sizeof(con->in_hdr)) {
1518 u32 crc = crc32c(0, (void *)&con->in_hdr, 1516 u32 crc = crc32c(0, (void *)&con->in_hdr,
1519 sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); 1517 sizeof(con->in_hdr) - sizeof(con->in_hdr.crc));
1520 if (crc != le32_to_cpu(con->in_hdr.crc)) { 1518 if (crc != le32_to_cpu(con->in_hdr.crc)) {
1521 pr_err("read_partial_message bad hdr " 1519 pr_err("read_partial_message bad hdr "
1522 " crc %u != expected %u\n", 1520 " crc %u != expected %u\n",
1523 crc, con->in_hdr.crc); 1521 crc, con->in_hdr.crc);
1524 return -EBADMSG; 1522 return -EBADMSG;
1525 } 1523 }
1526 } 1524 }
1527 } 1525 }
1528 front_len = le32_to_cpu(con->in_hdr.front_len); 1526 front_len = le32_to_cpu(con->in_hdr.front_len);
1529 if (front_len > CEPH_MSG_MAX_FRONT_LEN) 1527 if (front_len > CEPH_MSG_MAX_FRONT_LEN)
1530 return -EIO; 1528 return -EIO;
1531 middle_len = le32_to_cpu(con->in_hdr.middle_len); 1529 middle_len = le32_to_cpu(con->in_hdr.middle_len);
1532 if (middle_len > CEPH_MSG_MAX_DATA_LEN) 1530 if (middle_len > CEPH_MSG_MAX_DATA_LEN)
1533 return -EIO; 1531 return -EIO;
1534 data_len = le32_to_cpu(con->in_hdr.data_len); 1532 data_len = le32_to_cpu(con->in_hdr.data_len);
1535 if (data_len > CEPH_MSG_MAX_DATA_LEN) 1533 if (data_len > CEPH_MSG_MAX_DATA_LEN)
1536 return -EIO; 1534 return -EIO;
1537 1535
1538 /* verify seq# */ 1536 /* verify seq# */
1539 seq = le64_to_cpu(con->in_hdr.seq); 1537 seq = le64_to_cpu(con->in_hdr.seq);
1540 if ((s64)seq - (s64)con->in_seq < 1) { 1538 if ((s64)seq - (s64)con->in_seq < 1) {
1541 pr_info("skipping %s%lld %s seq %lld expected %lld\n", 1539 pr_info("skipping %s%lld %s seq %lld expected %lld\n",
1542 ENTITY_NAME(con->peer_name), 1540 ENTITY_NAME(con->peer_name),
1543 ceph_pr_addr(&con->peer_addr.in_addr), 1541 ceph_pr_addr(&con->peer_addr.in_addr),
1544 seq, con->in_seq + 1); 1542 seq, con->in_seq + 1);
1545 con->in_base_pos = -front_len - middle_len - data_len - 1543 con->in_base_pos = -front_len - middle_len - data_len -
1546 sizeof(m->footer); 1544 sizeof(m->footer);
1547 con->in_tag = CEPH_MSGR_TAG_READY; 1545 con->in_tag = CEPH_MSGR_TAG_READY;
1548 return 0; 1546 return 0;
1549 } else if ((s64)seq - (s64)con->in_seq > 1) { 1547 } else if ((s64)seq - (s64)con->in_seq > 1) {
1550 pr_err("read_partial_message bad seq %lld expected %lld\n", 1548 pr_err("read_partial_message bad seq %lld expected %lld\n",
1551 seq, con->in_seq + 1); 1549 seq, con->in_seq + 1);
1552 con->error_msg = "bad message sequence # for incoming message"; 1550 con->error_msg = "bad message sequence # for incoming message";
1553 return -EBADMSG; 1551 return -EBADMSG;
1554 } 1552 }
1555 1553
1556 /* allocate message? */ 1554 /* allocate message? */
1557 if (!con->in_msg) { 1555 if (!con->in_msg) {
1558 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 1556 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
1559 con->in_hdr.front_len, con->in_hdr.data_len); 1557 con->in_hdr.front_len, con->in_hdr.data_len);
1560 skip = 0; 1558 skip = 0;
1561 con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); 1559 con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
1562 if (skip) { 1560 if (skip) {
1563 /* skip this message */ 1561 /* skip this message */
1564 dout("alloc_msg said skip message\n"); 1562 dout("alloc_msg said skip message\n");
1565 BUG_ON(con->in_msg); 1563 BUG_ON(con->in_msg);
1566 con->in_base_pos = -front_len - middle_len - data_len - 1564 con->in_base_pos = -front_len - middle_len - data_len -
1567 sizeof(m->footer); 1565 sizeof(m->footer);
1568 con->in_tag = CEPH_MSGR_TAG_READY; 1566 con->in_tag = CEPH_MSGR_TAG_READY;
1569 con->in_seq++; 1567 con->in_seq++;
1570 return 0; 1568 return 0;
1571 } 1569 }
1572 if (!con->in_msg) { 1570 if (!con->in_msg) {
1573 con->error_msg = 1571 con->error_msg =
1574 "error allocating memory for incoming message"; 1572 "error allocating memory for incoming message";
1575 return -ENOMEM; 1573 return -ENOMEM;
1576 } 1574 }
1577 m = con->in_msg; 1575 m = con->in_msg;
1578 m->front.iov_len = 0; /* haven't read it yet */ 1576 m->front.iov_len = 0; /* haven't read it yet */
1579 if (m->middle) 1577 if (m->middle)
1580 m->middle->vec.iov_len = 0; 1578 m->middle->vec.iov_len = 0;
1581 1579
1582 con->in_msg_pos.page = 0; 1580 con->in_msg_pos.page = 0;
1583 if (m->pages) 1581 if (m->pages)
1584 con->in_msg_pos.page_pos = m->page_alignment; 1582 con->in_msg_pos.page_pos = m->page_alignment;
1585 else 1583 else
1586 con->in_msg_pos.page_pos = 0; 1584 con->in_msg_pos.page_pos = 0;
1587 con->in_msg_pos.data_pos = 0; 1585 con->in_msg_pos.data_pos = 0;
1588 } 1586 }
1589 1587
1590 /* front */ 1588 /* front */
1591 ret = read_partial_message_section(con, &m->front, front_len, 1589 ret = read_partial_message_section(con, &m->front, front_len,
1592 &con->in_front_crc); 1590 &con->in_front_crc);
1593 if (ret <= 0) 1591 if (ret <= 0)
1594 return ret; 1592 return ret;
1595 1593
1596 /* middle */ 1594 /* middle */
1597 if (m->middle) { 1595 if (m->middle) {
1598 ret = read_partial_message_section(con, &m->middle->vec, 1596 ret = read_partial_message_section(con, &m->middle->vec,
1599 middle_len, 1597 middle_len,
1600 &con->in_middle_crc); 1598 &con->in_middle_crc);
1601 if (ret <= 0) 1599 if (ret <= 0)
1602 return ret; 1600 return ret;
1603 } 1601 }
1604 #ifdef CONFIG_BLOCK 1602 #ifdef CONFIG_BLOCK
1605 if (m->bio && !m->bio_iter) 1603 if (m->bio && !m->bio_iter)
1606 init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); 1604 init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
1607 #endif 1605 #endif
1608 1606
1609 /* (page) data */ 1607 /* (page) data */
1610 while (con->in_msg_pos.data_pos < data_len) { 1608 while (con->in_msg_pos.data_pos < data_len) {
1611 if (m->pages) { 1609 if (m->pages) {
1612 ret = read_partial_message_pages(con, m->pages, 1610 ret = read_partial_message_pages(con, m->pages,
1613 data_len, datacrc); 1611 data_len, datacrc);
1614 if (ret <= 0) 1612 if (ret <= 0)
1615 return ret; 1613 return ret;
1616 #ifdef CONFIG_BLOCK 1614 #ifdef CONFIG_BLOCK
1617 } else if (m->bio) { 1615 } else if (m->bio) {
1618 1616
1619 ret = read_partial_message_bio(con, 1617 ret = read_partial_message_bio(con,
1620 &m->bio_iter, &m->bio_seg, 1618 &m->bio_iter, &m->bio_seg,
1621 data_len, datacrc); 1619 data_len, datacrc);
1622 if (ret <= 0) 1620 if (ret <= 0)
1623 return ret; 1621 return ret;
1624 #endif 1622 #endif
1625 } else { 1623 } else {
1626 BUG_ON(1); 1624 BUG_ON(1);
1627 } 1625 }
1628 } 1626 }
1629 1627
1630 /* footer */ 1628 /* footer */
1631 to = sizeof(m->hdr) + sizeof(m->footer); 1629 to = sizeof(m->hdr) + sizeof(m->footer);
1632 while (con->in_base_pos < to) { 1630 while (con->in_base_pos < to) {
1633 left = to - con->in_base_pos; 1631 left = to - con->in_base_pos;
1634 ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer + 1632 ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
1635 (con->in_base_pos - sizeof(m->hdr)), 1633 (con->in_base_pos - sizeof(m->hdr)),
1636 left); 1634 left);
1637 if (ret <= 0) 1635 if (ret <= 0)
1638 return ret; 1636 return ret;
1639 con->in_base_pos += ret; 1637 con->in_base_pos += ret;
1640 } 1638 }
1641 dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", 1639 dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
1642 m, front_len, m->footer.front_crc, middle_len, 1640 m, front_len, m->footer.front_crc, middle_len,
1643 m->footer.middle_crc, data_len, m->footer.data_crc); 1641 m->footer.middle_crc, data_len, m->footer.data_crc);
1644 1642
1645 /* crc ok? */ 1643 /* crc ok? */
1646 if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { 1644 if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
1647 pr_err("read_partial_message %p front crc %u != exp. %u\n", 1645 pr_err("read_partial_message %p front crc %u != exp. %u\n",
1648 m, con->in_front_crc, m->footer.front_crc); 1646 m, con->in_front_crc, m->footer.front_crc);
1649 return -EBADMSG; 1647 return -EBADMSG;
1650 } 1648 }
1651 if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { 1649 if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
1652 pr_err("read_partial_message %p middle crc %u != exp %u\n", 1650 pr_err("read_partial_message %p middle crc %u != exp %u\n",
1653 m, con->in_middle_crc, m->footer.middle_crc); 1651 m, con->in_middle_crc, m->footer.middle_crc);
1654 return -EBADMSG; 1652 return -EBADMSG;
1655 } 1653 }
1656 if (datacrc && 1654 if (datacrc &&
1657 (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && 1655 (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
1658 con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { 1656 con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
1659 pr_err("read_partial_message %p data crc %u != exp. %u\n", m, 1657 pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
1660 con->in_data_crc, le32_to_cpu(m->footer.data_crc)); 1658 con->in_data_crc, le32_to_cpu(m->footer.data_crc));
1661 return -EBADMSG; 1659 return -EBADMSG;
1662 } 1660 }
1663 1661
1664 return 1; /* done! */ 1662 return 1; /* done! */
1665 } 1663 }
1666 1664
1667 /* 1665 /*
1668 * Process message. This happens in the worker thread. The callback should 1666 * Process message. This happens in the worker thread. The callback should
1669 * be careful not to do anything that waits on other incoming messages or it 1667 * be careful not to do anything that waits on other incoming messages or it
1670 * may deadlock. 1668 * may deadlock.
1671 */ 1669 */
1672 static void process_message(struct ceph_connection *con) 1670 static void process_message(struct ceph_connection *con)
1673 { 1671 {
1674 struct ceph_msg *msg; 1672 struct ceph_msg *msg;
1675 1673
1676 msg = con->in_msg; 1674 msg = con->in_msg;
1677 con->in_msg = NULL; 1675 con->in_msg = NULL;
1678 1676
1679 /* if first message, set peer_name */ 1677 /* if first message, set peer_name */
1680 if (con->peer_name.type == 0) 1678 if (con->peer_name.type == 0)
1681 con->peer_name = msg->hdr.src; 1679 con->peer_name = msg->hdr.src;
1682 1680
1683 con->in_seq++; 1681 con->in_seq++;
1684 mutex_unlock(&con->mutex); 1682 mutex_unlock(&con->mutex);
1685 1683
1686 dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", 1684 dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n",
1687 msg, le64_to_cpu(msg->hdr.seq), 1685 msg, le64_to_cpu(msg->hdr.seq),
1688 ENTITY_NAME(msg->hdr.src), 1686 ENTITY_NAME(msg->hdr.src),
1689 le16_to_cpu(msg->hdr.type), 1687 le16_to_cpu(msg->hdr.type),
1690 ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), 1688 ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
1691 le32_to_cpu(msg->hdr.front_len), 1689 le32_to_cpu(msg->hdr.front_len),
1692 le32_to_cpu(msg->hdr.data_len), 1690 le32_to_cpu(msg->hdr.data_len),
1693 con->in_front_crc, con->in_middle_crc, con->in_data_crc); 1691 con->in_front_crc, con->in_middle_crc, con->in_data_crc);
1694 con->ops->dispatch(con, msg); 1692 con->ops->dispatch(con, msg);
1695 1693
1696 mutex_lock(&con->mutex); 1694 mutex_lock(&con->mutex);
1697 prepare_read_tag(con); 1695 prepare_read_tag(con);
1698 } 1696 }
1699 1697
1700 1698
1701 /* 1699 /*
1702 * Write something to the socket. Called in a worker thread when the 1700 * Write something to the socket. Called in a worker thread when the
1703 * socket appears to be writeable and we have something ready to send. 1701 * socket appears to be writeable and we have something ready to send.
1704 */ 1702 */
1705 static int try_write(struct ceph_connection *con) 1703 static int try_write(struct ceph_connection *con)
1706 { 1704 {
1707 struct ceph_messenger *msgr = con->msgr; 1705 struct ceph_messenger *msgr = con->msgr;
1708 int ret = 1; 1706 int ret = 1;
1709 1707
1710 dout("try_write start %p state %lu nref %d\n", con, con->state, 1708 dout("try_write start %p state %lu nref %d\n", con, con->state,
1711 atomic_read(&con->nref)); 1709 atomic_read(&con->nref));
1712 1710
1713 more: 1711 more:
1714 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); 1712 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
1715 1713
1716 /* open the socket first? */ 1714 /* open the socket first? */
1717 if (con->sock == NULL) { 1715 if (con->sock == NULL) {
1718 /* 1716 /*
1719 * if we were STANDBY and are reconnecting _this_ 1717 * if we were STANDBY and are reconnecting _this_
1720 * connection, bump connect_seq now. Always bump 1718 * connection, bump connect_seq now. Always bump
1721 * global_seq. 1719 * global_seq.
1722 */ 1720 */
1723 if (test_and_clear_bit(STANDBY, &con->state)) 1721 if (test_and_clear_bit(STANDBY, &con->state))
1724 con->connect_seq++; 1722 con->connect_seq++;
1725 1723
1726 prepare_write_banner(msgr, con); 1724 prepare_write_banner(msgr, con);
1727 prepare_write_connect(msgr, con, 1); 1725 prepare_write_connect(msgr, con, 1);
1728 prepare_read_banner(con); 1726 prepare_read_banner(con);
1729 set_bit(CONNECTING, &con->state); 1727 set_bit(CONNECTING, &con->state);
1730 clear_bit(NEGOTIATING, &con->state); 1728 clear_bit(NEGOTIATING, &con->state);
1731 1729
1732 BUG_ON(con->in_msg); 1730 BUG_ON(con->in_msg);
1733 con->in_tag = CEPH_MSGR_TAG_READY; 1731 con->in_tag = CEPH_MSGR_TAG_READY;
1734 dout("try_write initiating connect on %p new state %lu\n", 1732 dout("try_write initiating connect on %p new state %lu\n",
1735 con, con->state); 1733 con, con->state);
1736 con->sock = ceph_tcp_connect(con); 1734 con->sock = ceph_tcp_connect(con);
1737 if (IS_ERR(con->sock)) { 1735 if (IS_ERR(con->sock)) {
1738 con->sock = NULL; 1736 con->sock = NULL;
1739 con->error_msg = "connect error"; 1737 con->error_msg = "connect error";
1740 ret = -1; 1738 ret = -1;
1741 goto out; 1739 goto out;
1742 } 1740 }
1743 } 1741 }
1744 1742
1745 more_kvec: 1743 more_kvec:
1746 /* kvec data queued? */ 1744 /* kvec data queued? */
1747 if (con->out_skip) { 1745 if (con->out_skip) {
1748 ret = write_partial_skip(con); 1746 ret = write_partial_skip(con);
1749 if (ret <= 0) 1747 if (ret <= 0)
1750 goto out; 1748 goto out;
1751 } 1749 }
1752 if (con->out_kvec_left) { 1750 if (con->out_kvec_left) {
1753 ret = write_partial_kvec(con); 1751 ret = write_partial_kvec(con);
1754 if (ret <= 0) 1752 if (ret <= 0)
1755 goto out; 1753 goto out;
1756 } 1754 }
1757 1755
1758 /* msg pages? */ 1756 /* msg pages? */
1759 if (con->out_msg) { 1757 if (con->out_msg) {
1760 if (con->out_msg_done) { 1758 if (con->out_msg_done) {
1761 ceph_msg_put(con->out_msg); 1759 ceph_msg_put(con->out_msg);
1762 con->out_msg = NULL; /* we're done with this one */ 1760 con->out_msg = NULL; /* we're done with this one */
1763 goto do_next; 1761 goto do_next;
1764 } 1762 }
1765 1763
1766 ret = write_partial_msg_pages(con); 1764 ret = write_partial_msg_pages(con);
1767 if (ret == 1) 1765 if (ret == 1)
1768 goto more_kvec; /* we need to send the footer, too! */ 1766 goto more_kvec; /* we need to send the footer, too! */
1769 if (ret == 0) 1767 if (ret == 0)
1770 goto out; 1768 goto out;
1771 if (ret < 0) { 1769 if (ret < 0) {
1772 dout("try_write write_partial_msg_pages err %d\n", 1770 dout("try_write write_partial_msg_pages err %d\n",
1773 ret); 1771 ret);
1774 goto out; 1772 goto out;
1775 } 1773 }
1776 } 1774 }
1777 1775
1778 do_next: 1776 do_next:
1779 if (!test_bit(CONNECTING, &con->state)) { 1777 if (!test_bit(CONNECTING, &con->state)) {
1780 /* is anything else pending? */ 1778 /* is anything else pending? */
1781 if (!list_empty(&con->out_queue)) { 1779 if (!list_empty(&con->out_queue)) {
1782 prepare_write_message(con); 1780 prepare_write_message(con);
1783 goto more; 1781 goto more;
1784 } 1782 }
1785 if (con->in_seq > con->in_seq_acked) { 1783 if (con->in_seq > con->in_seq_acked) {
1786 prepare_write_ack(con); 1784 prepare_write_ack(con);
1787 goto more; 1785 goto more;
1788 } 1786 }
1789 if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { 1787 if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
1790 prepare_write_keepalive(con); 1788 prepare_write_keepalive(con);
1791 goto more; 1789 goto more;
1792 } 1790 }
1793 } 1791 }
1794 1792
1795 /* Nothing to do! */ 1793 /* Nothing to do! */
1796 clear_bit(WRITE_PENDING, &con->state); 1794 clear_bit(WRITE_PENDING, &con->state);
1797 dout("try_write nothing else to write.\n"); 1795 dout("try_write nothing else to write.\n");
1798 ret = 0; 1796 ret = 0;
1799 out: 1797 out:
1800 dout("try_write done on %p ret %d\n", con, ret); 1798 dout("try_write done on %p ret %d\n", con, ret);
1801 return ret; 1799 return ret;
1802 } 1800 }
1803 1801
1804 1802
1805 1803
1806 /* 1804 /*
1807 * Read what we can from the socket. 1805 * Read what we can from the socket.
1808 */ 1806 */
1809 static int try_read(struct ceph_connection *con) 1807 static int try_read(struct ceph_connection *con)
1810 { 1808 {
1811 int ret = -1; 1809 int ret = -1;
1812 1810
1813 if (!con->sock) 1811 if (!con->sock)
1814 return 0; 1812 return 0;
1815 1813
1816 if (test_bit(STANDBY, &con->state)) 1814 if (test_bit(STANDBY, &con->state))
1817 return 0; 1815 return 0;
1818 1816
1819 dout("try_read start on %p\n", con); 1817 dout("try_read start on %p\n", con);
1820 1818
1821 more: 1819 more:
1822 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, 1820 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
1823 con->in_base_pos); 1821 con->in_base_pos);
1824 if (test_bit(CONNECTING, &con->state)) { 1822 if (test_bit(CONNECTING, &con->state)) {
1825 if (!test_bit(NEGOTIATING, &con->state)) { 1823 if (!test_bit(NEGOTIATING, &con->state)) {
1826 dout("try_read connecting\n"); 1824 dout("try_read connecting\n");
1827 ret = read_partial_banner(con); 1825 ret = read_partial_banner(con);
1828 if (ret <= 0) 1826 if (ret <= 0)
1829 goto out; 1827 goto out;
1830 ret = process_banner(con); 1828 ret = process_banner(con);
1831 if (ret < 0) 1829 if (ret < 0)
1832 goto out; 1830 goto out;
1833 } 1831 }
1834 ret = read_partial_connect(con); 1832 ret = read_partial_connect(con);
1835 if (ret <= 0) 1833 if (ret <= 0)
1836 goto out; 1834 goto out;
1837 ret = process_connect(con); 1835 ret = process_connect(con);
1838 if (ret < 0) 1836 if (ret < 0)
1839 goto out; 1837 goto out;
1840 goto more; 1838 goto more;
1841 } 1839 }
1842 1840
1843 if (con->in_base_pos < 0) { 1841 if (con->in_base_pos < 0) {
1844 /* 1842 /*
1845 * skipping + discarding content. 1843 * skipping + discarding content.
1846 * 1844 *
1847 * FIXME: there must be a better way to do this! 1845 * FIXME: there must be a better way to do this!
1848 */ 1846 */
1849 static char buf[1024]; 1847 static char buf[1024];
1850 int skip = min(1024, -con->in_base_pos); 1848 int skip = min(1024, -con->in_base_pos);
1851 dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); 1849 dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
1852 ret = ceph_tcp_recvmsg(con->sock, buf, skip); 1850 ret = ceph_tcp_recvmsg(con->sock, buf, skip);
1853 if (ret <= 0) 1851 if (ret <= 0)
1854 goto out; 1852 goto out;
1855 con->in_base_pos += ret; 1853 con->in_base_pos += ret;
1856 if (con->in_base_pos) 1854 if (con->in_base_pos)
1857 goto more; 1855 goto more;
1858 } 1856 }
1859 if (con->in_tag == CEPH_MSGR_TAG_READY) { 1857 if (con->in_tag == CEPH_MSGR_TAG_READY) {
1860 /* 1858 /*
1861 * what's next? 1859 * what's next?
1862 */ 1860 */
1863 ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); 1861 ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
1864 if (ret <= 0) 1862 if (ret <= 0)
1865 goto out; 1863 goto out;
1866 dout("try_read got tag %d\n", (int)con->in_tag); 1864 dout("try_read got tag %d\n", (int)con->in_tag);
1867 switch (con->in_tag) { 1865 switch (con->in_tag) {
1868 case CEPH_MSGR_TAG_MSG: 1866 case CEPH_MSGR_TAG_MSG:
1869 prepare_read_message(con); 1867 prepare_read_message(con);
1870 break; 1868 break;
1871 case CEPH_MSGR_TAG_ACK: 1869 case CEPH_MSGR_TAG_ACK:
1872 prepare_read_ack(con); 1870 prepare_read_ack(con);
1873 break; 1871 break;
1874 case CEPH_MSGR_TAG_CLOSE: 1872 case CEPH_MSGR_TAG_CLOSE:
1875 set_bit(CLOSED, &con->state); /* fixme */ 1873 set_bit(CLOSED, &con->state); /* fixme */
1876 goto out; 1874 goto out;
1877 default: 1875 default:
1878 goto bad_tag; 1876 goto bad_tag;
1879 } 1877 }
1880 } 1878 }
1881 if (con->in_tag == CEPH_MSGR_TAG_MSG) { 1879 if (con->in_tag == CEPH_MSGR_TAG_MSG) {
1882 ret = read_partial_message(con); 1880 ret = read_partial_message(con);
1883 if (ret <= 0) { 1881 if (ret <= 0) {
1884 switch (ret) { 1882 switch (ret) {
1885 case -EBADMSG: 1883 case -EBADMSG:
1886 con->error_msg = "bad crc"; 1884 con->error_msg = "bad crc";
1887 ret = -EIO; 1885 ret = -EIO;
1888 break; 1886 break;
1889 case -EIO: 1887 case -EIO:
1890 con->error_msg = "io error"; 1888 con->error_msg = "io error";
1891 break; 1889 break;
1892 } 1890 }
1893 goto out; 1891 goto out;
1894 } 1892 }
1895 if (con->in_tag == CEPH_MSGR_TAG_READY) 1893 if (con->in_tag == CEPH_MSGR_TAG_READY)
1896 goto more; 1894 goto more;
1897 process_message(con); 1895 process_message(con);
1898 goto more; 1896 goto more;
1899 } 1897 }
1900 if (con->in_tag == CEPH_MSGR_TAG_ACK) { 1898 if (con->in_tag == CEPH_MSGR_TAG_ACK) {
1901 ret = read_partial_ack(con); 1899 ret = read_partial_ack(con);
1902 if (ret <= 0) 1900 if (ret <= 0)
1903 goto out; 1901 goto out;
1904 process_ack(con); 1902 process_ack(con);
1905 goto more; 1903 goto more;
1906 } 1904 }
1907 1905
1908 out: 1906 out:
1909 dout("try_read done on %p ret %d\n", con, ret); 1907 dout("try_read done on %p ret %d\n", con, ret);
1910 return ret; 1908 return ret;
1911 1909
1912 bad_tag: 1910 bad_tag:
1913 pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag); 1911 pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag);
1914 con->error_msg = "protocol error, garbage tag"; 1912 con->error_msg = "protocol error, garbage tag";
1915 ret = -1; 1913 ret = -1;
1916 goto out; 1914 goto out;
1917 } 1915 }
1918 1916
1919 1917
1920 /* 1918 /*
1921 * Atomically queue work on a connection. Bump @con reference to 1919 * Atomically queue work on a connection. Bump @con reference to
1922 * avoid races with connection teardown. 1920 * avoid races with connection teardown.
1923 */ 1921 */
1924 static void queue_con(struct ceph_connection *con) 1922 static void queue_con(struct ceph_connection *con)
1925 { 1923 {
1926 if (test_bit(DEAD, &con->state)) { 1924 if (test_bit(DEAD, &con->state)) {
1927 dout("queue_con %p ignoring: DEAD\n", 1925 dout("queue_con %p ignoring: DEAD\n",
1928 con); 1926 con);
1929 return; 1927 return;
1930 } 1928 }
1931 1929
1932 if (!con->ops->get(con)) { 1930 if (!con->ops->get(con)) {
1933 dout("queue_con %p ref count 0\n", con); 1931 dout("queue_con %p ref count 0\n", con);
1934 return; 1932 return;
1935 } 1933 }
1936 1934
1937 if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { 1935 if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) {
1938 dout("queue_con %p - already queued\n", con); 1936 dout("queue_con %p - already queued\n", con);
1939 con->ops->put(con); 1937 con->ops->put(con);
1940 } else { 1938 } else {
1941 dout("queue_con %p\n", con); 1939 dout("queue_con %p\n", con);
1942 } 1940 }
1943 } 1941 }
1944 1942
1945 /* 1943 /*
1946 * Do some work on a connection. Drop a connection ref when we're done. 1944 * Do some work on a connection. Drop a connection ref when we're done.
1947 */ 1945 */
1948 static void con_work(struct work_struct *work) 1946 static void con_work(struct work_struct *work)
1949 { 1947 {
1950 struct ceph_connection *con = container_of(work, struct ceph_connection, 1948 struct ceph_connection *con = container_of(work, struct ceph_connection,
1951 work.work); 1949 work.work);
1952 1950
1953 mutex_lock(&con->mutex); 1951 mutex_lock(&con->mutex);
1954 1952
1955 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1953 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
1956 dout("con_work CLOSED\n"); 1954 dout("con_work CLOSED\n");
1957 con_close_socket(con); 1955 con_close_socket(con);
1958 goto done; 1956 goto done;
1959 } 1957 }
1960 if (test_and_clear_bit(OPENING, &con->state)) { 1958 if (test_and_clear_bit(OPENING, &con->state)) {
1961 /* reopen w/ new peer */ 1959 /* reopen w/ new peer */
1962 dout("con_work OPENING\n"); 1960 dout("con_work OPENING\n");
1963 con_close_socket(con); 1961 con_close_socket(con);
1964 } 1962 }
1965 1963
1966 if (test_and_clear_bit(SOCK_CLOSED, &con->state) || 1964 if (test_and_clear_bit(SOCK_CLOSED, &con->state) ||
1967 try_read(con) < 0 || 1965 try_read(con) < 0 ||
1968 try_write(con) < 0) { 1966 try_write(con) < 0) {
1969 mutex_unlock(&con->mutex); 1967 mutex_unlock(&con->mutex);
1970 ceph_fault(con); /* error/fault path */ 1968 ceph_fault(con); /* error/fault path */
1971 goto done_unlocked; 1969 goto done_unlocked;
1972 } 1970 }
1973 1971
1974 done: 1972 done:
1975 mutex_unlock(&con->mutex); 1973 mutex_unlock(&con->mutex);
1976 done_unlocked: 1974 done_unlocked:
1977 con->ops->put(con); 1975 con->ops->put(con);
1978 } 1976 }
1979 1977
1980 1978
1981 /* 1979 /*
1982 * Generic error/fault handler. A retry mechanism is used with 1980 * Generic error/fault handler. A retry mechanism is used with
1983 * exponential backoff 1981 * exponential backoff
1984 */ 1982 */
1985 static void ceph_fault(struct ceph_connection *con) 1983 static void ceph_fault(struct ceph_connection *con)
1986 { 1984 {
1987 pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), 1985 pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
1988 ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); 1986 ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
1989 dout("fault %p state %lu to peer %s\n", 1987 dout("fault %p state %lu to peer %s\n",
1990 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); 1988 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
1991 1989
1992 if (test_bit(LOSSYTX, &con->state)) { 1990 if (test_bit(LOSSYTX, &con->state)) {
1993 dout("fault on LOSSYTX channel\n"); 1991 dout("fault on LOSSYTX channel\n");
1994 goto out; 1992 goto out;
1995 } 1993 }
1996 1994
1997 mutex_lock(&con->mutex); 1995 mutex_lock(&con->mutex);
1998 if (test_bit(CLOSED, &con->state)) 1996 if (test_bit(CLOSED, &con->state))
1999 goto out_unlock; 1997 goto out_unlock;
2000 1998
2001 con_close_socket(con); 1999 con_close_socket(con);
2002 2000
2003 if (con->in_msg) { 2001 if (con->in_msg) {
2004 ceph_msg_put(con->in_msg); 2002 ceph_msg_put(con->in_msg);
2005 con->in_msg = NULL; 2003 con->in_msg = NULL;
2006 } 2004 }
2007 2005
2008 /* Requeue anything that hasn't been acked */ 2006 /* Requeue anything that hasn't been acked */
2009 list_splice_init(&con->out_sent, &con->out_queue); 2007 list_splice_init(&con->out_sent, &con->out_queue);
2010 2008
2011 /* If there are no messages in the queue, place the connection 2009 /* If there are no messages in the queue, place the connection
2012 * in a STANDBY state (i.e., don't try to reconnect just yet). */ 2010 * in a STANDBY state (i.e., don't try to reconnect just yet). */
2013 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 2011 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
2014 dout("fault setting STANDBY\n"); 2012 dout("fault setting STANDBY\n");
2015 set_bit(STANDBY, &con->state); 2013 set_bit(STANDBY, &con->state);
2016 } else { 2014 } else {
2017 /* retry after a delay. */ 2015 /* retry after a delay. */
2018 if (con->delay == 0) 2016 if (con->delay == 0)
2019 con->delay = BASE_DELAY_INTERVAL; 2017 con->delay = BASE_DELAY_INTERVAL;
2020 else if (con->delay < MAX_DELAY_INTERVAL) 2018 else if (con->delay < MAX_DELAY_INTERVAL)
2021 con->delay *= 2; 2019 con->delay *= 2;
2022 dout("fault queueing %p delay %lu\n", con, con->delay); 2020 dout("fault queueing %p delay %lu\n", con, con->delay);
2023 con->ops->get(con); 2021 con->ops->get(con);
2024 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2022 if (queue_delayed_work(ceph_msgr_wq, &con->work,
2025 round_jiffies_relative(con->delay)) == 0) 2023 round_jiffies_relative(con->delay)) == 0)
2026 con->ops->put(con); 2024 con->ops->put(con);
2027 } 2025 }
2028 2026
2029 out_unlock: 2027 out_unlock:
2030 mutex_unlock(&con->mutex); 2028 mutex_unlock(&con->mutex);
2031 out: 2029 out:
2032 /* 2030 /*
2033 * in case we faulted due to authentication, invalidate our 2031 * in case we faulted due to authentication, invalidate our
2034 * current tickets so that we can get new ones. 2032 * current tickets so that we can get new ones.
2035 */ 2033 */
2036 if (con->auth_retry && con->ops->invalidate_authorizer) { 2034 if (con->auth_retry && con->ops->invalidate_authorizer) {
2037 dout("calling invalidate_authorizer()\n"); 2035 dout("calling invalidate_authorizer()\n");
2038 con->ops->invalidate_authorizer(con); 2036 con->ops->invalidate_authorizer(con);
2039 } 2037 }
2040 2038
2041 if (con->ops->fault) 2039 if (con->ops->fault)
2042 con->ops->fault(con); 2040 con->ops->fault(con);
2043 } 2041 }
2044 2042
2045 2043
2046 2044
2047 /* 2045 /*
2048 * create a new messenger instance 2046 * create a new messenger instance
2049 */ 2047 */
2050 struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, 2048 struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
2051 u32 supported_features, 2049 u32 supported_features,
2052 u32 required_features) 2050 u32 required_features)
2053 { 2051 {
2054 struct ceph_messenger *msgr; 2052 struct ceph_messenger *msgr;
2055 2053
2056 msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); 2054 msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
2057 if (msgr == NULL) 2055 if (msgr == NULL)
2058 return ERR_PTR(-ENOMEM); 2056 return ERR_PTR(-ENOMEM);
2059 2057
2060 msgr->supported_features = supported_features; 2058 msgr->supported_features = supported_features;
2061 msgr->required_features = required_features; 2059 msgr->required_features = required_features;
2062 2060
2063 spin_lock_init(&msgr->global_seq_lock); 2061 spin_lock_init(&msgr->global_seq_lock);
2064 2062
2065 /* the zero page is needed if a request is "canceled" while the message 2063 /* the zero page is needed if a request is "canceled" while the message
2066 * is being written over the socket */ 2064 * is being written over the socket */
2067 msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); 2065 msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO);
2068 if (!msgr->zero_page) { 2066 if (!msgr->zero_page) {
2069 kfree(msgr); 2067 kfree(msgr);
2070 return ERR_PTR(-ENOMEM); 2068 return ERR_PTR(-ENOMEM);
2071 } 2069 }
2072 kmap(msgr->zero_page); 2070 kmap(msgr->zero_page);
2073 2071
2074 if (myaddr) 2072 if (myaddr)
2075 msgr->inst.addr = *myaddr; 2073 msgr->inst.addr = *myaddr;
2076 2074
2077 /* select a random nonce */ 2075 /* select a random nonce */
2078 msgr->inst.addr.type = 0; 2076 msgr->inst.addr.type = 0;
2079 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); 2077 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
2080 encode_my_addr(msgr); 2078 encode_my_addr(msgr);
2081 2079
2082 dout("messenger_create %p\n", msgr); 2080 dout("messenger_create %p\n", msgr);
2083 return msgr; 2081 return msgr;
2084 } 2082 }
2085 EXPORT_SYMBOL(ceph_messenger_create); 2083 EXPORT_SYMBOL(ceph_messenger_create);
2086 2084
2087 void ceph_messenger_destroy(struct ceph_messenger *msgr) 2085 void ceph_messenger_destroy(struct ceph_messenger *msgr)
2088 { 2086 {
2089 dout("destroy %p\n", msgr); 2087 dout("destroy %p\n", msgr);
2090 kunmap(msgr->zero_page); 2088 kunmap(msgr->zero_page);
2091 __free_page(msgr->zero_page); 2089 __free_page(msgr->zero_page);
2092 kfree(msgr); 2090 kfree(msgr);
2093 dout("destroyed messenger %p\n", msgr); 2091 dout("destroyed messenger %p\n", msgr);
2094 } 2092 }
2095 EXPORT_SYMBOL(ceph_messenger_destroy); 2093 EXPORT_SYMBOL(ceph_messenger_destroy);
2096 2094
2097 /* 2095 /*
2098 * Queue up an outgoing message on the given connection. 2096 * Queue up an outgoing message on the given connection.
2099 */ 2097 */
2100 void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) 2098 void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
2101 { 2099 {
2102 if (test_bit(CLOSED, &con->state)) { 2100 if (test_bit(CLOSED, &con->state)) {
2103 dout("con_send %p closed, dropping %p\n", con, msg); 2101 dout("con_send %p closed, dropping %p\n", con, msg);
2104 ceph_msg_put(msg); 2102 ceph_msg_put(msg);
2105 return; 2103 return;
2106 } 2104 }
2107 2105
2108 /* set src+dst */ 2106 /* set src+dst */
2109 msg->hdr.src = con->msgr->inst.name; 2107 msg->hdr.src = con->msgr->inst.name;
2110 2108
2111 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); 2109 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
2112 2110
2113 msg->needs_out_seq = true; 2111 msg->needs_out_seq = true;
2114 2112
2115 /* queue */ 2113 /* queue */
2116 mutex_lock(&con->mutex); 2114 mutex_lock(&con->mutex);
2117 BUG_ON(!list_empty(&msg->list_head)); 2115 BUG_ON(!list_empty(&msg->list_head));
2118 list_add_tail(&msg->list_head, &con->out_queue); 2116 list_add_tail(&msg->list_head, &con->out_queue);
2119 dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, 2117 dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
2120 ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type), 2118 ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type),
2121 ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), 2119 ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
2122 le32_to_cpu(msg->hdr.front_len), 2120 le32_to_cpu(msg->hdr.front_len),
2123 le32_to_cpu(msg->hdr.middle_len), 2121 le32_to_cpu(msg->hdr.middle_len),
2124 le32_to_cpu(msg->hdr.data_len)); 2122 le32_to_cpu(msg->hdr.data_len));
2125 mutex_unlock(&con->mutex); 2123 mutex_unlock(&con->mutex);
2126 2124
2127 /* if there wasn't anything waiting to send before, queue 2125 /* if there wasn't anything waiting to send before, queue
2128 * new work */ 2126 * new work */
2129 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2127 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2130 queue_con(con); 2128 queue_con(con);
2131 } 2129 }
2132 EXPORT_SYMBOL(ceph_con_send); 2130 EXPORT_SYMBOL(ceph_con_send);
2133 2131
2134 /* 2132 /*
2135 * Revoke a message that was previously queued for send 2133 * Revoke a message that was previously queued for send
2136 */ 2134 */
2137 void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) 2135 void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
2138 { 2136 {
2139 mutex_lock(&con->mutex); 2137 mutex_lock(&con->mutex);
2140 if (!list_empty(&msg->list_head)) { 2138 if (!list_empty(&msg->list_head)) {
2141 dout("con_revoke %p msg %p - was on queue\n", con, msg); 2139 dout("con_revoke %p msg %p - was on queue\n", con, msg);
2142 list_del_init(&msg->list_head); 2140 list_del_init(&msg->list_head);
2143 ceph_msg_put(msg); 2141 ceph_msg_put(msg);
2144 msg->hdr.seq = 0; 2142 msg->hdr.seq = 0;
2145 } 2143 }
2146 if (con->out_msg == msg) { 2144 if (con->out_msg == msg) {
2147 dout("con_revoke %p msg %p - was sending\n", con, msg); 2145 dout("con_revoke %p msg %p - was sending\n", con, msg);
2148 con->out_msg = NULL; 2146 con->out_msg = NULL;
2149 if (con->out_kvec_is_msg) { 2147 if (con->out_kvec_is_msg) {
2150 con->out_skip = con->out_kvec_bytes; 2148 con->out_skip = con->out_kvec_bytes;
2151 con->out_kvec_is_msg = false; 2149 con->out_kvec_is_msg = false;
2152 } 2150 }
2153 ceph_msg_put(msg); 2151 ceph_msg_put(msg);
2154 msg->hdr.seq = 0; 2152 msg->hdr.seq = 0;
2155 } 2153 }
2156 mutex_unlock(&con->mutex); 2154 mutex_unlock(&con->mutex);
2157 } 2155 }
2158 2156
2159 /* 2157 /*
2160 * Revoke a message that we may be reading data into 2158 * Revoke a message that we may be reading data into
2161 */ 2159 */
2162 void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) 2160 void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2163 { 2161 {
2164 mutex_lock(&con->mutex); 2162 mutex_lock(&con->mutex);
2165 if (con->in_msg && con->in_msg == msg) { 2163 if (con->in_msg && con->in_msg == msg) {
2166 unsigned front_len = le32_to_cpu(con->in_hdr.front_len); 2164 unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
2167 unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len); 2165 unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
2168 unsigned data_len = le32_to_cpu(con->in_hdr.data_len); 2166 unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
2169 2167
2170 /* skip rest of message */ 2168 /* skip rest of message */
2171 dout("con_revoke_pages %p msg %p revoked\n", con, msg); 2169 dout("con_revoke_pages %p msg %p revoked\n", con, msg);
2172 con->in_base_pos = con->in_base_pos - 2170 con->in_base_pos = con->in_base_pos -
2173 sizeof(struct ceph_msg_header) - 2171 sizeof(struct ceph_msg_header) -
2174 front_len - 2172 front_len -
2175 middle_len - 2173 middle_len -
2176 data_len - 2174 data_len -
2177 sizeof(struct ceph_msg_footer); 2175 sizeof(struct ceph_msg_footer);
2178 ceph_msg_put(con->in_msg); 2176 ceph_msg_put(con->in_msg);
2179 con->in_msg = NULL; 2177 con->in_msg = NULL;
2180 con->in_tag = CEPH_MSGR_TAG_READY; 2178 con->in_tag = CEPH_MSGR_TAG_READY;
2181 con->in_seq++; 2179 con->in_seq++;
2182 } else { 2180 } else {
2183 dout("con_revoke_pages %p msg %p pages %p no-op\n", 2181 dout("con_revoke_pages %p msg %p pages %p no-op\n",
2184 con, con->in_msg, msg); 2182 con, con->in_msg, msg);
2185 } 2183 }
2186 mutex_unlock(&con->mutex); 2184 mutex_unlock(&con->mutex);
2187 } 2185 }
2188 2186
2189 /* 2187 /*
2190 * Queue a keepalive byte to ensure the tcp connection is alive. 2188 * Queue a keepalive byte to ensure the tcp connection is alive.
2191 */ 2189 */
2192 void ceph_con_keepalive(struct ceph_connection *con) 2190 void ceph_con_keepalive(struct ceph_connection *con)
2193 { 2191 {
2194 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2192 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
2195 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2193 test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2196 queue_con(con); 2194 queue_con(con);
2197 } 2195 }
2198 EXPORT_SYMBOL(ceph_con_keepalive); 2196 EXPORT_SYMBOL(ceph_con_keepalive);
2199 2197
2200 2198
2201 /* 2199 /*
2202 * construct a new message with given type, size 2200 * construct a new message with given type, size
2203 * the new msg has a ref count of 1. 2201 * the new msg has a ref count of 1.
2204 */ 2202 */
2205 struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) 2203 struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
2206 { 2204 {
2207 struct ceph_msg *m; 2205 struct ceph_msg *m;
2208 2206
2209 m = kmalloc(sizeof(*m), flags); 2207 m = kmalloc(sizeof(*m), flags);
2210 if (m == NULL) 2208 if (m == NULL)
2211 goto out; 2209 goto out;
2212 kref_init(&m->kref); 2210 kref_init(&m->kref);
2213 INIT_LIST_HEAD(&m->list_head); 2211 INIT_LIST_HEAD(&m->list_head);
2214 2212
2215 m->hdr.tid = 0; 2213 m->hdr.tid = 0;
2216 m->hdr.type = cpu_to_le16(type); 2214 m->hdr.type = cpu_to_le16(type);
2217 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 2215 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2218 m->hdr.version = 0; 2216 m->hdr.version = 0;
2219 m->hdr.front_len = cpu_to_le32(front_len); 2217 m->hdr.front_len = cpu_to_le32(front_len);
2220 m->hdr.middle_len = 0; 2218 m->hdr.middle_len = 0;
2221 m->hdr.data_len = 0; 2219 m->hdr.data_len = 0;
2222 m->hdr.data_off = 0; 2220 m->hdr.data_off = 0;
2223 m->hdr.reserved = 0; 2221 m->hdr.reserved = 0;
2224 m->footer.front_crc = 0; 2222 m->footer.front_crc = 0;
2225 m->footer.middle_crc = 0; 2223 m->footer.middle_crc = 0;
2226 m->footer.data_crc = 0; 2224 m->footer.data_crc = 0;
2227 m->footer.flags = 0; 2225 m->footer.flags = 0;
2228 m->front_max = front_len; 2226 m->front_max = front_len;
2229 m->front_is_vmalloc = false; 2227 m->front_is_vmalloc = false;
2230 m->more_to_follow = false; 2228 m->more_to_follow = false;
2231 m->pool = NULL; 2229 m->pool = NULL;
2232 2230
2233 /* front */ 2231 /* front */
2234 if (front_len) { 2232 if (front_len) {
2235 if (front_len > PAGE_CACHE_SIZE) { 2233 if (front_len > PAGE_CACHE_SIZE) {
2236 m->front.iov_base = __vmalloc(front_len, flags, 2234 m->front.iov_base = __vmalloc(front_len, flags,
2237 PAGE_KERNEL); 2235 PAGE_KERNEL);
2238 m->front_is_vmalloc = true; 2236 m->front_is_vmalloc = true;
2239 } else { 2237 } else {
2240 m->front.iov_base = kmalloc(front_len, flags); 2238 m->front.iov_base = kmalloc(front_len, flags);
2241 } 2239 }
2242 if (m->front.iov_base == NULL) { 2240 if (m->front.iov_base == NULL) {
2243 pr_err("msg_new can't allocate %d bytes\n", 2241 pr_err("msg_new can't allocate %d bytes\n",
2244 front_len); 2242 front_len);
2245 goto out2; 2243 goto out2;
2246 } 2244 }
2247 } else { 2245 } else {
2248 m->front.iov_base = NULL; 2246 m->front.iov_base = NULL;
2249 } 2247 }
2250 m->front.iov_len = front_len; 2248 m->front.iov_len = front_len;
2251 2249
2252 /* middle */ 2250 /* middle */
2253 m->middle = NULL; 2251 m->middle = NULL;
2254 2252
2255 /* data */ 2253 /* data */
2256 m->nr_pages = 0; 2254 m->nr_pages = 0;
2257 m->page_alignment = 0; 2255 m->page_alignment = 0;
2258 m->pages = NULL; 2256 m->pages = NULL;
2259 m->pagelist = NULL; 2257 m->pagelist = NULL;
2260 m->bio = NULL; 2258 m->bio = NULL;
2261 m->bio_iter = NULL; 2259 m->bio_iter = NULL;
2262 m->bio_seg = 0; 2260 m->bio_seg = 0;
2263 m->trail = NULL; 2261 m->trail = NULL;
2264 2262
2265 dout("ceph_msg_new %p front %d\n", m, front_len); 2263 dout("ceph_msg_new %p front %d\n", m, front_len);
2266 return m; 2264 return m;
2267 2265
2268 out2: 2266 out2:
2269 ceph_msg_put(m); 2267 ceph_msg_put(m);
2270 out: 2268 out:
2271 pr_err("msg_new can't create type %d front %d\n", type, front_len); 2269 pr_err("msg_new can't create type %d front %d\n", type, front_len);
2272 return NULL; 2270 return NULL;
2273 } 2271 }
2274 EXPORT_SYMBOL(ceph_msg_new); 2272 EXPORT_SYMBOL(ceph_msg_new);
2275 2273
2276 /* 2274 /*
2277 * Allocate "middle" portion of a message, if it is needed and wasn't 2275 * Allocate "middle" portion of a message, if it is needed and wasn't
2278 * allocated by alloc_msg. This allows us to read a small fixed-size 2276 * allocated by alloc_msg. This allows us to read a small fixed-size
2279 * per-type header in the front and then gracefully fail (i.e., 2277 * per-type header in the front and then gracefully fail (i.e.,
2280 * propagate the error to the caller based on info in the front) when 2278 * propagate the error to the caller based on info in the front) when
2281 * the middle is too large. 2279 * the middle is too large.
2282 */ 2280 */
2283 static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) 2281 static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
2284 { 2282 {
2285 int type = le16_to_cpu(msg->hdr.type); 2283 int type = le16_to_cpu(msg->hdr.type);
2286 int middle_len = le32_to_cpu(msg->hdr.middle_len); 2284 int middle_len = le32_to_cpu(msg->hdr.middle_len);
2287 2285
2288 dout("alloc_middle %p type %d %s middle_len %d\n", msg, type, 2286 dout("alloc_middle %p type %d %s middle_len %d\n", msg, type,
2289 ceph_msg_type_name(type), middle_len); 2287 ceph_msg_type_name(type), middle_len);
2290 BUG_ON(!middle_len); 2288 BUG_ON(!middle_len);
2291 BUG_ON(msg->middle); 2289 BUG_ON(msg->middle);
2292 2290
2293 msg->middle = ceph_buffer_new(middle_len, GFP_NOFS); 2291 msg->middle = ceph_buffer_new(middle_len, GFP_NOFS);
2294 if (!msg->middle) 2292 if (!msg->middle)
2295 return -ENOMEM; 2293 return -ENOMEM;
2296 return 0; 2294 return 0;
2297 } 2295 }
2298 2296
2299 /* 2297 /*
2300 * Generic message allocator, for incoming messages. 2298 * Generic message allocator, for incoming messages.
2301 */ 2299 */
2302 static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, 2300 static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
2303 struct ceph_msg_header *hdr, 2301 struct ceph_msg_header *hdr,
2304 int *skip) 2302 int *skip)
2305 { 2303 {
2306 int type = le16_to_cpu(hdr->type); 2304 int type = le16_to_cpu(hdr->type);
2307 int front_len = le32_to_cpu(hdr->front_len); 2305 int front_len = le32_to_cpu(hdr->front_len);
2308 int middle_len = le32_to_cpu(hdr->middle_len); 2306 int middle_len = le32_to_cpu(hdr->middle_len);
2309 struct ceph_msg *msg = NULL; 2307 struct ceph_msg *msg = NULL;
2310 int ret; 2308 int ret;
2311 2309
2312 if (con->ops->alloc_msg) { 2310 if (con->ops->alloc_msg) {
2313 mutex_unlock(&con->mutex); 2311 mutex_unlock(&con->mutex);
2314 msg = con->ops->alloc_msg(con, hdr, skip); 2312 msg = con->ops->alloc_msg(con, hdr, skip);
2315 mutex_lock(&con->mutex); 2313 mutex_lock(&con->mutex);
2316 if (!msg || *skip) 2314 if (!msg || *skip)
2317 return NULL; 2315 return NULL;
2318 } 2316 }
2319 if (!msg) { 2317 if (!msg) {
2320 *skip = 0; 2318 *skip = 0;
2321 msg = ceph_msg_new(type, front_len, GFP_NOFS); 2319 msg = ceph_msg_new(type, front_len, GFP_NOFS);
2322 if (!msg) { 2320 if (!msg) {
2323 pr_err("unable to allocate msg type %d len %d\n", 2321 pr_err("unable to allocate msg type %d len %d\n",
2324 type, front_len); 2322 type, front_len);
2325 return NULL; 2323 return NULL;
2326 } 2324 }
2327 msg->page_alignment = le16_to_cpu(hdr->data_off); 2325 msg->page_alignment = le16_to_cpu(hdr->data_off);
2328 } 2326 }
2329 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 2327 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
2330 2328
2331 if (middle_len && !msg->middle) { 2329 if (middle_len && !msg->middle) {
2332 ret = ceph_alloc_middle(con, msg); 2330 ret = ceph_alloc_middle(con, msg);
2333 if (ret < 0) { 2331 if (ret < 0) {
2334 ceph_msg_put(msg); 2332 ceph_msg_put(msg);
2335 return NULL; 2333 return NULL;
2336 } 2334 }
2337 } 2335 }
2338 2336
2339 return msg; 2337 return msg;
2340 } 2338 }
2341 2339
2342 2340
2343 /* 2341 /*
2344 * Free a generically kmalloc'd message. 2342 * Free a generically kmalloc'd message.
2345 */ 2343 */
2346 void ceph_msg_kfree(struct ceph_msg *m) 2344 void ceph_msg_kfree(struct ceph_msg *m)
2347 { 2345 {
2348 dout("msg_kfree %p\n", m); 2346 dout("msg_kfree %p\n", m);
2349 if (m->front_is_vmalloc) 2347 if (m->front_is_vmalloc)
2350 vfree(m->front.iov_base); 2348 vfree(m->front.iov_base);
2351 else 2349 else
2352 kfree(m->front.iov_base); 2350 kfree(m->front.iov_base);
2353 kfree(m); 2351 kfree(m);
2354 } 2352 }
2355 2353
2356 /* 2354 /*
2357 * Drop a msg ref. Destroy as needed. 2355 * Drop a msg ref. Destroy as needed.
2358 */ 2356 */
2359 void ceph_msg_last_put(struct kref *kref) 2357 void ceph_msg_last_put(struct kref *kref)
2360 { 2358 {
2361 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); 2359 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
2362 2360
2363 dout("ceph_msg_put last one on %p\n", m); 2361 dout("ceph_msg_put last one on %p\n", m);
2364 WARN_ON(!list_empty(&m->list_head)); 2362 WARN_ON(!list_empty(&m->list_head));
2365 2363
2366 /* drop middle, data, if any */ 2364 /* drop middle, data, if any */
2367 if (m->middle) { 2365 if (m->middle) {
2368 ceph_buffer_put(m->middle); 2366 ceph_buffer_put(m->middle);
2369 m->middle = NULL; 2367 m->middle = NULL;
2370 } 2368 }
2371 m->nr_pages = 0; 2369 m->nr_pages = 0;
2372 m->pages = NULL; 2370 m->pages = NULL;
2373 2371
2374 if (m->pagelist) { 2372 if (m->pagelist) {
2375 ceph_pagelist_release(m->pagelist); 2373 ceph_pagelist_release(m->pagelist);
2376 kfree(m->pagelist); 2374 kfree(m->pagelist);
2377 m->pagelist = NULL; 2375 m->pagelist = NULL;
2378 } 2376 }
2379 2377
2380 m->trail = NULL; 2378 m->trail = NULL;
2381 2379
2382 if (m->pool) 2380 if (m->pool)
2383 ceph_msgpool_put(m->pool, m); 2381 ceph_msgpool_put(m->pool, m);
2384 else 2382 else
2385 ceph_msg_kfree(m); 2383 ceph_msg_kfree(m);
2386 } 2384 }
2387 EXPORT_SYMBOL(ceph_msg_last_put); 2385 EXPORT_SYMBOL(ceph_msg_last_put);
2388 2386
2389 void ceph_msg_dump(struct ceph_msg *msg) 2387 void ceph_msg_dump(struct ceph_msg *msg)
2390 { 2388 {
2391 pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, 2389 pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg,
2392 msg->front_max, msg->nr_pages); 2390 msg->front_max, msg->nr_pages);
2393 print_hex_dump(KERN_DEBUG, "header: ", 2391 print_hex_dump(KERN_DEBUG, "header: ",
2394 DUMP_PREFIX_OFFSET, 16, 1, 2392 DUMP_PREFIX_OFFSET, 16, 1,
2395 &msg->hdr, sizeof(msg->hdr), true); 2393 &msg->hdr, sizeof(msg->hdr), true);
2396 print_hex_dump(KERN_DEBUG, " front: ", 2394 print_hex_dump(KERN_DEBUG, " front: ",
2397 DUMP_PREFIX_OFFSET, 16, 1, 2395 DUMP_PREFIX_OFFSET, 16, 1,
2398 msg->front.iov_base, msg->front.iov_len, true); 2396 msg->front.iov_base, msg->front.iov_len, true);
2399 if (msg->middle) 2397 if (msg->middle)
2400 print_hex_dump(KERN_DEBUG, "middle: ", 2398 print_hex_dump(KERN_DEBUG, "middle: ",
2401 DUMP_PREFIX_OFFSET, 16, 1, 2399 DUMP_PREFIX_OFFSET, 16, 1,
2402 msg->middle->vec.iov_base, 2400 msg->middle->vec.iov_base,
2403 msg->middle->vec.iov_len, true); 2401 msg->middle->vec.iov_len, true);
2404 print_hex_dump(KERN_DEBUG, "footer: ", 2402 print_hex_dump(KERN_DEBUG, "footer: ",
2405 DUMP_PREFIX_OFFSET, 16, 1, 2403 DUMP_PREFIX_OFFSET, 16, 1,
2406 &msg->footer, sizeof(msg->footer), true); 2404 &msg->footer, sizeof(msg->footer), true);
2407 } 2405 }
2408 EXPORT_SYMBOL(ceph_msg_dump); 2406 EXPORT_SYMBOL(ceph_msg_dump);
2409 2407