Commit 692d20f576fb26f62c83f80dbf3ea899998391b7
1 parent
38815b7802
libceph: retry after authorization failure
If we mark the connection CLOSED we will give up trying to reconnect to this server instance. That is appropriate for things like a protocol version mismatch that won't change until the server is restarted, at which point we'll get a new addr and reconnect. An authorization failure like this is probably due to the server not properly rotating it's secret keys, however, and should be treated as transient so that the normal backoff and retry behavior kicks in. Signed-off-by: Sage Weil <sage@newdream.net>
Showing 1 changed file with 0 additions and 2 deletions Inline Diff
net/ceph/messenger.c
1 | #include <linux/ceph/ceph_debug.h> | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/crc32c.h> | 3 | #include <linux/crc32c.h> |
4 | #include <linux/ctype.h> | 4 | #include <linux/ctype.h> |
5 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
6 | #include <linux/inet.h> | 6 | #include <linux/inet.h> |
7 | #include <linux/kthread.h> | 7 | #include <linux/kthread.h> |
8 | #include <linux/net.h> | 8 | #include <linux/net.h> |
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/socket.h> | 10 | #include <linux/socket.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/bio.h> | 12 | #include <linux/bio.h> |
13 | #include <linux/blkdev.h> | 13 | #include <linux/blkdev.h> |
14 | #include <net/tcp.h> | 14 | #include <net/tcp.h> |
15 | 15 | ||
16 | #include <linux/ceph/libceph.h> | 16 | #include <linux/ceph/libceph.h> |
17 | #include <linux/ceph/messenger.h> | 17 | #include <linux/ceph/messenger.h> |
18 | #include <linux/ceph/decode.h> | 18 | #include <linux/ceph/decode.h> |
19 | #include <linux/ceph/pagelist.h> | 19 | #include <linux/ceph/pagelist.h> |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * Ceph uses the messenger to exchange ceph_msg messages with other | 22 | * Ceph uses the messenger to exchange ceph_msg messages with other |
23 | * hosts in the system. The messenger provides ordered and reliable | 23 | * hosts in the system. The messenger provides ordered and reliable |
24 | * delivery. We tolerate TCP disconnects by reconnecting (with | 24 | * delivery. We tolerate TCP disconnects by reconnecting (with |
25 | * exponential backoff) in the case of a fault (disconnection, bad | 25 | * exponential backoff) in the case of a fault (disconnection, bad |
26 | * crc, protocol error). Acks allow sent messages to be discarded by | 26 | * crc, protocol error). Acks allow sent messages to be discarded by |
27 | * the sender. | 27 | * the sender. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | /* static tag bytes (protocol control messages) */ | 30 | /* static tag bytes (protocol control messages) */ |
31 | static char tag_msg = CEPH_MSGR_TAG_MSG; | 31 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
32 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 32 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
33 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | 33 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; |
34 | 34 | ||
35 | #ifdef CONFIG_LOCKDEP | 35 | #ifdef CONFIG_LOCKDEP |
36 | static struct lock_class_key socket_class; | 36 | static struct lock_class_key socket_class; |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | 39 | ||
40 | static void queue_con(struct ceph_connection *con); | 40 | static void queue_con(struct ceph_connection *con); |
41 | static void con_work(struct work_struct *); | 41 | static void con_work(struct work_struct *); |
42 | static void ceph_fault(struct ceph_connection *con); | 42 | static void ceph_fault(struct ceph_connection *con); |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * nicely render a sockaddr as a string. | 45 | * nicely render a sockaddr as a string. |
46 | */ | 46 | */ |
47 | #define MAX_ADDR_STR 20 | 47 | #define MAX_ADDR_STR 20 |
48 | #define MAX_ADDR_STR_LEN 60 | 48 | #define MAX_ADDR_STR_LEN 60 |
49 | static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | 49 | static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; |
50 | static DEFINE_SPINLOCK(addr_str_lock); | 50 | static DEFINE_SPINLOCK(addr_str_lock); |
51 | static int last_addr_str; | 51 | static int last_addr_str; |
52 | 52 | ||
53 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) | 53 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | char *s; | 56 | char *s; |
57 | struct sockaddr_in *in4 = (void *)ss; | 57 | struct sockaddr_in *in4 = (void *)ss; |
58 | struct sockaddr_in6 *in6 = (void *)ss; | 58 | struct sockaddr_in6 *in6 = (void *)ss; |
59 | 59 | ||
60 | spin_lock(&addr_str_lock); | 60 | spin_lock(&addr_str_lock); |
61 | i = last_addr_str++; | 61 | i = last_addr_str++; |
62 | if (last_addr_str == MAX_ADDR_STR) | 62 | if (last_addr_str == MAX_ADDR_STR) |
63 | last_addr_str = 0; | 63 | last_addr_str = 0; |
64 | spin_unlock(&addr_str_lock); | 64 | spin_unlock(&addr_str_lock); |
65 | s = addr_str[i]; | 65 | s = addr_str[i]; |
66 | 66 | ||
67 | switch (ss->ss_family) { | 67 | switch (ss->ss_family) { |
68 | case AF_INET: | 68 | case AF_INET: |
69 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, | 69 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, |
70 | (unsigned int)ntohs(in4->sin_port)); | 70 | (unsigned int)ntohs(in4->sin_port)); |
71 | break; | 71 | break; |
72 | 72 | ||
73 | case AF_INET6: | 73 | case AF_INET6: |
74 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, | 74 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, |
75 | (unsigned int)ntohs(in6->sin6_port)); | 75 | (unsigned int)ntohs(in6->sin6_port)); |
76 | break; | 76 | break; |
77 | 77 | ||
78 | default: | 78 | default: |
79 | sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); | 79 | sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); |
80 | } | 80 | } |
81 | 81 | ||
82 | return s; | 82 | return s; |
83 | } | 83 | } |
84 | EXPORT_SYMBOL(ceph_pr_addr); | 84 | EXPORT_SYMBOL(ceph_pr_addr); |
85 | 85 | ||
86 | static void encode_my_addr(struct ceph_messenger *msgr) | 86 | static void encode_my_addr(struct ceph_messenger *msgr) |
87 | { | 87 | { |
88 | memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); | 88 | memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); |
89 | ceph_encode_addr(&msgr->my_enc_addr); | 89 | ceph_encode_addr(&msgr->my_enc_addr); |
90 | } | 90 | } |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * work queue for all reading and writing to/from the socket. | 93 | * work queue for all reading and writing to/from the socket. |
94 | */ | 94 | */ |
95 | struct workqueue_struct *ceph_msgr_wq; | 95 | struct workqueue_struct *ceph_msgr_wq; |
96 | 96 | ||
97 | int ceph_msgr_init(void) | 97 | int ceph_msgr_init(void) |
98 | { | 98 | { |
99 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); | 99 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); |
100 | if (!ceph_msgr_wq) { | 100 | if (!ceph_msgr_wq) { |
101 | pr_err("msgr_init failed to create workqueue\n"); | 101 | pr_err("msgr_init failed to create workqueue\n"); |
102 | return -ENOMEM; | 102 | return -ENOMEM; |
103 | } | 103 | } |
104 | return 0; | 104 | return 0; |
105 | } | 105 | } |
106 | EXPORT_SYMBOL(ceph_msgr_init); | 106 | EXPORT_SYMBOL(ceph_msgr_init); |
107 | 107 | ||
108 | void ceph_msgr_exit(void) | 108 | void ceph_msgr_exit(void) |
109 | { | 109 | { |
110 | destroy_workqueue(ceph_msgr_wq); | 110 | destroy_workqueue(ceph_msgr_wq); |
111 | } | 111 | } |
112 | EXPORT_SYMBOL(ceph_msgr_exit); | 112 | EXPORT_SYMBOL(ceph_msgr_exit); |
113 | 113 | ||
114 | void ceph_msgr_flush(void) | 114 | void ceph_msgr_flush(void) |
115 | { | 115 | { |
116 | flush_workqueue(ceph_msgr_wq); | 116 | flush_workqueue(ceph_msgr_wq); |
117 | } | 117 | } |
118 | EXPORT_SYMBOL(ceph_msgr_flush); | 118 | EXPORT_SYMBOL(ceph_msgr_flush); |
119 | 119 | ||
120 | 120 | ||
121 | /* | 121 | /* |
122 | * socket callback functions | 122 | * socket callback functions |
123 | */ | 123 | */ |
124 | 124 | ||
125 | /* data available on socket, or listen socket received a connect */ | 125 | /* data available on socket, or listen socket received a connect */ |
126 | static void ceph_data_ready(struct sock *sk, int count_unused) | 126 | static void ceph_data_ready(struct sock *sk, int count_unused) |
127 | { | 127 | { |
128 | struct ceph_connection *con = | 128 | struct ceph_connection *con = |
129 | (struct ceph_connection *)sk->sk_user_data; | 129 | (struct ceph_connection *)sk->sk_user_data; |
130 | if (sk->sk_state != TCP_CLOSE_WAIT) { | 130 | if (sk->sk_state != TCP_CLOSE_WAIT) { |
131 | dout("ceph_data_ready on %p state = %lu, queueing work\n", | 131 | dout("ceph_data_ready on %p state = %lu, queueing work\n", |
132 | con, con->state); | 132 | con, con->state); |
133 | queue_con(con); | 133 | queue_con(con); |
134 | } | 134 | } |
135 | } | 135 | } |
136 | 136 | ||
137 | /* socket has buffer space for writing */ | 137 | /* socket has buffer space for writing */ |
138 | static void ceph_write_space(struct sock *sk) | 138 | static void ceph_write_space(struct sock *sk) |
139 | { | 139 | { |
140 | struct ceph_connection *con = | 140 | struct ceph_connection *con = |
141 | (struct ceph_connection *)sk->sk_user_data; | 141 | (struct ceph_connection *)sk->sk_user_data; |
142 | 142 | ||
143 | /* only queue to workqueue if there is data we want to write. */ | 143 | /* only queue to workqueue if there is data we want to write. */ |
144 | if (test_bit(WRITE_PENDING, &con->state)) { | 144 | if (test_bit(WRITE_PENDING, &con->state)) { |
145 | dout("ceph_write_space %p queueing write work\n", con); | 145 | dout("ceph_write_space %p queueing write work\n", con); |
146 | queue_con(con); | 146 | queue_con(con); |
147 | } else { | 147 | } else { |
148 | dout("ceph_write_space %p nothing to write\n", con); | 148 | dout("ceph_write_space %p nothing to write\n", con); |
149 | } | 149 | } |
150 | 150 | ||
151 | /* since we have our own write_space, clear the SOCK_NOSPACE flag */ | 151 | /* since we have our own write_space, clear the SOCK_NOSPACE flag */ |
152 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 152 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
153 | } | 153 | } |
154 | 154 | ||
155 | /* socket's state has changed */ | 155 | /* socket's state has changed */ |
156 | static void ceph_state_change(struct sock *sk) | 156 | static void ceph_state_change(struct sock *sk) |
157 | { | 157 | { |
158 | struct ceph_connection *con = | 158 | struct ceph_connection *con = |
159 | (struct ceph_connection *)sk->sk_user_data; | 159 | (struct ceph_connection *)sk->sk_user_data; |
160 | 160 | ||
161 | dout("ceph_state_change %p state = %lu sk_state = %u\n", | 161 | dout("ceph_state_change %p state = %lu sk_state = %u\n", |
162 | con, con->state, sk->sk_state); | 162 | con, con->state, sk->sk_state); |
163 | 163 | ||
164 | if (test_bit(CLOSED, &con->state)) | 164 | if (test_bit(CLOSED, &con->state)) |
165 | return; | 165 | return; |
166 | 166 | ||
167 | switch (sk->sk_state) { | 167 | switch (sk->sk_state) { |
168 | case TCP_CLOSE: | 168 | case TCP_CLOSE: |
169 | dout("ceph_state_change TCP_CLOSE\n"); | 169 | dout("ceph_state_change TCP_CLOSE\n"); |
170 | case TCP_CLOSE_WAIT: | 170 | case TCP_CLOSE_WAIT: |
171 | dout("ceph_state_change TCP_CLOSE_WAIT\n"); | 171 | dout("ceph_state_change TCP_CLOSE_WAIT\n"); |
172 | if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { | 172 | if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { |
173 | if (test_bit(CONNECTING, &con->state)) | 173 | if (test_bit(CONNECTING, &con->state)) |
174 | con->error_msg = "connection failed"; | 174 | con->error_msg = "connection failed"; |
175 | else | 175 | else |
176 | con->error_msg = "socket closed"; | 176 | con->error_msg = "socket closed"; |
177 | queue_con(con); | 177 | queue_con(con); |
178 | } | 178 | } |
179 | break; | 179 | break; |
180 | case TCP_ESTABLISHED: | 180 | case TCP_ESTABLISHED: |
181 | dout("ceph_state_change TCP_ESTABLISHED\n"); | 181 | dout("ceph_state_change TCP_ESTABLISHED\n"); |
182 | queue_con(con); | 182 | queue_con(con); |
183 | break; | 183 | break; |
184 | } | 184 | } |
185 | } | 185 | } |
186 | 186 | ||
187 | /* | 187 | /* |
188 | * set up socket callbacks | 188 | * set up socket callbacks |
189 | */ | 189 | */ |
190 | static void set_sock_callbacks(struct socket *sock, | 190 | static void set_sock_callbacks(struct socket *sock, |
191 | struct ceph_connection *con) | 191 | struct ceph_connection *con) |
192 | { | 192 | { |
193 | struct sock *sk = sock->sk; | 193 | struct sock *sk = sock->sk; |
194 | sk->sk_user_data = (void *)con; | 194 | sk->sk_user_data = (void *)con; |
195 | sk->sk_data_ready = ceph_data_ready; | 195 | sk->sk_data_ready = ceph_data_ready; |
196 | sk->sk_write_space = ceph_write_space; | 196 | sk->sk_write_space = ceph_write_space; |
197 | sk->sk_state_change = ceph_state_change; | 197 | sk->sk_state_change = ceph_state_change; |
198 | } | 198 | } |
199 | 199 | ||
200 | 200 | ||
201 | /* | 201 | /* |
202 | * socket helpers | 202 | * socket helpers |
203 | */ | 203 | */ |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * initiate connection to a remote socket. | 206 | * initiate connection to a remote socket. |
207 | */ | 207 | */ |
208 | static struct socket *ceph_tcp_connect(struct ceph_connection *con) | 208 | static struct socket *ceph_tcp_connect(struct ceph_connection *con) |
209 | { | 209 | { |
210 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; | 210 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; |
211 | struct socket *sock; | 211 | struct socket *sock; |
212 | int ret; | 212 | int ret; |
213 | 213 | ||
214 | BUG_ON(con->sock); | 214 | BUG_ON(con->sock); |
215 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, | 215 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, |
216 | IPPROTO_TCP, &sock); | 216 | IPPROTO_TCP, &sock); |
217 | if (ret) | 217 | if (ret) |
218 | return ERR_PTR(ret); | 218 | return ERR_PTR(ret); |
219 | con->sock = sock; | 219 | con->sock = sock; |
220 | sock->sk->sk_allocation = GFP_NOFS; | 220 | sock->sk->sk_allocation = GFP_NOFS; |
221 | 221 | ||
222 | #ifdef CONFIG_LOCKDEP | 222 | #ifdef CONFIG_LOCKDEP |
223 | lockdep_set_class(&sock->sk->sk_lock, &socket_class); | 223 | lockdep_set_class(&sock->sk->sk_lock, &socket_class); |
224 | #endif | 224 | #endif |
225 | 225 | ||
226 | set_sock_callbacks(sock, con); | 226 | set_sock_callbacks(sock, con); |
227 | 227 | ||
228 | dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); | 228 | dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); |
229 | 229 | ||
230 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | 230 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
231 | O_NONBLOCK); | 231 | O_NONBLOCK); |
232 | if (ret == -EINPROGRESS) { | 232 | if (ret == -EINPROGRESS) { |
233 | dout("connect %s EINPROGRESS sk_state = %u\n", | 233 | dout("connect %s EINPROGRESS sk_state = %u\n", |
234 | ceph_pr_addr(&con->peer_addr.in_addr), | 234 | ceph_pr_addr(&con->peer_addr.in_addr), |
235 | sock->sk->sk_state); | 235 | sock->sk->sk_state); |
236 | ret = 0; | 236 | ret = 0; |
237 | } | 237 | } |
238 | if (ret < 0) { | 238 | if (ret < 0) { |
239 | pr_err("connect %s error %d\n", | 239 | pr_err("connect %s error %d\n", |
240 | ceph_pr_addr(&con->peer_addr.in_addr), ret); | 240 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
241 | sock_release(sock); | 241 | sock_release(sock); |
242 | con->sock = NULL; | 242 | con->sock = NULL; |
243 | con->error_msg = "connect error"; | 243 | con->error_msg = "connect error"; |
244 | } | 244 | } |
245 | 245 | ||
246 | if (ret < 0) | 246 | if (ret < 0) |
247 | return ERR_PTR(ret); | 247 | return ERR_PTR(ret); |
248 | return sock; | 248 | return sock; |
249 | } | 249 | } |
250 | 250 | ||
251 | static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) | 251 | static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) |
252 | { | 252 | { |
253 | struct kvec iov = {buf, len}; | 253 | struct kvec iov = {buf, len}; |
254 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; | 254 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; |
255 | int r; | 255 | int r; |
256 | 256 | ||
257 | r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); | 257 | r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); |
258 | if (r == -EAGAIN) | 258 | if (r == -EAGAIN) |
259 | r = 0; | 259 | r = 0; |
260 | return r; | 260 | return r; |
261 | } | 261 | } |
262 | 262 | ||
263 | /* | 263 | /* |
264 | * write something. @more is true if caller will be sending more data | 264 | * write something. @more is true if caller will be sending more data |
265 | * shortly. | 265 | * shortly. |
266 | */ | 266 | */ |
267 | static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, | 267 | static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, |
268 | size_t kvlen, size_t len, int more) | 268 | size_t kvlen, size_t len, int more) |
269 | { | 269 | { |
270 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; | 270 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; |
271 | int r; | 271 | int r; |
272 | 272 | ||
273 | if (more) | 273 | if (more) |
274 | msg.msg_flags |= MSG_MORE; | 274 | msg.msg_flags |= MSG_MORE; |
275 | else | 275 | else |
276 | msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ | 276 | msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ |
277 | 277 | ||
278 | r = kernel_sendmsg(sock, &msg, iov, kvlen, len); | 278 | r = kernel_sendmsg(sock, &msg, iov, kvlen, len); |
279 | if (r == -EAGAIN) | 279 | if (r == -EAGAIN) |
280 | r = 0; | 280 | r = 0; |
281 | return r; | 281 | return r; |
282 | } | 282 | } |
283 | 283 | ||
284 | 284 | ||
285 | /* | 285 | /* |
286 | * Shutdown/close the socket for the given connection. | 286 | * Shutdown/close the socket for the given connection. |
287 | */ | 287 | */ |
288 | static int con_close_socket(struct ceph_connection *con) | 288 | static int con_close_socket(struct ceph_connection *con) |
289 | { | 289 | { |
290 | int rc; | 290 | int rc; |
291 | 291 | ||
292 | dout("con_close_socket on %p sock %p\n", con, con->sock); | 292 | dout("con_close_socket on %p sock %p\n", con, con->sock); |
293 | if (!con->sock) | 293 | if (!con->sock) |
294 | return 0; | 294 | return 0; |
295 | set_bit(SOCK_CLOSED, &con->state); | 295 | set_bit(SOCK_CLOSED, &con->state); |
296 | rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); | 296 | rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); |
297 | sock_release(con->sock); | 297 | sock_release(con->sock); |
298 | con->sock = NULL; | 298 | con->sock = NULL; |
299 | clear_bit(SOCK_CLOSED, &con->state); | 299 | clear_bit(SOCK_CLOSED, &con->state); |
300 | return rc; | 300 | return rc; |
301 | } | 301 | } |
302 | 302 | ||
303 | /* | 303 | /* |
304 | * Reset a connection. Discard all incoming and outgoing messages | 304 | * Reset a connection. Discard all incoming and outgoing messages |
305 | * and clear *_seq state. | 305 | * and clear *_seq state. |
306 | */ | 306 | */ |
307 | static void ceph_msg_remove(struct ceph_msg *msg) | 307 | static void ceph_msg_remove(struct ceph_msg *msg) |
308 | { | 308 | { |
309 | list_del_init(&msg->list_head); | 309 | list_del_init(&msg->list_head); |
310 | ceph_msg_put(msg); | 310 | ceph_msg_put(msg); |
311 | } | 311 | } |
312 | static void ceph_msg_remove_list(struct list_head *head) | 312 | static void ceph_msg_remove_list(struct list_head *head) |
313 | { | 313 | { |
314 | while (!list_empty(head)) { | 314 | while (!list_empty(head)) { |
315 | struct ceph_msg *msg = list_first_entry(head, struct ceph_msg, | 315 | struct ceph_msg *msg = list_first_entry(head, struct ceph_msg, |
316 | list_head); | 316 | list_head); |
317 | ceph_msg_remove(msg); | 317 | ceph_msg_remove(msg); |
318 | } | 318 | } |
319 | } | 319 | } |
320 | 320 | ||
321 | static void reset_connection(struct ceph_connection *con) | 321 | static void reset_connection(struct ceph_connection *con) |
322 | { | 322 | { |
323 | /* reset connection, out_queue, msg_ and connect_seq */ | 323 | /* reset connection, out_queue, msg_ and connect_seq */ |
324 | /* discard existing out_queue and msg_seq */ | 324 | /* discard existing out_queue and msg_seq */ |
325 | ceph_msg_remove_list(&con->out_queue); | 325 | ceph_msg_remove_list(&con->out_queue); |
326 | ceph_msg_remove_list(&con->out_sent); | 326 | ceph_msg_remove_list(&con->out_sent); |
327 | 327 | ||
328 | if (con->in_msg) { | 328 | if (con->in_msg) { |
329 | ceph_msg_put(con->in_msg); | 329 | ceph_msg_put(con->in_msg); |
330 | con->in_msg = NULL; | 330 | con->in_msg = NULL; |
331 | } | 331 | } |
332 | 332 | ||
333 | con->connect_seq = 0; | 333 | con->connect_seq = 0; |
334 | con->out_seq = 0; | 334 | con->out_seq = 0; |
335 | if (con->out_msg) { | 335 | if (con->out_msg) { |
336 | ceph_msg_put(con->out_msg); | 336 | ceph_msg_put(con->out_msg); |
337 | con->out_msg = NULL; | 337 | con->out_msg = NULL; |
338 | } | 338 | } |
339 | con->out_keepalive_pending = false; | 339 | con->out_keepalive_pending = false; |
340 | con->in_seq = 0; | 340 | con->in_seq = 0; |
341 | con->in_seq_acked = 0; | 341 | con->in_seq_acked = 0; |
342 | } | 342 | } |
343 | 343 | ||
344 | /* | 344 | /* |
345 | * mark a peer down. drop any open connections. | 345 | * mark a peer down. drop any open connections. |
346 | */ | 346 | */ |
347 | void ceph_con_close(struct ceph_connection *con) | 347 | void ceph_con_close(struct ceph_connection *con) |
348 | { | 348 | { |
349 | dout("con_close %p peer %s\n", con, | 349 | dout("con_close %p peer %s\n", con, |
350 | ceph_pr_addr(&con->peer_addr.in_addr)); | 350 | ceph_pr_addr(&con->peer_addr.in_addr)); |
351 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | 351 | set_bit(CLOSED, &con->state); /* in case there's queued work */ |
352 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ | 352 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ |
353 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ | 353 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ |
354 | clear_bit(KEEPALIVE_PENDING, &con->state); | 354 | clear_bit(KEEPALIVE_PENDING, &con->state); |
355 | clear_bit(WRITE_PENDING, &con->state); | 355 | clear_bit(WRITE_PENDING, &con->state); |
356 | mutex_lock(&con->mutex); | 356 | mutex_lock(&con->mutex); |
357 | reset_connection(con); | 357 | reset_connection(con); |
358 | con->peer_global_seq = 0; | 358 | con->peer_global_seq = 0; |
359 | cancel_delayed_work(&con->work); | 359 | cancel_delayed_work(&con->work); |
360 | mutex_unlock(&con->mutex); | 360 | mutex_unlock(&con->mutex); |
361 | queue_con(con); | 361 | queue_con(con); |
362 | } | 362 | } |
363 | EXPORT_SYMBOL(ceph_con_close); | 363 | EXPORT_SYMBOL(ceph_con_close); |
364 | 364 | ||
365 | /* | 365 | /* |
366 | * Reopen a closed connection, with a new peer address. | 366 | * Reopen a closed connection, with a new peer address. |
367 | */ | 367 | */ |
368 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | 368 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) |
369 | { | 369 | { |
370 | dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); | 370 | dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); |
371 | set_bit(OPENING, &con->state); | 371 | set_bit(OPENING, &con->state); |
372 | clear_bit(CLOSED, &con->state); | 372 | clear_bit(CLOSED, &con->state); |
373 | memcpy(&con->peer_addr, addr, sizeof(*addr)); | 373 | memcpy(&con->peer_addr, addr, sizeof(*addr)); |
374 | con->delay = 0; /* reset backoff memory */ | 374 | con->delay = 0; /* reset backoff memory */ |
375 | queue_con(con); | 375 | queue_con(con); |
376 | } | 376 | } |
377 | EXPORT_SYMBOL(ceph_con_open); | 377 | EXPORT_SYMBOL(ceph_con_open); |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * return true if this connection ever successfully opened | 380 | * return true if this connection ever successfully opened |
381 | */ | 381 | */ |
382 | bool ceph_con_opened(struct ceph_connection *con) | 382 | bool ceph_con_opened(struct ceph_connection *con) |
383 | { | 383 | { |
384 | return con->connect_seq > 0; | 384 | return con->connect_seq > 0; |
385 | } | 385 | } |
386 | 386 | ||
387 | /* | 387 | /* |
388 | * generic get/put | 388 | * generic get/put |
389 | */ | 389 | */ |
390 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) | 390 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) |
391 | { | 391 | { |
392 | dout("con_get %p nref = %d -> %d\n", con, | 392 | dout("con_get %p nref = %d -> %d\n", con, |
393 | atomic_read(&con->nref), atomic_read(&con->nref) + 1); | 393 | atomic_read(&con->nref), atomic_read(&con->nref) + 1); |
394 | if (atomic_inc_not_zero(&con->nref)) | 394 | if (atomic_inc_not_zero(&con->nref)) |
395 | return con; | 395 | return con; |
396 | return NULL; | 396 | return NULL; |
397 | } | 397 | } |
398 | 398 | ||
399 | void ceph_con_put(struct ceph_connection *con) | 399 | void ceph_con_put(struct ceph_connection *con) |
400 | { | 400 | { |
401 | dout("con_put %p nref = %d -> %d\n", con, | 401 | dout("con_put %p nref = %d -> %d\n", con, |
402 | atomic_read(&con->nref), atomic_read(&con->nref) - 1); | 402 | atomic_read(&con->nref), atomic_read(&con->nref) - 1); |
403 | BUG_ON(atomic_read(&con->nref) == 0); | 403 | BUG_ON(atomic_read(&con->nref) == 0); |
404 | if (atomic_dec_and_test(&con->nref)) { | 404 | if (atomic_dec_and_test(&con->nref)) { |
405 | BUG_ON(con->sock); | 405 | BUG_ON(con->sock); |
406 | kfree(con); | 406 | kfree(con); |
407 | } | 407 | } |
408 | } | 408 | } |
409 | 409 | ||
410 | /* | 410 | /* |
411 | * initialize a new connection. | 411 | * initialize a new connection. |
412 | */ | 412 | */ |
413 | void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) | 413 | void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) |
414 | { | 414 | { |
415 | dout("con_init %p\n", con); | 415 | dout("con_init %p\n", con); |
416 | memset(con, 0, sizeof(*con)); | 416 | memset(con, 0, sizeof(*con)); |
417 | atomic_set(&con->nref, 1); | 417 | atomic_set(&con->nref, 1); |
418 | con->msgr = msgr; | 418 | con->msgr = msgr; |
419 | mutex_init(&con->mutex); | 419 | mutex_init(&con->mutex); |
420 | INIT_LIST_HEAD(&con->out_queue); | 420 | INIT_LIST_HEAD(&con->out_queue); |
421 | INIT_LIST_HEAD(&con->out_sent); | 421 | INIT_LIST_HEAD(&con->out_sent); |
422 | INIT_DELAYED_WORK(&con->work, con_work); | 422 | INIT_DELAYED_WORK(&con->work, con_work); |
423 | } | 423 | } |
424 | EXPORT_SYMBOL(ceph_con_init); | 424 | EXPORT_SYMBOL(ceph_con_init); |
425 | 425 | ||
426 | 426 | ||
427 | /* | 427 | /* |
428 | * We maintain a global counter to order connection attempts. Get | 428 | * We maintain a global counter to order connection attempts. Get |
429 | * a unique seq greater than @gt. | 429 | * a unique seq greater than @gt. |
430 | */ | 430 | */ |
431 | static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) | 431 | static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) |
432 | { | 432 | { |
433 | u32 ret; | 433 | u32 ret; |
434 | 434 | ||
435 | spin_lock(&msgr->global_seq_lock); | 435 | spin_lock(&msgr->global_seq_lock); |
436 | if (msgr->global_seq < gt) | 436 | if (msgr->global_seq < gt) |
437 | msgr->global_seq = gt; | 437 | msgr->global_seq = gt; |
438 | ret = ++msgr->global_seq; | 438 | ret = ++msgr->global_seq; |
439 | spin_unlock(&msgr->global_seq_lock); | 439 | spin_unlock(&msgr->global_seq_lock); |
440 | return ret; | 440 | return ret; |
441 | } | 441 | } |
442 | 442 | ||
443 | 443 | ||
444 | /* | 444 | /* |
445 | * Prepare footer for currently outgoing message, and finish things | 445 | * Prepare footer for currently outgoing message, and finish things |
446 | * off. Assumes out_kvec* are already valid.. we just add on to the end. | 446 | * off. Assumes out_kvec* are already valid.. we just add on to the end. |
447 | */ | 447 | */ |
448 | static void prepare_write_message_footer(struct ceph_connection *con, int v) | 448 | static void prepare_write_message_footer(struct ceph_connection *con, int v) |
449 | { | 449 | { |
450 | struct ceph_msg *m = con->out_msg; | 450 | struct ceph_msg *m = con->out_msg; |
451 | 451 | ||
452 | dout("prepare_write_message_footer %p\n", con); | 452 | dout("prepare_write_message_footer %p\n", con); |
453 | con->out_kvec_is_msg = true; | 453 | con->out_kvec_is_msg = true; |
454 | con->out_kvec[v].iov_base = &m->footer; | 454 | con->out_kvec[v].iov_base = &m->footer; |
455 | con->out_kvec[v].iov_len = sizeof(m->footer); | 455 | con->out_kvec[v].iov_len = sizeof(m->footer); |
456 | con->out_kvec_bytes += sizeof(m->footer); | 456 | con->out_kvec_bytes += sizeof(m->footer); |
457 | con->out_kvec_left++; | 457 | con->out_kvec_left++; |
458 | con->out_more = m->more_to_follow; | 458 | con->out_more = m->more_to_follow; |
459 | con->out_msg_done = true; | 459 | con->out_msg_done = true; |
460 | } | 460 | } |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * Prepare headers for the next outgoing message. | 463 | * Prepare headers for the next outgoing message. |
464 | */ | 464 | */ |
465 | static void prepare_write_message(struct ceph_connection *con) | 465 | static void prepare_write_message(struct ceph_connection *con) |
466 | { | 466 | { |
467 | struct ceph_msg *m; | 467 | struct ceph_msg *m; |
468 | int v = 0; | 468 | int v = 0; |
469 | 469 | ||
470 | con->out_kvec_bytes = 0; | 470 | con->out_kvec_bytes = 0; |
471 | con->out_kvec_is_msg = true; | 471 | con->out_kvec_is_msg = true; |
472 | con->out_msg_done = false; | 472 | con->out_msg_done = false; |
473 | 473 | ||
474 | /* Sneak an ack in there first? If we can get it into the same | 474 | /* Sneak an ack in there first? If we can get it into the same |
475 | * TCP packet that's a good thing. */ | 475 | * TCP packet that's a good thing. */ |
476 | if (con->in_seq > con->in_seq_acked) { | 476 | if (con->in_seq > con->in_seq_acked) { |
477 | con->in_seq_acked = con->in_seq; | 477 | con->in_seq_acked = con->in_seq; |
478 | con->out_kvec[v].iov_base = &tag_ack; | 478 | con->out_kvec[v].iov_base = &tag_ack; |
479 | con->out_kvec[v++].iov_len = 1; | 479 | con->out_kvec[v++].iov_len = 1; |
480 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 480 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
481 | con->out_kvec[v].iov_base = &con->out_temp_ack; | 481 | con->out_kvec[v].iov_base = &con->out_temp_ack; |
482 | con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); | 482 | con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); |
483 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | 483 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); |
484 | } | 484 | } |
485 | 485 | ||
486 | m = list_first_entry(&con->out_queue, | 486 | m = list_first_entry(&con->out_queue, |
487 | struct ceph_msg, list_head); | 487 | struct ceph_msg, list_head); |
488 | con->out_msg = m; | 488 | con->out_msg = m; |
489 | if (test_bit(LOSSYTX, &con->state)) { | 489 | if (test_bit(LOSSYTX, &con->state)) { |
490 | list_del_init(&m->list_head); | 490 | list_del_init(&m->list_head); |
491 | } else { | 491 | } else { |
492 | /* put message on sent list */ | 492 | /* put message on sent list */ |
493 | ceph_msg_get(m); | 493 | ceph_msg_get(m); |
494 | list_move_tail(&m->list_head, &con->out_sent); | 494 | list_move_tail(&m->list_head, &con->out_sent); |
495 | } | 495 | } |
496 | 496 | ||
497 | /* | 497 | /* |
498 | * only assign outgoing seq # if we haven't sent this message | 498 | * only assign outgoing seq # if we haven't sent this message |
499 | * yet. if it is requeued, resend with it's original seq. | 499 | * yet. if it is requeued, resend with it's original seq. |
500 | */ | 500 | */ |
501 | if (m->needs_out_seq) { | 501 | if (m->needs_out_seq) { |
502 | m->hdr.seq = cpu_to_le64(++con->out_seq); | 502 | m->hdr.seq = cpu_to_le64(++con->out_seq); |
503 | m->needs_out_seq = false; | 503 | m->needs_out_seq = false; |
504 | } | 504 | } |
505 | 505 | ||
506 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", | 506 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", |
507 | m, con->out_seq, le16_to_cpu(m->hdr.type), | 507 | m, con->out_seq, le16_to_cpu(m->hdr.type), |
508 | le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), | 508 | le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), |
509 | le32_to_cpu(m->hdr.data_len), | 509 | le32_to_cpu(m->hdr.data_len), |
510 | m->nr_pages); | 510 | m->nr_pages); |
511 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); | 511 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); |
512 | 512 | ||
513 | /* tag + hdr + front + middle */ | 513 | /* tag + hdr + front + middle */ |
514 | con->out_kvec[v].iov_base = &tag_msg; | 514 | con->out_kvec[v].iov_base = &tag_msg; |
515 | con->out_kvec[v++].iov_len = 1; | 515 | con->out_kvec[v++].iov_len = 1; |
516 | con->out_kvec[v].iov_base = &m->hdr; | 516 | con->out_kvec[v].iov_base = &m->hdr; |
517 | con->out_kvec[v++].iov_len = sizeof(m->hdr); | 517 | con->out_kvec[v++].iov_len = sizeof(m->hdr); |
518 | con->out_kvec[v++] = m->front; | 518 | con->out_kvec[v++] = m->front; |
519 | if (m->middle) | 519 | if (m->middle) |
520 | con->out_kvec[v++] = m->middle->vec; | 520 | con->out_kvec[v++] = m->middle->vec; |
521 | con->out_kvec_left = v; | 521 | con->out_kvec_left = v; |
522 | con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + | 522 | con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + |
523 | (m->middle ? m->middle->vec.iov_len : 0); | 523 | (m->middle ? m->middle->vec.iov_len : 0); |
524 | con->out_kvec_cur = con->out_kvec; | 524 | con->out_kvec_cur = con->out_kvec; |
525 | 525 | ||
526 | /* fill in crc (except data pages), footer */ | 526 | /* fill in crc (except data pages), footer */ |
527 | con->out_msg->hdr.crc = | 527 | con->out_msg->hdr.crc = |
528 | cpu_to_le32(crc32c(0, (void *)&m->hdr, | 528 | cpu_to_le32(crc32c(0, (void *)&m->hdr, |
529 | sizeof(m->hdr) - sizeof(m->hdr.crc))); | 529 | sizeof(m->hdr) - sizeof(m->hdr.crc))); |
530 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; | 530 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; |
531 | con->out_msg->footer.front_crc = | 531 | con->out_msg->footer.front_crc = |
532 | cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); | 532 | cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); |
533 | if (m->middle) | 533 | if (m->middle) |
534 | con->out_msg->footer.middle_crc = | 534 | con->out_msg->footer.middle_crc = |
535 | cpu_to_le32(crc32c(0, m->middle->vec.iov_base, | 535 | cpu_to_le32(crc32c(0, m->middle->vec.iov_base, |
536 | m->middle->vec.iov_len)); | 536 | m->middle->vec.iov_len)); |
537 | else | 537 | else |
538 | con->out_msg->footer.middle_crc = 0; | 538 | con->out_msg->footer.middle_crc = 0; |
539 | con->out_msg->footer.data_crc = 0; | 539 | con->out_msg->footer.data_crc = 0; |
540 | dout("prepare_write_message front_crc %u data_crc %u\n", | 540 | dout("prepare_write_message front_crc %u data_crc %u\n", |
541 | le32_to_cpu(con->out_msg->footer.front_crc), | 541 | le32_to_cpu(con->out_msg->footer.front_crc), |
542 | le32_to_cpu(con->out_msg->footer.middle_crc)); | 542 | le32_to_cpu(con->out_msg->footer.middle_crc)); |
543 | 543 | ||
544 | /* is there a data payload? */ | 544 | /* is there a data payload? */ |
545 | if (le32_to_cpu(m->hdr.data_len) > 0) { | 545 | if (le32_to_cpu(m->hdr.data_len) > 0) { |
546 | /* initialize page iterator */ | 546 | /* initialize page iterator */ |
547 | con->out_msg_pos.page = 0; | 547 | con->out_msg_pos.page = 0; |
548 | if (m->pages) | 548 | if (m->pages) |
549 | con->out_msg_pos.page_pos = m->page_alignment; | 549 | con->out_msg_pos.page_pos = m->page_alignment; |
550 | else | 550 | else |
551 | con->out_msg_pos.page_pos = 0; | 551 | con->out_msg_pos.page_pos = 0; |
552 | con->out_msg_pos.data_pos = 0; | 552 | con->out_msg_pos.data_pos = 0; |
553 | con->out_msg_pos.did_page_crc = 0; | 553 | con->out_msg_pos.did_page_crc = 0; |
554 | con->out_more = 1; /* data + footer will follow */ | 554 | con->out_more = 1; /* data + footer will follow */ |
555 | } else { | 555 | } else { |
556 | /* no, queue up footer too and be done */ | 556 | /* no, queue up footer too and be done */ |
557 | prepare_write_message_footer(con, v); | 557 | prepare_write_message_footer(con, v); |
558 | } | 558 | } |
559 | 559 | ||
560 | set_bit(WRITE_PENDING, &con->state); | 560 | set_bit(WRITE_PENDING, &con->state); |
561 | } | 561 | } |
562 | 562 | ||
563 | /* | 563 | /* |
564 | * Prepare an ack. | 564 | * Prepare an ack. |
565 | */ | 565 | */ |
566 | static void prepare_write_ack(struct ceph_connection *con) | 566 | static void prepare_write_ack(struct ceph_connection *con) |
567 | { | 567 | { |
568 | dout("prepare_write_ack %p %llu -> %llu\n", con, | 568 | dout("prepare_write_ack %p %llu -> %llu\n", con, |
569 | con->in_seq_acked, con->in_seq); | 569 | con->in_seq_acked, con->in_seq); |
570 | con->in_seq_acked = con->in_seq; | 570 | con->in_seq_acked = con->in_seq; |
571 | 571 | ||
572 | con->out_kvec[0].iov_base = &tag_ack; | 572 | con->out_kvec[0].iov_base = &tag_ack; |
573 | con->out_kvec[0].iov_len = 1; | 573 | con->out_kvec[0].iov_len = 1; |
574 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 574 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
575 | con->out_kvec[1].iov_base = &con->out_temp_ack; | 575 | con->out_kvec[1].iov_base = &con->out_temp_ack; |
576 | con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); | 576 | con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); |
577 | con->out_kvec_left = 2; | 577 | con->out_kvec_left = 2; |
578 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | 578 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); |
579 | con->out_kvec_cur = con->out_kvec; | 579 | con->out_kvec_cur = con->out_kvec; |
580 | con->out_more = 1; /* more will follow.. eventually.. */ | 580 | con->out_more = 1; /* more will follow.. eventually.. */ |
581 | set_bit(WRITE_PENDING, &con->state); | 581 | set_bit(WRITE_PENDING, &con->state); |
582 | } | 582 | } |
583 | 583 | ||
584 | /* | 584 | /* |
585 | * Prepare to write keepalive byte. | 585 | * Prepare to write keepalive byte. |
586 | */ | 586 | */ |
587 | static void prepare_write_keepalive(struct ceph_connection *con) | 587 | static void prepare_write_keepalive(struct ceph_connection *con) |
588 | { | 588 | { |
589 | dout("prepare_write_keepalive %p\n", con); | 589 | dout("prepare_write_keepalive %p\n", con); |
590 | con->out_kvec[0].iov_base = &tag_keepalive; | 590 | con->out_kvec[0].iov_base = &tag_keepalive; |
591 | con->out_kvec[0].iov_len = 1; | 591 | con->out_kvec[0].iov_len = 1; |
592 | con->out_kvec_left = 1; | 592 | con->out_kvec_left = 1; |
593 | con->out_kvec_bytes = 1; | 593 | con->out_kvec_bytes = 1; |
594 | con->out_kvec_cur = con->out_kvec; | 594 | con->out_kvec_cur = con->out_kvec; |
595 | set_bit(WRITE_PENDING, &con->state); | 595 | set_bit(WRITE_PENDING, &con->state); |
596 | } | 596 | } |
597 | 597 | ||
598 | /* | 598 | /* |
599 | * Connection negotiation. | 599 | * Connection negotiation. |
600 | */ | 600 | */ |
601 | 601 | ||
602 | static void prepare_connect_authorizer(struct ceph_connection *con) | 602 | static void prepare_connect_authorizer(struct ceph_connection *con) |
603 | { | 603 | { |
604 | void *auth_buf; | 604 | void *auth_buf; |
605 | int auth_len = 0; | 605 | int auth_len = 0; |
606 | int auth_protocol = 0; | 606 | int auth_protocol = 0; |
607 | 607 | ||
608 | mutex_unlock(&con->mutex); | 608 | mutex_unlock(&con->mutex); |
609 | if (con->ops->get_authorizer) | 609 | if (con->ops->get_authorizer) |
610 | con->ops->get_authorizer(con, &auth_buf, &auth_len, | 610 | con->ops->get_authorizer(con, &auth_buf, &auth_len, |
611 | &auth_protocol, &con->auth_reply_buf, | 611 | &auth_protocol, &con->auth_reply_buf, |
612 | &con->auth_reply_buf_len, | 612 | &con->auth_reply_buf_len, |
613 | con->auth_retry); | 613 | con->auth_retry); |
614 | mutex_lock(&con->mutex); | 614 | mutex_lock(&con->mutex); |
615 | 615 | ||
616 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); | 616 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); |
617 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); | 617 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); |
618 | 618 | ||
619 | con->out_kvec[con->out_kvec_left].iov_base = auth_buf; | 619 | con->out_kvec[con->out_kvec_left].iov_base = auth_buf; |
620 | con->out_kvec[con->out_kvec_left].iov_len = auth_len; | 620 | con->out_kvec[con->out_kvec_left].iov_len = auth_len; |
621 | con->out_kvec_left++; | 621 | con->out_kvec_left++; |
622 | con->out_kvec_bytes += auth_len; | 622 | con->out_kvec_bytes += auth_len; |
623 | } | 623 | } |
624 | 624 | ||
625 | /* | 625 | /* |
626 | * We connected to a peer and are saying hello. | 626 | * We connected to a peer and are saying hello. |
627 | */ | 627 | */ |
628 | static void prepare_write_banner(struct ceph_messenger *msgr, | 628 | static void prepare_write_banner(struct ceph_messenger *msgr, |
629 | struct ceph_connection *con) | 629 | struct ceph_connection *con) |
630 | { | 630 | { |
631 | int len = strlen(CEPH_BANNER); | 631 | int len = strlen(CEPH_BANNER); |
632 | 632 | ||
633 | con->out_kvec[0].iov_base = CEPH_BANNER; | 633 | con->out_kvec[0].iov_base = CEPH_BANNER; |
634 | con->out_kvec[0].iov_len = len; | 634 | con->out_kvec[0].iov_len = len; |
635 | con->out_kvec[1].iov_base = &msgr->my_enc_addr; | 635 | con->out_kvec[1].iov_base = &msgr->my_enc_addr; |
636 | con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); | 636 | con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); |
637 | con->out_kvec_left = 2; | 637 | con->out_kvec_left = 2; |
638 | con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); | 638 | con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); |
639 | con->out_kvec_cur = con->out_kvec; | 639 | con->out_kvec_cur = con->out_kvec; |
640 | con->out_more = 0; | 640 | con->out_more = 0; |
641 | set_bit(WRITE_PENDING, &con->state); | 641 | set_bit(WRITE_PENDING, &con->state); |
642 | } | 642 | } |
643 | 643 | ||
644 | static void prepare_write_connect(struct ceph_messenger *msgr, | 644 | static void prepare_write_connect(struct ceph_messenger *msgr, |
645 | struct ceph_connection *con, | 645 | struct ceph_connection *con, |
646 | int after_banner) | 646 | int after_banner) |
647 | { | 647 | { |
648 | unsigned global_seq = get_global_seq(con->msgr, 0); | 648 | unsigned global_seq = get_global_seq(con->msgr, 0); |
649 | int proto; | 649 | int proto; |
650 | 650 | ||
651 | switch (con->peer_name.type) { | 651 | switch (con->peer_name.type) { |
652 | case CEPH_ENTITY_TYPE_MON: | 652 | case CEPH_ENTITY_TYPE_MON: |
653 | proto = CEPH_MONC_PROTOCOL; | 653 | proto = CEPH_MONC_PROTOCOL; |
654 | break; | 654 | break; |
655 | case CEPH_ENTITY_TYPE_OSD: | 655 | case CEPH_ENTITY_TYPE_OSD: |
656 | proto = CEPH_OSDC_PROTOCOL; | 656 | proto = CEPH_OSDC_PROTOCOL; |
657 | break; | 657 | break; |
658 | case CEPH_ENTITY_TYPE_MDS: | 658 | case CEPH_ENTITY_TYPE_MDS: |
659 | proto = CEPH_MDSC_PROTOCOL; | 659 | proto = CEPH_MDSC_PROTOCOL; |
660 | break; | 660 | break; |
661 | default: | 661 | default: |
662 | BUG(); | 662 | BUG(); |
663 | } | 663 | } |
664 | 664 | ||
665 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 665 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
666 | con->connect_seq, global_seq, proto); | 666 | con->connect_seq, global_seq, proto); |
667 | 667 | ||
668 | con->out_connect.features = cpu_to_le64(msgr->supported_features); | 668 | con->out_connect.features = cpu_to_le64(msgr->supported_features); |
669 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 669 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
670 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 670 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
671 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 671 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
672 | con->out_connect.protocol_version = cpu_to_le32(proto); | 672 | con->out_connect.protocol_version = cpu_to_le32(proto); |
673 | con->out_connect.flags = 0; | 673 | con->out_connect.flags = 0; |
674 | 674 | ||
675 | if (!after_banner) { | 675 | if (!after_banner) { |
676 | con->out_kvec_left = 0; | 676 | con->out_kvec_left = 0; |
677 | con->out_kvec_bytes = 0; | 677 | con->out_kvec_bytes = 0; |
678 | } | 678 | } |
679 | con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; | 679 | con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; |
680 | con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); | 680 | con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); |
681 | con->out_kvec_left++; | 681 | con->out_kvec_left++; |
682 | con->out_kvec_bytes += sizeof(con->out_connect); | 682 | con->out_kvec_bytes += sizeof(con->out_connect); |
683 | con->out_kvec_cur = con->out_kvec; | 683 | con->out_kvec_cur = con->out_kvec; |
684 | con->out_more = 0; | 684 | con->out_more = 0; |
685 | set_bit(WRITE_PENDING, &con->state); | 685 | set_bit(WRITE_PENDING, &con->state); |
686 | 686 | ||
687 | prepare_connect_authorizer(con); | 687 | prepare_connect_authorizer(con); |
688 | } | 688 | } |
689 | 689 | ||
690 | 690 | ||
691 | /* | 691 | /* |
692 | * write as much of pending kvecs to the socket as we can. | 692 | * write as much of pending kvecs to the socket as we can. |
693 | * 1 -> done | 693 | * 1 -> done |
694 | * 0 -> socket full, but more to do | 694 | * 0 -> socket full, but more to do |
695 | * <0 -> error | 695 | * <0 -> error |
696 | */ | 696 | */ |
697 | static int write_partial_kvec(struct ceph_connection *con) | 697 | static int write_partial_kvec(struct ceph_connection *con) |
698 | { | 698 | { |
699 | int ret; | 699 | int ret; |
700 | 700 | ||
701 | dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes); | 701 | dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes); |
702 | while (con->out_kvec_bytes > 0) { | 702 | while (con->out_kvec_bytes > 0) { |
703 | ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur, | 703 | ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur, |
704 | con->out_kvec_left, con->out_kvec_bytes, | 704 | con->out_kvec_left, con->out_kvec_bytes, |
705 | con->out_more); | 705 | con->out_more); |
706 | if (ret <= 0) | 706 | if (ret <= 0) |
707 | goto out; | 707 | goto out; |
708 | con->out_kvec_bytes -= ret; | 708 | con->out_kvec_bytes -= ret; |
709 | if (con->out_kvec_bytes == 0) | 709 | if (con->out_kvec_bytes == 0) |
710 | break; /* done */ | 710 | break; /* done */ |
711 | while (ret > 0) { | 711 | while (ret > 0) { |
712 | if (ret >= con->out_kvec_cur->iov_len) { | 712 | if (ret >= con->out_kvec_cur->iov_len) { |
713 | ret -= con->out_kvec_cur->iov_len; | 713 | ret -= con->out_kvec_cur->iov_len; |
714 | con->out_kvec_cur++; | 714 | con->out_kvec_cur++; |
715 | con->out_kvec_left--; | 715 | con->out_kvec_left--; |
716 | } else { | 716 | } else { |
717 | con->out_kvec_cur->iov_len -= ret; | 717 | con->out_kvec_cur->iov_len -= ret; |
718 | con->out_kvec_cur->iov_base += ret; | 718 | con->out_kvec_cur->iov_base += ret; |
719 | ret = 0; | 719 | ret = 0; |
720 | break; | 720 | break; |
721 | } | 721 | } |
722 | } | 722 | } |
723 | } | 723 | } |
724 | con->out_kvec_left = 0; | 724 | con->out_kvec_left = 0; |
725 | con->out_kvec_is_msg = false; | 725 | con->out_kvec_is_msg = false; |
726 | ret = 1; | 726 | ret = 1; |
727 | out: | 727 | out: |
728 | dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, | 728 | dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, |
729 | con->out_kvec_bytes, con->out_kvec_left, ret); | 729 | con->out_kvec_bytes, con->out_kvec_left, ret); |
730 | return ret; /* done! */ | 730 | return ret; /* done! */ |
731 | } | 731 | } |
732 | 732 | ||
733 | #ifdef CONFIG_BLOCK | 733 | #ifdef CONFIG_BLOCK |
734 | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) | 734 | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) |
735 | { | 735 | { |
736 | if (!bio) { | 736 | if (!bio) { |
737 | *iter = NULL; | 737 | *iter = NULL; |
738 | *seg = 0; | 738 | *seg = 0; |
739 | return; | 739 | return; |
740 | } | 740 | } |
741 | *iter = bio; | 741 | *iter = bio; |
742 | *seg = bio->bi_idx; | 742 | *seg = bio->bi_idx; |
743 | } | 743 | } |
744 | 744 | ||
745 | static void iter_bio_next(struct bio **bio_iter, int *seg) | 745 | static void iter_bio_next(struct bio **bio_iter, int *seg) |
746 | { | 746 | { |
747 | if (*bio_iter == NULL) | 747 | if (*bio_iter == NULL) |
748 | return; | 748 | return; |
749 | 749 | ||
750 | BUG_ON(*seg >= (*bio_iter)->bi_vcnt); | 750 | BUG_ON(*seg >= (*bio_iter)->bi_vcnt); |
751 | 751 | ||
752 | (*seg)++; | 752 | (*seg)++; |
753 | if (*seg == (*bio_iter)->bi_vcnt) | 753 | if (*seg == (*bio_iter)->bi_vcnt) |
754 | init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); | 754 | init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); |
755 | } | 755 | } |
756 | #endif | 756 | #endif |
757 | 757 | ||
758 | /* | 758 | /* |
759 | * Write as much message data payload as we can. If we finish, queue | 759 | * Write as much message data payload as we can. If we finish, queue |
760 | * up the footer. | 760 | * up the footer. |
761 | * 1 -> done, footer is now queued in out_kvec[]. | 761 | * 1 -> done, footer is now queued in out_kvec[]. |
762 | * 0 -> socket full, but more to do | 762 | * 0 -> socket full, but more to do |
763 | * <0 -> error | 763 | * <0 -> error |
764 | */ | 764 | */ |
765 | static int write_partial_msg_pages(struct ceph_connection *con) | 765 | static int write_partial_msg_pages(struct ceph_connection *con) |
766 | { | 766 | { |
767 | struct ceph_msg *msg = con->out_msg; | 767 | struct ceph_msg *msg = con->out_msg; |
768 | unsigned data_len = le32_to_cpu(msg->hdr.data_len); | 768 | unsigned data_len = le32_to_cpu(msg->hdr.data_len); |
769 | size_t len; | 769 | size_t len; |
770 | int crc = con->msgr->nocrc; | 770 | int crc = con->msgr->nocrc; |
771 | int ret; | 771 | int ret; |
772 | int total_max_write; | 772 | int total_max_write; |
773 | int in_trail = 0; | 773 | int in_trail = 0; |
774 | size_t trail_len = (msg->trail ? msg->trail->length : 0); | 774 | size_t trail_len = (msg->trail ? msg->trail->length : 0); |
775 | 775 | ||
776 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | 776 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", |
777 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | 777 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, |
778 | con->out_msg_pos.page_pos); | 778 | con->out_msg_pos.page_pos); |
779 | 779 | ||
780 | #ifdef CONFIG_BLOCK | 780 | #ifdef CONFIG_BLOCK |
781 | if (msg->bio && !msg->bio_iter) | 781 | if (msg->bio && !msg->bio_iter) |
782 | init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); | 782 | init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); |
783 | #endif | 783 | #endif |
784 | 784 | ||
785 | while (data_len > con->out_msg_pos.data_pos) { | 785 | while (data_len > con->out_msg_pos.data_pos) { |
786 | struct page *page = NULL; | 786 | struct page *page = NULL; |
787 | void *kaddr = NULL; | 787 | void *kaddr = NULL; |
788 | int max_write = PAGE_SIZE; | 788 | int max_write = PAGE_SIZE; |
789 | int page_shift = 0; | 789 | int page_shift = 0; |
790 | 790 | ||
791 | total_max_write = data_len - trail_len - | 791 | total_max_write = data_len - trail_len - |
792 | con->out_msg_pos.data_pos; | 792 | con->out_msg_pos.data_pos; |
793 | 793 | ||
794 | /* | 794 | /* |
795 | * if we are calculating the data crc (the default), we need | 795 | * if we are calculating the data crc (the default), we need |
796 | * to map the page. if our pages[] has been revoked, use the | 796 | * to map the page. if our pages[] has been revoked, use the |
797 | * zero page. | 797 | * zero page. |
798 | */ | 798 | */ |
799 | 799 | ||
800 | /* have we reached the trail part of the data? */ | 800 | /* have we reached the trail part of the data? */ |
801 | if (con->out_msg_pos.data_pos >= data_len - trail_len) { | 801 | if (con->out_msg_pos.data_pos >= data_len - trail_len) { |
802 | in_trail = 1; | 802 | in_trail = 1; |
803 | 803 | ||
804 | total_max_write = data_len - con->out_msg_pos.data_pos; | 804 | total_max_write = data_len - con->out_msg_pos.data_pos; |
805 | 805 | ||
806 | page = list_first_entry(&msg->trail->head, | 806 | page = list_first_entry(&msg->trail->head, |
807 | struct page, lru); | 807 | struct page, lru); |
808 | if (crc) | 808 | if (crc) |
809 | kaddr = kmap(page); | 809 | kaddr = kmap(page); |
810 | max_write = PAGE_SIZE; | 810 | max_write = PAGE_SIZE; |
811 | } else if (msg->pages) { | 811 | } else if (msg->pages) { |
812 | page = msg->pages[con->out_msg_pos.page]; | 812 | page = msg->pages[con->out_msg_pos.page]; |
813 | if (crc) | 813 | if (crc) |
814 | kaddr = kmap(page); | 814 | kaddr = kmap(page); |
815 | } else if (msg->pagelist) { | 815 | } else if (msg->pagelist) { |
816 | page = list_first_entry(&msg->pagelist->head, | 816 | page = list_first_entry(&msg->pagelist->head, |
817 | struct page, lru); | 817 | struct page, lru); |
818 | if (crc) | 818 | if (crc) |
819 | kaddr = kmap(page); | 819 | kaddr = kmap(page); |
820 | #ifdef CONFIG_BLOCK | 820 | #ifdef CONFIG_BLOCK |
821 | } else if (msg->bio) { | 821 | } else if (msg->bio) { |
822 | struct bio_vec *bv; | 822 | struct bio_vec *bv; |
823 | 823 | ||
824 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); | 824 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); |
825 | page = bv->bv_page; | 825 | page = bv->bv_page; |
826 | page_shift = bv->bv_offset; | 826 | page_shift = bv->bv_offset; |
827 | if (crc) | 827 | if (crc) |
828 | kaddr = kmap(page) + page_shift; | 828 | kaddr = kmap(page) + page_shift; |
829 | max_write = bv->bv_len; | 829 | max_write = bv->bv_len; |
830 | #endif | 830 | #endif |
831 | } else { | 831 | } else { |
832 | page = con->msgr->zero_page; | 832 | page = con->msgr->zero_page; |
833 | if (crc) | 833 | if (crc) |
834 | kaddr = page_address(con->msgr->zero_page); | 834 | kaddr = page_address(con->msgr->zero_page); |
835 | } | 835 | } |
836 | len = min_t(int, max_write - con->out_msg_pos.page_pos, | 836 | len = min_t(int, max_write - con->out_msg_pos.page_pos, |
837 | total_max_write); | 837 | total_max_write); |
838 | 838 | ||
839 | if (crc && !con->out_msg_pos.did_page_crc) { | 839 | if (crc && !con->out_msg_pos.did_page_crc) { |
840 | void *base = kaddr + con->out_msg_pos.page_pos; | 840 | void *base = kaddr + con->out_msg_pos.page_pos; |
841 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | 841 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); |
842 | 842 | ||
843 | BUG_ON(kaddr == NULL); | 843 | BUG_ON(kaddr == NULL); |
844 | con->out_msg->footer.data_crc = | 844 | con->out_msg->footer.data_crc = |
845 | cpu_to_le32(crc32c(tmpcrc, base, len)); | 845 | cpu_to_le32(crc32c(tmpcrc, base, len)); |
846 | con->out_msg_pos.did_page_crc = 1; | 846 | con->out_msg_pos.did_page_crc = 1; |
847 | } | 847 | } |
848 | ret = kernel_sendpage(con->sock, page, | 848 | ret = kernel_sendpage(con->sock, page, |
849 | con->out_msg_pos.page_pos + page_shift, | 849 | con->out_msg_pos.page_pos + page_shift, |
850 | len, | 850 | len, |
851 | MSG_DONTWAIT | MSG_NOSIGNAL | | 851 | MSG_DONTWAIT | MSG_NOSIGNAL | |
852 | MSG_MORE); | 852 | MSG_MORE); |
853 | 853 | ||
854 | if (crc && | 854 | if (crc && |
855 | (msg->pages || msg->pagelist || msg->bio || in_trail)) | 855 | (msg->pages || msg->pagelist || msg->bio || in_trail)) |
856 | kunmap(page); | 856 | kunmap(page); |
857 | 857 | ||
858 | if (ret == -EAGAIN) | 858 | if (ret == -EAGAIN) |
859 | ret = 0; | 859 | ret = 0; |
860 | if (ret <= 0) | 860 | if (ret <= 0) |
861 | goto out; | 861 | goto out; |
862 | 862 | ||
863 | con->out_msg_pos.data_pos += ret; | 863 | con->out_msg_pos.data_pos += ret; |
864 | con->out_msg_pos.page_pos += ret; | 864 | con->out_msg_pos.page_pos += ret; |
865 | if (ret == len) { | 865 | if (ret == len) { |
866 | con->out_msg_pos.page_pos = 0; | 866 | con->out_msg_pos.page_pos = 0; |
867 | con->out_msg_pos.page++; | 867 | con->out_msg_pos.page++; |
868 | con->out_msg_pos.did_page_crc = 0; | 868 | con->out_msg_pos.did_page_crc = 0; |
869 | if (in_trail) | 869 | if (in_trail) |
870 | list_move_tail(&page->lru, | 870 | list_move_tail(&page->lru, |
871 | &msg->trail->head); | 871 | &msg->trail->head); |
872 | else if (msg->pagelist) | 872 | else if (msg->pagelist) |
873 | list_move_tail(&page->lru, | 873 | list_move_tail(&page->lru, |
874 | &msg->pagelist->head); | 874 | &msg->pagelist->head); |
875 | #ifdef CONFIG_BLOCK | 875 | #ifdef CONFIG_BLOCK |
876 | else if (msg->bio) | 876 | else if (msg->bio) |
877 | iter_bio_next(&msg->bio_iter, &msg->bio_seg); | 877 | iter_bio_next(&msg->bio_iter, &msg->bio_seg); |
878 | #endif | 878 | #endif |
879 | } | 879 | } |
880 | } | 880 | } |
881 | 881 | ||
882 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); | 882 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); |
883 | 883 | ||
884 | /* prepare and queue up footer, too */ | 884 | /* prepare and queue up footer, too */ |
885 | if (!crc) | 885 | if (!crc) |
886 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; | 886 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; |
887 | con->out_kvec_bytes = 0; | 887 | con->out_kvec_bytes = 0; |
888 | con->out_kvec_left = 0; | 888 | con->out_kvec_left = 0; |
889 | con->out_kvec_cur = con->out_kvec; | 889 | con->out_kvec_cur = con->out_kvec; |
890 | prepare_write_message_footer(con, 0); | 890 | prepare_write_message_footer(con, 0); |
891 | ret = 1; | 891 | ret = 1; |
892 | out: | 892 | out: |
893 | return ret; | 893 | return ret; |
894 | } | 894 | } |
895 | 895 | ||
896 | /* | 896 | /* |
897 | * write some zeros | 897 | * write some zeros |
898 | */ | 898 | */ |
899 | static int write_partial_skip(struct ceph_connection *con) | 899 | static int write_partial_skip(struct ceph_connection *con) |
900 | { | 900 | { |
901 | int ret; | 901 | int ret; |
902 | 902 | ||
903 | while (con->out_skip > 0) { | 903 | while (con->out_skip > 0) { |
904 | struct kvec iov = { | 904 | struct kvec iov = { |
905 | .iov_base = page_address(con->msgr->zero_page), | 905 | .iov_base = page_address(con->msgr->zero_page), |
906 | .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) | 906 | .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) |
907 | }; | 907 | }; |
908 | 908 | ||
909 | ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); | 909 | ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); |
910 | if (ret <= 0) | 910 | if (ret <= 0) |
911 | goto out; | 911 | goto out; |
912 | con->out_skip -= ret; | 912 | con->out_skip -= ret; |
913 | } | 913 | } |
914 | ret = 1; | 914 | ret = 1; |
915 | out: | 915 | out: |
916 | return ret; | 916 | return ret; |
917 | } | 917 | } |
918 | 918 | ||
919 | /* | 919 | /* |
920 | * Prepare to read connection handshake, or an ack. | 920 | * Prepare to read connection handshake, or an ack. |
921 | */ | 921 | */ |
922 | static void prepare_read_banner(struct ceph_connection *con) | 922 | static void prepare_read_banner(struct ceph_connection *con) |
923 | { | 923 | { |
924 | dout("prepare_read_banner %p\n", con); | 924 | dout("prepare_read_banner %p\n", con); |
925 | con->in_base_pos = 0; | 925 | con->in_base_pos = 0; |
926 | } | 926 | } |
927 | 927 | ||
928 | static void prepare_read_connect(struct ceph_connection *con) | 928 | static void prepare_read_connect(struct ceph_connection *con) |
929 | { | 929 | { |
930 | dout("prepare_read_connect %p\n", con); | 930 | dout("prepare_read_connect %p\n", con); |
931 | con->in_base_pos = 0; | 931 | con->in_base_pos = 0; |
932 | } | 932 | } |
933 | 933 | ||
934 | static void prepare_read_ack(struct ceph_connection *con) | 934 | static void prepare_read_ack(struct ceph_connection *con) |
935 | { | 935 | { |
936 | dout("prepare_read_ack %p\n", con); | 936 | dout("prepare_read_ack %p\n", con); |
937 | con->in_base_pos = 0; | 937 | con->in_base_pos = 0; |
938 | } | 938 | } |
939 | 939 | ||
940 | static void prepare_read_tag(struct ceph_connection *con) | 940 | static void prepare_read_tag(struct ceph_connection *con) |
941 | { | 941 | { |
942 | dout("prepare_read_tag %p\n", con); | 942 | dout("prepare_read_tag %p\n", con); |
943 | con->in_base_pos = 0; | 943 | con->in_base_pos = 0; |
944 | con->in_tag = CEPH_MSGR_TAG_READY; | 944 | con->in_tag = CEPH_MSGR_TAG_READY; |
945 | } | 945 | } |
946 | 946 | ||
947 | /* | 947 | /* |
948 | * Prepare to read a message. | 948 | * Prepare to read a message. |
949 | */ | 949 | */ |
950 | static int prepare_read_message(struct ceph_connection *con) | 950 | static int prepare_read_message(struct ceph_connection *con) |
951 | { | 951 | { |
952 | dout("prepare_read_message %p\n", con); | 952 | dout("prepare_read_message %p\n", con); |
953 | BUG_ON(con->in_msg != NULL); | 953 | BUG_ON(con->in_msg != NULL); |
954 | con->in_base_pos = 0; | 954 | con->in_base_pos = 0; |
955 | con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; | 955 | con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; |
956 | return 0; | 956 | return 0; |
957 | } | 957 | } |
958 | 958 | ||
959 | 959 | ||
960 | static int read_partial(struct ceph_connection *con, | 960 | static int read_partial(struct ceph_connection *con, |
961 | int *to, int size, void *object) | 961 | int *to, int size, void *object) |
962 | { | 962 | { |
963 | *to += size; | 963 | *to += size; |
964 | while (con->in_base_pos < *to) { | 964 | while (con->in_base_pos < *to) { |
965 | int left = *to - con->in_base_pos; | 965 | int left = *to - con->in_base_pos; |
966 | int have = size - left; | 966 | int have = size - left; |
967 | int ret = ceph_tcp_recvmsg(con->sock, object + have, left); | 967 | int ret = ceph_tcp_recvmsg(con->sock, object + have, left); |
968 | if (ret <= 0) | 968 | if (ret <= 0) |
969 | return ret; | 969 | return ret; |
970 | con->in_base_pos += ret; | 970 | con->in_base_pos += ret; |
971 | } | 971 | } |
972 | return 1; | 972 | return 1; |
973 | } | 973 | } |
974 | 974 | ||
975 | 975 | ||
976 | /* | 976 | /* |
977 | * Read all or part of the connect-side handshake on a new connection | 977 | * Read all or part of the connect-side handshake on a new connection |
978 | */ | 978 | */ |
979 | static int read_partial_banner(struct ceph_connection *con) | 979 | static int read_partial_banner(struct ceph_connection *con) |
980 | { | 980 | { |
981 | int ret, to = 0; | 981 | int ret, to = 0; |
982 | 982 | ||
983 | dout("read_partial_banner %p at %d\n", con, con->in_base_pos); | 983 | dout("read_partial_banner %p at %d\n", con, con->in_base_pos); |
984 | 984 | ||
985 | /* peer's banner */ | 985 | /* peer's banner */ |
986 | ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner); | 986 | ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner); |
987 | if (ret <= 0) | 987 | if (ret <= 0) |
988 | goto out; | 988 | goto out; |
989 | ret = read_partial(con, &to, sizeof(con->actual_peer_addr), | 989 | ret = read_partial(con, &to, sizeof(con->actual_peer_addr), |
990 | &con->actual_peer_addr); | 990 | &con->actual_peer_addr); |
991 | if (ret <= 0) | 991 | if (ret <= 0) |
992 | goto out; | 992 | goto out; |
993 | ret = read_partial(con, &to, sizeof(con->peer_addr_for_me), | 993 | ret = read_partial(con, &to, sizeof(con->peer_addr_for_me), |
994 | &con->peer_addr_for_me); | 994 | &con->peer_addr_for_me); |
995 | if (ret <= 0) | 995 | if (ret <= 0) |
996 | goto out; | 996 | goto out; |
997 | out: | 997 | out: |
998 | return ret; | 998 | return ret; |
999 | } | 999 | } |
1000 | 1000 | ||
1001 | static int read_partial_connect(struct ceph_connection *con) | 1001 | static int read_partial_connect(struct ceph_connection *con) |
1002 | { | 1002 | { |
1003 | int ret, to = 0; | 1003 | int ret, to = 0; |
1004 | 1004 | ||
1005 | dout("read_partial_connect %p at %d\n", con, con->in_base_pos); | 1005 | dout("read_partial_connect %p at %d\n", con, con->in_base_pos); |
1006 | 1006 | ||
1007 | ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply); | 1007 | ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply); |
1008 | if (ret <= 0) | 1008 | if (ret <= 0) |
1009 | goto out; | 1009 | goto out; |
1010 | ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len), | 1010 | ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len), |
1011 | con->auth_reply_buf); | 1011 | con->auth_reply_buf); |
1012 | if (ret <= 0) | 1012 | if (ret <= 0) |
1013 | goto out; | 1013 | goto out; |
1014 | 1014 | ||
1015 | dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", | 1015 | dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", |
1016 | con, (int)con->in_reply.tag, | 1016 | con, (int)con->in_reply.tag, |
1017 | le32_to_cpu(con->in_reply.connect_seq), | 1017 | le32_to_cpu(con->in_reply.connect_seq), |
1018 | le32_to_cpu(con->in_reply.global_seq)); | 1018 | le32_to_cpu(con->in_reply.global_seq)); |
1019 | out: | 1019 | out: |
1020 | return ret; | 1020 | return ret; |
1021 | 1021 | ||
1022 | } | 1022 | } |
1023 | 1023 | ||
1024 | /* | 1024 | /* |
1025 | * Verify the hello banner looks okay. | 1025 | * Verify the hello banner looks okay. |
1026 | */ | 1026 | */ |
1027 | static int verify_hello(struct ceph_connection *con) | 1027 | static int verify_hello(struct ceph_connection *con) |
1028 | { | 1028 | { |
1029 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { | 1029 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { |
1030 | pr_err("connect to %s got bad banner\n", | 1030 | pr_err("connect to %s got bad banner\n", |
1031 | ceph_pr_addr(&con->peer_addr.in_addr)); | 1031 | ceph_pr_addr(&con->peer_addr.in_addr)); |
1032 | con->error_msg = "protocol error, bad banner"; | 1032 | con->error_msg = "protocol error, bad banner"; |
1033 | return -1; | 1033 | return -1; |
1034 | } | 1034 | } |
1035 | return 0; | 1035 | return 0; |
1036 | } | 1036 | } |
1037 | 1037 | ||
1038 | static bool addr_is_blank(struct sockaddr_storage *ss) | 1038 | static bool addr_is_blank(struct sockaddr_storage *ss) |
1039 | { | 1039 | { |
1040 | switch (ss->ss_family) { | 1040 | switch (ss->ss_family) { |
1041 | case AF_INET: | 1041 | case AF_INET: |
1042 | return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; | 1042 | return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; |
1043 | case AF_INET6: | 1043 | case AF_INET6: |
1044 | return | 1044 | return |
1045 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && | 1045 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && |
1046 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && | 1046 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && |
1047 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && | 1047 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && |
1048 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; | 1048 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; |
1049 | } | 1049 | } |
1050 | return false; | 1050 | return false; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | static int addr_port(struct sockaddr_storage *ss) | 1053 | static int addr_port(struct sockaddr_storage *ss) |
1054 | { | 1054 | { |
1055 | switch (ss->ss_family) { | 1055 | switch (ss->ss_family) { |
1056 | case AF_INET: | 1056 | case AF_INET: |
1057 | return ntohs(((struct sockaddr_in *)ss)->sin_port); | 1057 | return ntohs(((struct sockaddr_in *)ss)->sin_port); |
1058 | case AF_INET6: | 1058 | case AF_INET6: |
1059 | return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); | 1059 | return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); |
1060 | } | 1060 | } |
1061 | return 0; | 1061 | return 0; |
1062 | } | 1062 | } |
1063 | 1063 | ||
1064 | static void addr_set_port(struct sockaddr_storage *ss, int p) | 1064 | static void addr_set_port(struct sockaddr_storage *ss, int p) |
1065 | { | 1065 | { |
1066 | switch (ss->ss_family) { | 1066 | switch (ss->ss_family) { |
1067 | case AF_INET: | 1067 | case AF_INET: |
1068 | ((struct sockaddr_in *)ss)->sin_port = htons(p); | 1068 | ((struct sockaddr_in *)ss)->sin_port = htons(p); |
1069 | case AF_INET6: | 1069 | case AF_INET6: |
1070 | ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); | 1070 | ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); |
1071 | } | 1071 | } |
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | /* | 1074 | /* |
1075 | * Parse an ip[:port] list into an addr array. Use the default | 1075 | * Parse an ip[:port] list into an addr array. Use the default |
1076 | * monitor port if a port isn't specified. | 1076 | * monitor port if a port isn't specified. |
1077 | */ | 1077 | */ |
1078 | int ceph_parse_ips(const char *c, const char *end, | 1078 | int ceph_parse_ips(const char *c, const char *end, |
1079 | struct ceph_entity_addr *addr, | 1079 | struct ceph_entity_addr *addr, |
1080 | int max_count, int *count) | 1080 | int max_count, int *count) |
1081 | { | 1081 | { |
1082 | int i; | 1082 | int i; |
1083 | const char *p = c; | 1083 | const char *p = c; |
1084 | 1084 | ||
1085 | dout("parse_ips on '%.*s'\n", (int)(end-c), c); | 1085 | dout("parse_ips on '%.*s'\n", (int)(end-c), c); |
1086 | for (i = 0; i < max_count; i++) { | 1086 | for (i = 0; i < max_count; i++) { |
1087 | const char *ipend; | 1087 | const char *ipend; |
1088 | struct sockaddr_storage *ss = &addr[i].in_addr; | 1088 | struct sockaddr_storage *ss = &addr[i].in_addr; |
1089 | struct sockaddr_in *in4 = (void *)ss; | 1089 | struct sockaddr_in *in4 = (void *)ss; |
1090 | struct sockaddr_in6 *in6 = (void *)ss; | 1090 | struct sockaddr_in6 *in6 = (void *)ss; |
1091 | int port; | 1091 | int port; |
1092 | char delim = ','; | 1092 | char delim = ','; |
1093 | 1093 | ||
1094 | if (*p == '[') { | 1094 | if (*p == '[') { |
1095 | delim = ']'; | 1095 | delim = ']'; |
1096 | p++; | 1096 | p++; |
1097 | } | 1097 | } |
1098 | 1098 | ||
1099 | memset(ss, 0, sizeof(*ss)); | 1099 | memset(ss, 0, sizeof(*ss)); |
1100 | if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, | 1100 | if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, |
1101 | delim, &ipend)) | 1101 | delim, &ipend)) |
1102 | ss->ss_family = AF_INET; | 1102 | ss->ss_family = AF_INET; |
1103 | else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, | 1103 | else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, |
1104 | delim, &ipend)) | 1104 | delim, &ipend)) |
1105 | ss->ss_family = AF_INET6; | 1105 | ss->ss_family = AF_INET6; |
1106 | else | 1106 | else |
1107 | goto bad; | 1107 | goto bad; |
1108 | p = ipend; | 1108 | p = ipend; |
1109 | 1109 | ||
1110 | if (delim == ']') { | 1110 | if (delim == ']') { |
1111 | if (*p != ']') { | 1111 | if (*p != ']') { |
1112 | dout("missing matching ']'\n"); | 1112 | dout("missing matching ']'\n"); |
1113 | goto bad; | 1113 | goto bad; |
1114 | } | 1114 | } |
1115 | p++; | 1115 | p++; |
1116 | } | 1116 | } |
1117 | 1117 | ||
1118 | /* port? */ | 1118 | /* port? */ |
1119 | if (p < end && *p == ':') { | 1119 | if (p < end && *p == ':') { |
1120 | port = 0; | 1120 | port = 0; |
1121 | p++; | 1121 | p++; |
1122 | while (p < end && *p >= '0' && *p <= '9') { | 1122 | while (p < end && *p >= '0' && *p <= '9') { |
1123 | port = (port * 10) + (*p - '0'); | 1123 | port = (port * 10) + (*p - '0'); |
1124 | p++; | 1124 | p++; |
1125 | } | 1125 | } |
1126 | if (port > 65535 || port == 0) | 1126 | if (port > 65535 || port == 0) |
1127 | goto bad; | 1127 | goto bad; |
1128 | } else { | 1128 | } else { |
1129 | port = CEPH_MON_PORT; | 1129 | port = CEPH_MON_PORT; |
1130 | } | 1130 | } |
1131 | 1131 | ||
1132 | addr_set_port(ss, port); | 1132 | addr_set_port(ss, port); |
1133 | 1133 | ||
1134 | dout("parse_ips got %s\n", ceph_pr_addr(ss)); | 1134 | dout("parse_ips got %s\n", ceph_pr_addr(ss)); |
1135 | 1135 | ||
1136 | if (p == end) | 1136 | if (p == end) |
1137 | break; | 1137 | break; |
1138 | if (*p != ',') | 1138 | if (*p != ',') |
1139 | goto bad; | 1139 | goto bad; |
1140 | p++; | 1140 | p++; |
1141 | } | 1141 | } |
1142 | 1142 | ||
1143 | if (p != end) | 1143 | if (p != end) |
1144 | goto bad; | 1144 | goto bad; |
1145 | 1145 | ||
1146 | if (count) | 1146 | if (count) |
1147 | *count = i + 1; | 1147 | *count = i + 1; |
1148 | return 0; | 1148 | return 0; |
1149 | 1149 | ||
1150 | bad: | 1150 | bad: |
1151 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); | 1151 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); |
1152 | return -EINVAL; | 1152 | return -EINVAL; |
1153 | } | 1153 | } |
1154 | EXPORT_SYMBOL(ceph_parse_ips); | 1154 | EXPORT_SYMBOL(ceph_parse_ips); |
1155 | 1155 | ||
1156 | static int process_banner(struct ceph_connection *con) | 1156 | static int process_banner(struct ceph_connection *con) |
1157 | { | 1157 | { |
1158 | dout("process_banner on %p\n", con); | 1158 | dout("process_banner on %p\n", con); |
1159 | 1159 | ||
1160 | if (verify_hello(con) < 0) | 1160 | if (verify_hello(con) < 0) |
1161 | return -1; | 1161 | return -1; |
1162 | 1162 | ||
1163 | ceph_decode_addr(&con->actual_peer_addr); | 1163 | ceph_decode_addr(&con->actual_peer_addr); |
1164 | ceph_decode_addr(&con->peer_addr_for_me); | 1164 | ceph_decode_addr(&con->peer_addr_for_me); |
1165 | 1165 | ||
1166 | /* | 1166 | /* |
1167 | * Make sure the other end is who we wanted. note that the other | 1167 | * Make sure the other end is who we wanted. note that the other |
1168 | * end may not yet know their ip address, so if it's 0.0.0.0, give | 1168 | * end may not yet know their ip address, so if it's 0.0.0.0, give |
1169 | * them the benefit of the doubt. | 1169 | * them the benefit of the doubt. |
1170 | */ | 1170 | */ |
1171 | if (memcmp(&con->peer_addr, &con->actual_peer_addr, | 1171 | if (memcmp(&con->peer_addr, &con->actual_peer_addr, |
1172 | sizeof(con->peer_addr)) != 0 && | 1172 | sizeof(con->peer_addr)) != 0 && |
1173 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1173 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
1174 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1174 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
1175 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", | 1175 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
1176 | ceph_pr_addr(&con->peer_addr.in_addr), | 1176 | ceph_pr_addr(&con->peer_addr.in_addr), |
1177 | (int)le32_to_cpu(con->peer_addr.nonce), | 1177 | (int)le32_to_cpu(con->peer_addr.nonce), |
1178 | ceph_pr_addr(&con->actual_peer_addr.in_addr), | 1178 | ceph_pr_addr(&con->actual_peer_addr.in_addr), |
1179 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); | 1179 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
1180 | con->error_msg = "wrong peer at address"; | 1180 | con->error_msg = "wrong peer at address"; |
1181 | return -1; | 1181 | return -1; |
1182 | } | 1182 | } |
1183 | 1183 | ||
1184 | /* | 1184 | /* |
1185 | * did we learn our address? | 1185 | * did we learn our address? |
1186 | */ | 1186 | */ |
1187 | if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { | 1187 | if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { |
1188 | int port = addr_port(&con->msgr->inst.addr.in_addr); | 1188 | int port = addr_port(&con->msgr->inst.addr.in_addr); |
1189 | 1189 | ||
1190 | memcpy(&con->msgr->inst.addr.in_addr, | 1190 | memcpy(&con->msgr->inst.addr.in_addr, |
1191 | &con->peer_addr_for_me.in_addr, | 1191 | &con->peer_addr_for_me.in_addr, |
1192 | sizeof(con->peer_addr_for_me.in_addr)); | 1192 | sizeof(con->peer_addr_for_me.in_addr)); |
1193 | addr_set_port(&con->msgr->inst.addr.in_addr, port); | 1193 | addr_set_port(&con->msgr->inst.addr.in_addr, port); |
1194 | encode_my_addr(con->msgr); | 1194 | encode_my_addr(con->msgr); |
1195 | dout("process_banner learned my addr is %s\n", | 1195 | dout("process_banner learned my addr is %s\n", |
1196 | ceph_pr_addr(&con->msgr->inst.addr.in_addr)); | 1196 | ceph_pr_addr(&con->msgr->inst.addr.in_addr)); |
1197 | } | 1197 | } |
1198 | 1198 | ||
1199 | set_bit(NEGOTIATING, &con->state); | 1199 | set_bit(NEGOTIATING, &con->state); |
1200 | prepare_read_connect(con); | 1200 | prepare_read_connect(con); |
1201 | return 0; | 1201 | return 0; |
1202 | } | 1202 | } |
1203 | 1203 | ||
1204 | static void fail_protocol(struct ceph_connection *con) | 1204 | static void fail_protocol(struct ceph_connection *con) |
1205 | { | 1205 | { |
1206 | reset_connection(con); | 1206 | reset_connection(con); |
1207 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | 1207 | set_bit(CLOSED, &con->state); /* in case there's queued work */ |
1208 | 1208 | ||
1209 | mutex_unlock(&con->mutex); | 1209 | mutex_unlock(&con->mutex); |
1210 | if (con->ops->bad_proto) | 1210 | if (con->ops->bad_proto) |
1211 | con->ops->bad_proto(con); | 1211 | con->ops->bad_proto(con); |
1212 | mutex_lock(&con->mutex); | 1212 | mutex_lock(&con->mutex); |
1213 | } | 1213 | } |
1214 | 1214 | ||
1215 | static int process_connect(struct ceph_connection *con) | 1215 | static int process_connect(struct ceph_connection *con) |
1216 | { | 1216 | { |
1217 | u64 sup_feat = con->msgr->supported_features; | 1217 | u64 sup_feat = con->msgr->supported_features; |
1218 | u64 req_feat = con->msgr->required_features; | 1218 | u64 req_feat = con->msgr->required_features; |
1219 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1219 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1220 | 1220 | ||
1221 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1221 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
1222 | 1222 | ||
1223 | switch (con->in_reply.tag) { | 1223 | switch (con->in_reply.tag) { |
1224 | case CEPH_MSGR_TAG_FEATURES: | 1224 | case CEPH_MSGR_TAG_FEATURES: |
1225 | pr_err("%s%lld %s feature set mismatch," | 1225 | pr_err("%s%lld %s feature set mismatch," |
1226 | " my %llx < server's %llx, missing %llx\n", | 1226 | " my %llx < server's %llx, missing %llx\n", |
1227 | ENTITY_NAME(con->peer_name), | 1227 | ENTITY_NAME(con->peer_name), |
1228 | ceph_pr_addr(&con->peer_addr.in_addr), | 1228 | ceph_pr_addr(&con->peer_addr.in_addr), |
1229 | sup_feat, server_feat, server_feat & ~sup_feat); | 1229 | sup_feat, server_feat, server_feat & ~sup_feat); |
1230 | con->error_msg = "missing required protocol features"; | 1230 | con->error_msg = "missing required protocol features"; |
1231 | fail_protocol(con); | 1231 | fail_protocol(con); |
1232 | return -1; | 1232 | return -1; |
1233 | 1233 | ||
1234 | case CEPH_MSGR_TAG_BADPROTOVER: | 1234 | case CEPH_MSGR_TAG_BADPROTOVER: |
1235 | pr_err("%s%lld %s protocol version mismatch," | 1235 | pr_err("%s%lld %s protocol version mismatch," |
1236 | " my %d != server's %d\n", | 1236 | " my %d != server's %d\n", |
1237 | ENTITY_NAME(con->peer_name), | 1237 | ENTITY_NAME(con->peer_name), |
1238 | ceph_pr_addr(&con->peer_addr.in_addr), | 1238 | ceph_pr_addr(&con->peer_addr.in_addr), |
1239 | le32_to_cpu(con->out_connect.protocol_version), | 1239 | le32_to_cpu(con->out_connect.protocol_version), |
1240 | le32_to_cpu(con->in_reply.protocol_version)); | 1240 | le32_to_cpu(con->in_reply.protocol_version)); |
1241 | con->error_msg = "protocol version mismatch"; | 1241 | con->error_msg = "protocol version mismatch"; |
1242 | fail_protocol(con); | 1242 | fail_protocol(con); |
1243 | return -1; | 1243 | return -1; |
1244 | 1244 | ||
1245 | case CEPH_MSGR_TAG_BADAUTHORIZER: | 1245 | case CEPH_MSGR_TAG_BADAUTHORIZER: |
1246 | con->auth_retry++; | 1246 | con->auth_retry++; |
1247 | dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, | 1247 | dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, |
1248 | con->auth_retry); | 1248 | con->auth_retry); |
1249 | if (con->auth_retry == 2) { | 1249 | if (con->auth_retry == 2) { |
1250 | con->error_msg = "connect authorization failure"; | 1250 | con->error_msg = "connect authorization failure"; |
1251 | reset_connection(con); | ||
1252 | set_bit(CLOSED, &con->state); | ||
1253 | return -1; | 1251 | return -1; |
1254 | } | 1252 | } |
1255 | con->auth_retry = 1; | 1253 | con->auth_retry = 1; |
1256 | prepare_write_connect(con->msgr, con, 0); | 1254 | prepare_write_connect(con->msgr, con, 0); |
1257 | prepare_read_connect(con); | 1255 | prepare_read_connect(con); |
1258 | break; | 1256 | break; |
1259 | 1257 | ||
1260 | case CEPH_MSGR_TAG_RESETSESSION: | 1258 | case CEPH_MSGR_TAG_RESETSESSION: |
1261 | /* | 1259 | /* |
1262 | * If we connected with a large connect_seq but the peer | 1260 | * If we connected with a large connect_seq but the peer |
1263 | * has no record of a session with us (no connection, or | 1261 | * has no record of a session with us (no connection, or |
1264 | * connect_seq == 0), they will send RESETSESION to indicate | 1262 | * connect_seq == 0), they will send RESETSESION to indicate |
1265 | * that they must have reset their session, and may have | 1263 | * that they must have reset their session, and may have |
1266 | * dropped messages. | 1264 | * dropped messages. |
1267 | */ | 1265 | */ |
1268 | dout("process_connect got RESET peer seq %u\n", | 1266 | dout("process_connect got RESET peer seq %u\n", |
1269 | le32_to_cpu(con->in_connect.connect_seq)); | 1267 | le32_to_cpu(con->in_connect.connect_seq)); |
1270 | pr_err("%s%lld %s connection reset\n", | 1268 | pr_err("%s%lld %s connection reset\n", |
1271 | ENTITY_NAME(con->peer_name), | 1269 | ENTITY_NAME(con->peer_name), |
1272 | ceph_pr_addr(&con->peer_addr.in_addr)); | 1270 | ceph_pr_addr(&con->peer_addr.in_addr)); |
1273 | reset_connection(con); | 1271 | reset_connection(con); |
1274 | prepare_write_connect(con->msgr, con, 0); | 1272 | prepare_write_connect(con->msgr, con, 0); |
1275 | prepare_read_connect(con); | 1273 | prepare_read_connect(con); |
1276 | 1274 | ||
1277 | /* Tell ceph about it. */ | 1275 | /* Tell ceph about it. */ |
1278 | mutex_unlock(&con->mutex); | 1276 | mutex_unlock(&con->mutex); |
1279 | pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name)); | 1277 | pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name)); |
1280 | if (con->ops->peer_reset) | 1278 | if (con->ops->peer_reset) |
1281 | con->ops->peer_reset(con); | 1279 | con->ops->peer_reset(con); |
1282 | mutex_lock(&con->mutex); | 1280 | mutex_lock(&con->mutex); |
1283 | break; | 1281 | break; |
1284 | 1282 | ||
1285 | case CEPH_MSGR_TAG_RETRY_SESSION: | 1283 | case CEPH_MSGR_TAG_RETRY_SESSION: |
1286 | /* | 1284 | /* |
1287 | * If we sent a smaller connect_seq than the peer has, try | 1285 | * If we sent a smaller connect_seq than the peer has, try |
1288 | * again with a larger value. | 1286 | * again with a larger value. |
1289 | */ | 1287 | */ |
1290 | dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", | 1288 | dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", |
1291 | le32_to_cpu(con->out_connect.connect_seq), | 1289 | le32_to_cpu(con->out_connect.connect_seq), |
1292 | le32_to_cpu(con->in_connect.connect_seq)); | 1290 | le32_to_cpu(con->in_connect.connect_seq)); |
1293 | con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); | 1291 | con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); |
1294 | prepare_write_connect(con->msgr, con, 0); | 1292 | prepare_write_connect(con->msgr, con, 0); |
1295 | prepare_read_connect(con); | 1293 | prepare_read_connect(con); |
1296 | break; | 1294 | break; |
1297 | 1295 | ||
1298 | case CEPH_MSGR_TAG_RETRY_GLOBAL: | 1296 | case CEPH_MSGR_TAG_RETRY_GLOBAL: |
1299 | /* | 1297 | /* |
1300 | * If we sent a smaller global_seq than the peer has, try | 1298 | * If we sent a smaller global_seq than the peer has, try |
1301 | * again with a larger value. | 1299 | * again with a larger value. |
1302 | */ | 1300 | */ |
1303 | dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", | 1301 | dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", |
1304 | con->peer_global_seq, | 1302 | con->peer_global_seq, |
1305 | le32_to_cpu(con->in_connect.global_seq)); | 1303 | le32_to_cpu(con->in_connect.global_seq)); |
1306 | get_global_seq(con->msgr, | 1304 | get_global_seq(con->msgr, |
1307 | le32_to_cpu(con->in_connect.global_seq)); | 1305 | le32_to_cpu(con->in_connect.global_seq)); |
1308 | prepare_write_connect(con->msgr, con, 0); | 1306 | prepare_write_connect(con->msgr, con, 0); |
1309 | prepare_read_connect(con); | 1307 | prepare_read_connect(con); |
1310 | break; | 1308 | break; |
1311 | 1309 | ||
1312 | case CEPH_MSGR_TAG_READY: | 1310 | case CEPH_MSGR_TAG_READY: |
1313 | if (req_feat & ~server_feat) { | 1311 | if (req_feat & ~server_feat) { |
1314 | pr_err("%s%lld %s protocol feature mismatch," | 1312 | pr_err("%s%lld %s protocol feature mismatch," |
1315 | " my required %llx > server's %llx, need %llx\n", | 1313 | " my required %llx > server's %llx, need %llx\n", |
1316 | ENTITY_NAME(con->peer_name), | 1314 | ENTITY_NAME(con->peer_name), |
1317 | ceph_pr_addr(&con->peer_addr.in_addr), | 1315 | ceph_pr_addr(&con->peer_addr.in_addr), |
1318 | req_feat, server_feat, req_feat & ~server_feat); | 1316 | req_feat, server_feat, req_feat & ~server_feat); |
1319 | con->error_msg = "missing required protocol features"; | 1317 | con->error_msg = "missing required protocol features"; |
1320 | fail_protocol(con); | 1318 | fail_protocol(con); |
1321 | return -1; | 1319 | return -1; |
1322 | } | 1320 | } |
1323 | clear_bit(CONNECTING, &con->state); | 1321 | clear_bit(CONNECTING, &con->state); |
1324 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); | 1322 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); |
1325 | con->connect_seq++; | 1323 | con->connect_seq++; |
1326 | con->peer_features = server_feat; | 1324 | con->peer_features = server_feat; |
1327 | dout("process_connect got READY gseq %d cseq %d (%d)\n", | 1325 | dout("process_connect got READY gseq %d cseq %d (%d)\n", |
1328 | con->peer_global_seq, | 1326 | con->peer_global_seq, |
1329 | le32_to_cpu(con->in_reply.connect_seq), | 1327 | le32_to_cpu(con->in_reply.connect_seq), |
1330 | con->connect_seq); | 1328 | con->connect_seq); |
1331 | WARN_ON(con->connect_seq != | 1329 | WARN_ON(con->connect_seq != |
1332 | le32_to_cpu(con->in_reply.connect_seq)); | 1330 | le32_to_cpu(con->in_reply.connect_seq)); |
1333 | 1331 | ||
1334 | if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) | 1332 | if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) |
1335 | set_bit(LOSSYTX, &con->state); | 1333 | set_bit(LOSSYTX, &con->state); |
1336 | 1334 | ||
1337 | prepare_read_tag(con); | 1335 | prepare_read_tag(con); |
1338 | break; | 1336 | break; |
1339 | 1337 | ||
1340 | case CEPH_MSGR_TAG_WAIT: | 1338 | case CEPH_MSGR_TAG_WAIT: |
1341 | /* | 1339 | /* |
1342 | * If there is a connection race (we are opening | 1340 | * If there is a connection race (we are opening |
1343 | * connections to each other), one of us may just have | 1341 | * connections to each other), one of us may just have |
1344 | * to WAIT. This shouldn't happen if we are the | 1342 | * to WAIT. This shouldn't happen if we are the |
1345 | * client. | 1343 | * client. |
1346 | */ | 1344 | */ |
1347 | pr_err("process_connect peer connecting WAIT\n"); | 1345 | pr_err("process_connect peer connecting WAIT\n"); |
1348 | 1346 | ||
1349 | default: | 1347 | default: |
1350 | pr_err("connect protocol error, will retry\n"); | 1348 | pr_err("connect protocol error, will retry\n"); |
1351 | con->error_msg = "protocol error, garbage tag during connect"; | 1349 | con->error_msg = "protocol error, garbage tag during connect"; |
1352 | return -1; | 1350 | return -1; |
1353 | } | 1351 | } |
1354 | return 0; | 1352 | return 0; |
1355 | } | 1353 | } |
1356 | 1354 | ||
1357 | 1355 | ||
1358 | /* | 1356 | /* |
1359 | * read (part of) an ack | 1357 | * read (part of) an ack |
1360 | */ | 1358 | */ |
1361 | static int read_partial_ack(struct ceph_connection *con) | 1359 | static int read_partial_ack(struct ceph_connection *con) |
1362 | { | 1360 | { |
1363 | int to = 0; | 1361 | int to = 0; |
1364 | 1362 | ||
1365 | return read_partial(con, &to, sizeof(con->in_temp_ack), | 1363 | return read_partial(con, &to, sizeof(con->in_temp_ack), |
1366 | &con->in_temp_ack); | 1364 | &con->in_temp_ack); |
1367 | } | 1365 | } |
1368 | 1366 | ||
1369 | 1367 | ||
1370 | /* | 1368 | /* |
1371 | * We can finally discard anything that's been acked. | 1369 | * We can finally discard anything that's been acked. |
1372 | */ | 1370 | */ |
1373 | static void process_ack(struct ceph_connection *con) | 1371 | static void process_ack(struct ceph_connection *con) |
1374 | { | 1372 | { |
1375 | struct ceph_msg *m; | 1373 | struct ceph_msg *m; |
1376 | u64 ack = le64_to_cpu(con->in_temp_ack); | 1374 | u64 ack = le64_to_cpu(con->in_temp_ack); |
1377 | u64 seq; | 1375 | u64 seq; |
1378 | 1376 | ||
1379 | while (!list_empty(&con->out_sent)) { | 1377 | while (!list_empty(&con->out_sent)) { |
1380 | m = list_first_entry(&con->out_sent, struct ceph_msg, | 1378 | m = list_first_entry(&con->out_sent, struct ceph_msg, |
1381 | list_head); | 1379 | list_head); |
1382 | seq = le64_to_cpu(m->hdr.seq); | 1380 | seq = le64_to_cpu(m->hdr.seq); |
1383 | if (seq > ack) | 1381 | if (seq > ack) |
1384 | break; | 1382 | break; |
1385 | dout("got ack for seq %llu type %d at %p\n", seq, | 1383 | dout("got ack for seq %llu type %d at %p\n", seq, |
1386 | le16_to_cpu(m->hdr.type), m); | 1384 | le16_to_cpu(m->hdr.type), m); |
1387 | ceph_msg_remove(m); | 1385 | ceph_msg_remove(m); |
1388 | } | 1386 | } |
1389 | prepare_read_tag(con); | 1387 | prepare_read_tag(con); |
1390 | } | 1388 | } |
1391 | 1389 | ||
1392 | 1390 | ||
1393 | 1391 | ||
1394 | 1392 | ||
1395 | static int read_partial_message_section(struct ceph_connection *con, | 1393 | static int read_partial_message_section(struct ceph_connection *con, |
1396 | struct kvec *section, | 1394 | struct kvec *section, |
1397 | unsigned int sec_len, u32 *crc) | 1395 | unsigned int sec_len, u32 *crc) |
1398 | { | 1396 | { |
1399 | int ret, left; | 1397 | int ret, left; |
1400 | 1398 | ||
1401 | BUG_ON(!section); | 1399 | BUG_ON(!section); |
1402 | 1400 | ||
1403 | while (section->iov_len < sec_len) { | 1401 | while (section->iov_len < sec_len) { |
1404 | BUG_ON(section->iov_base == NULL); | 1402 | BUG_ON(section->iov_base == NULL); |
1405 | left = sec_len - section->iov_len; | 1403 | left = sec_len - section->iov_len; |
1406 | ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + | 1404 | ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + |
1407 | section->iov_len, left); | 1405 | section->iov_len, left); |
1408 | if (ret <= 0) | 1406 | if (ret <= 0) |
1409 | return ret; | 1407 | return ret; |
1410 | section->iov_len += ret; | 1408 | section->iov_len += ret; |
1411 | if (section->iov_len == sec_len) | 1409 | if (section->iov_len == sec_len) |
1412 | *crc = crc32c(0, section->iov_base, | 1410 | *crc = crc32c(0, section->iov_base, |
1413 | section->iov_len); | 1411 | section->iov_len); |
1414 | } | 1412 | } |
1415 | 1413 | ||
1416 | return 1; | 1414 | return 1; |
1417 | } | 1415 | } |
1418 | 1416 | ||
1419 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | 1417 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, |
1420 | struct ceph_msg_header *hdr, | 1418 | struct ceph_msg_header *hdr, |
1421 | int *skip); | 1419 | int *skip); |
1422 | 1420 | ||
1423 | 1421 | ||
1424 | static int read_partial_message_pages(struct ceph_connection *con, | 1422 | static int read_partial_message_pages(struct ceph_connection *con, |
1425 | struct page **pages, | 1423 | struct page **pages, |
1426 | unsigned data_len, int datacrc) | 1424 | unsigned data_len, int datacrc) |
1427 | { | 1425 | { |
1428 | void *p; | 1426 | void *p; |
1429 | int ret; | 1427 | int ret; |
1430 | int left; | 1428 | int left; |
1431 | 1429 | ||
1432 | left = min((int)(data_len - con->in_msg_pos.data_pos), | 1430 | left = min((int)(data_len - con->in_msg_pos.data_pos), |
1433 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | 1431 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); |
1434 | /* (page) data */ | 1432 | /* (page) data */ |
1435 | BUG_ON(pages == NULL); | 1433 | BUG_ON(pages == NULL); |
1436 | p = kmap(pages[con->in_msg_pos.page]); | 1434 | p = kmap(pages[con->in_msg_pos.page]); |
1437 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1435 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, |
1438 | left); | 1436 | left); |
1439 | if (ret > 0 && datacrc) | 1437 | if (ret > 0 && datacrc) |
1440 | con->in_data_crc = | 1438 | con->in_data_crc = |
1441 | crc32c(con->in_data_crc, | 1439 | crc32c(con->in_data_crc, |
1442 | p + con->in_msg_pos.page_pos, ret); | 1440 | p + con->in_msg_pos.page_pos, ret); |
1443 | kunmap(pages[con->in_msg_pos.page]); | 1441 | kunmap(pages[con->in_msg_pos.page]); |
1444 | if (ret <= 0) | 1442 | if (ret <= 0) |
1445 | return ret; | 1443 | return ret; |
1446 | con->in_msg_pos.data_pos += ret; | 1444 | con->in_msg_pos.data_pos += ret; |
1447 | con->in_msg_pos.page_pos += ret; | 1445 | con->in_msg_pos.page_pos += ret; |
1448 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | 1446 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { |
1449 | con->in_msg_pos.page_pos = 0; | 1447 | con->in_msg_pos.page_pos = 0; |
1450 | con->in_msg_pos.page++; | 1448 | con->in_msg_pos.page++; |
1451 | } | 1449 | } |
1452 | 1450 | ||
1453 | return ret; | 1451 | return ret; |
1454 | } | 1452 | } |
1455 | 1453 | ||
1456 | #ifdef CONFIG_BLOCK | 1454 | #ifdef CONFIG_BLOCK |
1457 | static int read_partial_message_bio(struct ceph_connection *con, | 1455 | static int read_partial_message_bio(struct ceph_connection *con, |
1458 | struct bio **bio_iter, int *bio_seg, | 1456 | struct bio **bio_iter, int *bio_seg, |
1459 | unsigned data_len, int datacrc) | 1457 | unsigned data_len, int datacrc) |
1460 | { | 1458 | { |
1461 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); | 1459 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); |
1462 | void *p; | 1460 | void *p; |
1463 | int ret, left; | 1461 | int ret, left; |
1464 | 1462 | ||
1465 | if (IS_ERR(bv)) | 1463 | if (IS_ERR(bv)) |
1466 | return PTR_ERR(bv); | 1464 | return PTR_ERR(bv); |
1467 | 1465 | ||
1468 | left = min((int)(data_len - con->in_msg_pos.data_pos), | 1466 | left = min((int)(data_len - con->in_msg_pos.data_pos), |
1469 | (int)(bv->bv_len - con->in_msg_pos.page_pos)); | 1467 | (int)(bv->bv_len - con->in_msg_pos.page_pos)); |
1470 | 1468 | ||
1471 | p = kmap(bv->bv_page) + bv->bv_offset; | 1469 | p = kmap(bv->bv_page) + bv->bv_offset; |
1472 | 1470 | ||
1473 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1471 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, |
1474 | left); | 1472 | left); |
1475 | if (ret > 0 && datacrc) | 1473 | if (ret > 0 && datacrc) |
1476 | con->in_data_crc = | 1474 | con->in_data_crc = |
1477 | crc32c(con->in_data_crc, | 1475 | crc32c(con->in_data_crc, |
1478 | p + con->in_msg_pos.page_pos, ret); | 1476 | p + con->in_msg_pos.page_pos, ret); |
1479 | kunmap(bv->bv_page); | 1477 | kunmap(bv->bv_page); |
1480 | if (ret <= 0) | 1478 | if (ret <= 0) |
1481 | return ret; | 1479 | return ret; |
1482 | con->in_msg_pos.data_pos += ret; | 1480 | con->in_msg_pos.data_pos += ret; |
1483 | con->in_msg_pos.page_pos += ret; | 1481 | con->in_msg_pos.page_pos += ret; |
1484 | if (con->in_msg_pos.page_pos == bv->bv_len) { | 1482 | if (con->in_msg_pos.page_pos == bv->bv_len) { |
1485 | con->in_msg_pos.page_pos = 0; | 1483 | con->in_msg_pos.page_pos = 0; |
1486 | iter_bio_next(bio_iter, bio_seg); | 1484 | iter_bio_next(bio_iter, bio_seg); |
1487 | } | 1485 | } |
1488 | 1486 | ||
1489 | return ret; | 1487 | return ret; |
1490 | } | 1488 | } |
1491 | #endif | 1489 | #endif |
1492 | 1490 | ||
1493 | /* | 1491 | /* |
1494 | * read (part of) a message. | 1492 | * read (part of) a message. |
1495 | */ | 1493 | */ |
1496 | static int read_partial_message(struct ceph_connection *con) | 1494 | static int read_partial_message(struct ceph_connection *con) |
1497 | { | 1495 | { |
1498 | struct ceph_msg *m = con->in_msg; | 1496 | struct ceph_msg *m = con->in_msg; |
1499 | int ret; | 1497 | int ret; |
1500 | int to, left; | 1498 | int to, left; |
1501 | unsigned front_len, middle_len, data_len; | 1499 | unsigned front_len, middle_len, data_len; |
1502 | int datacrc = con->msgr->nocrc; | 1500 | int datacrc = con->msgr->nocrc; |
1503 | int skip; | 1501 | int skip; |
1504 | u64 seq; | 1502 | u64 seq; |
1505 | 1503 | ||
1506 | dout("read_partial_message con %p msg %p\n", con, m); | 1504 | dout("read_partial_message con %p msg %p\n", con, m); |
1507 | 1505 | ||
1508 | /* header */ | 1506 | /* header */ |
1509 | while (con->in_base_pos < sizeof(con->in_hdr)) { | 1507 | while (con->in_base_pos < sizeof(con->in_hdr)) { |
1510 | left = sizeof(con->in_hdr) - con->in_base_pos; | 1508 | left = sizeof(con->in_hdr) - con->in_base_pos; |
1511 | ret = ceph_tcp_recvmsg(con->sock, | 1509 | ret = ceph_tcp_recvmsg(con->sock, |
1512 | (char *)&con->in_hdr + con->in_base_pos, | 1510 | (char *)&con->in_hdr + con->in_base_pos, |
1513 | left); | 1511 | left); |
1514 | if (ret <= 0) | 1512 | if (ret <= 0) |
1515 | return ret; | 1513 | return ret; |
1516 | con->in_base_pos += ret; | 1514 | con->in_base_pos += ret; |
1517 | if (con->in_base_pos == sizeof(con->in_hdr)) { | 1515 | if (con->in_base_pos == sizeof(con->in_hdr)) { |
1518 | u32 crc = crc32c(0, (void *)&con->in_hdr, | 1516 | u32 crc = crc32c(0, (void *)&con->in_hdr, |
1519 | sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); | 1517 | sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); |
1520 | if (crc != le32_to_cpu(con->in_hdr.crc)) { | 1518 | if (crc != le32_to_cpu(con->in_hdr.crc)) { |
1521 | pr_err("read_partial_message bad hdr " | 1519 | pr_err("read_partial_message bad hdr " |
1522 | " crc %u != expected %u\n", | 1520 | " crc %u != expected %u\n", |
1523 | crc, con->in_hdr.crc); | 1521 | crc, con->in_hdr.crc); |
1524 | return -EBADMSG; | 1522 | return -EBADMSG; |
1525 | } | 1523 | } |
1526 | } | 1524 | } |
1527 | } | 1525 | } |
1528 | front_len = le32_to_cpu(con->in_hdr.front_len); | 1526 | front_len = le32_to_cpu(con->in_hdr.front_len); |
1529 | if (front_len > CEPH_MSG_MAX_FRONT_LEN) | 1527 | if (front_len > CEPH_MSG_MAX_FRONT_LEN) |
1530 | return -EIO; | 1528 | return -EIO; |
1531 | middle_len = le32_to_cpu(con->in_hdr.middle_len); | 1529 | middle_len = le32_to_cpu(con->in_hdr.middle_len); |
1532 | if (middle_len > CEPH_MSG_MAX_DATA_LEN) | 1530 | if (middle_len > CEPH_MSG_MAX_DATA_LEN) |
1533 | return -EIO; | 1531 | return -EIO; |
1534 | data_len = le32_to_cpu(con->in_hdr.data_len); | 1532 | data_len = le32_to_cpu(con->in_hdr.data_len); |
1535 | if (data_len > CEPH_MSG_MAX_DATA_LEN) | 1533 | if (data_len > CEPH_MSG_MAX_DATA_LEN) |
1536 | return -EIO; | 1534 | return -EIO; |
1537 | 1535 | ||
1538 | /* verify seq# */ | 1536 | /* verify seq# */ |
1539 | seq = le64_to_cpu(con->in_hdr.seq); | 1537 | seq = le64_to_cpu(con->in_hdr.seq); |
1540 | if ((s64)seq - (s64)con->in_seq < 1) { | 1538 | if ((s64)seq - (s64)con->in_seq < 1) { |
1541 | pr_info("skipping %s%lld %s seq %lld expected %lld\n", | 1539 | pr_info("skipping %s%lld %s seq %lld expected %lld\n", |
1542 | ENTITY_NAME(con->peer_name), | 1540 | ENTITY_NAME(con->peer_name), |
1543 | ceph_pr_addr(&con->peer_addr.in_addr), | 1541 | ceph_pr_addr(&con->peer_addr.in_addr), |
1544 | seq, con->in_seq + 1); | 1542 | seq, con->in_seq + 1); |
1545 | con->in_base_pos = -front_len - middle_len - data_len - | 1543 | con->in_base_pos = -front_len - middle_len - data_len - |
1546 | sizeof(m->footer); | 1544 | sizeof(m->footer); |
1547 | con->in_tag = CEPH_MSGR_TAG_READY; | 1545 | con->in_tag = CEPH_MSGR_TAG_READY; |
1548 | return 0; | 1546 | return 0; |
1549 | } else if ((s64)seq - (s64)con->in_seq > 1) { | 1547 | } else if ((s64)seq - (s64)con->in_seq > 1) { |
1550 | pr_err("read_partial_message bad seq %lld expected %lld\n", | 1548 | pr_err("read_partial_message bad seq %lld expected %lld\n", |
1551 | seq, con->in_seq + 1); | 1549 | seq, con->in_seq + 1); |
1552 | con->error_msg = "bad message sequence # for incoming message"; | 1550 | con->error_msg = "bad message sequence # for incoming message"; |
1553 | return -EBADMSG; | 1551 | return -EBADMSG; |
1554 | } | 1552 | } |
1555 | 1553 | ||
1556 | /* allocate message? */ | 1554 | /* allocate message? */ |
1557 | if (!con->in_msg) { | 1555 | if (!con->in_msg) { |
1558 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, | 1556 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, |
1559 | con->in_hdr.front_len, con->in_hdr.data_len); | 1557 | con->in_hdr.front_len, con->in_hdr.data_len); |
1560 | skip = 0; | 1558 | skip = 0; |
1561 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | 1559 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); |
1562 | if (skip) { | 1560 | if (skip) { |
1563 | /* skip this message */ | 1561 | /* skip this message */ |
1564 | dout("alloc_msg said skip message\n"); | 1562 | dout("alloc_msg said skip message\n"); |
1565 | BUG_ON(con->in_msg); | 1563 | BUG_ON(con->in_msg); |
1566 | con->in_base_pos = -front_len - middle_len - data_len - | 1564 | con->in_base_pos = -front_len - middle_len - data_len - |
1567 | sizeof(m->footer); | 1565 | sizeof(m->footer); |
1568 | con->in_tag = CEPH_MSGR_TAG_READY; | 1566 | con->in_tag = CEPH_MSGR_TAG_READY; |
1569 | con->in_seq++; | 1567 | con->in_seq++; |
1570 | return 0; | 1568 | return 0; |
1571 | } | 1569 | } |
1572 | if (!con->in_msg) { | 1570 | if (!con->in_msg) { |
1573 | con->error_msg = | 1571 | con->error_msg = |
1574 | "error allocating memory for incoming message"; | 1572 | "error allocating memory for incoming message"; |
1575 | return -ENOMEM; | 1573 | return -ENOMEM; |
1576 | } | 1574 | } |
1577 | m = con->in_msg; | 1575 | m = con->in_msg; |
1578 | m->front.iov_len = 0; /* haven't read it yet */ | 1576 | m->front.iov_len = 0; /* haven't read it yet */ |
1579 | if (m->middle) | 1577 | if (m->middle) |
1580 | m->middle->vec.iov_len = 0; | 1578 | m->middle->vec.iov_len = 0; |
1581 | 1579 | ||
1582 | con->in_msg_pos.page = 0; | 1580 | con->in_msg_pos.page = 0; |
1583 | if (m->pages) | 1581 | if (m->pages) |
1584 | con->in_msg_pos.page_pos = m->page_alignment; | 1582 | con->in_msg_pos.page_pos = m->page_alignment; |
1585 | else | 1583 | else |
1586 | con->in_msg_pos.page_pos = 0; | 1584 | con->in_msg_pos.page_pos = 0; |
1587 | con->in_msg_pos.data_pos = 0; | 1585 | con->in_msg_pos.data_pos = 0; |
1588 | } | 1586 | } |
1589 | 1587 | ||
1590 | /* front */ | 1588 | /* front */ |
1591 | ret = read_partial_message_section(con, &m->front, front_len, | 1589 | ret = read_partial_message_section(con, &m->front, front_len, |
1592 | &con->in_front_crc); | 1590 | &con->in_front_crc); |
1593 | if (ret <= 0) | 1591 | if (ret <= 0) |
1594 | return ret; | 1592 | return ret; |
1595 | 1593 | ||
1596 | /* middle */ | 1594 | /* middle */ |
1597 | if (m->middle) { | 1595 | if (m->middle) { |
1598 | ret = read_partial_message_section(con, &m->middle->vec, | 1596 | ret = read_partial_message_section(con, &m->middle->vec, |
1599 | middle_len, | 1597 | middle_len, |
1600 | &con->in_middle_crc); | 1598 | &con->in_middle_crc); |
1601 | if (ret <= 0) | 1599 | if (ret <= 0) |
1602 | return ret; | 1600 | return ret; |
1603 | } | 1601 | } |
1604 | #ifdef CONFIG_BLOCK | 1602 | #ifdef CONFIG_BLOCK |
1605 | if (m->bio && !m->bio_iter) | 1603 | if (m->bio && !m->bio_iter) |
1606 | init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); | 1604 | init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); |
1607 | #endif | 1605 | #endif |
1608 | 1606 | ||
1609 | /* (page) data */ | 1607 | /* (page) data */ |
1610 | while (con->in_msg_pos.data_pos < data_len) { | 1608 | while (con->in_msg_pos.data_pos < data_len) { |
1611 | if (m->pages) { | 1609 | if (m->pages) { |
1612 | ret = read_partial_message_pages(con, m->pages, | 1610 | ret = read_partial_message_pages(con, m->pages, |
1613 | data_len, datacrc); | 1611 | data_len, datacrc); |
1614 | if (ret <= 0) | 1612 | if (ret <= 0) |
1615 | return ret; | 1613 | return ret; |
1616 | #ifdef CONFIG_BLOCK | 1614 | #ifdef CONFIG_BLOCK |
1617 | } else if (m->bio) { | 1615 | } else if (m->bio) { |
1618 | 1616 | ||
1619 | ret = read_partial_message_bio(con, | 1617 | ret = read_partial_message_bio(con, |
1620 | &m->bio_iter, &m->bio_seg, | 1618 | &m->bio_iter, &m->bio_seg, |
1621 | data_len, datacrc); | 1619 | data_len, datacrc); |
1622 | if (ret <= 0) | 1620 | if (ret <= 0) |
1623 | return ret; | 1621 | return ret; |
1624 | #endif | 1622 | #endif |
1625 | } else { | 1623 | } else { |
1626 | BUG_ON(1); | 1624 | BUG_ON(1); |
1627 | } | 1625 | } |
1628 | } | 1626 | } |
1629 | 1627 | ||
1630 | /* footer */ | 1628 | /* footer */ |
1631 | to = sizeof(m->hdr) + sizeof(m->footer); | 1629 | to = sizeof(m->hdr) + sizeof(m->footer); |
1632 | while (con->in_base_pos < to) { | 1630 | while (con->in_base_pos < to) { |
1633 | left = to - con->in_base_pos; | 1631 | left = to - con->in_base_pos; |
1634 | ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer + | 1632 | ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer + |
1635 | (con->in_base_pos - sizeof(m->hdr)), | 1633 | (con->in_base_pos - sizeof(m->hdr)), |
1636 | left); | 1634 | left); |
1637 | if (ret <= 0) | 1635 | if (ret <= 0) |
1638 | return ret; | 1636 | return ret; |
1639 | con->in_base_pos += ret; | 1637 | con->in_base_pos += ret; |
1640 | } | 1638 | } |
1641 | dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", | 1639 | dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", |
1642 | m, front_len, m->footer.front_crc, middle_len, | 1640 | m, front_len, m->footer.front_crc, middle_len, |
1643 | m->footer.middle_crc, data_len, m->footer.data_crc); | 1641 | m->footer.middle_crc, data_len, m->footer.data_crc); |
1644 | 1642 | ||
1645 | /* crc ok? */ | 1643 | /* crc ok? */ |
1646 | if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { | 1644 | if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { |
1647 | pr_err("read_partial_message %p front crc %u != exp. %u\n", | 1645 | pr_err("read_partial_message %p front crc %u != exp. %u\n", |
1648 | m, con->in_front_crc, m->footer.front_crc); | 1646 | m, con->in_front_crc, m->footer.front_crc); |
1649 | return -EBADMSG; | 1647 | return -EBADMSG; |
1650 | } | 1648 | } |
1651 | if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { | 1649 | if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { |
1652 | pr_err("read_partial_message %p middle crc %u != exp %u\n", | 1650 | pr_err("read_partial_message %p middle crc %u != exp %u\n", |
1653 | m, con->in_middle_crc, m->footer.middle_crc); | 1651 | m, con->in_middle_crc, m->footer.middle_crc); |
1654 | return -EBADMSG; | 1652 | return -EBADMSG; |
1655 | } | 1653 | } |
1656 | if (datacrc && | 1654 | if (datacrc && |
1657 | (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && | 1655 | (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && |
1658 | con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { | 1656 | con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { |
1659 | pr_err("read_partial_message %p data crc %u != exp. %u\n", m, | 1657 | pr_err("read_partial_message %p data crc %u != exp. %u\n", m, |
1660 | con->in_data_crc, le32_to_cpu(m->footer.data_crc)); | 1658 | con->in_data_crc, le32_to_cpu(m->footer.data_crc)); |
1661 | return -EBADMSG; | 1659 | return -EBADMSG; |
1662 | } | 1660 | } |
1663 | 1661 | ||
1664 | return 1; /* done! */ | 1662 | return 1; /* done! */ |
1665 | } | 1663 | } |
1666 | 1664 | ||
1667 | /* | 1665 | /* |
1668 | * Process message. This happens in the worker thread. The callback should | 1666 | * Process message. This happens in the worker thread. The callback should |
1669 | * be careful not to do anything that waits on other incoming messages or it | 1667 | * be careful not to do anything that waits on other incoming messages or it |
1670 | * may deadlock. | 1668 | * may deadlock. |
1671 | */ | 1669 | */ |
1672 | static void process_message(struct ceph_connection *con) | 1670 | static void process_message(struct ceph_connection *con) |
1673 | { | 1671 | { |
1674 | struct ceph_msg *msg; | 1672 | struct ceph_msg *msg; |
1675 | 1673 | ||
1676 | msg = con->in_msg; | 1674 | msg = con->in_msg; |
1677 | con->in_msg = NULL; | 1675 | con->in_msg = NULL; |
1678 | 1676 | ||
1679 | /* if first message, set peer_name */ | 1677 | /* if first message, set peer_name */ |
1680 | if (con->peer_name.type == 0) | 1678 | if (con->peer_name.type == 0) |
1681 | con->peer_name = msg->hdr.src; | 1679 | con->peer_name = msg->hdr.src; |
1682 | 1680 | ||
1683 | con->in_seq++; | 1681 | con->in_seq++; |
1684 | mutex_unlock(&con->mutex); | 1682 | mutex_unlock(&con->mutex); |
1685 | 1683 | ||
1686 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", | 1684 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", |
1687 | msg, le64_to_cpu(msg->hdr.seq), | 1685 | msg, le64_to_cpu(msg->hdr.seq), |
1688 | ENTITY_NAME(msg->hdr.src), | 1686 | ENTITY_NAME(msg->hdr.src), |
1689 | le16_to_cpu(msg->hdr.type), | 1687 | le16_to_cpu(msg->hdr.type), |
1690 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | 1688 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), |
1691 | le32_to_cpu(msg->hdr.front_len), | 1689 | le32_to_cpu(msg->hdr.front_len), |
1692 | le32_to_cpu(msg->hdr.data_len), | 1690 | le32_to_cpu(msg->hdr.data_len), |
1693 | con->in_front_crc, con->in_middle_crc, con->in_data_crc); | 1691 | con->in_front_crc, con->in_middle_crc, con->in_data_crc); |
1694 | con->ops->dispatch(con, msg); | 1692 | con->ops->dispatch(con, msg); |
1695 | 1693 | ||
1696 | mutex_lock(&con->mutex); | 1694 | mutex_lock(&con->mutex); |
1697 | prepare_read_tag(con); | 1695 | prepare_read_tag(con); |
1698 | } | 1696 | } |
1699 | 1697 | ||
1700 | 1698 | ||
1701 | /* | 1699 | /* |
1702 | * Write something to the socket. Called in a worker thread when the | 1700 | * Write something to the socket. Called in a worker thread when the |
1703 | * socket appears to be writeable and we have something ready to send. | 1701 | * socket appears to be writeable and we have something ready to send. |
1704 | */ | 1702 | */ |
1705 | static int try_write(struct ceph_connection *con) | 1703 | static int try_write(struct ceph_connection *con) |
1706 | { | 1704 | { |
1707 | struct ceph_messenger *msgr = con->msgr; | 1705 | struct ceph_messenger *msgr = con->msgr; |
1708 | int ret = 1; | 1706 | int ret = 1; |
1709 | 1707 | ||
1710 | dout("try_write start %p state %lu nref %d\n", con, con->state, | 1708 | dout("try_write start %p state %lu nref %d\n", con, con->state, |
1711 | atomic_read(&con->nref)); | 1709 | atomic_read(&con->nref)); |
1712 | 1710 | ||
1713 | more: | 1711 | more: |
1714 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); | 1712 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); |
1715 | 1713 | ||
1716 | /* open the socket first? */ | 1714 | /* open the socket first? */ |
1717 | if (con->sock == NULL) { | 1715 | if (con->sock == NULL) { |
1718 | /* | 1716 | /* |
1719 | * if we were STANDBY and are reconnecting _this_ | 1717 | * if we were STANDBY and are reconnecting _this_ |
1720 | * connection, bump connect_seq now. Always bump | 1718 | * connection, bump connect_seq now. Always bump |
1721 | * global_seq. | 1719 | * global_seq. |
1722 | */ | 1720 | */ |
1723 | if (test_and_clear_bit(STANDBY, &con->state)) | 1721 | if (test_and_clear_bit(STANDBY, &con->state)) |
1724 | con->connect_seq++; | 1722 | con->connect_seq++; |
1725 | 1723 | ||
1726 | prepare_write_banner(msgr, con); | 1724 | prepare_write_banner(msgr, con); |
1727 | prepare_write_connect(msgr, con, 1); | 1725 | prepare_write_connect(msgr, con, 1); |
1728 | prepare_read_banner(con); | 1726 | prepare_read_banner(con); |
1729 | set_bit(CONNECTING, &con->state); | 1727 | set_bit(CONNECTING, &con->state); |
1730 | clear_bit(NEGOTIATING, &con->state); | 1728 | clear_bit(NEGOTIATING, &con->state); |
1731 | 1729 | ||
1732 | BUG_ON(con->in_msg); | 1730 | BUG_ON(con->in_msg); |
1733 | con->in_tag = CEPH_MSGR_TAG_READY; | 1731 | con->in_tag = CEPH_MSGR_TAG_READY; |
1734 | dout("try_write initiating connect on %p new state %lu\n", | 1732 | dout("try_write initiating connect on %p new state %lu\n", |
1735 | con, con->state); | 1733 | con, con->state); |
1736 | con->sock = ceph_tcp_connect(con); | 1734 | con->sock = ceph_tcp_connect(con); |
1737 | if (IS_ERR(con->sock)) { | 1735 | if (IS_ERR(con->sock)) { |
1738 | con->sock = NULL; | 1736 | con->sock = NULL; |
1739 | con->error_msg = "connect error"; | 1737 | con->error_msg = "connect error"; |
1740 | ret = -1; | 1738 | ret = -1; |
1741 | goto out; | 1739 | goto out; |
1742 | } | 1740 | } |
1743 | } | 1741 | } |
1744 | 1742 | ||
1745 | more_kvec: | 1743 | more_kvec: |
1746 | /* kvec data queued? */ | 1744 | /* kvec data queued? */ |
1747 | if (con->out_skip) { | 1745 | if (con->out_skip) { |
1748 | ret = write_partial_skip(con); | 1746 | ret = write_partial_skip(con); |
1749 | if (ret <= 0) | 1747 | if (ret <= 0) |
1750 | goto out; | 1748 | goto out; |
1751 | } | 1749 | } |
1752 | if (con->out_kvec_left) { | 1750 | if (con->out_kvec_left) { |
1753 | ret = write_partial_kvec(con); | 1751 | ret = write_partial_kvec(con); |
1754 | if (ret <= 0) | 1752 | if (ret <= 0) |
1755 | goto out; | 1753 | goto out; |
1756 | } | 1754 | } |
1757 | 1755 | ||
1758 | /* msg pages? */ | 1756 | /* msg pages? */ |
1759 | if (con->out_msg) { | 1757 | if (con->out_msg) { |
1760 | if (con->out_msg_done) { | 1758 | if (con->out_msg_done) { |
1761 | ceph_msg_put(con->out_msg); | 1759 | ceph_msg_put(con->out_msg); |
1762 | con->out_msg = NULL; /* we're done with this one */ | 1760 | con->out_msg = NULL; /* we're done with this one */ |
1763 | goto do_next; | 1761 | goto do_next; |
1764 | } | 1762 | } |
1765 | 1763 | ||
1766 | ret = write_partial_msg_pages(con); | 1764 | ret = write_partial_msg_pages(con); |
1767 | if (ret == 1) | 1765 | if (ret == 1) |
1768 | goto more_kvec; /* we need to send the footer, too! */ | 1766 | goto more_kvec; /* we need to send the footer, too! */ |
1769 | if (ret == 0) | 1767 | if (ret == 0) |
1770 | goto out; | 1768 | goto out; |
1771 | if (ret < 0) { | 1769 | if (ret < 0) { |
1772 | dout("try_write write_partial_msg_pages err %d\n", | 1770 | dout("try_write write_partial_msg_pages err %d\n", |
1773 | ret); | 1771 | ret); |
1774 | goto out; | 1772 | goto out; |
1775 | } | 1773 | } |
1776 | } | 1774 | } |
1777 | 1775 | ||
1778 | do_next: | 1776 | do_next: |
1779 | if (!test_bit(CONNECTING, &con->state)) { | 1777 | if (!test_bit(CONNECTING, &con->state)) { |
1780 | /* is anything else pending? */ | 1778 | /* is anything else pending? */ |
1781 | if (!list_empty(&con->out_queue)) { | 1779 | if (!list_empty(&con->out_queue)) { |
1782 | prepare_write_message(con); | 1780 | prepare_write_message(con); |
1783 | goto more; | 1781 | goto more; |
1784 | } | 1782 | } |
1785 | if (con->in_seq > con->in_seq_acked) { | 1783 | if (con->in_seq > con->in_seq_acked) { |
1786 | prepare_write_ack(con); | 1784 | prepare_write_ack(con); |
1787 | goto more; | 1785 | goto more; |
1788 | } | 1786 | } |
1789 | if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { | 1787 | if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { |
1790 | prepare_write_keepalive(con); | 1788 | prepare_write_keepalive(con); |
1791 | goto more; | 1789 | goto more; |
1792 | } | 1790 | } |
1793 | } | 1791 | } |
1794 | 1792 | ||
1795 | /* Nothing to do! */ | 1793 | /* Nothing to do! */ |
1796 | clear_bit(WRITE_PENDING, &con->state); | 1794 | clear_bit(WRITE_PENDING, &con->state); |
1797 | dout("try_write nothing else to write.\n"); | 1795 | dout("try_write nothing else to write.\n"); |
1798 | ret = 0; | 1796 | ret = 0; |
1799 | out: | 1797 | out: |
1800 | dout("try_write done on %p ret %d\n", con, ret); | 1798 | dout("try_write done on %p ret %d\n", con, ret); |
1801 | return ret; | 1799 | return ret; |
1802 | } | 1800 | } |
1803 | 1801 | ||
1804 | 1802 | ||
1805 | 1803 | ||
1806 | /* | 1804 | /* |
1807 | * Read what we can from the socket. | 1805 | * Read what we can from the socket. |
1808 | */ | 1806 | */ |
1809 | static int try_read(struct ceph_connection *con) | 1807 | static int try_read(struct ceph_connection *con) |
1810 | { | 1808 | { |
1811 | int ret = -1; | 1809 | int ret = -1; |
1812 | 1810 | ||
1813 | if (!con->sock) | 1811 | if (!con->sock) |
1814 | return 0; | 1812 | return 0; |
1815 | 1813 | ||
1816 | if (test_bit(STANDBY, &con->state)) | 1814 | if (test_bit(STANDBY, &con->state)) |
1817 | return 0; | 1815 | return 0; |
1818 | 1816 | ||
1819 | dout("try_read start on %p\n", con); | 1817 | dout("try_read start on %p\n", con); |
1820 | 1818 | ||
1821 | more: | 1819 | more: |
1822 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | 1820 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, |
1823 | con->in_base_pos); | 1821 | con->in_base_pos); |
1824 | if (test_bit(CONNECTING, &con->state)) { | 1822 | if (test_bit(CONNECTING, &con->state)) { |
1825 | if (!test_bit(NEGOTIATING, &con->state)) { | 1823 | if (!test_bit(NEGOTIATING, &con->state)) { |
1826 | dout("try_read connecting\n"); | 1824 | dout("try_read connecting\n"); |
1827 | ret = read_partial_banner(con); | 1825 | ret = read_partial_banner(con); |
1828 | if (ret <= 0) | 1826 | if (ret <= 0) |
1829 | goto out; | 1827 | goto out; |
1830 | ret = process_banner(con); | 1828 | ret = process_banner(con); |
1831 | if (ret < 0) | 1829 | if (ret < 0) |
1832 | goto out; | 1830 | goto out; |
1833 | } | 1831 | } |
1834 | ret = read_partial_connect(con); | 1832 | ret = read_partial_connect(con); |
1835 | if (ret <= 0) | 1833 | if (ret <= 0) |
1836 | goto out; | 1834 | goto out; |
1837 | ret = process_connect(con); | 1835 | ret = process_connect(con); |
1838 | if (ret < 0) | 1836 | if (ret < 0) |
1839 | goto out; | 1837 | goto out; |
1840 | goto more; | 1838 | goto more; |
1841 | } | 1839 | } |
1842 | 1840 | ||
1843 | if (con->in_base_pos < 0) { | 1841 | if (con->in_base_pos < 0) { |
1844 | /* | 1842 | /* |
1845 | * skipping + discarding content. | 1843 | * skipping + discarding content. |
1846 | * | 1844 | * |
1847 | * FIXME: there must be a better way to do this! | 1845 | * FIXME: there must be a better way to do this! |
1848 | */ | 1846 | */ |
1849 | static char buf[1024]; | 1847 | static char buf[1024]; |
1850 | int skip = min(1024, -con->in_base_pos); | 1848 | int skip = min(1024, -con->in_base_pos); |
1851 | dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); | 1849 | dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); |
1852 | ret = ceph_tcp_recvmsg(con->sock, buf, skip); | 1850 | ret = ceph_tcp_recvmsg(con->sock, buf, skip); |
1853 | if (ret <= 0) | 1851 | if (ret <= 0) |
1854 | goto out; | 1852 | goto out; |
1855 | con->in_base_pos += ret; | 1853 | con->in_base_pos += ret; |
1856 | if (con->in_base_pos) | 1854 | if (con->in_base_pos) |
1857 | goto more; | 1855 | goto more; |
1858 | } | 1856 | } |
1859 | if (con->in_tag == CEPH_MSGR_TAG_READY) { | 1857 | if (con->in_tag == CEPH_MSGR_TAG_READY) { |
1860 | /* | 1858 | /* |
1861 | * what's next? | 1859 | * what's next? |
1862 | */ | 1860 | */ |
1863 | ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); | 1861 | ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); |
1864 | if (ret <= 0) | 1862 | if (ret <= 0) |
1865 | goto out; | 1863 | goto out; |
1866 | dout("try_read got tag %d\n", (int)con->in_tag); | 1864 | dout("try_read got tag %d\n", (int)con->in_tag); |
1867 | switch (con->in_tag) { | 1865 | switch (con->in_tag) { |
1868 | case CEPH_MSGR_TAG_MSG: | 1866 | case CEPH_MSGR_TAG_MSG: |
1869 | prepare_read_message(con); | 1867 | prepare_read_message(con); |
1870 | break; | 1868 | break; |
1871 | case CEPH_MSGR_TAG_ACK: | 1869 | case CEPH_MSGR_TAG_ACK: |
1872 | prepare_read_ack(con); | 1870 | prepare_read_ack(con); |
1873 | break; | 1871 | break; |
1874 | case CEPH_MSGR_TAG_CLOSE: | 1872 | case CEPH_MSGR_TAG_CLOSE: |
1875 | set_bit(CLOSED, &con->state); /* fixme */ | 1873 | set_bit(CLOSED, &con->state); /* fixme */ |
1876 | goto out; | 1874 | goto out; |
1877 | default: | 1875 | default: |
1878 | goto bad_tag; | 1876 | goto bad_tag; |
1879 | } | 1877 | } |
1880 | } | 1878 | } |
1881 | if (con->in_tag == CEPH_MSGR_TAG_MSG) { | 1879 | if (con->in_tag == CEPH_MSGR_TAG_MSG) { |
1882 | ret = read_partial_message(con); | 1880 | ret = read_partial_message(con); |
1883 | if (ret <= 0) { | 1881 | if (ret <= 0) { |
1884 | switch (ret) { | 1882 | switch (ret) { |
1885 | case -EBADMSG: | 1883 | case -EBADMSG: |
1886 | con->error_msg = "bad crc"; | 1884 | con->error_msg = "bad crc"; |
1887 | ret = -EIO; | 1885 | ret = -EIO; |
1888 | break; | 1886 | break; |
1889 | case -EIO: | 1887 | case -EIO: |
1890 | con->error_msg = "io error"; | 1888 | con->error_msg = "io error"; |
1891 | break; | 1889 | break; |
1892 | } | 1890 | } |
1893 | goto out; | 1891 | goto out; |
1894 | } | 1892 | } |
1895 | if (con->in_tag == CEPH_MSGR_TAG_READY) | 1893 | if (con->in_tag == CEPH_MSGR_TAG_READY) |
1896 | goto more; | 1894 | goto more; |
1897 | process_message(con); | 1895 | process_message(con); |
1898 | goto more; | 1896 | goto more; |
1899 | } | 1897 | } |
1900 | if (con->in_tag == CEPH_MSGR_TAG_ACK) { | 1898 | if (con->in_tag == CEPH_MSGR_TAG_ACK) { |
1901 | ret = read_partial_ack(con); | 1899 | ret = read_partial_ack(con); |
1902 | if (ret <= 0) | 1900 | if (ret <= 0) |
1903 | goto out; | 1901 | goto out; |
1904 | process_ack(con); | 1902 | process_ack(con); |
1905 | goto more; | 1903 | goto more; |
1906 | } | 1904 | } |
1907 | 1905 | ||
1908 | out: | 1906 | out: |
1909 | dout("try_read done on %p ret %d\n", con, ret); | 1907 | dout("try_read done on %p ret %d\n", con, ret); |
1910 | return ret; | 1908 | return ret; |
1911 | 1909 | ||
1912 | bad_tag: | 1910 | bad_tag: |
1913 | pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag); | 1911 | pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag); |
1914 | con->error_msg = "protocol error, garbage tag"; | 1912 | con->error_msg = "protocol error, garbage tag"; |
1915 | ret = -1; | 1913 | ret = -1; |
1916 | goto out; | 1914 | goto out; |
1917 | } | 1915 | } |
1918 | 1916 | ||
1919 | 1917 | ||
1920 | /* | 1918 | /* |
1921 | * Atomically queue work on a connection. Bump @con reference to | 1919 | * Atomically queue work on a connection. Bump @con reference to |
1922 | * avoid races with connection teardown. | 1920 | * avoid races with connection teardown. |
1923 | */ | 1921 | */ |
1924 | static void queue_con(struct ceph_connection *con) | 1922 | static void queue_con(struct ceph_connection *con) |
1925 | { | 1923 | { |
1926 | if (test_bit(DEAD, &con->state)) { | 1924 | if (test_bit(DEAD, &con->state)) { |
1927 | dout("queue_con %p ignoring: DEAD\n", | 1925 | dout("queue_con %p ignoring: DEAD\n", |
1928 | con); | 1926 | con); |
1929 | return; | 1927 | return; |
1930 | } | 1928 | } |
1931 | 1929 | ||
1932 | if (!con->ops->get(con)) { | 1930 | if (!con->ops->get(con)) { |
1933 | dout("queue_con %p ref count 0\n", con); | 1931 | dout("queue_con %p ref count 0\n", con); |
1934 | return; | 1932 | return; |
1935 | } | 1933 | } |
1936 | 1934 | ||
1937 | if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { | 1935 | if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { |
1938 | dout("queue_con %p - already queued\n", con); | 1936 | dout("queue_con %p - already queued\n", con); |
1939 | con->ops->put(con); | 1937 | con->ops->put(con); |
1940 | } else { | 1938 | } else { |
1941 | dout("queue_con %p\n", con); | 1939 | dout("queue_con %p\n", con); |
1942 | } | 1940 | } |
1943 | } | 1941 | } |
1944 | 1942 | ||
1945 | /* | 1943 | /* |
1946 | * Do some work on a connection. Drop a connection ref when we're done. | 1944 | * Do some work on a connection. Drop a connection ref when we're done. |
1947 | */ | 1945 | */ |
1948 | static void con_work(struct work_struct *work) | 1946 | static void con_work(struct work_struct *work) |
1949 | { | 1947 | { |
1950 | struct ceph_connection *con = container_of(work, struct ceph_connection, | 1948 | struct ceph_connection *con = container_of(work, struct ceph_connection, |
1951 | work.work); | 1949 | work.work); |
1952 | 1950 | ||
1953 | mutex_lock(&con->mutex); | 1951 | mutex_lock(&con->mutex); |
1954 | 1952 | ||
1955 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1953 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
1956 | dout("con_work CLOSED\n"); | 1954 | dout("con_work CLOSED\n"); |
1957 | con_close_socket(con); | 1955 | con_close_socket(con); |
1958 | goto done; | 1956 | goto done; |
1959 | } | 1957 | } |
1960 | if (test_and_clear_bit(OPENING, &con->state)) { | 1958 | if (test_and_clear_bit(OPENING, &con->state)) { |
1961 | /* reopen w/ new peer */ | 1959 | /* reopen w/ new peer */ |
1962 | dout("con_work OPENING\n"); | 1960 | dout("con_work OPENING\n"); |
1963 | con_close_socket(con); | 1961 | con_close_socket(con); |
1964 | } | 1962 | } |
1965 | 1963 | ||
1966 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || | 1964 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || |
1967 | try_read(con) < 0 || | 1965 | try_read(con) < 0 || |
1968 | try_write(con) < 0) { | 1966 | try_write(con) < 0) { |
1969 | mutex_unlock(&con->mutex); | 1967 | mutex_unlock(&con->mutex); |
1970 | ceph_fault(con); /* error/fault path */ | 1968 | ceph_fault(con); /* error/fault path */ |
1971 | goto done_unlocked; | 1969 | goto done_unlocked; |
1972 | } | 1970 | } |
1973 | 1971 | ||
1974 | done: | 1972 | done: |
1975 | mutex_unlock(&con->mutex); | 1973 | mutex_unlock(&con->mutex); |
1976 | done_unlocked: | 1974 | done_unlocked: |
1977 | con->ops->put(con); | 1975 | con->ops->put(con); |
1978 | } | 1976 | } |
1979 | 1977 | ||
1980 | 1978 | ||
1981 | /* | 1979 | /* |
1982 | * Generic error/fault handler. A retry mechanism is used with | 1980 | * Generic error/fault handler. A retry mechanism is used with |
1983 | * exponential backoff | 1981 | * exponential backoff |
1984 | */ | 1982 | */ |
1985 | static void ceph_fault(struct ceph_connection *con) | 1983 | static void ceph_fault(struct ceph_connection *con) |
1986 | { | 1984 | { |
1987 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | 1985 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), |
1988 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); | 1986 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); |
1989 | dout("fault %p state %lu to peer %s\n", | 1987 | dout("fault %p state %lu to peer %s\n", |
1990 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); | 1988 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
1991 | 1989 | ||
1992 | if (test_bit(LOSSYTX, &con->state)) { | 1990 | if (test_bit(LOSSYTX, &con->state)) { |
1993 | dout("fault on LOSSYTX channel\n"); | 1991 | dout("fault on LOSSYTX channel\n"); |
1994 | goto out; | 1992 | goto out; |
1995 | } | 1993 | } |
1996 | 1994 | ||
1997 | mutex_lock(&con->mutex); | 1995 | mutex_lock(&con->mutex); |
1998 | if (test_bit(CLOSED, &con->state)) | 1996 | if (test_bit(CLOSED, &con->state)) |
1999 | goto out_unlock; | 1997 | goto out_unlock; |
2000 | 1998 | ||
2001 | con_close_socket(con); | 1999 | con_close_socket(con); |
2002 | 2000 | ||
2003 | if (con->in_msg) { | 2001 | if (con->in_msg) { |
2004 | ceph_msg_put(con->in_msg); | 2002 | ceph_msg_put(con->in_msg); |
2005 | con->in_msg = NULL; | 2003 | con->in_msg = NULL; |
2006 | } | 2004 | } |
2007 | 2005 | ||
2008 | /* Requeue anything that hasn't been acked */ | 2006 | /* Requeue anything that hasn't been acked */ |
2009 | list_splice_init(&con->out_sent, &con->out_queue); | 2007 | list_splice_init(&con->out_sent, &con->out_queue); |
2010 | 2008 | ||
2011 | /* If there are no messages in the queue, place the connection | 2009 | /* If there are no messages in the queue, place the connection |
2012 | * in a STANDBY state (i.e., don't try to reconnect just yet). */ | 2010 | * in a STANDBY state (i.e., don't try to reconnect just yet). */ |
2013 | if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { | 2011 | if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { |
2014 | dout("fault setting STANDBY\n"); | 2012 | dout("fault setting STANDBY\n"); |
2015 | set_bit(STANDBY, &con->state); | 2013 | set_bit(STANDBY, &con->state); |
2016 | } else { | 2014 | } else { |
2017 | /* retry after a delay. */ | 2015 | /* retry after a delay. */ |
2018 | if (con->delay == 0) | 2016 | if (con->delay == 0) |
2019 | con->delay = BASE_DELAY_INTERVAL; | 2017 | con->delay = BASE_DELAY_INTERVAL; |
2020 | else if (con->delay < MAX_DELAY_INTERVAL) | 2018 | else if (con->delay < MAX_DELAY_INTERVAL) |
2021 | con->delay *= 2; | 2019 | con->delay *= 2; |
2022 | dout("fault queueing %p delay %lu\n", con, con->delay); | 2020 | dout("fault queueing %p delay %lu\n", con, con->delay); |
2023 | con->ops->get(con); | 2021 | con->ops->get(con); |
2024 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | 2022 | if (queue_delayed_work(ceph_msgr_wq, &con->work, |
2025 | round_jiffies_relative(con->delay)) == 0) | 2023 | round_jiffies_relative(con->delay)) == 0) |
2026 | con->ops->put(con); | 2024 | con->ops->put(con); |
2027 | } | 2025 | } |
2028 | 2026 | ||
2029 | out_unlock: | 2027 | out_unlock: |
2030 | mutex_unlock(&con->mutex); | 2028 | mutex_unlock(&con->mutex); |
2031 | out: | 2029 | out: |
2032 | /* | 2030 | /* |
2033 | * in case we faulted due to authentication, invalidate our | 2031 | * in case we faulted due to authentication, invalidate our |
2034 | * current tickets so that we can get new ones. | 2032 | * current tickets so that we can get new ones. |
2035 | */ | 2033 | */ |
2036 | if (con->auth_retry && con->ops->invalidate_authorizer) { | 2034 | if (con->auth_retry && con->ops->invalidate_authorizer) { |
2037 | dout("calling invalidate_authorizer()\n"); | 2035 | dout("calling invalidate_authorizer()\n"); |
2038 | con->ops->invalidate_authorizer(con); | 2036 | con->ops->invalidate_authorizer(con); |
2039 | } | 2037 | } |
2040 | 2038 | ||
2041 | if (con->ops->fault) | 2039 | if (con->ops->fault) |
2042 | con->ops->fault(con); | 2040 | con->ops->fault(con); |
2043 | } | 2041 | } |
2044 | 2042 | ||
2045 | 2043 | ||
2046 | 2044 | ||
2047 | /* | 2045 | /* |
2048 | * create a new messenger instance | 2046 | * create a new messenger instance |
2049 | */ | 2047 | */ |
2050 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, | 2048 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, |
2051 | u32 supported_features, | 2049 | u32 supported_features, |
2052 | u32 required_features) | 2050 | u32 required_features) |
2053 | { | 2051 | { |
2054 | struct ceph_messenger *msgr; | 2052 | struct ceph_messenger *msgr; |
2055 | 2053 | ||
2056 | msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); | 2054 | msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); |
2057 | if (msgr == NULL) | 2055 | if (msgr == NULL) |
2058 | return ERR_PTR(-ENOMEM); | 2056 | return ERR_PTR(-ENOMEM); |
2059 | 2057 | ||
2060 | msgr->supported_features = supported_features; | 2058 | msgr->supported_features = supported_features; |
2061 | msgr->required_features = required_features; | 2059 | msgr->required_features = required_features; |
2062 | 2060 | ||
2063 | spin_lock_init(&msgr->global_seq_lock); | 2061 | spin_lock_init(&msgr->global_seq_lock); |
2064 | 2062 | ||
2065 | /* the zero page is needed if a request is "canceled" while the message | 2063 | /* the zero page is needed if a request is "canceled" while the message |
2066 | * is being written over the socket */ | 2064 | * is being written over the socket */ |
2067 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); | 2065 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); |
2068 | if (!msgr->zero_page) { | 2066 | if (!msgr->zero_page) { |
2069 | kfree(msgr); | 2067 | kfree(msgr); |
2070 | return ERR_PTR(-ENOMEM); | 2068 | return ERR_PTR(-ENOMEM); |
2071 | } | 2069 | } |
2072 | kmap(msgr->zero_page); | 2070 | kmap(msgr->zero_page); |
2073 | 2071 | ||
2074 | if (myaddr) | 2072 | if (myaddr) |
2075 | msgr->inst.addr = *myaddr; | 2073 | msgr->inst.addr = *myaddr; |
2076 | 2074 | ||
2077 | /* select a random nonce */ | 2075 | /* select a random nonce */ |
2078 | msgr->inst.addr.type = 0; | 2076 | msgr->inst.addr.type = 0; |
2079 | get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); | 2077 | get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); |
2080 | encode_my_addr(msgr); | 2078 | encode_my_addr(msgr); |
2081 | 2079 | ||
2082 | dout("messenger_create %p\n", msgr); | 2080 | dout("messenger_create %p\n", msgr); |
2083 | return msgr; | 2081 | return msgr; |
2084 | } | 2082 | } |
2085 | EXPORT_SYMBOL(ceph_messenger_create); | 2083 | EXPORT_SYMBOL(ceph_messenger_create); |
2086 | 2084 | ||
2087 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | 2085 | void ceph_messenger_destroy(struct ceph_messenger *msgr) |
2088 | { | 2086 | { |
2089 | dout("destroy %p\n", msgr); | 2087 | dout("destroy %p\n", msgr); |
2090 | kunmap(msgr->zero_page); | 2088 | kunmap(msgr->zero_page); |
2091 | __free_page(msgr->zero_page); | 2089 | __free_page(msgr->zero_page); |
2092 | kfree(msgr); | 2090 | kfree(msgr); |
2093 | dout("destroyed messenger %p\n", msgr); | 2091 | dout("destroyed messenger %p\n", msgr); |
2094 | } | 2092 | } |
2095 | EXPORT_SYMBOL(ceph_messenger_destroy); | 2093 | EXPORT_SYMBOL(ceph_messenger_destroy); |
2096 | 2094 | ||
2097 | /* | 2095 | /* |
2098 | * Queue up an outgoing message on the given connection. | 2096 | * Queue up an outgoing message on the given connection. |
2099 | */ | 2097 | */ |
2100 | void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | 2098 | void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) |
2101 | { | 2099 | { |
2102 | if (test_bit(CLOSED, &con->state)) { | 2100 | if (test_bit(CLOSED, &con->state)) { |
2103 | dout("con_send %p closed, dropping %p\n", con, msg); | 2101 | dout("con_send %p closed, dropping %p\n", con, msg); |
2104 | ceph_msg_put(msg); | 2102 | ceph_msg_put(msg); |
2105 | return; | 2103 | return; |
2106 | } | 2104 | } |
2107 | 2105 | ||
2108 | /* set src+dst */ | 2106 | /* set src+dst */ |
2109 | msg->hdr.src = con->msgr->inst.name; | 2107 | msg->hdr.src = con->msgr->inst.name; |
2110 | 2108 | ||
2111 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 2109 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
2112 | 2110 | ||
2113 | msg->needs_out_seq = true; | 2111 | msg->needs_out_seq = true; |
2114 | 2112 | ||
2115 | /* queue */ | 2113 | /* queue */ |
2116 | mutex_lock(&con->mutex); | 2114 | mutex_lock(&con->mutex); |
2117 | BUG_ON(!list_empty(&msg->list_head)); | 2115 | BUG_ON(!list_empty(&msg->list_head)); |
2118 | list_add_tail(&msg->list_head, &con->out_queue); | 2116 | list_add_tail(&msg->list_head, &con->out_queue); |
2119 | dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, | 2117 | dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, |
2120 | ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type), | 2118 | ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type), |
2121 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | 2119 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), |
2122 | le32_to_cpu(msg->hdr.front_len), | 2120 | le32_to_cpu(msg->hdr.front_len), |
2123 | le32_to_cpu(msg->hdr.middle_len), | 2121 | le32_to_cpu(msg->hdr.middle_len), |
2124 | le32_to_cpu(msg->hdr.data_len)); | 2122 | le32_to_cpu(msg->hdr.data_len)); |
2125 | mutex_unlock(&con->mutex); | 2123 | mutex_unlock(&con->mutex); |
2126 | 2124 | ||
2127 | /* if there wasn't anything waiting to send before, queue | 2125 | /* if there wasn't anything waiting to send before, queue |
2128 | * new work */ | 2126 | * new work */ |
2129 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2127 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
2130 | queue_con(con); | 2128 | queue_con(con); |
2131 | } | 2129 | } |
2132 | EXPORT_SYMBOL(ceph_con_send); | 2130 | EXPORT_SYMBOL(ceph_con_send); |
2133 | 2131 | ||
2134 | /* | 2132 | /* |
2135 | * Revoke a message that was previously queued for send | 2133 | * Revoke a message that was previously queued for send |
2136 | */ | 2134 | */ |
2137 | void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) | 2135 | void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) |
2138 | { | 2136 | { |
2139 | mutex_lock(&con->mutex); | 2137 | mutex_lock(&con->mutex); |
2140 | if (!list_empty(&msg->list_head)) { | 2138 | if (!list_empty(&msg->list_head)) { |
2141 | dout("con_revoke %p msg %p - was on queue\n", con, msg); | 2139 | dout("con_revoke %p msg %p - was on queue\n", con, msg); |
2142 | list_del_init(&msg->list_head); | 2140 | list_del_init(&msg->list_head); |
2143 | ceph_msg_put(msg); | 2141 | ceph_msg_put(msg); |
2144 | msg->hdr.seq = 0; | 2142 | msg->hdr.seq = 0; |
2145 | } | 2143 | } |
2146 | if (con->out_msg == msg) { | 2144 | if (con->out_msg == msg) { |
2147 | dout("con_revoke %p msg %p - was sending\n", con, msg); | 2145 | dout("con_revoke %p msg %p - was sending\n", con, msg); |
2148 | con->out_msg = NULL; | 2146 | con->out_msg = NULL; |
2149 | if (con->out_kvec_is_msg) { | 2147 | if (con->out_kvec_is_msg) { |
2150 | con->out_skip = con->out_kvec_bytes; | 2148 | con->out_skip = con->out_kvec_bytes; |
2151 | con->out_kvec_is_msg = false; | 2149 | con->out_kvec_is_msg = false; |
2152 | } | 2150 | } |
2153 | ceph_msg_put(msg); | 2151 | ceph_msg_put(msg); |
2154 | msg->hdr.seq = 0; | 2152 | msg->hdr.seq = 0; |
2155 | } | 2153 | } |
2156 | mutex_unlock(&con->mutex); | 2154 | mutex_unlock(&con->mutex); |
2157 | } | 2155 | } |
2158 | 2156 | ||
2159 | /* | 2157 | /* |
2160 | * Revoke a message that we may be reading data into | 2158 | * Revoke a message that we may be reading data into |
2161 | */ | 2159 | */ |
2162 | void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | 2160 | void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) |
2163 | { | 2161 | { |
2164 | mutex_lock(&con->mutex); | 2162 | mutex_lock(&con->mutex); |
2165 | if (con->in_msg && con->in_msg == msg) { | 2163 | if (con->in_msg && con->in_msg == msg) { |
2166 | unsigned front_len = le32_to_cpu(con->in_hdr.front_len); | 2164 | unsigned front_len = le32_to_cpu(con->in_hdr.front_len); |
2167 | unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len); | 2165 | unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len); |
2168 | unsigned data_len = le32_to_cpu(con->in_hdr.data_len); | 2166 | unsigned data_len = le32_to_cpu(con->in_hdr.data_len); |
2169 | 2167 | ||
2170 | /* skip rest of message */ | 2168 | /* skip rest of message */ |
2171 | dout("con_revoke_pages %p msg %p revoked\n", con, msg); | 2169 | dout("con_revoke_pages %p msg %p revoked\n", con, msg); |
2172 | con->in_base_pos = con->in_base_pos - | 2170 | con->in_base_pos = con->in_base_pos - |
2173 | sizeof(struct ceph_msg_header) - | 2171 | sizeof(struct ceph_msg_header) - |
2174 | front_len - | 2172 | front_len - |
2175 | middle_len - | 2173 | middle_len - |
2176 | data_len - | 2174 | data_len - |
2177 | sizeof(struct ceph_msg_footer); | 2175 | sizeof(struct ceph_msg_footer); |
2178 | ceph_msg_put(con->in_msg); | 2176 | ceph_msg_put(con->in_msg); |
2179 | con->in_msg = NULL; | 2177 | con->in_msg = NULL; |
2180 | con->in_tag = CEPH_MSGR_TAG_READY; | 2178 | con->in_tag = CEPH_MSGR_TAG_READY; |
2181 | con->in_seq++; | 2179 | con->in_seq++; |
2182 | } else { | 2180 | } else { |
2183 | dout("con_revoke_pages %p msg %p pages %p no-op\n", | 2181 | dout("con_revoke_pages %p msg %p pages %p no-op\n", |
2184 | con, con->in_msg, msg); | 2182 | con, con->in_msg, msg); |
2185 | } | 2183 | } |
2186 | mutex_unlock(&con->mutex); | 2184 | mutex_unlock(&con->mutex); |
2187 | } | 2185 | } |
2188 | 2186 | ||
2189 | /* | 2187 | /* |
2190 | * Queue a keepalive byte to ensure the tcp connection is alive. | 2188 | * Queue a keepalive byte to ensure the tcp connection is alive. |
2191 | */ | 2189 | */ |
2192 | void ceph_con_keepalive(struct ceph_connection *con) | 2190 | void ceph_con_keepalive(struct ceph_connection *con) |
2193 | { | 2191 | { |
2194 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && | 2192 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && |
2195 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2193 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
2196 | queue_con(con); | 2194 | queue_con(con); |
2197 | } | 2195 | } |
2198 | EXPORT_SYMBOL(ceph_con_keepalive); | 2196 | EXPORT_SYMBOL(ceph_con_keepalive); |
2199 | 2197 | ||
2200 | 2198 | ||
2201 | /* | 2199 | /* |
2202 | * construct a new message with given type, size | 2200 | * construct a new message with given type, size |
2203 | * the new msg has a ref count of 1. | 2201 | * the new msg has a ref count of 1. |
2204 | */ | 2202 | */ |
2205 | struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | 2203 | struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) |
2206 | { | 2204 | { |
2207 | struct ceph_msg *m; | 2205 | struct ceph_msg *m; |
2208 | 2206 | ||
2209 | m = kmalloc(sizeof(*m), flags); | 2207 | m = kmalloc(sizeof(*m), flags); |
2210 | if (m == NULL) | 2208 | if (m == NULL) |
2211 | goto out; | 2209 | goto out; |
2212 | kref_init(&m->kref); | 2210 | kref_init(&m->kref); |
2213 | INIT_LIST_HEAD(&m->list_head); | 2211 | INIT_LIST_HEAD(&m->list_head); |
2214 | 2212 | ||
2215 | m->hdr.tid = 0; | 2213 | m->hdr.tid = 0; |
2216 | m->hdr.type = cpu_to_le16(type); | 2214 | m->hdr.type = cpu_to_le16(type); |
2217 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | 2215 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); |
2218 | m->hdr.version = 0; | 2216 | m->hdr.version = 0; |
2219 | m->hdr.front_len = cpu_to_le32(front_len); | 2217 | m->hdr.front_len = cpu_to_le32(front_len); |
2220 | m->hdr.middle_len = 0; | 2218 | m->hdr.middle_len = 0; |
2221 | m->hdr.data_len = 0; | 2219 | m->hdr.data_len = 0; |
2222 | m->hdr.data_off = 0; | 2220 | m->hdr.data_off = 0; |
2223 | m->hdr.reserved = 0; | 2221 | m->hdr.reserved = 0; |
2224 | m->footer.front_crc = 0; | 2222 | m->footer.front_crc = 0; |
2225 | m->footer.middle_crc = 0; | 2223 | m->footer.middle_crc = 0; |
2226 | m->footer.data_crc = 0; | 2224 | m->footer.data_crc = 0; |
2227 | m->footer.flags = 0; | 2225 | m->footer.flags = 0; |
2228 | m->front_max = front_len; | 2226 | m->front_max = front_len; |
2229 | m->front_is_vmalloc = false; | 2227 | m->front_is_vmalloc = false; |
2230 | m->more_to_follow = false; | 2228 | m->more_to_follow = false; |
2231 | m->pool = NULL; | 2229 | m->pool = NULL; |
2232 | 2230 | ||
2233 | /* front */ | 2231 | /* front */ |
2234 | if (front_len) { | 2232 | if (front_len) { |
2235 | if (front_len > PAGE_CACHE_SIZE) { | 2233 | if (front_len > PAGE_CACHE_SIZE) { |
2236 | m->front.iov_base = __vmalloc(front_len, flags, | 2234 | m->front.iov_base = __vmalloc(front_len, flags, |
2237 | PAGE_KERNEL); | 2235 | PAGE_KERNEL); |
2238 | m->front_is_vmalloc = true; | 2236 | m->front_is_vmalloc = true; |
2239 | } else { | 2237 | } else { |
2240 | m->front.iov_base = kmalloc(front_len, flags); | 2238 | m->front.iov_base = kmalloc(front_len, flags); |
2241 | } | 2239 | } |
2242 | if (m->front.iov_base == NULL) { | 2240 | if (m->front.iov_base == NULL) { |
2243 | pr_err("msg_new can't allocate %d bytes\n", | 2241 | pr_err("msg_new can't allocate %d bytes\n", |
2244 | front_len); | 2242 | front_len); |
2245 | goto out2; | 2243 | goto out2; |
2246 | } | 2244 | } |
2247 | } else { | 2245 | } else { |
2248 | m->front.iov_base = NULL; | 2246 | m->front.iov_base = NULL; |
2249 | } | 2247 | } |
2250 | m->front.iov_len = front_len; | 2248 | m->front.iov_len = front_len; |
2251 | 2249 | ||
2252 | /* middle */ | 2250 | /* middle */ |
2253 | m->middle = NULL; | 2251 | m->middle = NULL; |
2254 | 2252 | ||
2255 | /* data */ | 2253 | /* data */ |
2256 | m->nr_pages = 0; | 2254 | m->nr_pages = 0; |
2257 | m->page_alignment = 0; | 2255 | m->page_alignment = 0; |
2258 | m->pages = NULL; | 2256 | m->pages = NULL; |
2259 | m->pagelist = NULL; | 2257 | m->pagelist = NULL; |
2260 | m->bio = NULL; | 2258 | m->bio = NULL; |
2261 | m->bio_iter = NULL; | 2259 | m->bio_iter = NULL; |
2262 | m->bio_seg = 0; | 2260 | m->bio_seg = 0; |
2263 | m->trail = NULL; | 2261 | m->trail = NULL; |
2264 | 2262 | ||
2265 | dout("ceph_msg_new %p front %d\n", m, front_len); | 2263 | dout("ceph_msg_new %p front %d\n", m, front_len); |
2266 | return m; | 2264 | return m; |
2267 | 2265 | ||
2268 | out2: | 2266 | out2: |
2269 | ceph_msg_put(m); | 2267 | ceph_msg_put(m); |
2270 | out: | 2268 | out: |
2271 | pr_err("msg_new can't create type %d front %d\n", type, front_len); | 2269 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
2272 | return NULL; | 2270 | return NULL; |
2273 | } | 2271 | } |
2274 | EXPORT_SYMBOL(ceph_msg_new); | 2272 | EXPORT_SYMBOL(ceph_msg_new); |
2275 | 2273 | ||
2276 | /* | 2274 | /* |
2277 | * Allocate "middle" portion of a message, if it is needed and wasn't | 2275 | * Allocate "middle" portion of a message, if it is needed and wasn't |
2278 | * allocated by alloc_msg. This allows us to read a small fixed-size | 2276 | * allocated by alloc_msg. This allows us to read a small fixed-size |
2279 | * per-type header in the front and then gracefully fail (i.e., | 2277 | * per-type header in the front and then gracefully fail (i.e., |
2280 | * propagate the error to the caller based on info in the front) when | 2278 | * propagate the error to the caller based on info in the front) when |
2281 | * the middle is too large. | 2279 | * the middle is too large. |
2282 | */ | 2280 | */ |
2283 | static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) | 2281 | static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) |
2284 | { | 2282 | { |
2285 | int type = le16_to_cpu(msg->hdr.type); | 2283 | int type = le16_to_cpu(msg->hdr.type); |
2286 | int middle_len = le32_to_cpu(msg->hdr.middle_len); | 2284 | int middle_len = le32_to_cpu(msg->hdr.middle_len); |
2287 | 2285 | ||
2288 | dout("alloc_middle %p type %d %s middle_len %d\n", msg, type, | 2286 | dout("alloc_middle %p type %d %s middle_len %d\n", msg, type, |
2289 | ceph_msg_type_name(type), middle_len); | 2287 | ceph_msg_type_name(type), middle_len); |
2290 | BUG_ON(!middle_len); | 2288 | BUG_ON(!middle_len); |
2291 | BUG_ON(msg->middle); | 2289 | BUG_ON(msg->middle); |
2292 | 2290 | ||
2293 | msg->middle = ceph_buffer_new(middle_len, GFP_NOFS); | 2291 | msg->middle = ceph_buffer_new(middle_len, GFP_NOFS); |
2294 | if (!msg->middle) | 2292 | if (!msg->middle) |
2295 | return -ENOMEM; | 2293 | return -ENOMEM; |
2296 | return 0; | 2294 | return 0; |
2297 | } | 2295 | } |
2298 | 2296 | ||
2299 | /* | 2297 | /* |
2300 | * Generic message allocator, for incoming messages. | 2298 | * Generic message allocator, for incoming messages. |
2301 | */ | 2299 | */ |
2302 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | 2300 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, |
2303 | struct ceph_msg_header *hdr, | 2301 | struct ceph_msg_header *hdr, |
2304 | int *skip) | 2302 | int *skip) |
2305 | { | 2303 | { |
2306 | int type = le16_to_cpu(hdr->type); | 2304 | int type = le16_to_cpu(hdr->type); |
2307 | int front_len = le32_to_cpu(hdr->front_len); | 2305 | int front_len = le32_to_cpu(hdr->front_len); |
2308 | int middle_len = le32_to_cpu(hdr->middle_len); | 2306 | int middle_len = le32_to_cpu(hdr->middle_len); |
2309 | struct ceph_msg *msg = NULL; | 2307 | struct ceph_msg *msg = NULL; |
2310 | int ret; | 2308 | int ret; |
2311 | 2309 | ||
2312 | if (con->ops->alloc_msg) { | 2310 | if (con->ops->alloc_msg) { |
2313 | mutex_unlock(&con->mutex); | 2311 | mutex_unlock(&con->mutex); |
2314 | msg = con->ops->alloc_msg(con, hdr, skip); | 2312 | msg = con->ops->alloc_msg(con, hdr, skip); |
2315 | mutex_lock(&con->mutex); | 2313 | mutex_lock(&con->mutex); |
2316 | if (!msg || *skip) | 2314 | if (!msg || *skip) |
2317 | return NULL; | 2315 | return NULL; |
2318 | } | 2316 | } |
2319 | if (!msg) { | 2317 | if (!msg) { |
2320 | *skip = 0; | 2318 | *skip = 0; |
2321 | msg = ceph_msg_new(type, front_len, GFP_NOFS); | 2319 | msg = ceph_msg_new(type, front_len, GFP_NOFS); |
2322 | if (!msg) { | 2320 | if (!msg) { |
2323 | pr_err("unable to allocate msg type %d len %d\n", | 2321 | pr_err("unable to allocate msg type %d len %d\n", |
2324 | type, front_len); | 2322 | type, front_len); |
2325 | return NULL; | 2323 | return NULL; |
2326 | } | 2324 | } |
2327 | msg->page_alignment = le16_to_cpu(hdr->data_off); | 2325 | msg->page_alignment = le16_to_cpu(hdr->data_off); |
2328 | } | 2326 | } |
2329 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2327 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
2330 | 2328 | ||
2331 | if (middle_len && !msg->middle) { | 2329 | if (middle_len && !msg->middle) { |
2332 | ret = ceph_alloc_middle(con, msg); | 2330 | ret = ceph_alloc_middle(con, msg); |
2333 | if (ret < 0) { | 2331 | if (ret < 0) { |
2334 | ceph_msg_put(msg); | 2332 | ceph_msg_put(msg); |
2335 | return NULL; | 2333 | return NULL; |
2336 | } | 2334 | } |
2337 | } | 2335 | } |
2338 | 2336 | ||
2339 | return msg; | 2337 | return msg; |
2340 | } | 2338 | } |
2341 | 2339 | ||
2342 | 2340 | ||
2343 | /* | 2341 | /* |
2344 | * Free a generically kmalloc'd message. | 2342 | * Free a generically kmalloc'd message. |
2345 | */ | 2343 | */ |
2346 | void ceph_msg_kfree(struct ceph_msg *m) | 2344 | void ceph_msg_kfree(struct ceph_msg *m) |
2347 | { | 2345 | { |
2348 | dout("msg_kfree %p\n", m); | 2346 | dout("msg_kfree %p\n", m); |
2349 | if (m->front_is_vmalloc) | 2347 | if (m->front_is_vmalloc) |
2350 | vfree(m->front.iov_base); | 2348 | vfree(m->front.iov_base); |
2351 | else | 2349 | else |
2352 | kfree(m->front.iov_base); | 2350 | kfree(m->front.iov_base); |
2353 | kfree(m); | 2351 | kfree(m); |
2354 | } | 2352 | } |
2355 | 2353 | ||
2356 | /* | 2354 | /* |
2357 | * Drop a msg ref. Destroy as needed. | 2355 | * Drop a msg ref. Destroy as needed. |
2358 | */ | 2356 | */ |
2359 | void ceph_msg_last_put(struct kref *kref) | 2357 | void ceph_msg_last_put(struct kref *kref) |
2360 | { | 2358 | { |
2361 | struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); | 2359 | struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); |
2362 | 2360 | ||
2363 | dout("ceph_msg_put last one on %p\n", m); | 2361 | dout("ceph_msg_put last one on %p\n", m); |
2364 | WARN_ON(!list_empty(&m->list_head)); | 2362 | WARN_ON(!list_empty(&m->list_head)); |
2365 | 2363 | ||
2366 | /* drop middle, data, if any */ | 2364 | /* drop middle, data, if any */ |
2367 | if (m->middle) { | 2365 | if (m->middle) { |
2368 | ceph_buffer_put(m->middle); | 2366 | ceph_buffer_put(m->middle); |
2369 | m->middle = NULL; | 2367 | m->middle = NULL; |
2370 | } | 2368 | } |
2371 | m->nr_pages = 0; | 2369 | m->nr_pages = 0; |
2372 | m->pages = NULL; | 2370 | m->pages = NULL; |
2373 | 2371 | ||
2374 | if (m->pagelist) { | 2372 | if (m->pagelist) { |
2375 | ceph_pagelist_release(m->pagelist); | 2373 | ceph_pagelist_release(m->pagelist); |
2376 | kfree(m->pagelist); | 2374 | kfree(m->pagelist); |
2377 | m->pagelist = NULL; | 2375 | m->pagelist = NULL; |
2378 | } | 2376 | } |
2379 | 2377 | ||
2380 | m->trail = NULL; | 2378 | m->trail = NULL; |
2381 | 2379 | ||
2382 | if (m->pool) | 2380 | if (m->pool) |
2383 | ceph_msgpool_put(m->pool, m); | 2381 | ceph_msgpool_put(m->pool, m); |
2384 | else | 2382 | else |
2385 | ceph_msg_kfree(m); | 2383 | ceph_msg_kfree(m); |
2386 | } | 2384 | } |
2387 | EXPORT_SYMBOL(ceph_msg_last_put); | 2385 | EXPORT_SYMBOL(ceph_msg_last_put); |
2388 | 2386 | ||
2389 | void ceph_msg_dump(struct ceph_msg *msg) | 2387 | void ceph_msg_dump(struct ceph_msg *msg) |
2390 | { | 2388 | { |
2391 | pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, | 2389 | pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, |
2392 | msg->front_max, msg->nr_pages); | 2390 | msg->front_max, msg->nr_pages); |
2393 | print_hex_dump(KERN_DEBUG, "header: ", | 2391 | print_hex_dump(KERN_DEBUG, "header: ", |
2394 | DUMP_PREFIX_OFFSET, 16, 1, | 2392 | DUMP_PREFIX_OFFSET, 16, 1, |
2395 | &msg->hdr, sizeof(msg->hdr), true); | 2393 | &msg->hdr, sizeof(msg->hdr), true); |
2396 | print_hex_dump(KERN_DEBUG, " front: ", | 2394 | print_hex_dump(KERN_DEBUG, " front: ", |
2397 | DUMP_PREFIX_OFFSET, 16, 1, | 2395 | DUMP_PREFIX_OFFSET, 16, 1, |
2398 | msg->front.iov_base, msg->front.iov_len, true); | 2396 | msg->front.iov_base, msg->front.iov_len, true); |
2399 | if (msg->middle) | 2397 | if (msg->middle) |
2400 | print_hex_dump(KERN_DEBUG, "middle: ", | 2398 | print_hex_dump(KERN_DEBUG, "middle: ", |
2401 | DUMP_PREFIX_OFFSET, 16, 1, | 2399 | DUMP_PREFIX_OFFSET, 16, 1, |
2402 | msg->middle->vec.iov_base, | 2400 | msg->middle->vec.iov_base, |
2403 | msg->middle->vec.iov_len, true); | 2401 | msg->middle->vec.iov_len, true); |
2404 | print_hex_dump(KERN_DEBUG, "footer: ", | 2402 | print_hex_dump(KERN_DEBUG, "footer: ", |
2405 | DUMP_PREFIX_OFFSET, 16, 1, | 2403 | DUMP_PREFIX_OFFSET, 16, 1, |
2406 | &msg->footer, sizeof(msg->footer), true); | 2404 | &msg->footer, sizeof(msg->footer), true); |
2407 | } | 2405 | } |
2408 | EXPORT_SYMBOL(ceph_msg_dump); | 2406 | EXPORT_SYMBOL(ceph_msg_dump); |
2409 | 2407 |