Blame view

fs/dlm/lowcomms.c 38.3 KB
fdda387f7   Patrick Caulfield   [DLM] Add support...
1
2
3
4
  /******************************************************************************
  *******************************************************************************
  **
  **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
5e9ccc372   Christine Caulfield   dlm: replace idr ...
5
  **  Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
fdda387f7   Patrick Caulfield   [DLM] Add support...
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
  **
  **  This copyrighted material is made available to anyone wishing to use,
  **  modify, copy, or redistribute it subject to the terms and conditions
  **  of the GNU General Public License v.2.
  **
  *******************************************************************************
  ******************************************************************************/
  
  /*
   * lowcomms.c
   *
   * This is the "low-level" comms layer.
   *
   * It is responsible for sending/receiving messages
   * from other nodes in the cluster.
   *
   * Cluster nodes are referred to by their nodeids. nodeids are
   * simply 32 bit numbers to the locking module - if they need to
2cf12c0bf   Joe Perches   dlm: comment typo...
24
   * be expanded for the cluster infrastructure then that is its
fdda387f7   Patrick Caulfield   [DLM] Add support...
25
26
27
28
29
30
31
32
33
34
35
36
37
38
   * responsibility. It is this layer's
   * responsibility to resolve these into IP address or
   * whatever it needs for inter-node communication.
   *
   * The comms level is two kernel threads that deal mainly with
   * the receiving of messages from other nodes and passing them
   * up to the mid-level comms layer (which understands the
   * message format) for execution by the locking core, and
   * a send thread which does all the setting up of connections
   * to remote nodes and the sending of data. Threads are not allowed
   * to send their own data because it may cause them to wait in times
   * of high load. Also, this way, the sending thread can collect together
   * messages bound for one node and send them in one block.
   *
2cf12c0bf   Joe Perches   dlm: comment typo...
39
   * lowcomms will choose to use either TCP or SCTP as its transport layer
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
40
   * depending on the configuration variable 'protocol'. This should be set
2cf12c0bf   Joe Perches   dlm: comment typo...
41
   * to 0 (default) for TCP or 1 for SCTP. It should be configured using a
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
42
43
   * cluster-wide mechanism as it must be the same on all nodes of the cluster
   * for the DLM to function.
fdda387f7   Patrick Caulfield   [DLM] Add support...
44
45
   *
   */
fdda387f7   Patrick Caulfield   [DLM] Add support...
46
47
48
49
  #include <asm/ioctls.h>
  #include <net/sock.h>
  #include <net/tcp.h>
  #include <linux/pagemap.h>
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
50
  #include <linux/file.h>
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
51
  #include <linux/mutex.h>
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
52
  #include <linux/sctp.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
53
  #include <linux/slab.h>
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
54
  #include <net/sctp/user.h>
44ad532b3   Joe Perches   dlm: use ipv6_add...
55
  #include <net/ipv6.h>
fdda387f7   Patrick Caulfield   [DLM] Add support...
56
57
58
59
60
  
  #include "dlm_internal.h"
  #include "lowcomms.h"
  #include "midcomms.h"
  #include "config.h"
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
61
  #define NEEDED_RMEM (4*1024*1024)
5e9ccc372   Christine Caulfield   dlm: replace idr ...
62
  #define CONN_HASH_SIZE 32
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
63

f92c8dd7a   Bob Peterson   dlm: reduce cond_...
64
65
  /* Number of messages to send before rescheduling */
  #define MAX_SEND_MSG_COUNT 25
fdda387f7   Patrick Caulfield   [DLM] Add support...
66
  struct cbuf {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
67
68
69
  	unsigned int base;
  	unsigned int len;
  	unsigned int mask;
fdda387f7   Patrick Caulfield   [DLM] Add support...
70
  };
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
71
72
73
74
  static void cbuf_add(struct cbuf *cb, int n)
  {
  	cb->len += n;
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
75

ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  static int cbuf_data(struct cbuf *cb)
  {
  	return ((cb->base + cb->len) & cb->mask);
  }
  
  static void cbuf_init(struct cbuf *cb, int size)
  {
  	cb->base = cb->len = 0;
  	cb->mask = size-1;
  }
  
  static void cbuf_eat(struct cbuf *cb, int n)
  {
  	cb->len  -= n;
  	cb->base += n;
  	cb->base &= cb->mask;
  }
  
  static bool cbuf_empty(struct cbuf *cb)
  {
  	return cb->len == 0;
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
98

fdda387f7   Patrick Caulfield   [DLM] Add support...
99
100
101
  struct connection {
  	struct socket *sock;	/* NULL if not connected */
  	uint32_t nodeid;	/* So we know who we are in the list */
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
102
  	struct mutex sock_mutex;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
103
  	unsigned long flags;
fdda387f7   Patrick Caulfield   [DLM] Add support...
104
105
106
  #define CF_READ_PENDING 1
  #define CF_WRITE_PENDING 2
  #define CF_CONNECT_PENDING 3
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
107
108
  #define CF_INIT_PENDING 4
  #define CF_IS_OTHERCON 5
063c4c996   Lars Marowsky-Bree   dlm: fix connecti...
109
  #define CF_CLOSE 6
b36930dd5   David Miller   dlm: Handle appli...
110
  #define CF_APP_LIMITED 7
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
111
  	struct list_head writequeue;  /* List of outgoing writequeue_entries */
fdda387f7   Patrick Caulfield   [DLM] Add support...
112
113
  	spinlock_t writequeue_lock;
  	int (*rx_action) (struct connection *);	/* What to do when active */
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
114
  	void (*connect_action) (struct connection *);	/* What to do to connect */
fdda387f7   Patrick Caulfield   [DLM] Add support...
115
116
117
  	struct page *rx_page;
  	struct cbuf cb;
  	int retries;
fdda387f7   Patrick Caulfield   [DLM] Add support...
118
  #define MAX_CONNECT_RETRIES 3
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
119
  	int sctp_assoc;
5e9ccc372   Christine Caulfield   dlm: replace idr ...
120
  	struct hlist_node list;
fdda387f7   Patrick Caulfield   [DLM] Add support...
121
  	struct connection *othercon;
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
122
123
  	struct work_struct rwork; /* Receive workqueue */
  	struct work_struct swork; /* Send workqueue */
fdda387f7   Patrick Caulfield   [DLM] Add support...
124
125
126
127
128
129
130
131
132
133
134
135
136
  };
  #define sock2con(x) ((struct connection *)(x)->sk_user_data)
  
  /* An entry waiting to be sent */
  struct writequeue_entry {
  	struct list_head list;
  	struct page *page;
  	int offset;
  	int len;
  	int end;
  	int users;
  	struct connection *con;
  };
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
137
138
  static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
  static int dlm_local_count;
fdda387f7   Patrick Caulfield   [DLM] Add support...
139

1d6e8131c   Patrick Caulfield   [DLM] Use workque...
140
141
142
  /* Work queues */
  static struct workqueue_struct *recv_workqueue;
  static struct workqueue_struct *send_workqueue;
fdda387f7   Patrick Caulfield   [DLM] Add support...
143

5e9ccc372   Christine Caulfield   dlm: replace idr ...
144
  static struct hlist_head connection_hash[CONN_HASH_SIZE];
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
145
  static DEFINE_MUTEX(connections_lock);
c80e7c83d   Patrick Caulfield   [DLM] fix compile...
146
  static struct kmem_cache *con_cache;
fdda387f7   Patrick Caulfield   [DLM] Add support...
147

1d6e8131c   Patrick Caulfield   [DLM] Use workque...
148
149
  static void process_recv_sockets(struct work_struct *work);
  static void process_send_sockets(struct work_struct *work);
fdda387f7   Patrick Caulfield   [DLM] Add support...
150

5e9ccc372   Christine Caulfield   dlm: replace idr ...
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
  
  /* This is deliberately very simple because most clusters have simple
     sequential nodeids, so we should be able to go straight to a connection
     struct in the array */
  static inline int nodeid_hash(int nodeid)
  {
  	return nodeid & (CONN_HASH_SIZE-1);
  }
  
  static struct connection *__find_con(int nodeid)
  {
  	int r;
  	struct hlist_node *h;
  	struct connection *con;
  
  	r = nodeid_hash(nodeid);
  
  	hlist_for_each_entry(con, h, &connection_hash[r], list) {
  		if (con->nodeid == nodeid)
  			return con;
  	}
  	return NULL;
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
174
175
176
177
178
  /*
   * If 'allocation' is zero then we don't attempt to create a new
   * connection structure for this node.
   */
  static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
fdda387f7   Patrick Caulfield   [DLM] Add support...
179
180
  {
  	struct connection *con = NULL;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
181
  	int r;
fdda387f7   Patrick Caulfield   [DLM] Add support...
182

5e9ccc372   Christine Caulfield   dlm: replace idr ...
183
  	con = __find_con(nodeid);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
184
185
  	if (con || !alloc)
  		return con;
fdda387f7   Patrick Caulfield   [DLM] Add support...
186

6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
187
188
189
  	con = kmem_cache_zalloc(con_cache, alloc);
  	if (!con)
  		return NULL;
fdda387f7   Patrick Caulfield   [DLM] Add support...
190

5e9ccc372   Christine Caulfield   dlm: replace idr ...
191
192
  	r = nodeid_hash(nodeid);
  	hlist_add_head(&con->list, &connection_hash[r]);
fdda387f7   Patrick Caulfield   [DLM] Add support...
193

6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
194
195
196
197
198
199
  	con->nodeid = nodeid;
  	mutex_init(&con->sock_mutex);
  	INIT_LIST_HEAD(&con->writequeue);
  	spin_lock_init(&con->writequeue_lock);
  	INIT_WORK(&con->swork, process_send_sockets);
  	INIT_WORK(&con->rwork, process_recv_sockets);
fdda387f7   Patrick Caulfield   [DLM] Add support...
200

6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
201
202
  	/* Setup action pointers for child sockets */
  	if (con->nodeid) {
5e9ccc372   Christine Caulfield   dlm: replace idr ...
203
  		struct connection *zerocon = __find_con(0);
fdda387f7   Patrick Caulfield   [DLM] Add support...
204

6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
205
206
207
  		con->connect_action = zerocon->connect_action;
  		if (!con->rx_action)
  			con->rx_action = zerocon->rx_action;
fdda387f7   Patrick Caulfield   [DLM] Add support...
208
  	}
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
209
210
  	return con;
  }
5e9ccc372   Christine Caulfield   dlm: replace idr ...
211
212
213
214
215
216
217
218
219
220
221
222
223
  /* Loop round all connections */
  static void foreach_conn(void (*conn_func)(struct connection *c))
  {
  	int i;
  	struct hlist_node *h, *n;
  	struct connection *con;
  
  	for (i = 0; i < CONN_HASH_SIZE; i++) {
  		hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){
  			conn_func(con);
  		}
  	}
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
224
225
226
  static struct connection *nodeid2con(int nodeid, gfp_t allocation)
  {
  	struct connection *con;
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
227
  	mutex_lock(&connections_lock);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
228
  	con = __nodeid2con(nodeid, allocation);
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
229
  	mutex_unlock(&connections_lock);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
230

fdda387f7   Patrick Caulfield   [DLM] Add support...
231
232
  	return con;
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
233
234
235
236
  /* This is a bit drastic, but only called when things go wrong */
  static struct connection *assoc2con(int assoc_id)
  {
  	int i;
5e9ccc372   Christine Caulfield   dlm: replace idr ...
237
  	struct hlist_node *h;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
238
  	struct connection *con;
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
239
  	mutex_lock(&connections_lock);
5e9ccc372   Christine Caulfield   dlm: replace idr ...
240
241
242
  
  	for (i = 0 ; i < CONN_HASH_SIZE; i++) {
  		hlist_for_each_entry(con, h, &connection_hash[i], list) {
f70cb33b9   Julia Lawall   fs/dlm: Drop unne...
243
  			if (con->sctp_assoc == assoc_id) {
5e9ccc372   Christine Caulfield   dlm: replace idr ...
244
245
246
  				mutex_unlock(&connections_lock);
  				return con;
  			}
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
247
248
  		}
  	}
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
249
  	mutex_unlock(&connections_lock);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
  	return NULL;
  }
  
  static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
  {
  	struct sockaddr_storage addr;
  	int error;
  
  	if (!dlm_local_count)
  		return -1;
  
  	error = dlm_nodeid_to_addr(nodeid, &addr);
  	if (error)
  		return error;
  
  	if (dlm_local_addr[0]->ss_family == AF_INET) {
  		struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
  		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
  		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
  	} else {
  		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
  		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
4e3fd7a06   Alexey Dobriyan   net: remove ipv6_...
272
  		ret6->sin6_addr = in6->sin6_addr;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
273
274
275
276
  	}
  
  	return 0;
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
277
278
279
280
  /* Data available on socket or listen socket received a connect */
  static void lowcomms_data_ready(struct sock *sk, int count_unused)
  {
  	struct connection *con = sock2con(sk);
afb853fb4   Patrick Caulfield   [DLM] fix socket ...
281
  	if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
282
  		queue_work(recv_workqueue, &con->rwork);
fdda387f7   Patrick Caulfield   [DLM] Add support...
283
284
285
286
287
  }
  
  static void lowcomms_write_space(struct sock *sk)
  {
  	struct connection *con = sock2con(sk);
b36930dd5   David Miller   dlm: Handle appli...
288
289
290
291
292
293
294
295
296
297
298
  	if (!con)
  		return;
  
  	clear_bit(SOCK_NOSPACE, &con->sock->flags);
  
  	if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
  		con->sock->sk->sk_write_pending--;
  		clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
  	}
  
  	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
299
  		queue_work(send_workqueue, &con->swork);
fdda387f7   Patrick Caulfield   [DLM] Add support...
300
301
302
303
  }
  
  static inline void lowcomms_connect_sock(struct connection *con)
  {
063c4c996   Lars Marowsky-Bree   dlm: fix connecti...
304
305
  	if (test_bit(CF_CLOSE, &con->flags))
  		return;
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
306
307
  	if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
  		queue_work(send_workqueue, &con->swork);
fdda387f7   Patrick Caulfield   [DLM] Add support...
308
309
310
311
  }
  
  static void lowcomms_state_change(struct sock *sk)
  {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
312
  	if (sk->sk_state == TCP_ESTABLISHED)
fdda387f7   Patrick Caulfield   [DLM] Add support...
313
  		lowcomms_write_space(sk);
fdda387f7   Patrick Caulfield   [DLM] Add support...
314
  }
391fbdc5d   Christine Caulfield   dlm: connect to n...
315
316
317
  int dlm_lowcomms_connect_node(int nodeid)
  {
  	struct connection *con;
04bedd79a   David Teigland   dlm: fix lowcomms...
318
319
320
  	/* with sctp there's no connecting without sending */
  	if (dlm_config.ci_protocol != 0)
  		return 0;
391fbdc5d   Christine Caulfield   dlm: connect to n...
321
322
323
324
325
326
327
328
329
  	if (nodeid == dlm_our_nodeid())
  		return 0;
  
  	con = nodeid2con(nodeid, GFP_NOFS);
  	if (!con)
  		return -ENOMEM;
  	lowcomms_connect_sock(con);
  	return 0;
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
330
331
332
333
334
335
336
337
338
  /* Make a socket active */
  static int add_sock(struct socket *sock, struct connection *con)
  {
  	con->sock = sock;
  
  	/* Install a data_ready callback */
  	con->sock->sk->sk_data_ready = lowcomms_data_ready;
  	con->sock->sk->sk_write_space = lowcomms_write_space;
  	con->sock->sk->sk_state_change = lowcomms_state_change;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
339
  	con->sock->sk->sk_user_data = con;
d6d7b702a   Steven Whitehouse   dlm: fix up memor...
340
  	con->sock->sk->sk_allocation = GFP_NOFS;
fdda387f7   Patrick Caulfield   [DLM] Add support...
341
342
  	return 0;
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
343
  /* Add the port number to an IPv6 or 4 sockaddr and return the address
fdda387f7   Patrick Caulfield   [DLM] Add support...
344
345
346
347
     length */
  static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
  			  int *addr_len)
  {
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
348
  	saddr->ss_family =  dlm_local_addr[0]->ss_family;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
349
  	if (saddr->ss_family == AF_INET) {
fdda387f7   Patrick Caulfield   [DLM] Add support...
350
351
352
  		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
  		in4_addr->sin_port = cpu_to_be16(port);
  		*addr_len = sizeof(struct sockaddr_in);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
353
  		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
354
  	} else {
fdda387f7   Patrick Caulfield   [DLM] Add support...
355
356
357
358
  		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
  		in6_addr->sin6_port = cpu_to_be16(port);
  		*addr_len = sizeof(struct sockaddr_in6);
  	}
01c8cab25   Patrick Caulfield   [DLM] zero unused...
359
  	memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
fdda387f7   Patrick Caulfield   [DLM] Add support...
360
361
362
  }
  
  /* Close a remote connection and tidy up */
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
363
  static void close_connection(struct connection *con, bool and_other)
fdda387f7   Patrick Caulfield   [DLM] Add support...
364
  {
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
365
  	mutex_lock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
366
367
368
369
370
371
  
  	if (con->sock) {
  		sock_release(con->sock);
  		con->sock = NULL;
  	}
  	if (con->othercon && and_other) {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
372
373
  		/* Will only re-enter once. */
  		close_connection(con->othercon, false);
fdda387f7   Patrick Caulfield   [DLM] Add support...
374
375
376
377
378
  	}
  	if (con->rx_page) {
  		__free_page(con->rx_page);
  		con->rx_page = NULL;
  	}
9e5f2825a   Patrick Caulfield   [DLM] More otherc...
379

61d96be0f   Patrick Caulfield   [DLM] Fix lowcomm...
380
381
  	con->retries = 0;
  	mutex_unlock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
382
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
  /* We only send shutdown messages to nodes that are not part of the cluster */
  static void sctp_send_shutdown(sctp_assoc_t associd)
  {
  	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
  	struct msghdr outmessage;
  	struct cmsghdr *cmsg;
  	struct sctp_sndrcvinfo *sinfo;
  	int ret;
  	struct connection *con;
  
  	con = nodeid2con(0,0);
  	BUG_ON(con == NULL);
  
  	outmessage.msg_name = NULL;
  	outmessage.msg_namelen = 0;
  	outmessage.msg_control = outcmsg;
  	outmessage.msg_controllen = sizeof(outcmsg);
  	outmessage.msg_flags = MSG_EOR;
  
  	cmsg = CMSG_FIRSTHDR(&outmessage);
  	cmsg->cmsg_level = IPPROTO_SCTP;
  	cmsg->cmsg_type = SCTP_SNDRCV;
  	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
  	outmessage.msg_controllen = cmsg->cmsg_len;
  	sinfo = CMSG_DATA(cmsg);
  	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
  
  	sinfo->sinfo_flags |= MSG_EOF;
  	sinfo->sinfo_assoc_id = associd;
  
  	ret = kernel_sendmsg(con->sock, &outmessage, NULL, 0, 0);
  
  	if (ret != 0)
  		log_print("send EOF to node failed: %d", ret);
  }
5e9ccc372   Christine Caulfield   dlm: replace idr ...
418
419
420
421
422
423
424
425
  static void sctp_init_failed_foreach(struct connection *con)
  {
  	con->sctp_assoc = 0;
  	if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
  		if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
  			queue_work(send_workqueue, &con->swork);
  	}
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
426
427
428
429
  /* INIT failed but we don't know which node...
     restart INIT on all pending nodes */
  static void sctp_init_failed(void)
  {
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
430
  	mutex_lock(&connections_lock);
5e9ccc372   Christine Caulfield   dlm: replace idr ...
431
432
  
  	foreach_conn(sctp_init_failed_foreach);
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
433
  	mutex_unlock(&connections_lock);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
434
435
436
  }
  
  /* Something happened to an association */
617e82e10   David Teigland   [DLM] lowcomms style
437
438
  static void process_sctp_notification(struct connection *con,
  				      struct msghdr *msg, char *buf)
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
  {
  	union sctp_notification *sn = (union sctp_notification *)buf;
  
  	if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) {
  		switch (sn->sn_assoc_change.sac_state) {
  
  		case SCTP_COMM_UP:
  		case SCTP_RESTART:
  		{
  			/* Check that the new node is in the lockspace */
  			struct sctp_prim prim;
  			int nodeid;
  			int prim_len, ret;
  			int addr_len;
  			struct connection *new_con;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
454
455
  			sctp_peeloff_arg_t parg;
  			int parglen = sizeof(parg);
6861f3507   David Teigland   dlm: fix socket f...
456
  			int err;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
457
458
459
460
461
462
463
464
  
  			/*
  			 * We get this before any data for an association.
  			 * We verify that the node is in the cluster and
  			 * then peel off a socket for it.
  			 */
  			if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
  				log_print("COMM_UP for invalid assoc ID %d",
617e82e10   David Teigland   [DLM] lowcomms style
465
  					 (int)sn->sn_assoc_change.sac_assoc_id);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
  				sctp_init_failed();
  				return;
  			}
  			memset(&prim, 0, sizeof(struct sctp_prim));
  			prim_len = sizeof(struct sctp_prim);
  			prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id;
  
  			ret = kernel_getsockopt(con->sock,
  						IPPROTO_SCTP,
  						SCTP_PRIMARY_ADDR,
  						(char*)&prim,
  						&prim_len);
  			if (ret < 0) {
  				log_print("getsockopt/sctp_primary_addr on "
  					  "new assoc %d failed : %d",
  					  (int)sn->sn_assoc_change.sac_assoc_id,
  					  ret);
  
  				/* Retry INIT later */
  				new_con = assoc2con(sn->sn_assoc_change.sac_assoc_id);
  				if (new_con)
  					clear_bit(CF_CONNECT_PENDING, &con->flags);
  				return;
  			}
  			make_sockaddr(&prim.ssp_addr, 0, &addr_len);
  			if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
492
493
  				unsigned char *b=(unsigned char *)&prim.ssp_addr;
  				log_print("reject connect from unknown addr");
bcaadf5c1   Masatake YAMATO   dlm: dump address...
494
495
  				print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
  						     b, sizeof(struct sockaddr_storage));
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
496
497
498
  				sctp_send_shutdown(prim.ssp_assoc_id);
  				return;
  			}
748285ccf   David Teigland   dlm: use more NOF...
499
  			new_con = nodeid2con(nodeid, GFP_NOFS);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
500
501
502
503
504
  			if (!new_con)
  				return;
  
  			/* Peel off a new sock */
  			parg.associd = sn->sn_assoc_change.sac_assoc_id;
617e82e10   David Teigland   [DLM] lowcomms style
505
506
  			ret = kernel_getsockopt(con->sock, IPPROTO_SCTP,
  						SCTP_SOCKOPT_PEELOFF,
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
507
  						(void *)&parg, &parglen);
6861f3507   David Teigland   dlm: fix socket f...
508
  			if (ret < 0) {
617e82e10   David Teigland   [DLM] lowcomms style
509
  				log_print("Can't peel off a socket for "
6861f3507   David Teigland   dlm: fix socket f...
510
  					  "connection %d to node %d: err=%d",
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
511
  					  parg.associd, nodeid, ret);
6861f3507   David Teigland   dlm: fix socket f...
512
513
514
515
516
517
  				return;
  			}
  			new_con->sock = sockfd_lookup(parg.sd, &err);
  			if (!new_con->sock) {
  				log_print("sockfd_lookup error %d", err);
  				return;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
518
  			}
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
519
  			add_sock(new_con->sock, new_con);
6861f3507   David Teigland   dlm: fix socket f...
520
  			sockfd_put(new_con->sock);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
521

6861f3507   David Teigland   dlm: fix socket f...
522
523
  			log_print("connecting to %d sctp association %d",
  				 nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
  
  			/* Send any pending writes */
  			clear_bit(CF_CONNECT_PENDING, &new_con->flags);
  			clear_bit(CF_INIT_PENDING, &con->flags);
  			if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) {
  				queue_work(send_workqueue, &new_con->swork);
  			}
  			if (!test_and_set_bit(CF_READ_PENDING, &new_con->flags))
  				queue_work(recv_workqueue, &new_con->rwork);
  		}
  		break;
  
  		case SCTP_COMM_LOST:
  		case SCTP_SHUTDOWN_COMP:
  		{
  			con = assoc2con(sn->sn_assoc_change.sac_assoc_id);
  			if (con) {
  				con->sctp_assoc = 0;
  			}
  		}
  		break;
  
  		/* We don't know which INIT failed, so clear the PENDING flags
  		 * on them all.  if assoc_id is zero then it will then try
  		 * again */
  
  		case SCTP_CANT_STR_ASSOC:
  		{
  			log_print("Can't start SCTP association - retrying");
  			sctp_init_failed();
  		}
  		break;
  
  		default:
  			log_print("unexpected SCTP assoc change id=%d state=%d",
  				  (int)sn->sn_assoc_change.sac_assoc_id,
  				  sn->sn_assoc_change.sac_state);
  		}
  	}
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
564
565
566
567
  /* Data received from remote end */
  static int receive_from_sock(struct connection *con)
  {
  	int ret = 0;
58addbffd   Al Viro   [PATCH] dlm: use ...
568
569
  	struct msghdr msg = {};
  	struct kvec iov[2];
fdda387f7   Patrick Caulfield   [DLM] Add support...
570
571
572
  	unsigned len;
  	int r;
  	int call_again_soon = 0;
58addbffd   Al Viro   [PATCH] dlm: use ...
573
  	int nvec;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
574
  	char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
fdda387f7   Patrick Caulfield   [DLM] Add support...
575

f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
576
  	mutex_lock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
577

a34fbc636   Patrick Caulfield   [DLM] fix softloc...
578
579
580
581
  	if (con->sock == NULL) {
  		ret = -EAGAIN;
  		goto out_close;
  	}
fdda387f7   Patrick Caulfield   [DLM] Add support...
582
583
584
585
586
587
588
589
  	if (con->rx_page == NULL) {
  		/*
  		 * This doesn't need to be atomic, but I think it should
  		 * improve performance if it is.
  		 */
  		con->rx_page = alloc_page(GFP_ATOMIC);
  		if (con->rx_page == NULL)
  			goto out_resched;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
590
  		cbuf_init(&con->cb, PAGE_CACHE_SIZE);
fdda387f7   Patrick Caulfield   [DLM] Add support...
591
  	}
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
592
593
594
595
  	/* Only SCTP needs these really */
  	memset(&incmsg, 0, sizeof(incmsg));
  	msg.msg_control = incmsg;
  	msg.msg_controllen = sizeof(incmsg);
fdda387f7   Patrick Caulfield   [DLM] Add support...
596
597
598
599
  	/*
  	 * iov[0] is the bit of the circular buffer between the current end
  	 * point (cb.base + cb.len) and the end of the buffer.
  	 */
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
600
601
  	iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
  	iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
89adc934f   Patrick Caulfield   [DLM] Fix uniniti...
602
  	iov[1].iov_len = 0;
58addbffd   Al Viro   [PATCH] dlm: use ...
603
  	nvec = 1;
fdda387f7   Patrick Caulfield   [DLM] Add support...
604
605
606
607
608
  
  	/*
  	 * iov[1] is the bit of the circular buffer between the start of the
  	 * buffer and the start of the currently used section (cb.base)
  	 */
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
609
610
  	if (cbuf_data(&con->cb) >= con->cb.base) {
  		iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
fdda387f7   Patrick Caulfield   [DLM] Add support...
611
612
  		iov[1].iov_len = con->cb.base;
  		iov[1].iov_base = page_address(con->rx_page);
58addbffd   Al Viro   [PATCH] dlm: use ...
613
  		nvec = 2;
fdda387f7   Patrick Caulfield   [DLM] Add support...
614
615
  	}
  	len = iov[0].iov_len + iov[1].iov_len;
58addbffd   Al Viro   [PATCH] dlm: use ...
616
  	r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len,
fdda387f7   Patrick Caulfield   [DLM] Add support...
617
  			       MSG_DONTWAIT | MSG_NOSIGNAL);
fdda387f7   Patrick Caulfield   [DLM] Add support...
618
619
  	if (ret <= 0)
  		goto out_close;
bd44e2b00   Patrick Caulfield   [DLM] fix lowcomm...
620

6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
621
622
  	/* Process SCTP notifications */
  	if (msg.msg_flags & MSG_NOTIFICATION) {
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
623
624
625
626
  		msg.msg_control = incmsg;
  		msg.msg_controllen = sizeof(incmsg);
  
  		process_sctp_notification(con, &msg,
617e82e10   David Teigland   [DLM] lowcomms style
627
  				page_address(con->rx_page) + con->cb.base);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
628
629
630
631
  		mutex_unlock(&con->sock_mutex);
  		return 0;
  	}
  	BUG_ON(con->nodeid == 0);
fdda387f7   Patrick Caulfield   [DLM] Add support...
632
633
  	if (ret == len)
  		call_again_soon = 1;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
634
  	cbuf_add(&con->cb, ret);
fdda387f7   Patrick Caulfield   [DLM] Add support...
635
636
637
638
639
  	ret = dlm_process_incoming_buffer(con->nodeid,
  					  page_address(con->rx_page),
  					  con->cb.base, con->cb.len,
  					  PAGE_CACHE_SIZE);
  	if (ret == -EBADMSG) {
617e82e10   David Teigland   [DLM] lowcomms style
640
641
642
643
  		log_print("lowcomms: addr=%p, base=%u, len=%u, "
  			  "iov_len=%u, iov_base[0]=%p, read=%d",
  			  page_address(con->rx_page), con->cb.base, con->cb.len,
  			  len, iov[0].iov_base, r);
fdda387f7   Patrick Caulfield   [DLM] Add support...
644
645
646
  	}
  	if (ret < 0)
  		goto out_close;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
647
  	cbuf_eat(&con->cb, ret);
fdda387f7   Patrick Caulfield   [DLM] Add support...
648

ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
649
  	if (cbuf_empty(&con->cb) && !call_again_soon) {
fdda387f7   Patrick Caulfield   [DLM] Add support...
650
651
652
  		__free_page(con->rx_page);
  		con->rx_page = NULL;
  	}
fdda387f7   Patrick Caulfield   [DLM] Add support...
653
654
  	if (call_again_soon)
  		goto out_resched;
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
655
  	mutex_unlock(&con->sock_mutex);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
656
  	return 0;
fdda387f7   Patrick Caulfield   [DLM] Add support...
657

ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
658
  out_resched:
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
659
660
  	if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
  		queue_work(recv_workqueue, &con->rwork);
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
661
  	mutex_unlock(&con->sock_mutex);
bd44e2b00   Patrick Caulfield   [DLM] fix lowcomm...
662
  	return -EAGAIN;
fdda387f7   Patrick Caulfield   [DLM] Add support...
663

ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
664
  out_close:
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
665
  	mutex_unlock(&con->sock_mutex);
9e5f2825a   Patrick Caulfield   [DLM] More otherc...
666
  	if (ret != -EAGAIN) {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
667
  		close_connection(con, false);
fdda387f7   Patrick Caulfield   [DLM] Add support...
668
669
  		/* Reconnect when there is something to send */
  	}
a34fbc636   Patrick Caulfield   [DLM] fix softloc...
670
671
672
  	/* Don't return success if we really got EOF */
  	if (ret == 0)
  		ret = -EAGAIN;
fdda387f7   Patrick Caulfield   [DLM] Add support...
673

fdda387f7   Patrick Caulfield   [DLM] Add support...
674
675
676
677
  	return ret;
  }
  
  /* Listening socket is busy, accept a connection */
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
678
  static int tcp_accept_from_sock(struct connection *con)
fdda387f7   Patrick Caulfield   [DLM] Add support...
679
680
681
682
683
684
685
  {
  	int result;
  	struct sockaddr_storage peeraddr;
  	struct socket *newsock;
  	int len;
  	int nodeid;
  	struct connection *newcon;
bd44e2b00   Patrick Caulfield   [DLM] fix lowcomm...
686
  	struct connection *addcon;
fdda387f7   Patrick Caulfield   [DLM] Add support...
687
688
  
  	memset(&peeraddr, 0, sizeof(peeraddr));
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
689
  	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
690
  				  IPPROTO_TCP, &newsock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
691
692
  	if (result < 0)
  		return -ENOMEM;
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
693
  	mutex_lock_nested(&con->sock_mutex, 0);
fdda387f7   Patrick Caulfield   [DLM] Add support...
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
  
  	result = -ENOTCONN;
  	if (con->sock == NULL)
  		goto accept_err;
  
  	newsock->type = con->sock->type;
  	newsock->ops = con->sock->ops;
  
  	result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
  	if (result < 0)
  		goto accept_err;
  
  	/* Get the connected socket's peer */
  	memset(&peeraddr, 0, sizeof(peeraddr));
  	if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
  				  &len, 2)) {
  		result = -ECONNABORTED;
  		goto accept_err;
  	}
  
  	/* Get the new node's NODEID */
  	make_sockaddr(&peeraddr, 0, &len);
  	if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
bcaadf5c1   Masatake YAMATO   dlm: dump address...
717
  		unsigned char *b=(unsigned char *)&peeraddr;
617e82e10   David Teigland   [DLM] lowcomms style
718
  		log_print("connect from non cluster node");
bcaadf5c1   Masatake YAMATO   dlm: dump address...
719
720
  		print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
  				     b, sizeof(struct sockaddr_storage));
fdda387f7   Patrick Caulfield   [DLM] Add support...
721
  		sock_release(newsock);
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
722
  		mutex_unlock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
723
724
725
726
727
728
729
730
  		return -1;
  	}
  
  	log_print("got connection from %d", nodeid);
  
  	/*  Check to see if we already have a connection to this node. This
  	 *  could happen if the two nodes initiate a connection at roughly
  	 *  the same time and the connections cross on the wire.
fdda387f7   Patrick Caulfield   [DLM] Add support...
731
732
  	 *  In this case we store the incoming one in "othercon"
  	 */
748285ccf   David Teigland   dlm: use more NOF...
733
  	newcon = nodeid2con(nodeid, GFP_NOFS);
fdda387f7   Patrick Caulfield   [DLM] Add support...
734
735
736
737
  	if (!newcon) {
  		result = -ENOMEM;
  		goto accept_err;
  	}
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
738
  	mutex_lock_nested(&newcon->sock_mutex, 1);
fdda387f7   Patrick Caulfield   [DLM] Add support...
739
  	if (newcon->sock) {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
740
  		struct connection *othercon = newcon->othercon;
fdda387f7   Patrick Caulfield   [DLM] Add support...
741
742
  
  		if (!othercon) {
748285ccf   David Teigland   dlm: use more NOF...
743
  			othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
fdda387f7   Patrick Caulfield   [DLM] Add support...
744
  			if (!othercon) {
617e82e10   David Teigland   [DLM] lowcomms style
745
  				log_print("failed to allocate incoming socket");
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
746
  				mutex_unlock(&newcon->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
747
748
749
  				result = -ENOMEM;
  				goto accept_err;
  			}
fdda387f7   Patrick Caulfield   [DLM] Add support...
750
751
  			othercon->nodeid = nodeid;
  			othercon->rx_action = receive_from_sock;
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
752
  			mutex_init(&othercon->sock_mutex);
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
753
754
  			INIT_WORK(&othercon->swork, process_send_sockets);
  			INIT_WORK(&othercon->rwork, process_recv_sockets);
fdda387f7   Patrick Caulfield   [DLM] Add support...
755
  			set_bit(CF_IS_OTHERCON, &othercon->flags);
61d96be0f   Patrick Caulfield   [DLM] Fix lowcomm...
756
757
  		}
  		if (!othercon->sock) {
fdda387f7   Patrick Caulfield   [DLM] Add support...
758
  			newcon->othercon = othercon;
97d848365   Patrick Caulfield   [DLM] Telnet to p...
759
760
761
762
763
764
765
766
767
  			othercon->sock = newsock;
  			newsock->sk->sk_user_data = othercon;
  			add_sock(newsock, othercon);
  			addcon = othercon;
  		}
  		else {
  			printk("Extra connection from node %d attempted
  ", nodeid);
  			result = -EAGAIN;
f4fadb23c   akpm@linux-foundation.org   [GFS2] git-gfs2-n...
768
  			mutex_unlock(&newcon->sock_mutex);
97d848365   Patrick Caulfield   [DLM] Telnet to p...
769
  			goto accept_err;
fdda387f7   Patrick Caulfield   [DLM] Add support...
770
  		}
fdda387f7   Patrick Caulfield   [DLM] Add support...
771
772
773
774
775
  	}
  	else {
  		newsock->sk->sk_user_data = newcon;
  		newcon->rx_action = receive_from_sock;
  		add_sock(newsock, newcon);
bd44e2b00   Patrick Caulfield   [DLM] fix lowcomm...
776
  		addcon = newcon;
fdda387f7   Patrick Caulfield   [DLM] Add support...
777
  	}
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
778
  	mutex_unlock(&newcon->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
779
780
781
  
  	/*
  	 * Add it to the active queue in case we got data
25985edce   Lucas De Marchi   Fix common misspe...
782
  	 * between processing the accept adding the socket
fdda387f7   Patrick Caulfield   [DLM] Add support...
783
784
  	 * to the read_sockets list
  	 */
bd44e2b00   Patrick Caulfield   [DLM] fix lowcomm...
785
786
  	if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
  		queue_work(recv_workqueue, &addcon->rwork);
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
787
  	mutex_unlock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
788
789
  
  	return 0;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
790
  accept_err:
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
791
  	mutex_unlock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
792
793
794
  	sock_release(newsock);
  
  	if (result != -EAGAIN)
617e82e10   David Teigland   [DLM] lowcomms style
795
  		log_print("error accepting connection from node: %d", result);
fdda387f7   Patrick Caulfield   [DLM] Add support...
796
797
  	return result;
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
  static void free_entry(struct writequeue_entry *e)
  {
  	__free_page(e->page);
  	kfree(e);
  }
  
  /* Initiate an SCTP association.
     This is a special case of send_to_sock() in that we don't yet have a
     peeled-off socket for this association, so we use the listening socket
     and add the primary IP address of the remote node.
   */
  static void sctp_init_assoc(struct connection *con)
  {
  	struct sockaddr_storage rem_addr;
  	char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
  	struct msghdr outmessage;
  	struct cmsghdr *cmsg;
  	struct sctp_sndrcvinfo *sinfo;
  	struct connection *base_con;
  	struct writequeue_entry *e;
  	int len, offset;
  	int ret;
  	int addrlen;
  	struct kvec iov[1];
  
  	if (test_and_set_bit(CF_INIT_PENDING, &con->flags))
  		return;
  
  	if (con->retries++ > MAX_CONNECT_RETRIES)
  		return;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
  	if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) {
  		log_print("no address for nodeid %d", con->nodeid);
  		return;
  	}
  	base_con = nodeid2con(0, 0);
  	BUG_ON(base_con == NULL);
  
  	make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
  
  	outmessage.msg_name = &rem_addr;
  	outmessage.msg_namelen = addrlen;
  	outmessage.msg_control = outcmsg;
  	outmessage.msg_controllen = sizeof(outcmsg);
  	outmessage.msg_flags = MSG_EOR;
  
  	spin_lock(&con->writequeue_lock);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
844

04bedd79a   David Teigland   dlm: fix lowcomms...
845
846
847
848
849
  	if (list_empty(&con->writequeue)) {
  		spin_unlock(&con->writequeue_lock);
  		log_print("writequeue empty for nodeid %d", con->nodeid);
  		return;
  	}
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
850

04bedd79a   David Teigland   dlm: fix lowcomms...
851
  	e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
852
853
854
  	len = e->len;
  	offset = e->offset;
  	spin_unlock(&con->writequeue_lock);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
  
  	/* Send the first block off the write queue */
  	iov[0].iov_base = page_address(e->page)+offset;
  	iov[0].iov_len = len;
  
  	cmsg = CMSG_FIRSTHDR(&outmessage);
  	cmsg->cmsg_level = IPPROTO_SCTP;
  	cmsg->cmsg_type = SCTP_SNDRCV;
  	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
  	sinfo = CMSG_DATA(cmsg);
  	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
  	sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid());
  	outmessage.msg_controllen = cmsg->cmsg_len;
  
  	ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len);
  	if (ret < 0) {
617e82e10   David Teigland   [DLM] lowcomms style
871
872
  		log_print("Send first packet to node %d failed: %d",
  			  con->nodeid, ret);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
873
874
875
876
877
878
879
880
881
882
883
884
  
  		/* Try again later */
  		clear_bit(CF_CONNECT_PENDING, &con->flags);
  		clear_bit(CF_INIT_PENDING, &con->flags);
  	}
  	else {
  		spin_lock(&con->writequeue_lock);
  		e->offset += ret;
  		e->len -= ret;
  
  		if (e->len == 0 && e->users == 0) {
  			list_del(&e->list);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
885
886
887
888
889
  			free_entry(e);
  		}
  		spin_unlock(&con->writequeue_lock);
  	}
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
890
  /* Connect a new socket to its peer */
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
891
  static void tcp_connect_to_sock(struct connection *con)
fdda387f7   Patrick Caulfield   [DLM] Add support...
892
893
  {
  	int result = -EHOSTUNREACH;
6bd8fedaa   Lon Hohberger   dlm: bind connect...
894
  	struct sockaddr_storage saddr, src_addr;
fdda387f7   Patrick Caulfield   [DLM] Add support...
895
  	int addr_len;
a89d63a15   Casey Dahlin   dlm: free socket ...
896
  	struct socket *sock = NULL;
cb2d45da8   David Teigland   dlm: use TCP_NODELAY
897
  	int one = 1;
fdda387f7   Patrick Caulfield   [DLM] Add support...
898
899
900
  
  	if (con->nodeid == 0) {
  		log_print("attempt to connect sock 0 foiled");
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
901
  		return;
fdda387f7   Patrick Caulfield   [DLM] Add support...
902
  	}
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
903
  	mutex_lock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
904
905
906
907
908
909
910
911
912
913
  	if (con->retries++ > MAX_CONNECT_RETRIES)
  		goto out;
  
  	/* Some odd races can cause double-connects, ignore them */
  	if (con->sock) {
  		result = 0;
  		goto out;
  	}
  
  	/* Create a socket to communicate with */
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
914
  	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
915
  				  IPPROTO_TCP, &sock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
916
917
918
919
  	if (result < 0)
  		goto out_err;
  
  	memset(&saddr, 0, sizeof(saddr));
b5711b8e5   Casey Dahlin   dlm: fix double-r...
920
  	if (dlm_nodeid_to_addr(con->nodeid, &saddr))
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
921
  		goto out_err;
fdda387f7   Patrick Caulfield   [DLM] Add support...
922
923
924
  
  	sock->sk->sk_user_data = con;
  	con->rx_action = receive_from_sock;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
925
926
  	con->connect_action = tcp_connect_to_sock;
  	add_sock(sock, con);
fdda387f7   Patrick Caulfield   [DLM] Add support...
927

6bd8fedaa   Lon Hohberger   dlm: bind connect...
928
929
930
931
932
933
934
935
936
937
  	/* Bind to our cluster-known address connecting to avoid
  	   routing problems */
  	memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
  	make_sockaddr(&src_addr, 0, &addr_len);
  	result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
  				 addr_len);
  	if (result < 0) {
  		log_print("could not bind for connect: %d", result);
  		/* This *may* not indicate a critical error */
  	}
68c817a1c   David Teigland   [DLM] rename dlm_...
938
  	make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
fdda387f7   Patrick Caulfield   [DLM] Add support...
939

fdda387f7   Patrick Caulfield   [DLM] Add support...
940
  	log_print("connecting to %d", con->nodeid);
cb2d45da8   David Teigland   dlm: use TCP_NODELAY
941
942
943
944
  
  	/* Turn off Nagle's algorithm */
  	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
  			  sizeof(one));
fdda387f7   Patrick Caulfield   [DLM] Add support...
945
946
  	result =
  		sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
947
  				   O_NONBLOCK);
fdda387f7   Patrick Caulfield   [DLM] Add support...
948
949
  	if (result == -EINPROGRESS)
  		result = 0;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
950
951
  	if (result == 0)
  		goto out;
fdda387f7   Patrick Caulfield   [DLM] Add support...
952

ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
953
  out_err:
fdda387f7   Patrick Caulfield   [DLM] Add support...
954
955
956
  	if (con->sock) {
  		sock_release(con->sock);
  		con->sock = NULL;
a89d63a15   Casey Dahlin   dlm: free socket ...
957
958
  	} else if (sock) {
  		sock_release(sock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
959
960
961
962
963
964
  	}
  	/*
  	 * Some errors are fatal and this list might need adjusting. For other
  	 * errors we try again until the max number of retries is reached.
  	 */
  	if (result != -EHOSTUNREACH && result != -ENETUNREACH &&
0035a4b14   Marcin Slusarz   dlm: tcp_connect_...
965
  	    result != -ENETDOWN && result != -EINVAL
fdda387f7   Patrick Caulfield   [DLM] Add support...
966
967
968
969
  	    && result != -EPROTONOSUPPORT) {
  		lowcomms_connect_sock(con);
  		result = 0;
  	}
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
970
  out:
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
971
  	mutex_unlock(&con->sock_mutex);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
972
  	return;
fdda387f7   Patrick Caulfield   [DLM] Add support...
973
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
974
975
  static struct socket *tcp_create_listen_sock(struct connection *con,
  					     struct sockaddr_storage *saddr)
fdda387f7   Patrick Caulfield   [DLM] Add support...
976
  {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
977
  	struct socket *sock = NULL;
fdda387f7   Patrick Caulfield   [DLM] Add support...
978
979
980
  	int result = 0;
  	int one = 1;
  	int addr_len;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
981
  	if (dlm_local_addr[0]->ss_family == AF_INET)
fdda387f7   Patrick Caulfield   [DLM] Add support...
982
983
984
985
986
  		addr_len = sizeof(struct sockaddr_in);
  	else
  		addr_len = sizeof(struct sockaddr_in6);
  
  	/* Create a socket to communicate with */
617e82e10   David Teigland   [DLM] lowcomms style
987
988
  	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
  				  IPPROTO_TCP, &sock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
989
  	if (result < 0) {
617e82e10   David Teigland   [DLM] lowcomms style
990
  		log_print("Can't create listening comms socket");
fdda387f7   Patrick Caulfield   [DLM] Add support...
991
992
  		goto create_out;
  	}
cb2d45da8   David Teigland   dlm: use TCP_NODELAY
993
994
995
  	/* Turn off Nagle's algorithm */
  	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
  			  sizeof(one));
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
996
997
  	result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
  				   (char *)&one, sizeof(one));
fdda387f7   Patrick Caulfield   [DLM] Add support...
998
  	if (result < 0) {
617e82e10   David Teigland   [DLM] lowcomms style
999
  		log_print("Failed to set SO_REUSEADDR on socket: %d", result);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1000
1001
  	}
  	sock->sk->sk_user_data = con;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1002
1003
  	con->rx_action = tcp_accept_from_sock;
  	con->connect_action = tcp_connect_to_sock;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1004
1005
1006
  	con->sock = sock;
  
  	/* Bind to our port */
68c817a1c   David Teigland   [DLM] rename dlm_...
1007
  	make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1008
1009
  	result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
  	if (result < 0) {
617e82e10   David Teigland   [DLM] lowcomms style
1010
  		log_print("Can't bind to port %d", dlm_config.ci_tcp_port);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1011
1012
1013
1014
1015
  		sock_release(sock);
  		sock = NULL;
  		con->sock = NULL;
  		goto create_out;
  	}
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1016
  	result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1017
  				 (char *)&one, sizeof(one));
fdda387f7   Patrick Caulfield   [DLM] Add support...
1018
  	if (result < 0) {
617e82e10   David Teigland   [DLM] lowcomms style
1019
  		log_print("Set keepalive failed: %d", result);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1020
1021
1022
1023
  	}
  
  	result = sock->ops->listen(sock, 5);
  	if (result < 0) {
617e82e10   David Teigland   [DLM] lowcomms style
1024
  		log_print("Can't listen on port %d", dlm_config.ci_tcp_port);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1025
1026
1027
1028
  		sock_release(sock);
  		sock = NULL;
  		goto create_out;
  	}
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1029
  create_out:
fdda387f7   Patrick Caulfield   [DLM] Add support...
1030
1031
  	return sock;
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1032
1033
1034
1035
1036
  /* Get local addresses */
  static void init_local(void)
  {
  	struct sockaddr_storage sas, *addr;
  	int i;
30d3a2373   Patrick Caulfield   [DLM] Lowcomms no...
1037
  	dlm_local_count = 0;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1038
1039
1040
  	for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
  		if (dlm_our_addr(&sas, i))
  			break;
573c24c4a   David Teigland   dlm: always use G...
1041
  		addr = kmalloc(sizeof(*addr), GFP_NOFS);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1042
1043
1044
1045
1046
1047
  		if (!addr)
  			break;
  		memcpy(addr, &sas, sizeof(*addr));
  		dlm_local_addr[dlm_local_count++] = addr;
  	}
  }
617e82e10   David Teigland   [DLM] lowcomms style
1048
1049
1050
1051
1052
  /* Bind to an IP address. SCTP allows multiple address so it can do
     multi-homing */
  static int add_sctp_bind_addr(struct connection *sctp_con,
  			      struct sockaddr_storage *addr,
  			      int addr_len, int num)
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
  {
  	int result = 0;
  
  	if (num == 1)
  		result = kernel_bind(sctp_con->sock,
  				     (struct sockaddr *) addr,
  				     addr_len);
  	else
  		result = kernel_setsockopt(sctp_con->sock, SOL_SCTP,
  					   SCTP_SOCKOPT_BINDX_ADD,
  					   (char *)addr, addr_len);
  
  	if (result < 0)
  		log_print("Can't bind to port %d addr number %d",
  			  dlm_config.ci_tcp_port, num);
  
  	return result;
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
1071

6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1072
1073
1074
1075
1076
1077
1078
  /* Initialise SCTP socket and bind to all interfaces */
  static int sctp_listen_for_all(void)
  {
  	struct socket *sock = NULL;
  	struct sockaddr_storage localaddr;
  	struct sctp_event_subscribe subscribe;
  	int result = -EINVAL, num = 1, i, addr_len;
573c24c4a   David Teigland   dlm: always use G...
1079
  	struct connection *con = nodeid2con(0, GFP_NOFS);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
  	int bufsize = NEEDED_RMEM;
  
  	if (!con)
  		return -ENOMEM;
  
  	log_print("Using SCTP for communications");
  
  	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
  				  IPPROTO_SCTP, &sock);
  	if (result < 0) {
  		log_print("Can't create comms socket, check SCTP is loaded");
  		goto out;
  	}
  
  	/* Listen for events */
  	memset(&subscribe, 0, sizeof(subscribe));
  	subscribe.sctp_data_io_event = 1;
  	subscribe.sctp_association_event = 1;
  	subscribe.sctp_send_failure_event = 1;
  	subscribe.sctp_shutdown_event = 1;
  	subscribe.sctp_partial_delivery_event = 1;
df61c9526   David S. Miller   [DLM] lowcomms: D...
1101
  	result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1102
1103
  				 (char *)&bufsize, sizeof(bufsize));
  	if (result)
617e82e10   David Teigland   [DLM] lowcomms style
1104
  		log_print("Error increasing buffer space on socket %d", result);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1105
1106
  
  	result = kernel_setsockopt(sock, SOL_SCTP, SCTP_EVENTS,
617e82e10   David Teigland   [DLM] lowcomms style
1107
  				   (char *)&subscribe, sizeof(subscribe));
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
  	if (result < 0) {
  		log_print("Failed to set SCTP_EVENTS on socket: result=%d",
  			  result);
  		goto create_delsock;
  	}
  
  	/* Init con struct */
  	sock->sk->sk_user_data = con;
  	con->sock = sock;
  	con->sock->sk->sk_data_ready = lowcomms_data_ready;
  	con->rx_action = receive_from_sock;
  	con->connect_action = sctp_init_assoc;
  
  	/* Bind to all interfaces. */
  	for (i = 0; i < dlm_local_count; i++) {
  		memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
  		make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
  
  		result = add_sctp_bind_addr(con, &localaddr, addr_len, num);
  		if (result)
  			goto create_delsock;
  		++num;
  	}
  
  	result = sock->ops->listen(sock, 5);
  	if (result < 0) {
  		log_print("Can't set socket listening");
  		goto create_delsock;
  	}
  
  	return 0;
  
  create_delsock:
  	sock_release(sock);
  	con->sock = NULL;
  out:
  	return result;
  }
  
  static int tcp_listen_for_all(void)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1148
1149
  {
  	struct socket *sock = NULL;
573c24c4a   David Teigland   dlm: always use G...
1150
  	struct connection *con = nodeid2con(0, GFP_NOFS);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1151
  	int result = -EINVAL;
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1152
1153
  	if (!con)
  		return -ENOMEM;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1154
  	/* We don't support multi-homed hosts */
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1155
  	if (dlm_local_addr[1] != NULL) {
617e82e10   David Teigland   [DLM] lowcomms style
1156
1157
  		log_print("TCP protocol can't handle multi-homed hosts, "
  			  "try SCTP");
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1158
1159
1160
1161
  		return -EINVAL;
  	}
  
  	log_print("Using TCP for communications");
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1162
  	sock = tcp_create_listen_sock(con, dlm_local_addr[0]);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
  	if (sock) {
  		add_sock(sock, con);
  		result = 0;
  	}
  	else {
  		result = -EADDRINUSE;
  	}
  
  	return result;
  }
  
  
  
  static struct writequeue_entry *new_writequeue_entry(struct connection *con,
  						     gfp_t allocation)
  {
  	struct writequeue_entry *entry;
  
  	entry = kmalloc(sizeof(struct writequeue_entry), allocation);
  	if (!entry)
  		return NULL;
  
  	entry->page = alloc_page(allocation);
  	if (!entry->page) {
  		kfree(entry);
  		return NULL;
  	}
  
  	entry->offset = 0;
  	entry->len = 0;
  	entry->end = 0;
  	entry->users = 0;
  	entry->con = con;
  
  	return entry;
  }
617e82e10   David Teigland   [DLM] lowcomms style
1199
  void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1200
1201
1202
1203
1204
  {
  	struct connection *con;
  	struct writequeue_entry *e;
  	int offset = 0;
  	int users = 0;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1205
1206
1207
  	con = nodeid2con(nodeid, allocation);
  	if (!con)
  		return NULL;
4edde74ee   Patrick Caulfield   [DLM] Fix spin lo...
1208
  	spin_lock(&con->writequeue_lock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1209
  	e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1210
  	if ((&e->list == &con->writequeue) ||
fdda387f7   Patrick Caulfield   [DLM] Add support...
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
  	    (PAGE_CACHE_SIZE - e->end < len)) {
  		e = NULL;
  	} else {
  		offset = e->end;
  		e->end += len;
  		users = e->users++;
  	}
  	spin_unlock(&con->writequeue_lock);
  
  	if (e) {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1221
  	got_one:
fdda387f7   Patrick Caulfield   [DLM] Add support...
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
  		*ppc = page_address(e->page) + offset;
  		return e;
  	}
  
  	e = new_writequeue_entry(con, allocation);
  	if (e) {
  		spin_lock(&con->writequeue_lock);
  		offset = e->end;
  		e->end += len;
  		users = e->users++;
  		list_add_tail(&e->list, &con->writequeue);
  		spin_unlock(&con->writequeue_lock);
  		goto got_one;
  	}
  	return NULL;
  }
  
  void dlm_lowcomms_commit_buffer(void *mh)
  {
  	struct writequeue_entry *e = (struct writequeue_entry *)mh;
  	struct connection *con = e->con;
  	int users;
4edde74ee   Patrick Caulfield   [DLM] Fix spin lo...
1244
  	spin_lock(&con->writequeue_lock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1245
1246
1247
1248
  	users = --e->users;
  	if (users)
  		goto out;
  	e->len = e->end - e->offset;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1249
  	spin_unlock(&con->writequeue_lock);
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1250
1251
  	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
  		queue_work(send_workqueue, &con->swork);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1252
1253
  	}
  	return;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1254
  out:
fdda387f7   Patrick Caulfield   [DLM] Add support...
1255
1256
1257
  	spin_unlock(&con->writequeue_lock);
  	return;
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
1258
  /* Send a message */
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1259
  static void send_to_sock(struct connection *con)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1260
1261
  {
  	int ret = 0;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1262
1263
1264
  	const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
  	struct writequeue_entry *e;
  	int len, offset;
f92c8dd7a   Bob Peterson   dlm: reduce cond_...
1265
  	int count = 0;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1266

f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
1267
  	mutex_lock(&con->sock_mutex);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1268
1269
  	if (con->sock == NULL)
  		goto out_connect;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
  	spin_lock(&con->writequeue_lock);
  	for (;;) {
  		e = list_entry(con->writequeue.next, struct writequeue_entry,
  			       list);
  		if ((struct list_head *) e == &con->writequeue)
  			break;
  
  		len = e->len;
  		offset = e->offset;
  		BUG_ON(len == 0 && e->users == 0);
  		spin_unlock(&con->writequeue_lock);
  
  		ret = 0;
  		if (len) {
1329e3f2c   Paolo Bonzini   dlm: use kernel_s...
1284
1285
  			ret = kernel_sendpage(con->sock, e->page, offset, len,
  					      msg_flags);
d66f8277f   Patrick Caulfield   [DLM] Make dlm_se...
1286
  			if (ret == -EAGAIN || ret == 0) {
b36930dd5   David Miller   dlm: Handle appli...
1287
1288
1289
1290
1291
1292
1293
1294
1295
  				if (ret == -EAGAIN &&
  				    test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
  				    !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
  					/* Notify TCP that we're limited by the
  					 * application window size.
  					 */
  					set_bit(SOCK_NOSPACE, &con->sock->flags);
  					con->sock->sk->sk_write_pending++;
  				}
d66f8277f   Patrick Caulfield   [DLM] Make dlm_se...
1296
  				cond_resched();
fdda387f7   Patrick Caulfield   [DLM] Add support...
1297
  				goto out;
d66f8277f   Patrick Caulfield   [DLM] Make dlm_se...
1298
  			}
fdda387f7   Patrick Caulfield   [DLM] Add support...
1299
1300
  			if (ret <= 0)
  				goto send_error;
d66f8277f   Patrick Caulfield   [DLM] Make dlm_se...
1301
  		}
f92c8dd7a   Bob Peterson   dlm: reduce cond_...
1302
1303
1304
  
  		/* Don't starve people filling buffers */
  		if (++count >= MAX_SEND_MSG_COUNT) {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1305
  			cond_resched();
f92c8dd7a   Bob Peterson   dlm: reduce cond_...
1306
1307
  			count = 0;
  		}
fdda387f7   Patrick Caulfield   [DLM] Add support...
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
  
  		spin_lock(&con->writequeue_lock);
  		e->offset += ret;
  		e->len -= ret;
  
  		if (e->len == 0 && e->users == 0) {
  			list_del(&e->list);
  			free_entry(e);
  			continue;
  		}
  	}
  	spin_unlock(&con->writequeue_lock);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1320
  out:
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
1321
  	mutex_unlock(&con->sock_mutex);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1322
  	return;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1323

ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1324
  send_error:
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
1325
  	mutex_unlock(&con->sock_mutex);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1326
  	close_connection(con, false);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1327
  	lowcomms_connect_sock(con);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1328
  	return;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1329

ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1330
  out_connect:
f1f1c1ccf   Patrick Caulfield   [DLM] Make sock_s...
1331
  	mutex_unlock(&con->sock_mutex);
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1332
1333
  	if (!test_bit(CF_INIT_PENDING, &con->flags))
  		lowcomms_connect_sock(con);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1334
  	return;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1335
1336
1337
1338
  }
  
  static void clean_one_writequeue(struct connection *con)
  {
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1339
  	struct writequeue_entry *e, *safe;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1340
1341
  
  	spin_lock(&con->writequeue_lock);
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1342
  	list_for_each_entry_safe(e, safe, &con->writequeue, list) {
fdda387f7   Patrick Caulfield   [DLM] Add support...
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
  		list_del(&e->list);
  		free_entry(e);
  	}
  	spin_unlock(&con->writequeue_lock);
  }
  
  /* Called from recovery when it knows that a node has
     left the cluster */
  int dlm_lowcomms_close(int nodeid)
  {
  	struct connection *con;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1354
1355
1356
  	log_print("closing connection to node %d", nodeid);
  	con = nodeid2con(nodeid, 0);
  	if (con) {
063c4c996   Lars Marowsky-Bree   dlm: fix connecti...
1357
1358
1359
1360
1361
1362
1363
  		clear_bit(CF_CONNECT_PENDING, &con->flags);
  		clear_bit(CF_WRITE_PENDING, &con->flags);
  		set_bit(CF_CLOSE, &con->flags);
  		if (cancel_work_sync(&con->swork))
  			log_print("canceled swork for node %d", nodeid);
  		if (cancel_work_sync(&con->rwork))
  			log_print("canceled rwork for node %d", nodeid);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1364
  		clean_one_writequeue(con);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1365
  		close_connection(con, true);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1366
1367
  	}
  	return 0;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1368
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1369
  /* Receive workqueue function */
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1370
  static void process_recv_sockets(struct work_struct *work)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1371
  {
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1372
1373
  	struct connection *con = container_of(work, struct connection, rwork);
  	int err;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1374

1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1375
1376
1377
1378
  	clear_bit(CF_READ_PENDING, &con->flags);
  	do {
  		err = con->rx_action(con);
  	} while (!err);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1379
  }
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1380
  /* Send workqueue function */
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1381
  static void process_send_sockets(struct work_struct *work)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1382
  {
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1383
  	struct connection *con = container_of(work, struct connection, swork);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1384

1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1385
  	if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1386
  		con->connect_action(con);
063c4c996   Lars Marowsky-Bree   dlm: fix connecti...
1387
  		set_bit(CF_WRITE_PENDING, &con->flags);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1388
  	}
063c4c996   Lars Marowsky-Bree   dlm: fix connecti...
1389
1390
  	if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags))
  		send_to_sock(con);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1391
1392
1393
1394
1395
1396
  }
  
  
  /* Discard all entries on the write queues */
  static void clean_writequeues(void)
  {
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1397
  	foreach_conn(clean_one_writequeue);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1398
  }
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1399
  static void work_stop(void)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1400
  {
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1401
1402
  	destroy_workqueue(recv_workqueue);
  	destroy_workqueue(send_workqueue);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1403
  }
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1404
  static int work_start(void)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1405
  {
e43f055a9   David Teigland   dlm: use alloc_wo...
1406
1407
  	recv_workqueue = alloc_workqueue("dlm_recv",
  					 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
b9d410527   Namhyung Kim   dlm: sanitize wor...
1408
1409
1410
  	if (!recv_workqueue) {
  		log_print("can't start dlm_recv");
  		return -ENOMEM;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1411
  	}
fdda387f7   Patrick Caulfield   [DLM] Add support...
1412

e43f055a9   David Teigland   dlm: use alloc_wo...
1413
1414
  	send_workqueue = alloc_workqueue("dlm_send",
  					 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
b9d410527   Namhyung Kim   dlm: sanitize wor...
1415
1416
  	if (!send_workqueue) {
  		log_print("can't start dlm_send");
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1417
  		destroy_workqueue(recv_workqueue);
b9d410527   Namhyung Kim   dlm: sanitize wor...
1418
  		return -ENOMEM;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1419
  	}
fdda387f7   Patrick Caulfield   [DLM] Add support...
1420
1421
1422
  
  	return 0;
  }
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1423
  static void stop_conn(struct connection *con)
fdda387f7   Patrick Caulfield   [DLM] Add support...
1424
  {
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1425
  	con->flags |= 0x0F;
391fbdc5d   Christine Caulfield   dlm: connect to n...
1426
  	if (con->sock && con->sock->sk)
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1427
1428
  		con->sock->sk->sk_user_data = NULL;
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
1429

5e9ccc372   Christine Caulfield   dlm: replace idr ...
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
  static void free_conn(struct connection *con)
  {
  	close_connection(con, true);
  	if (con->othercon)
  		kmem_cache_free(con_cache, con->othercon);
  	hlist_del(&con->list);
  	kmem_cache_free(con_cache, con);
  }
  
  void dlm_lowcomms_stop(void)
  {
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1441
  	/* Set all the flags to prevent any
fdda387f7   Patrick Caulfield   [DLM] Add support...
1442
1443
  	   socket activity.
  	*/
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
1444
  	mutex_lock(&connections_lock);
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1445
  	foreach_conn(stop_conn);
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
1446
  	mutex_unlock(&connections_lock);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1447

1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1448
  	work_stop();
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1449

7a936ce71   Matthias Kaehlcke   dlm: convert conn...
1450
  	mutex_lock(&connections_lock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1451
  	clean_writequeues();
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1452
  	foreach_conn(free_conn);
7a936ce71   Matthias Kaehlcke   dlm: convert conn...
1453
  	mutex_unlock(&connections_lock);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1454
1455
  	kmem_cache_destroy(con_cache);
  }
fdda387f7   Patrick Caulfield   [DLM] Add support...
1456
1457
  int dlm_lowcomms_start(void)
  {
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1458
1459
  	int error = -EINVAL;
  	struct connection *con;
5e9ccc372   Christine Caulfield   dlm: replace idr ...
1460
1461
1462
1463
  	int i;
  
  	for (i = 0; i < CONN_HASH_SIZE; i++)
  		INIT_HLIST_HEAD(&connection_hash[i]);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1464

6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1465
1466
  	init_local();
  	if (!dlm_local_count) {
617e82e10   David Teigland   [DLM] lowcomms style
1467
  		error = -ENOTCONN;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1468
  		log_print("no local IP address has been set");
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1469
  		goto out;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1470
  	}
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1471
  	error = -ENOMEM;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1472
  	con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1473
  				      __alignof__(struct connection), 0,
20c2df83d   Paul Mundt   mm: Remove slab d...
1474
  				      NULL);
fdda387f7   Patrick Caulfield   [DLM] Add support...
1475
  	if (!con_cache)
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1476
  		goto out;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1477

fdda387f7   Patrick Caulfield   [DLM] Add support...
1478
  	/* Start listening */
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1479
1480
1481
1482
  	if (dlm_config.ci_protocol == 0)
  		error = tcp_listen_for_all();
  	else
  		error = sctp_listen_for_all();
fdda387f7   Patrick Caulfield   [DLM] Add support...
1483
1484
  	if (error)
  		goto fail_unlisten;
1d6e8131c   Patrick Caulfield   [DLM] Use workque...
1485
  	error = work_start();
fdda387f7   Patrick Caulfield   [DLM] Add support...
1486
1487
  	if (error)
  		goto fail_unlisten;
fdda387f7   Patrick Caulfield   [DLM] Add support...
1488
  	return 0;
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1489
  fail_unlisten:
6ed7257b4   Patrick Caulfield   [DLM] Consolidate...
1490
1491
1492
1493
1494
  	con = nodeid2con(0,0);
  	if (con) {
  		close_connection(con, false);
  		kmem_cache_free(con_cache, con);
  	}
fdda387f7   Patrick Caulfield   [DLM] Add support...
1495
  	kmem_cache_destroy(con_cache);
ac33d0710   Patrick Caulfield   [DLM] Clean up lo...
1496
  out:
fdda387f7   Patrick Caulfield   [DLM] Add support...
1497
1498
  	return error;
  }