Commit 9dcbe1b87c4a8e3ed62e95369c18709541a3dc8f

Authored by Geert Uytterhoeven
Committed by Simon Horman
1 parent 6a649f3398

ipvs: Remove unused variable ret from sync_thread_master()

net/netfilter/ipvs/ip_vs_sync.c: In function 'sync_thread_master':
net/netfilter/ipvs/ip_vs_sync.c:1640:8: warning: unused variable 'ret' [-Wunused-variable]

Commit 35a2af94c7ce7130ca292c68b1d27fcfdb648f6b ("sched/wait: Make the
__wait_event*() interface more friendly") changed how the interruption
state is returned. However, sync_thread_master() ignores this state,
now causing a compile warning.

According to Julian Anastasov <ja@ssi.bg>, this behavior is OK:

    "Yes, your patch looks ok to me. In the past we used ssleep() but IPVS
     users were confused why IPVS threads increase the load average. So, we
     switched to _interruptible calls and later the socket polling was
     added."

Document this, as requested by Peter Zijlstra, to avoid precious developers
disappearing in this pitfall in the future.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>

Showing 1 changed file with 4 additions and 1 deletions Inline Diff

net/netfilter/ipvs/ip_vs_sync.c
1 /* 1 /*
2 * IPVS An implementation of the IP virtual server support for the 2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module 3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a 4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version 1, is capable of handling both version 0 and 1 messages. 8 * Version 1, is capable of handling both version 0 and 1 messages.
9 * Version 0 is the plain old format. 9 * Version 0 is the plain old format.
10 * Note Version 0 receivers will just drop Ver 1 messages. 10 * Note Version 0 receivers will just drop Ver 1 messages.
11 * Version 1 is capable of handle IPv6, Persistence data, 11 * Version 1 is capable of handle IPv6, Persistence data,
12 * time-outs, and firewall marks. 12 * time-outs, and firewall marks.
13 * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. 13 * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
14 * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 14 * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
15 * 15 *
16 * Definitions Message: is a complete datagram 16 * Definitions Message: is a complete datagram
17 * Sync_conn: is a part of a Message 17 * Sync_conn: is a part of a Message
18 * Param Data is an option to a Sync_conn. 18 * Param Data is an option to a Sync_conn.
19 * 19 *
20 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 20 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
21 * 21 *
22 * ip_vs_sync: sync connection info from master load balancer to backups 22 * ip_vs_sync: sync connection info from master load balancer to backups
23 * through multicast 23 * through multicast
24 * 24 *
25 * Changes: 25 * Changes:
26 * Alexandre Cassen : Added master & backup support at a time. 26 * Alexandre Cassen : Added master & backup support at a time.
27 * Alexandre Cassen : Added SyncID support for incoming sync 27 * Alexandre Cassen : Added SyncID support for incoming sync
28 * messages filtering. 28 * messages filtering.
29 * Justin Ossevoort : Fix endian problem on sync message size. 29 * Justin Ossevoort : Fix endian problem on sync message size.
30 * Hans Schillstrom : Added Version 1: i.e. IPv6, 30 * Hans Schillstrom : Added Version 1: i.e. IPv6,
31 * Persistence support, fwmark and time-out. 31 * Persistence support, fwmark and time-out.
32 */ 32 */
33 33
34 #define KMSG_COMPONENT "IPVS" 34 #define KMSG_COMPONENT "IPVS"
35 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 35 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36 36
37 #include <linux/module.h> 37 #include <linux/module.h>
38 #include <linux/slab.h> 38 #include <linux/slab.h>
39 #include <linux/inetdevice.h> 39 #include <linux/inetdevice.h>
40 #include <linux/net.h> 40 #include <linux/net.h>
41 #include <linux/completion.h> 41 #include <linux/completion.h>
42 #include <linux/delay.h> 42 #include <linux/delay.h>
43 #include <linux/skbuff.h> 43 #include <linux/skbuff.h>
44 #include <linux/in.h> 44 #include <linux/in.h>
45 #include <linux/igmp.h> /* for ip_mc_join_group */ 45 #include <linux/igmp.h> /* for ip_mc_join_group */
46 #include <linux/udp.h> 46 #include <linux/udp.h>
47 #include <linux/err.h> 47 #include <linux/err.h>
48 #include <linux/kthread.h> 48 #include <linux/kthread.h>
49 #include <linux/wait.h> 49 #include <linux/wait.h>
50 #include <linux/kernel.h> 50 #include <linux/kernel.h>
51 51
52 #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ 52 #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
53 53
54 #include <net/ip.h> 54 #include <net/ip.h>
55 #include <net/sock.h> 55 #include <net/sock.h>
56 56
57 #include <net/ip_vs.h> 57 #include <net/ip_vs.h>
58 58
59 #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ 59 #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
60 #define IP_VS_SYNC_PORT 8848 /* multicast port */ 60 #define IP_VS_SYNC_PORT 8848 /* multicast port */
61 61
62 #define SYNC_PROTO_VER 1 /* Protocol version in header */ 62 #define SYNC_PROTO_VER 1 /* Protocol version in header */
63 63
64 static struct lock_class_key __ipvs_sync_key; 64 static struct lock_class_key __ipvs_sync_key;
65 /* 65 /*
66 * IPVS sync connection entry 66 * IPVS sync connection entry
67 * Version 0, i.e. original version. 67 * Version 0, i.e. original version.
68 */ 68 */
69 struct ip_vs_sync_conn_v0 { 69 struct ip_vs_sync_conn_v0 {
70 __u8 reserved; 70 __u8 reserved;
71 71
72 /* Protocol, addresses and port numbers */ 72 /* Protocol, addresses and port numbers */
73 __u8 protocol; /* Which protocol (TCP/UDP) */ 73 __u8 protocol; /* Which protocol (TCP/UDP) */
74 __be16 cport; 74 __be16 cport;
75 __be16 vport; 75 __be16 vport;
76 __be16 dport; 76 __be16 dport;
77 __be32 caddr; /* client address */ 77 __be32 caddr; /* client address */
78 __be32 vaddr; /* virtual address */ 78 __be32 vaddr; /* virtual address */
79 __be32 daddr; /* destination address */ 79 __be32 daddr; /* destination address */
80 80
81 /* Flags and state transition */ 81 /* Flags and state transition */
82 __be16 flags; /* status flags */ 82 __be16 flags; /* status flags */
83 __be16 state; /* state info */ 83 __be16 state; /* state info */
84 84
85 /* The sequence options start here */ 85 /* The sequence options start here */
86 }; 86 };
87 87
88 struct ip_vs_sync_conn_options { 88 struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq; /* incoming seq. struct */ 89 struct ip_vs_seq in_seq; /* incoming seq. struct */
90 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 90 struct ip_vs_seq out_seq; /* outgoing seq. struct */
91 }; 91 };
92 92
93 /* 93 /*
94 Sync Connection format (sync_conn) 94 Sync Connection format (sync_conn)
95 95
96 0 1 2 3 96 0 1 2 3
97 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 97 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
98 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 98 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
99 | Type | Protocol | Ver. | Size | 99 | Type | Protocol | Ver. | Size |
100 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 100 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
101 | Flags | 101 | Flags |
102 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 102 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
103 | State | cport | 103 | State | cport |
104 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 104 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
105 | vport | dport | 105 | vport | dport |
106 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 106 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
107 | fwmark | 107 | fwmark |
108 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 108 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
109 | timeout (in sec.) | 109 | timeout (in sec.) |
110 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 110 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
111 | ... | 111 | ... |
112 | IP-Addresses (v4 or v6) | 112 | IP-Addresses (v4 or v6) |
113 | ... | 113 | ... |
114 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 114 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
115 Optional Parameters. 115 Optional Parameters.
116 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 116 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
117 | Param. Type | Param. Length | Param. data | 117 | Param. Type | Param. Length | Param. data |
118 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 118 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
119 | ... | 119 | ... |
120 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 120 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
121 | | Param Type | Param. Length | 121 | | Param Type | Param. Length |
122 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 122 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
123 | Param data | 123 | Param data |
124 | Last Param data should be padded for 32 bit alignment | 124 | Last Param data should be padded for 32 bit alignment |
125 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 125 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
126 */ 126 */
127 127
128 /* 128 /*
129 * Type 0, IPv4 sync connection format 129 * Type 0, IPv4 sync connection format
130 */ 130 */
131 struct ip_vs_sync_v4 { 131 struct ip_vs_sync_v4 {
132 __u8 type; 132 __u8 type;
133 __u8 protocol; /* Which protocol (TCP/UDP) */ 133 __u8 protocol; /* Which protocol (TCP/UDP) */
134 __be16 ver_size; /* Version msb 4 bits */ 134 __be16 ver_size; /* Version msb 4 bits */
135 /* Flags and state transition */ 135 /* Flags and state transition */
136 __be32 flags; /* status flags */ 136 __be32 flags; /* status flags */
137 __be16 state; /* state info */ 137 __be16 state; /* state info */
138 /* Protocol, addresses and port numbers */ 138 /* Protocol, addresses and port numbers */
139 __be16 cport; 139 __be16 cport;
140 __be16 vport; 140 __be16 vport;
141 __be16 dport; 141 __be16 dport;
142 __be32 fwmark; /* Firewall mark from skb */ 142 __be32 fwmark; /* Firewall mark from skb */
143 __be32 timeout; /* cp timeout */ 143 __be32 timeout; /* cp timeout */
144 __be32 caddr; /* client address */ 144 __be32 caddr; /* client address */
145 __be32 vaddr; /* virtual address */ 145 __be32 vaddr; /* virtual address */
146 __be32 daddr; /* destination address */ 146 __be32 daddr; /* destination address */
147 /* The sequence options start here */ 147 /* The sequence options start here */
148 /* PE data padded to 32bit alignment after seq. options */ 148 /* PE data padded to 32bit alignment after seq. options */
149 }; 149 };
150 /* 150 /*
151 * Type 2 messages IPv6 151 * Type 2 messages IPv6
152 */ 152 */
153 struct ip_vs_sync_v6 { 153 struct ip_vs_sync_v6 {
154 __u8 type; 154 __u8 type;
155 __u8 protocol; /* Which protocol (TCP/UDP) */ 155 __u8 protocol; /* Which protocol (TCP/UDP) */
156 __be16 ver_size; /* Version msb 4 bits */ 156 __be16 ver_size; /* Version msb 4 bits */
157 /* Flags and state transition */ 157 /* Flags and state transition */
158 __be32 flags; /* status flags */ 158 __be32 flags; /* status flags */
159 __be16 state; /* state info */ 159 __be16 state; /* state info */
160 /* Protocol, addresses and port numbers */ 160 /* Protocol, addresses and port numbers */
161 __be16 cport; 161 __be16 cport;
162 __be16 vport; 162 __be16 vport;
163 __be16 dport; 163 __be16 dport;
164 __be32 fwmark; /* Firewall mark from skb */ 164 __be32 fwmark; /* Firewall mark from skb */
165 __be32 timeout; /* cp timeout */ 165 __be32 timeout; /* cp timeout */
166 struct in6_addr caddr; /* client address */ 166 struct in6_addr caddr; /* client address */
167 struct in6_addr vaddr; /* virtual address */ 167 struct in6_addr vaddr; /* virtual address */
168 struct in6_addr daddr; /* destination address */ 168 struct in6_addr daddr; /* destination address */
169 /* The sequence options start here */ 169 /* The sequence options start here */
170 /* PE data padded to 32bit alignment after seq. options */ 170 /* PE data padded to 32bit alignment after seq. options */
171 }; 171 };
172 172
173 union ip_vs_sync_conn { 173 union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4; 174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6; 175 struct ip_vs_sync_v6 v6;
176 }; 176 };
177 177
178 /* Bits in Type field in above */ 178 /* Bits in Type field in above */
179 #define STYPE_INET6 0 179 #define STYPE_INET6 0
180 #define STYPE_F_INET6 (1 << STYPE_INET6) 180 #define STYPE_F_INET6 (1 << STYPE_INET6)
181 181
182 #define SVER_SHIFT 12 /* Shift to get version */ 182 #define SVER_SHIFT 12 /* Shift to get version */
183 #define SVER_MASK 0x0fff /* Mask to strip version */ 183 #define SVER_MASK 0x0fff /* Mask to strip version */
184 184
185 #define IPVS_OPT_SEQ_DATA 1 185 #define IPVS_OPT_SEQ_DATA 1
186 #define IPVS_OPT_PE_DATA 2 186 #define IPVS_OPT_PE_DATA 2
187 #define IPVS_OPT_PE_NAME 3 187 #define IPVS_OPT_PE_NAME 3
188 #define IPVS_OPT_PARAM 7 188 #define IPVS_OPT_PARAM 7
189 189
190 #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) 190 #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191 #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) 191 #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192 #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) 192 #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193 #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) 193 #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194 194
195 struct ip_vs_sync_thread_data { 195 struct ip_vs_sync_thread_data {
196 struct net *net; 196 struct net *net;
197 struct socket *sock; 197 struct socket *sock;
198 char *buf; 198 char *buf;
199 int id; 199 int id;
200 }; 200 };
201 201
202 /* Version 0 definition of packet sizes */ 202 /* Version 0 definition of packet sizes */
203 #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) 203 #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204 #define FULL_CONN_SIZE \ 204 #define FULL_CONN_SIZE \
205 (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) 205 (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
206 206
207 207
208 /* 208 /*
209 The master mulitcasts messages (Datagrams) to the backup load balancers 209 The master mulitcasts messages (Datagrams) to the backup load balancers
210 in the following format. 210 in the following format.
211 211
212 Version 1: 212 Version 1:
213 Note, first byte should be Zero, so ver 0 receivers will drop the packet. 213 Note, first byte should be Zero, so ver 0 receivers will drop the packet.
214 214
215 0 1 2 3 215 0 1 2 3
216 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 216 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
217 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 217 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
218 | 0 | SyncID | Size | 218 | 0 | SyncID | Size |
219 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 219 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
220 | Count Conns | Version | Reserved, set to Zero | 220 | Count Conns | Version | Reserved, set to Zero |
221 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 221 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
222 | | 222 | |
223 | IPVS Sync Connection (1) | 223 | IPVS Sync Connection (1) |
224 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 224 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
225 | . | 225 | . |
226 ~ . ~ 226 ~ . ~
227 | . | 227 | . |
228 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 228 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
229 | | 229 | |
230 | IPVS Sync Connection (n) | 230 | IPVS Sync Connection (n) |
231 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 231 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
232 232
233 Version 0 Header 233 Version 0 Header
234 0 1 2 3 234 0 1 2 3
235 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 235 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
236 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 236 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
237 | Count Conns | SyncID | Size | 237 | Count Conns | SyncID | Size |
238 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 238 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
239 | IPVS Sync Connection (1) | 239 | IPVS Sync Connection (1) |
240 */ 240 */
241 241
242 #define SYNC_MESG_HEADER_LEN 4 242 #define SYNC_MESG_HEADER_LEN 4
243 #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ 243 #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
244 244
245 /* Version 0 header */ 245 /* Version 0 header */
246 struct ip_vs_sync_mesg_v0 { 246 struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns; 247 __u8 nr_conns;
248 __u8 syncid; 248 __u8 syncid;
249 __be16 size; 249 __be16 size;
250 250
251 /* ip_vs_sync_conn entries start here */ 251 /* ip_vs_sync_conn entries start here */
252 }; 252 };
253 253
254 /* Version 1 header */ 254 /* Version 1 header */
255 struct ip_vs_sync_mesg { 255 struct ip_vs_sync_mesg {
256 __u8 reserved; /* must be zero */ 256 __u8 reserved; /* must be zero */
257 __u8 syncid; 257 __u8 syncid;
258 __be16 size; 258 __be16 size;
259 __u8 nr_conns; 259 __u8 nr_conns;
260 __s8 version; /* SYNC_PROTO_VER */ 260 __s8 version; /* SYNC_PROTO_VER */
261 __u16 spare; 261 __u16 spare;
262 /* ip_vs_sync_conn entries start here */ 262 /* ip_vs_sync_conn entries start here */
263 }; 263 };
264 264
265 struct ip_vs_sync_buff { 265 struct ip_vs_sync_buff {
266 struct list_head list; 266 struct list_head list;
267 unsigned long firstuse; 267 unsigned long firstuse;
268 268
269 /* pointers for the message data */ 269 /* pointers for the message data */
270 struct ip_vs_sync_mesg *mesg; 270 struct ip_vs_sync_mesg *mesg;
271 unsigned char *head; 271 unsigned char *head;
272 unsigned char *end; 272 unsigned char *end;
273 }; 273 };
274 274
275 /* 275 /*
276 * Copy of struct ip_vs_seq 276 * Copy of struct ip_vs_seq
277 * From unaligned network order to aligned host order 277 * From unaligned network order to aligned host order
278 */ 278 */
279 static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) 279 static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
280 { 280 {
281 ho->init_seq = get_unaligned_be32(&no->init_seq); 281 ho->init_seq = get_unaligned_be32(&no->init_seq);
282 ho->delta = get_unaligned_be32(&no->delta); 282 ho->delta = get_unaligned_be32(&no->delta);
283 ho->previous_delta = get_unaligned_be32(&no->previous_delta); 283 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
284 } 284 }
285 285
286 /* 286 /*
287 * Copy of struct ip_vs_seq 287 * Copy of struct ip_vs_seq
288 * From Aligned host order to unaligned network order 288 * From Aligned host order to unaligned network order
289 */ 289 */
290 static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) 290 static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
291 { 291 {
292 put_unaligned_be32(ho->init_seq, &no->init_seq); 292 put_unaligned_be32(ho->init_seq, &no->init_seq);
293 put_unaligned_be32(ho->delta, &no->delta); 293 put_unaligned_be32(ho->delta, &no->delta);
294 put_unaligned_be32(ho->previous_delta, &no->previous_delta); 294 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
295 } 295 }
296 296
297 static inline struct ip_vs_sync_buff * 297 static inline struct ip_vs_sync_buff *
298 sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) 298 sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
299 { 299 {
300 struct ip_vs_sync_buff *sb; 300 struct ip_vs_sync_buff *sb;
301 301
302 spin_lock_bh(&ipvs->sync_lock); 302 spin_lock_bh(&ipvs->sync_lock);
303 if (list_empty(&ms->sync_queue)) { 303 if (list_empty(&ms->sync_queue)) {
304 sb = NULL; 304 sb = NULL;
305 __set_current_state(TASK_INTERRUPTIBLE); 305 __set_current_state(TASK_INTERRUPTIBLE);
306 } else { 306 } else {
307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff, 307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
308 list); 308 list);
309 list_del(&sb->list); 309 list_del(&sb->list);
310 ms->sync_queue_len--; 310 ms->sync_queue_len--;
311 if (!ms->sync_queue_len) 311 if (!ms->sync_queue_len)
312 ms->sync_queue_delay = 0; 312 ms->sync_queue_delay = 0;
313 } 313 }
314 spin_unlock_bh(&ipvs->sync_lock); 314 spin_unlock_bh(&ipvs->sync_lock);
315 315
316 return sb; 316 return sb;
317 } 317 }
318 318
319 /* 319 /*
320 * Create a new sync buffer for Version 1 proto. 320 * Create a new sync buffer for Version 1 proto.
321 */ 321 */
322 static inline struct ip_vs_sync_buff * 322 static inline struct ip_vs_sync_buff *
323 ip_vs_sync_buff_create(struct netns_ipvs *ipvs) 323 ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
324 { 324 {
325 struct ip_vs_sync_buff *sb; 325 struct ip_vs_sync_buff *sb;
326 326
327 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 327 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
328 return NULL; 328 return NULL;
329 329
330 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); 330 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
331 if (!sb->mesg) { 331 if (!sb->mesg) {
332 kfree(sb); 332 kfree(sb);
333 return NULL; 333 return NULL;
334 } 334 }
335 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ 335 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
336 sb->mesg->version = SYNC_PROTO_VER; 336 sb->mesg->version = SYNC_PROTO_VER;
337 sb->mesg->syncid = ipvs->master_syncid; 337 sb->mesg->syncid = ipvs->master_syncid;
338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); 338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
339 sb->mesg->nr_conns = 0; 339 sb->mesg->nr_conns = 0;
340 sb->mesg->spare = 0; 340 sb->mesg->spare = 0;
341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); 341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
342 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; 342 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
343 343
344 sb->firstuse = jiffies; 344 sb->firstuse = jiffies;
345 return sb; 345 return sb;
346 } 346 }
347 347
348 static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) 348 static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
349 { 349 {
350 kfree(sb->mesg); 350 kfree(sb->mesg);
351 kfree(sb); 351 kfree(sb);
352 } 352 }
353 353
354 static inline void sb_queue_tail(struct netns_ipvs *ipvs, 354 static inline void sb_queue_tail(struct netns_ipvs *ipvs,
355 struct ipvs_master_sync_state *ms) 355 struct ipvs_master_sync_state *ms)
356 { 356 {
357 struct ip_vs_sync_buff *sb = ms->sync_buff; 357 struct ip_vs_sync_buff *sb = ms->sync_buff;
358 358
359 spin_lock(&ipvs->sync_lock); 359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER && 360 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) { 361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
362 if (!ms->sync_queue_len) 362 if (!ms->sync_queue_len)
363 schedule_delayed_work(&ms->master_wakeup_work, 363 schedule_delayed_work(&ms->master_wakeup_work,
364 max(IPVS_SYNC_SEND_DELAY, 1)); 364 max(IPVS_SYNC_SEND_DELAY, 1));
365 ms->sync_queue_len++; 365 ms->sync_queue_len++;
366 list_add_tail(&sb->list, &ms->sync_queue); 366 list_add_tail(&sb->list, &ms->sync_queue);
367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) 367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
368 wake_up_process(ms->master_thread); 368 wake_up_process(ms->master_thread);
369 } else 369 } else
370 ip_vs_sync_buff_release(sb); 370 ip_vs_sync_buff_release(sb);
371 spin_unlock(&ipvs->sync_lock); 371 spin_unlock(&ipvs->sync_lock);
372 } 372 }
373 373
374 /* 374 /*
375 * Get the current sync buffer if it has been created for more 375 * Get the current sync buffer if it has been created for more
376 * than the specified time or the specified time is zero. 376 * than the specified time or the specified time is zero.
377 */ 377 */
378 static inline struct ip_vs_sync_buff * 378 static inline struct ip_vs_sync_buff *
379 get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms, 379 get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
380 unsigned long time) 380 unsigned long time)
381 { 381 {
382 struct ip_vs_sync_buff *sb; 382 struct ip_vs_sync_buff *sb;
383 383
384 spin_lock_bh(&ipvs->sync_buff_lock); 384 spin_lock_bh(&ipvs->sync_buff_lock);
385 sb = ms->sync_buff; 385 sb = ms->sync_buff;
386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) { 386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
387 ms->sync_buff = NULL; 387 ms->sync_buff = NULL;
388 __set_current_state(TASK_RUNNING); 388 __set_current_state(TASK_RUNNING);
389 } else 389 } else
390 sb = NULL; 390 sb = NULL;
391 spin_unlock_bh(&ipvs->sync_buff_lock); 391 spin_unlock_bh(&ipvs->sync_buff_lock);
392 return sb; 392 return sb;
393 } 393 }
394 394
395 static inline int 395 static inline int
396 select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) 396 select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
397 { 397 {
398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask; 398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
399 } 399 }
400 400
401 /* 401 /*
402 * Create a new sync buffer for Version 0 proto. 402 * Create a new sync buffer for Version 0 proto.
403 */ 403 */
404 static inline struct ip_vs_sync_buff * 404 static inline struct ip_vs_sync_buff *
405 ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) 405 ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
406 { 406 {
407 struct ip_vs_sync_buff *sb; 407 struct ip_vs_sync_buff *sb;
408 struct ip_vs_sync_mesg_v0 *mesg; 408 struct ip_vs_sync_mesg_v0 *mesg;
409 409
410 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 410 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
411 return NULL; 411 return NULL;
412 412
413 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); 413 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
414 if (!sb->mesg) { 414 if (!sb->mesg) {
415 kfree(sb); 415 kfree(sb);
416 return NULL; 416 return NULL;
417 } 417 }
418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; 418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
419 mesg->nr_conns = 0; 419 mesg->nr_conns = 0;
420 mesg->syncid = ipvs->master_syncid; 420 mesg->syncid = ipvs->master_syncid;
421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); 421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); 422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; 423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
424 sb->firstuse = jiffies; 424 sb->firstuse = jiffies;
425 return sb; 425 return sb;
426 } 426 }
427 427
428 /* Check if connection is controlled by persistence */ 428 /* Check if connection is controlled by persistence */
429 static inline bool in_persistence(struct ip_vs_conn *cp) 429 static inline bool in_persistence(struct ip_vs_conn *cp)
430 { 430 {
431 for (cp = cp->control; cp; cp = cp->control) { 431 for (cp = cp->control; cp; cp = cp->control) {
432 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 432 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
433 return true; 433 return true;
434 } 434 }
435 return false; 435 return false;
436 } 436 }
437 437
438 /* Check if conn should be synced. 438 /* Check if conn should be synced.
439 * pkts: conn packets, use sysctl_sync_threshold to avoid packet check 439 * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
440 * - (1) sync_refresh_period: reduce sync rate. Additionally, retry 440 * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
441 * sync_retries times with period of sync_refresh_period/8 441 * sync_retries times with period of sync_refresh_period/8
442 * - (2) if both sync_refresh_period and sync_period are 0 send sync only 442 * - (2) if both sync_refresh_period and sync_period are 0 send sync only
443 * for state changes or only once when pkts matches sync_threshold 443 * for state changes or only once when pkts matches sync_threshold
444 * - (3) templates: rate can be reduced only with sync_refresh_period or 444 * - (3) templates: rate can be reduced only with sync_refresh_period or
445 * with (2) 445 * with (2)
446 */ 446 */
447 static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, 447 static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
448 struct ip_vs_conn *cp, int pkts) 448 struct ip_vs_conn *cp, int pkts)
449 { 449 {
450 unsigned long orig = ACCESS_ONCE(cp->sync_endtime); 450 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
451 unsigned long now = jiffies; 451 unsigned long now = jiffies;
452 unsigned long n = (now + cp->timeout) & ~3UL; 452 unsigned long n = (now + cp->timeout) & ~3UL;
453 unsigned int sync_refresh_period; 453 unsigned int sync_refresh_period;
454 int sync_period; 454 int sync_period;
455 int force; 455 int force;
456 456
457 /* Check if we sync in current state */ 457 /* Check if we sync in current state */
458 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) 458 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
459 force = 0; 459 force = 0;
460 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) 460 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
461 return 0; 461 return 0;
462 else if (likely(cp->protocol == IPPROTO_TCP)) { 462 else if (likely(cp->protocol == IPPROTO_TCP)) {
463 if (!((1 << cp->state) & 463 if (!((1 << cp->state) &
464 ((1 << IP_VS_TCP_S_ESTABLISHED) | 464 ((1 << IP_VS_TCP_S_ESTABLISHED) |
465 (1 << IP_VS_TCP_S_FIN_WAIT) | 465 (1 << IP_VS_TCP_S_FIN_WAIT) |
466 (1 << IP_VS_TCP_S_CLOSE) | 466 (1 << IP_VS_TCP_S_CLOSE) |
467 (1 << IP_VS_TCP_S_CLOSE_WAIT) | 467 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
468 (1 << IP_VS_TCP_S_TIME_WAIT)))) 468 (1 << IP_VS_TCP_S_TIME_WAIT))))
469 return 0; 469 return 0;
470 force = cp->state != cp->old_state; 470 force = cp->state != cp->old_state;
471 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED) 471 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
472 goto set; 472 goto set;
473 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { 473 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
474 if (!((1 << cp->state) & 474 if (!((1 << cp->state) &
475 ((1 << IP_VS_SCTP_S_ESTABLISHED) | 475 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
476 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | 476 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
477 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | 477 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
478 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | 478 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
479 (1 << IP_VS_SCTP_S_CLOSED)))) 479 (1 << IP_VS_SCTP_S_CLOSED))))
480 return 0; 480 return 0;
481 force = cp->state != cp->old_state; 481 force = cp->state != cp->old_state;
482 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) 482 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
483 goto set; 483 goto set;
484 } else { 484 } else {
485 /* UDP or another protocol with single state */ 485 /* UDP or another protocol with single state */
486 force = 0; 486 force = 0;
487 } 487 }
488 488
489 sync_refresh_period = sysctl_sync_refresh_period(ipvs); 489 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
490 if (sync_refresh_period > 0) { 490 if (sync_refresh_period > 0) {
491 long diff = n - orig; 491 long diff = n - orig;
492 long min_diff = max(cp->timeout >> 1, 10UL * HZ); 492 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
493 493
494 /* Avoid sync if difference is below sync_refresh_period 494 /* Avoid sync if difference is below sync_refresh_period
495 * and below the half timeout. 495 * and below the half timeout.
496 */ 496 */
497 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) { 497 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
498 int retries = orig & 3; 498 int retries = orig & 3;
499 499
500 if (retries >= sysctl_sync_retries(ipvs)) 500 if (retries >= sysctl_sync_retries(ipvs))
501 return 0; 501 return 0;
502 if (time_before(now, orig - cp->timeout + 502 if (time_before(now, orig - cp->timeout +
503 (sync_refresh_period >> 3))) 503 (sync_refresh_period >> 3)))
504 return 0; 504 return 0;
505 n |= retries + 1; 505 n |= retries + 1;
506 } 506 }
507 } 507 }
508 sync_period = sysctl_sync_period(ipvs); 508 sync_period = sysctl_sync_period(ipvs);
509 if (sync_period > 0) { 509 if (sync_period > 0) {
510 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && 510 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
511 pkts % sync_period != sysctl_sync_threshold(ipvs)) 511 pkts % sync_period != sysctl_sync_threshold(ipvs))
512 return 0; 512 return 0;
513 } else if (sync_refresh_period <= 0 && 513 } else if (sync_refresh_period <= 0 &&
514 pkts != sysctl_sync_threshold(ipvs)) 514 pkts != sysctl_sync_threshold(ipvs))
515 return 0; 515 return 0;
516 516
517 set: 517 set:
518 cp->old_state = cp->state; 518 cp->old_state = cp->state;
519 n = cmpxchg(&cp->sync_endtime, orig, n); 519 n = cmpxchg(&cp->sync_endtime, orig, n);
520 return n == orig || force; 520 return n == orig || force;
521 } 521 }
522 522
523 /* 523 /*
524 * Version 0 , could be switched in by sys_ctl. 524 * Version 0 , could be switched in by sys_ctl.
525 * Add an ip_vs_conn information into the current sync_buff. 525 * Add an ip_vs_conn information into the current sync_buff.
526 */ 526 */
527 static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, 527 static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
528 int pkts) 528 int pkts)
529 { 529 {
530 struct netns_ipvs *ipvs = net_ipvs(net); 530 struct netns_ipvs *ipvs = net_ipvs(net);
531 struct ip_vs_sync_mesg_v0 *m; 531 struct ip_vs_sync_mesg_v0 *m;
532 struct ip_vs_sync_conn_v0 *s; 532 struct ip_vs_sync_conn_v0 *s;
533 struct ip_vs_sync_buff *buff; 533 struct ip_vs_sync_buff *buff;
534 struct ipvs_master_sync_state *ms; 534 struct ipvs_master_sync_state *ms;
535 int id; 535 int id;
536 int len; 536 int len;
537 537
538 if (unlikely(cp->af != AF_INET)) 538 if (unlikely(cp->af != AF_INET))
539 return; 539 return;
540 /* Do not sync ONE PACKET */ 540 /* Do not sync ONE PACKET */
541 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 541 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
542 return; 542 return;
543 543
544 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) 544 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
545 return; 545 return;
546 546
547 spin_lock_bh(&ipvs->sync_buff_lock); 547 spin_lock_bh(&ipvs->sync_buff_lock);
548 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { 548 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
549 spin_unlock_bh(&ipvs->sync_buff_lock); 549 spin_unlock_bh(&ipvs->sync_buff_lock);
550 return; 550 return;
551 } 551 }
552 552
553 id = select_master_thread_id(ipvs, cp); 553 id = select_master_thread_id(ipvs, cp);
554 ms = &ipvs->ms[id]; 554 ms = &ipvs->ms[id];
555 buff = ms->sync_buff; 555 buff = ms->sync_buff;
556 if (buff) { 556 if (buff) {
557 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; 557 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
558 /* Send buffer if it is for v1 */ 558 /* Send buffer if it is for v1 */
559 if (!m->nr_conns) { 559 if (!m->nr_conns) {
560 sb_queue_tail(ipvs, ms); 560 sb_queue_tail(ipvs, ms);
561 ms->sync_buff = NULL; 561 ms->sync_buff = NULL;
562 buff = NULL; 562 buff = NULL;
563 } 563 }
564 } 564 }
565 if (!buff) { 565 if (!buff) {
566 buff = ip_vs_sync_buff_create_v0(ipvs); 566 buff = ip_vs_sync_buff_create_v0(ipvs);
567 if (!buff) { 567 if (!buff) {
568 spin_unlock_bh(&ipvs->sync_buff_lock); 568 spin_unlock_bh(&ipvs->sync_buff_lock);
569 pr_err("ip_vs_sync_buff_create failed.\n"); 569 pr_err("ip_vs_sync_buff_create failed.\n");
570 return; 570 return;
571 } 571 }
572 ms->sync_buff = buff; 572 ms->sync_buff = buff;
573 } 573 }
574 574
575 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : 575 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
576 SIMPLE_CONN_SIZE; 576 SIMPLE_CONN_SIZE;
577 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; 577 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
578 s = (struct ip_vs_sync_conn_v0 *) buff->head; 578 s = (struct ip_vs_sync_conn_v0 *) buff->head;
579 579
580 /* copy members */ 580 /* copy members */
581 s->reserved = 0; 581 s->reserved = 0;
582 s->protocol = cp->protocol; 582 s->protocol = cp->protocol;
583 s->cport = cp->cport; 583 s->cport = cp->cport;
584 s->vport = cp->vport; 584 s->vport = cp->vport;
585 s->dport = cp->dport; 585 s->dport = cp->dport;
586 s->caddr = cp->caddr.ip; 586 s->caddr = cp->caddr.ip;
587 s->vaddr = cp->vaddr.ip; 587 s->vaddr = cp->vaddr.ip;
588 s->daddr = cp->daddr.ip; 588 s->daddr = cp->daddr.ip;
589 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); 589 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
590 s->state = htons(cp->state); 590 s->state = htons(cp->state);
591 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { 591 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
592 struct ip_vs_sync_conn_options *opt = 592 struct ip_vs_sync_conn_options *opt =
593 (struct ip_vs_sync_conn_options *)&s[1]; 593 (struct ip_vs_sync_conn_options *)&s[1];
594 memcpy(opt, &cp->in_seq, sizeof(*opt)); 594 memcpy(opt, &cp->in_seq, sizeof(*opt));
595 } 595 }
596 596
597 m->nr_conns++; 597 m->nr_conns++;
598 m->size = htons(ntohs(m->size) + len); 598 m->size = htons(ntohs(m->size) + len);
599 buff->head += len; 599 buff->head += len;
600 600
601 /* check if there is a space for next one */ 601 /* check if there is a space for next one */
602 if (buff->head + FULL_CONN_SIZE > buff->end) { 602 if (buff->head + FULL_CONN_SIZE > buff->end) {
603 sb_queue_tail(ipvs, ms); 603 sb_queue_tail(ipvs, ms);
604 ms->sync_buff = NULL; 604 ms->sync_buff = NULL;
605 } 605 }
606 spin_unlock_bh(&ipvs->sync_buff_lock); 606 spin_unlock_bh(&ipvs->sync_buff_lock);
607 607
608 /* synchronize its controller if it has */ 608 /* synchronize its controller if it has */
609 cp = cp->control; 609 cp = cp->control;
610 if (cp) { 610 if (cp) {
611 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 611 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
612 pkts = atomic_add_return(1, &cp->in_pkts); 612 pkts = atomic_add_return(1, &cp->in_pkts);
613 else 613 else
614 pkts = sysctl_sync_threshold(ipvs); 614 pkts = sysctl_sync_threshold(ipvs);
615 ip_vs_sync_conn(net, cp->control, pkts); 615 ip_vs_sync_conn(net, cp->control, pkts);
616 } 616 }
617 } 617 }
618 618
619 /* 619 /*
620 * Add an ip_vs_conn information into the current sync_buff. 620 * Add an ip_vs_conn information into the current sync_buff.
621 * Called by ip_vs_in. 621 * Called by ip_vs_in.
622 * Sending Version 1 messages 622 * Sending Version 1 messages
623 */ 623 */
624 void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) 624 void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
625 { 625 {
626 struct netns_ipvs *ipvs = net_ipvs(net); 626 struct netns_ipvs *ipvs = net_ipvs(net);
627 struct ip_vs_sync_mesg *m; 627 struct ip_vs_sync_mesg *m;
628 union ip_vs_sync_conn *s; 628 union ip_vs_sync_conn *s;
629 struct ip_vs_sync_buff *buff; 629 struct ip_vs_sync_buff *buff;
630 struct ipvs_master_sync_state *ms; 630 struct ipvs_master_sync_state *ms;
631 int id; 631 int id;
632 __u8 *p; 632 __u8 *p;
633 unsigned int len, pe_name_len, pad; 633 unsigned int len, pe_name_len, pad;
634 634
635 /* Handle old version of the protocol */ 635 /* Handle old version of the protocol */
636 if (sysctl_sync_ver(ipvs) == 0) { 636 if (sysctl_sync_ver(ipvs) == 0) {
637 ip_vs_sync_conn_v0(net, cp, pkts); 637 ip_vs_sync_conn_v0(net, cp, pkts);
638 return; 638 return;
639 } 639 }
640 /* Do not sync ONE PACKET */ 640 /* Do not sync ONE PACKET */
641 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 641 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
642 goto control; 642 goto control;
643 sloop: 643 sloop:
644 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) 644 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
645 goto control; 645 goto control;
646 646
647 /* Sanity checks */ 647 /* Sanity checks */
648 pe_name_len = 0; 648 pe_name_len = 0;
649 if (cp->pe_data_len) { 649 if (cp->pe_data_len) {
650 if (!cp->pe_data || !cp->dest) { 650 if (!cp->pe_data || !cp->dest) {
651 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); 651 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
652 return; 652 return;
653 } 653 }
654 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); 654 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
655 } 655 }
656 656
657 spin_lock_bh(&ipvs->sync_buff_lock); 657 spin_lock_bh(&ipvs->sync_buff_lock);
658 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { 658 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
659 spin_unlock_bh(&ipvs->sync_buff_lock); 659 spin_unlock_bh(&ipvs->sync_buff_lock);
660 return; 660 return;
661 } 661 }
662 662
663 id = select_master_thread_id(ipvs, cp); 663 id = select_master_thread_id(ipvs, cp);
664 ms = &ipvs->ms[id]; 664 ms = &ipvs->ms[id];
665 665
666 #ifdef CONFIG_IP_VS_IPV6 666 #ifdef CONFIG_IP_VS_IPV6
667 if (cp->af == AF_INET6) 667 if (cp->af == AF_INET6)
668 len = sizeof(struct ip_vs_sync_v6); 668 len = sizeof(struct ip_vs_sync_v6);
669 else 669 else
670 #endif 670 #endif
671 len = sizeof(struct ip_vs_sync_v4); 671 len = sizeof(struct ip_vs_sync_v4);
672 672
673 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) 673 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
674 len += sizeof(struct ip_vs_sync_conn_options) + 2; 674 len += sizeof(struct ip_vs_sync_conn_options) + 2;
675 675
676 if (cp->pe_data_len) 676 if (cp->pe_data_len)
677 len += cp->pe_data_len + 2; /* + Param hdr field */ 677 len += cp->pe_data_len + 2; /* + Param hdr field */
678 if (pe_name_len) 678 if (pe_name_len)
679 len += pe_name_len + 2; 679 len += pe_name_len + 2;
680 680
681 /* check if there is a space for this one */ 681 /* check if there is a space for this one */
682 pad = 0; 682 pad = 0;
683 buff = ms->sync_buff; 683 buff = ms->sync_buff;
684 if (buff) { 684 if (buff) {
685 m = buff->mesg; 685 m = buff->mesg;
686 pad = (4 - (size_t) buff->head) & 3; 686 pad = (4 - (size_t) buff->head) & 3;
687 /* Send buffer if it is for v0 */ 687 /* Send buffer if it is for v0 */
688 if (buff->head + len + pad > buff->end || m->reserved) { 688 if (buff->head + len + pad > buff->end || m->reserved) {
689 sb_queue_tail(ipvs, ms); 689 sb_queue_tail(ipvs, ms);
690 ms->sync_buff = NULL; 690 ms->sync_buff = NULL;
691 buff = NULL; 691 buff = NULL;
692 pad = 0; 692 pad = 0;
693 } 693 }
694 } 694 }
695 695
696 if (!buff) { 696 if (!buff) {
697 buff = ip_vs_sync_buff_create(ipvs); 697 buff = ip_vs_sync_buff_create(ipvs);
698 if (!buff) { 698 if (!buff) {
699 spin_unlock_bh(&ipvs->sync_buff_lock); 699 spin_unlock_bh(&ipvs->sync_buff_lock);
700 pr_err("ip_vs_sync_buff_create failed.\n"); 700 pr_err("ip_vs_sync_buff_create failed.\n");
701 return; 701 return;
702 } 702 }
703 ms->sync_buff = buff; 703 ms->sync_buff = buff;
704 m = buff->mesg; 704 m = buff->mesg;
705 } 705 }
706 706
707 p = buff->head; 707 p = buff->head;
708 buff->head += pad + len; 708 buff->head += pad + len;
709 m->size = htons(ntohs(m->size) + pad + len); 709 m->size = htons(ntohs(m->size) + pad + len);
710 /* Add ev. padding from prev. sync_conn */ 710 /* Add ev. padding from prev. sync_conn */
711 while (pad--) 711 while (pad--)
712 *(p++) = 0; 712 *(p++) = 0;
713 713
714 s = (union ip_vs_sync_conn *)p; 714 s = (union ip_vs_sync_conn *)p;
715 715
716 /* Set message type & copy members */ 716 /* Set message type & copy members */
717 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); 717 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
718 s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ 718 s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
719 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); 719 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
720 s->v4.state = htons(cp->state); 720 s->v4.state = htons(cp->state);
721 s->v4.protocol = cp->protocol; 721 s->v4.protocol = cp->protocol;
722 s->v4.cport = cp->cport; 722 s->v4.cport = cp->cport;
723 s->v4.vport = cp->vport; 723 s->v4.vport = cp->vport;
724 s->v4.dport = cp->dport; 724 s->v4.dport = cp->dport;
725 s->v4.fwmark = htonl(cp->fwmark); 725 s->v4.fwmark = htonl(cp->fwmark);
726 s->v4.timeout = htonl(cp->timeout / HZ); 726 s->v4.timeout = htonl(cp->timeout / HZ);
727 m->nr_conns++; 727 m->nr_conns++;
728 728
729 #ifdef CONFIG_IP_VS_IPV6 729 #ifdef CONFIG_IP_VS_IPV6
730 if (cp->af == AF_INET6) { 730 if (cp->af == AF_INET6) {
731 p += sizeof(struct ip_vs_sync_v6); 731 p += sizeof(struct ip_vs_sync_v6);
732 s->v6.caddr = cp->caddr.in6; 732 s->v6.caddr = cp->caddr.in6;
733 s->v6.vaddr = cp->vaddr.in6; 733 s->v6.vaddr = cp->vaddr.in6;
734 s->v6.daddr = cp->daddr.in6; 734 s->v6.daddr = cp->daddr.in6;
735 } else 735 } else
736 #endif 736 #endif
737 { 737 {
738 p += sizeof(struct ip_vs_sync_v4); /* options ptr */ 738 p += sizeof(struct ip_vs_sync_v4); /* options ptr */
739 s->v4.caddr = cp->caddr.ip; 739 s->v4.caddr = cp->caddr.ip;
740 s->v4.vaddr = cp->vaddr.ip; 740 s->v4.vaddr = cp->vaddr.ip;
741 s->v4.daddr = cp->daddr.ip; 741 s->v4.daddr = cp->daddr.ip;
742 } 742 }
743 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { 743 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
744 *(p++) = IPVS_OPT_SEQ_DATA; 744 *(p++) = IPVS_OPT_SEQ_DATA;
745 *(p++) = sizeof(struct ip_vs_sync_conn_options); 745 *(p++) = sizeof(struct ip_vs_sync_conn_options);
746 hton_seq((struct ip_vs_seq *)p, &cp->in_seq); 746 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
747 p += sizeof(struct ip_vs_seq); 747 p += sizeof(struct ip_vs_seq);
748 hton_seq((struct ip_vs_seq *)p, &cp->out_seq); 748 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
749 p += sizeof(struct ip_vs_seq); 749 p += sizeof(struct ip_vs_seq);
750 } 750 }
751 /* Handle pe data */ 751 /* Handle pe data */
752 if (cp->pe_data_len && cp->pe_data) { 752 if (cp->pe_data_len && cp->pe_data) {
753 *(p++) = IPVS_OPT_PE_DATA; 753 *(p++) = IPVS_OPT_PE_DATA;
754 *(p++) = cp->pe_data_len; 754 *(p++) = cp->pe_data_len;
755 memcpy(p, cp->pe_data, cp->pe_data_len); 755 memcpy(p, cp->pe_data, cp->pe_data_len);
756 p += cp->pe_data_len; 756 p += cp->pe_data_len;
757 if (pe_name_len) { 757 if (pe_name_len) {
758 /* Add PE_NAME */ 758 /* Add PE_NAME */
759 *(p++) = IPVS_OPT_PE_NAME; 759 *(p++) = IPVS_OPT_PE_NAME;
760 *(p++) = pe_name_len; 760 *(p++) = pe_name_len;
761 memcpy(p, cp->pe->name, pe_name_len); 761 memcpy(p, cp->pe->name, pe_name_len);
762 p += pe_name_len; 762 p += pe_name_len;
763 } 763 }
764 } 764 }
765 765
766 spin_unlock_bh(&ipvs->sync_buff_lock); 766 spin_unlock_bh(&ipvs->sync_buff_lock);
767 767
768 control: 768 control:
769 /* synchronize its controller if it has */ 769 /* synchronize its controller if it has */
770 cp = cp->control; 770 cp = cp->control;
771 if (!cp) 771 if (!cp)
772 return; 772 return;
773 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 773 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
774 pkts = atomic_add_return(1, &cp->in_pkts); 774 pkts = atomic_add_return(1, &cp->in_pkts);
775 else 775 else
776 pkts = sysctl_sync_threshold(ipvs); 776 pkts = sysctl_sync_threshold(ipvs);
777 goto sloop; 777 goto sloop;
778 } 778 }
779 779
780 /* 780 /*
781 * fill_param used by version 1 781 * fill_param used by version 1
782 */ 782 */
783 static inline int 783 static inline int
784 ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, 784 ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
785 struct ip_vs_conn_param *p, 785 struct ip_vs_conn_param *p,
786 __u8 *pe_data, unsigned int pe_data_len, 786 __u8 *pe_data, unsigned int pe_data_len,
787 __u8 *pe_name, unsigned int pe_name_len) 787 __u8 *pe_name, unsigned int pe_name_len)
788 { 788 {
789 #ifdef CONFIG_IP_VS_IPV6 789 #ifdef CONFIG_IP_VS_IPV6
790 if (af == AF_INET6) 790 if (af == AF_INET6)
791 ip_vs_conn_fill_param(net, af, sc->v6.protocol, 791 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
792 (const union nf_inet_addr *)&sc->v6.caddr, 792 (const union nf_inet_addr *)&sc->v6.caddr,
793 sc->v6.cport, 793 sc->v6.cport,
794 (const union nf_inet_addr *)&sc->v6.vaddr, 794 (const union nf_inet_addr *)&sc->v6.vaddr,
795 sc->v6.vport, p); 795 sc->v6.vport, p);
796 else 796 else
797 #endif 797 #endif
798 ip_vs_conn_fill_param(net, af, sc->v4.protocol, 798 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
799 (const union nf_inet_addr *)&sc->v4.caddr, 799 (const union nf_inet_addr *)&sc->v4.caddr,
800 sc->v4.cport, 800 sc->v4.cport,
801 (const union nf_inet_addr *)&sc->v4.vaddr, 801 (const union nf_inet_addr *)&sc->v4.vaddr,
802 sc->v4.vport, p); 802 sc->v4.vport, p);
803 /* Handle pe data */ 803 /* Handle pe data */
804 if (pe_data_len) { 804 if (pe_data_len) {
805 if (pe_name_len) { 805 if (pe_name_len) {
806 char buff[IP_VS_PENAME_MAXLEN+1]; 806 char buff[IP_VS_PENAME_MAXLEN+1];
807 807
808 memcpy(buff, pe_name, pe_name_len); 808 memcpy(buff, pe_name, pe_name_len);
809 buff[pe_name_len]=0; 809 buff[pe_name_len]=0;
810 p->pe = __ip_vs_pe_getbyname(buff); 810 p->pe = __ip_vs_pe_getbyname(buff);
811 if (!p->pe) { 811 if (!p->pe) {
812 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", 812 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
813 buff); 813 buff);
814 return 1; 814 return 1;
815 } 815 }
816 } else { 816 } else {
817 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); 817 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
818 return 1; 818 return 1;
819 } 819 }
820 820
821 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); 821 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
822 if (!p->pe_data) { 822 if (!p->pe_data) {
823 if (p->pe->module) 823 if (p->pe->module)
824 module_put(p->pe->module); 824 module_put(p->pe->module);
825 return -ENOMEM; 825 return -ENOMEM;
826 } 826 }
827 p->pe_data_len = pe_data_len; 827 p->pe_data_len = pe_data_len;
828 } 828 }
829 return 0; 829 return 0;
830 } 830 }
831 831
832 /* 832 /*
833 * Connection Add / Update. 833 * Connection Add / Update.
834 * Common for version 0 and 1 reception of backup sync_conns. 834 * Common for version 0 and 1 reception of backup sync_conns.
835 * Param: ... 835 * Param: ...
836 * timeout is in sec. 836 * timeout is in sec.
837 */ 837 */
838 static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, 838 static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
839 unsigned int flags, unsigned int state, 839 unsigned int flags, unsigned int state,
840 unsigned int protocol, unsigned int type, 840 unsigned int protocol, unsigned int type,
841 const union nf_inet_addr *daddr, __be16 dport, 841 const union nf_inet_addr *daddr, __be16 dport,
842 unsigned long timeout, __u32 fwmark, 842 unsigned long timeout, __u32 fwmark,
843 struct ip_vs_sync_conn_options *opt) 843 struct ip_vs_sync_conn_options *opt)
844 { 844 {
845 struct ip_vs_dest *dest; 845 struct ip_vs_dest *dest;
846 struct ip_vs_conn *cp; 846 struct ip_vs_conn *cp;
847 struct netns_ipvs *ipvs = net_ipvs(net); 847 struct netns_ipvs *ipvs = net_ipvs(net);
848 848
849 if (!(flags & IP_VS_CONN_F_TEMPLATE)) 849 if (!(flags & IP_VS_CONN_F_TEMPLATE))
850 cp = ip_vs_conn_in_get(param); 850 cp = ip_vs_conn_in_get(param);
851 else 851 else
852 cp = ip_vs_ct_in_get(param); 852 cp = ip_vs_ct_in_get(param);
853 853
854 if (cp) { 854 if (cp) {
855 /* Free pe_data */ 855 /* Free pe_data */
856 kfree(param->pe_data); 856 kfree(param->pe_data);
857 857
858 dest = cp->dest; 858 dest = cp->dest;
859 spin_lock_bh(&cp->lock); 859 spin_lock_bh(&cp->lock);
860 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && 860 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
861 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { 861 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
862 if (flags & IP_VS_CONN_F_INACTIVE) { 862 if (flags & IP_VS_CONN_F_INACTIVE) {
863 atomic_dec(&dest->activeconns); 863 atomic_dec(&dest->activeconns);
864 atomic_inc(&dest->inactconns); 864 atomic_inc(&dest->inactconns);
865 } else { 865 } else {
866 atomic_inc(&dest->activeconns); 866 atomic_inc(&dest->activeconns);
867 atomic_dec(&dest->inactconns); 867 atomic_dec(&dest->inactconns);
868 } 868 }
869 } 869 }
870 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; 870 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
871 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; 871 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
872 cp->flags = flags; 872 cp->flags = flags;
873 spin_unlock_bh(&cp->lock); 873 spin_unlock_bh(&cp->lock);
874 if (!dest) 874 if (!dest)
875 ip_vs_try_bind_dest(cp); 875 ip_vs_try_bind_dest(cp);
876 } else { 876 } else {
877 /* 877 /*
878 * Find the appropriate destination for the connection. 878 * Find the appropriate destination for the connection.
879 * If it is not found the connection will remain unbound 879 * If it is not found the connection will remain unbound
880 * but still handled. 880 * but still handled.
881 */ 881 */
882 rcu_read_lock(); 882 rcu_read_lock();
883 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, 883 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
884 param->vport, protocol, fwmark, flags); 884 param->vport, protocol, fwmark, flags);
885 885
886 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); 886 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
887 rcu_read_unlock(); 887 rcu_read_unlock();
888 if (!cp) { 888 if (!cp) {
889 if (param->pe_data) 889 if (param->pe_data)
890 kfree(param->pe_data); 890 kfree(param->pe_data);
891 IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); 891 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
892 return; 892 return;
893 } 893 }
894 } 894 }
895 895
896 if (opt) 896 if (opt)
897 memcpy(&cp->in_seq, opt, sizeof(*opt)); 897 memcpy(&cp->in_seq, opt, sizeof(*opt));
898 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); 898 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
899 cp->state = state; 899 cp->state = state;
900 cp->old_state = cp->state; 900 cp->old_state = cp->state;
901 /* 901 /*
902 * For Ver 0 messages style 902 * For Ver 0 messages style
903 * - Not possible to recover the right timeout for templates 903 * - Not possible to recover the right timeout for templates
904 * - can not find the right fwmark 904 * - can not find the right fwmark
905 * virtual service. If needed, we can do it for 905 * virtual service. If needed, we can do it for
906 * non-fwmark persistent services. 906 * non-fwmark persistent services.
907 * Ver 1 messages style. 907 * Ver 1 messages style.
908 * - No problem. 908 * - No problem.
909 */ 909 */
910 if (timeout) { 910 if (timeout) {
911 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) 911 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
912 timeout = MAX_SCHEDULE_TIMEOUT / HZ; 912 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
913 cp->timeout = timeout*HZ; 913 cp->timeout = timeout*HZ;
914 } else { 914 } else {
915 struct ip_vs_proto_data *pd; 915 struct ip_vs_proto_data *pd;
916 916
917 pd = ip_vs_proto_data_get(net, protocol); 917 pd = ip_vs_proto_data_get(net, protocol);
918 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) 918 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
919 cp->timeout = pd->timeout_table[state]; 919 cp->timeout = pd->timeout_table[state];
920 else 920 else
921 cp->timeout = (3*60*HZ); 921 cp->timeout = (3*60*HZ);
922 } 922 }
923 ip_vs_conn_put(cp); 923 ip_vs_conn_put(cp);
924 } 924 }
925 925
926 /* 926 /*
927 * Process received multicast message for Version 0 927 * Process received multicast message for Version 0
928 */ 928 */
929 static void ip_vs_process_message_v0(struct net *net, const char *buffer, 929 static void ip_vs_process_message_v0(struct net *net, const char *buffer,
930 const size_t buflen) 930 const size_t buflen)
931 { 931 {
932 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; 932 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
933 struct ip_vs_sync_conn_v0 *s; 933 struct ip_vs_sync_conn_v0 *s;
934 struct ip_vs_sync_conn_options *opt; 934 struct ip_vs_sync_conn_options *opt;
935 struct ip_vs_protocol *pp; 935 struct ip_vs_protocol *pp;
936 struct ip_vs_conn_param param; 936 struct ip_vs_conn_param param;
937 char *p; 937 char *p;
938 int i; 938 int i;
939 939
940 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); 940 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
941 for (i=0; i<m->nr_conns; i++) { 941 for (i=0; i<m->nr_conns; i++) {
942 unsigned int flags, state; 942 unsigned int flags, state;
943 943
944 if (p + SIMPLE_CONN_SIZE > buffer+buflen) { 944 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
945 IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); 945 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
946 return; 946 return;
947 } 947 }
948 s = (struct ip_vs_sync_conn_v0 *) p; 948 s = (struct ip_vs_sync_conn_v0 *) p;
949 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; 949 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
950 flags &= ~IP_VS_CONN_F_HASHED; 950 flags &= ~IP_VS_CONN_F_HASHED;
951 if (flags & IP_VS_CONN_F_SEQ_MASK) { 951 if (flags & IP_VS_CONN_F_SEQ_MASK) {
952 opt = (struct ip_vs_sync_conn_options *)&s[1]; 952 opt = (struct ip_vs_sync_conn_options *)&s[1];
953 p += FULL_CONN_SIZE; 953 p += FULL_CONN_SIZE;
954 if (p > buffer+buflen) { 954 if (p > buffer+buflen) {
955 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); 955 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
956 return; 956 return;
957 } 957 }
958 } else { 958 } else {
959 opt = NULL; 959 opt = NULL;
960 p += SIMPLE_CONN_SIZE; 960 p += SIMPLE_CONN_SIZE;
961 } 961 }
962 962
963 state = ntohs(s->state); 963 state = ntohs(s->state);
964 if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 964 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
965 pp = ip_vs_proto_get(s->protocol); 965 pp = ip_vs_proto_get(s->protocol);
966 if (!pp) { 966 if (!pp) {
967 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", 967 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
968 s->protocol); 968 s->protocol);
969 continue; 969 continue;
970 } 970 }
971 if (state >= pp->num_states) { 971 if (state >= pp->num_states) {
972 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", 972 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
973 pp->name, state); 973 pp->name, state);
974 continue; 974 continue;
975 } 975 }
976 } else { 976 } else {
977 /* protocol in templates is not used for state/timeout */ 977 /* protocol in templates is not used for state/timeout */
978 if (state > 0) { 978 if (state > 0) {
979 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", 979 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
980 state); 980 state);
981 state = 0; 981 state = 0;
982 } 982 }
983 } 983 }
984 984
985 ip_vs_conn_fill_param(net, AF_INET, s->protocol, 985 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
986 (const union nf_inet_addr *)&s->caddr, 986 (const union nf_inet_addr *)&s->caddr,
987 s->cport, 987 s->cport,
988 (const union nf_inet_addr *)&s->vaddr, 988 (const union nf_inet_addr *)&s->vaddr,
989 s->vport, &param); 989 s->vport, &param);
990 990
991 /* Send timeout as Zero */ 991 /* Send timeout as Zero */
992 ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET, 992 ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
993 (union nf_inet_addr *)&s->daddr, s->dport, 993 (union nf_inet_addr *)&s->daddr, s->dport,
994 0, 0, opt); 994 0, 0, opt);
995 } 995 }
996 } 996 }
997 997
998 /* 998 /*
999 * Handle options 999 * Handle options
1000 */ 1000 */
1001 static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, 1001 static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
1002 __u32 *opt_flags, 1002 __u32 *opt_flags,
1003 struct ip_vs_sync_conn_options *opt) 1003 struct ip_vs_sync_conn_options *opt)
1004 { 1004 {
1005 struct ip_vs_sync_conn_options *topt; 1005 struct ip_vs_sync_conn_options *topt;
1006 1006
1007 topt = (struct ip_vs_sync_conn_options *)p; 1007 topt = (struct ip_vs_sync_conn_options *)p;
1008 1008
1009 if (plen != sizeof(struct ip_vs_sync_conn_options)) { 1009 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
1010 IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); 1010 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
1011 return -EINVAL; 1011 return -EINVAL;
1012 } 1012 }
1013 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { 1013 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
1014 IP_VS_DBG(2, "BACKUP, conn options found twice\n"); 1014 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
1015 return -EINVAL; 1015 return -EINVAL;
1016 } 1016 }
1017 ntoh_seq(&topt->in_seq, &opt->in_seq); 1017 ntoh_seq(&topt->in_seq, &opt->in_seq);
1018 ntoh_seq(&topt->out_seq, &opt->out_seq); 1018 ntoh_seq(&topt->out_seq, &opt->out_seq);
1019 *opt_flags |= IPVS_OPT_F_SEQ_DATA; 1019 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
1020 return 0; 1020 return 0;
1021 } 1021 }
1022 1022
1023 static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, 1023 static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1024 __u8 **data, unsigned int maxlen, 1024 __u8 **data, unsigned int maxlen,
1025 __u32 *opt_flags, __u32 flag) 1025 __u32 *opt_flags, __u32 flag)
1026 { 1026 {
1027 if (plen > maxlen) { 1027 if (plen > maxlen) {
1028 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); 1028 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
1029 return -EINVAL; 1029 return -EINVAL;
1030 } 1030 }
1031 if (*opt_flags & flag) { 1031 if (*opt_flags & flag) {
1032 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); 1032 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
1033 return -EINVAL; 1033 return -EINVAL;
1034 } 1034 }
1035 *data_len = plen; 1035 *data_len = plen;
1036 *data = p; 1036 *data = p;
1037 *opt_flags |= flag; 1037 *opt_flags |= flag;
1038 return 0; 1038 return 0;
1039 } 1039 }
1040 /* 1040 /*
1041 * Process a Version 1 sync. connection 1041 * Process a Version 1 sync. connection
1042 */ 1042 */
1043 static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) 1043 static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
1044 { 1044 {
1045 struct ip_vs_sync_conn_options opt; 1045 struct ip_vs_sync_conn_options opt;
1046 union ip_vs_sync_conn *s; 1046 union ip_vs_sync_conn *s;
1047 struct ip_vs_protocol *pp; 1047 struct ip_vs_protocol *pp;
1048 struct ip_vs_conn_param param; 1048 struct ip_vs_conn_param param;
1049 __u32 flags; 1049 __u32 flags;
1050 unsigned int af, state, pe_data_len=0, pe_name_len=0; 1050 unsigned int af, state, pe_data_len=0, pe_name_len=0;
1051 __u8 *pe_data=NULL, *pe_name=NULL; 1051 __u8 *pe_data=NULL, *pe_name=NULL;
1052 __u32 opt_flags=0; 1052 __u32 opt_flags=0;
1053 int retc=0; 1053 int retc=0;
1054 1054
1055 s = (union ip_vs_sync_conn *) p; 1055 s = (union ip_vs_sync_conn *) p;
1056 1056
1057 if (s->v6.type & STYPE_F_INET6) { 1057 if (s->v6.type & STYPE_F_INET6) {
1058 #ifdef CONFIG_IP_VS_IPV6 1058 #ifdef CONFIG_IP_VS_IPV6
1059 af = AF_INET6; 1059 af = AF_INET6;
1060 p += sizeof(struct ip_vs_sync_v6); 1060 p += sizeof(struct ip_vs_sync_v6);
1061 #else 1061 #else
1062 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); 1062 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1063 retc = 10; 1063 retc = 10;
1064 goto out; 1064 goto out;
1065 #endif 1065 #endif
1066 } else if (!s->v4.type) { 1066 } else if (!s->v4.type) {
1067 af = AF_INET; 1067 af = AF_INET;
1068 p += sizeof(struct ip_vs_sync_v4); 1068 p += sizeof(struct ip_vs_sync_v4);
1069 } else { 1069 } else {
1070 return -10; 1070 return -10;
1071 } 1071 }
1072 if (p > msg_end) 1072 if (p > msg_end)
1073 return -20; 1073 return -20;
1074 1074
1075 /* Process optional params check Type & Len. */ 1075 /* Process optional params check Type & Len. */
1076 while (p < msg_end) { 1076 while (p < msg_end) {
1077 int ptype; 1077 int ptype;
1078 int plen; 1078 int plen;
1079 1079
1080 if (p+2 > msg_end) 1080 if (p+2 > msg_end)
1081 return -30; 1081 return -30;
1082 ptype = *(p++); 1082 ptype = *(p++);
1083 plen = *(p++); 1083 plen = *(p++);
1084 1084
1085 if (!plen || ((p + plen) > msg_end)) 1085 if (!plen || ((p + plen) > msg_end))
1086 return -40; 1086 return -40;
1087 /* Handle seq option p = param data */ 1087 /* Handle seq option p = param data */
1088 switch (ptype & ~IPVS_OPT_F_PARAM) { 1088 switch (ptype & ~IPVS_OPT_F_PARAM) {
1089 case IPVS_OPT_SEQ_DATA: 1089 case IPVS_OPT_SEQ_DATA:
1090 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) 1090 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1091 return -50; 1091 return -50;
1092 break; 1092 break;
1093 1093
1094 case IPVS_OPT_PE_DATA: 1094 case IPVS_OPT_PE_DATA:
1095 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, 1095 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1096 IP_VS_PEDATA_MAXLEN, &opt_flags, 1096 IP_VS_PEDATA_MAXLEN, &opt_flags,
1097 IPVS_OPT_F_PE_DATA)) 1097 IPVS_OPT_F_PE_DATA))
1098 return -60; 1098 return -60;
1099 break; 1099 break;
1100 1100
1101 case IPVS_OPT_PE_NAME: 1101 case IPVS_OPT_PE_NAME:
1102 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, 1102 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1103 IP_VS_PENAME_MAXLEN, &opt_flags, 1103 IP_VS_PENAME_MAXLEN, &opt_flags,
1104 IPVS_OPT_F_PE_NAME)) 1104 IPVS_OPT_F_PE_NAME))
1105 return -70; 1105 return -70;
1106 break; 1106 break;
1107 1107
1108 default: 1108 default:
1109 /* Param data mandatory ? */ 1109 /* Param data mandatory ? */
1110 if (!(ptype & IPVS_OPT_F_PARAM)) { 1110 if (!(ptype & IPVS_OPT_F_PARAM)) {
1111 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", 1111 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1112 ptype & ~IPVS_OPT_F_PARAM); 1112 ptype & ~IPVS_OPT_F_PARAM);
1113 retc = 20; 1113 retc = 20;
1114 goto out; 1114 goto out;
1115 } 1115 }
1116 } 1116 }
1117 p += plen; /* Next option */ 1117 p += plen; /* Next option */
1118 } 1118 }
1119 1119
1120 /* Get flags and Mask off unsupported */ 1120 /* Get flags and Mask off unsupported */
1121 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; 1121 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1122 flags |= IP_VS_CONN_F_SYNC; 1122 flags |= IP_VS_CONN_F_SYNC;
1123 state = ntohs(s->v4.state); 1123 state = ntohs(s->v4.state);
1124 1124
1125 if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 1125 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1126 pp = ip_vs_proto_get(s->v4.protocol); 1126 pp = ip_vs_proto_get(s->v4.protocol);
1127 if (!pp) { 1127 if (!pp) {
1128 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", 1128 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1129 s->v4.protocol); 1129 s->v4.protocol);
1130 retc = 30; 1130 retc = 30;
1131 goto out; 1131 goto out;
1132 } 1132 }
1133 if (state >= pp->num_states) { 1133 if (state >= pp->num_states) {
1134 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", 1134 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1135 pp->name, state); 1135 pp->name, state);
1136 retc = 40; 1136 retc = 40;
1137 goto out; 1137 goto out;
1138 } 1138 }
1139 } else { 1139 } else {
1140 /* protocol in templates is not used for state/timeout */ 1140 /* protocol in templates is not used for state/timeout */
1141 if (state > 0) { 1141 if (state > 0) {
1142 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", 1142 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1143 state); 1143 state);
1144 state = 0; 1144 state = 0;
1145 } 1145 }
1146 } 1146 }
1147 if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data, 1147 if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
1148 pe_data_len, pe_name, pe_name_len)) { 1148 pe_data_len, pe_name, pe_name_len)) {
1149 retc = 50; 1149 retc = 50;
1150 goto out; 1150 goto out;
1151 } 1151 }
1152 /* If only IPv4, just silent skip IPv6 */ 1152 /* If only IPv4, just silent skip IPv6 */
1153 if (af == AF_INET) 1153 if (af == AF_INET)
1154 ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af, 1154 ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
1155 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, 1155 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1156 ntohl(s->v4.timeout), ntohl(s->v4.fwmark), 1156 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1157 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) 1157 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1158 ); 1158 );
1159 #ifdef CONFIG_IP_VS_IPV6 1159 #ifdef CONFIG_IP_VS_IPV6
1160 else 1160 else
1161 ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af, 1161 ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
1162 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, 1162 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1163 ntohl(s->v6.timeout), ntohl(s->v6.fwmark), 1163 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1164 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) 1164 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1165 ); 1165 );
1166 #endif 1166 #endif
1167 return 0; 1167 return 0;
1168 /* Error exit */ 1168 /* Error exit */
1169 out: 1169 out:
1170 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); 1170 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1171 return retc; 1171 return retc;
1172 1172
1173 } 1173 }
1174 /* 1174 /*
1175 * Process received multicast message and create the corresponding 1175 * Process received multicast message and create the corresponding
1176 * ip_vs_conn entries. 1176 * ip_vs_conn entries.
1177 * Handles Version 0 & 1 1177 * Handles Version 0 & 1
1178 */ 1178 */
1179 static void ip_vs_process_message(struct net *net, __u8 *buffer, 1179 static void ip_vs_process_message(struct net *net, __u8 *buffer,
1180 const size_t buflen) 1180 const size_t buflen)
1181 { 1181 {
1182 struct netns_ipvs *ipvs = net_ipvs(net); 1182 struct netns_ipvs *ipvs = net_ipvs(net);
1183 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; 1183 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1184 __u8 *p, *msg_end; 1184 __u8 *p, *msg_end;
1185 int i, nr_conns; 1185 int i, nr_conns;
1186 1186
1187 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { 1187 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1188 IP_VS_DBG(2, "BACKUP, message header too short\n"); 1188 IP_VS_DBG(2, "BACKUP, message header too short\n");
1189 return; 1189 return;
1190 } 1190 }
1191 1191
1192 if (buflen != ntohs(m2->size)) { 1192 if (buflen != ntohs(m2->size)) {
1193 IP_VS_DBG(2, "BACKUP, bogus message size\n"); 1193 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1194 return; 1194 return;
1195 } 1195 }
1196 /* SyncID sanity check */ 1196 /* SyncID sanity check */
1197 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { 1197 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1198 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); 1198 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1199 return; 1199 return;
1200 } 1200 }
1201 /* Handle version 1 message */ 1201 /* Handle version 1 message */
1202 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) 1202 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1203 && (m2->spare == 0)) { 1203 && (m2->spare == 0)) {
1204 1204
1205 msg_end = buffer + sizeof(struct ip_vs_sync_mesg); 1205 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1206 nr_conns = m2->nr_conns; 1206 nr_conns = m2->nr_conns;
1207 1207
1208 for (i=0; i<nr_conns; i++) { 1208 for (i=0; i<nr_conns; i++) {
1209 union ip_vs_sync_conn *s; 1209 union ip_vs_sync_conn *s;
1210 unsigned int size; 1210 unsigned int size;
1211 int retc; 1211 int retc;
1212 1212
1213 p = msg_end; 1213 p = msg_end;
1214 if (p + sizeof(s->v4) > buffer+buflen) { 1214 if (p + sizeof(s->v4) > buffer+buflen) {
1215 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); 1215 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1216 return; 1216 return;
1217 } 1217 }
1218 s = (union ip_vs_sync_conn *)p; 1218 s = (union ip_vs_sync_conn *)p;
1219 size = ntohs(s->v4.ver_size) & SVER_MASK; 1219 size = ntohs(s->v4.ver_size) & SVER_MASK;
1220 msg_end = p + size; 1220 msg_end = p + size;
1221 /* Basic sanity checks */ 1221 /* Basic sanity checks */
1222 if (msg_end > buffer+buflen) { 1222 if (msg_end > buffer+buflen) {
1223 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); 1223 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1224 return; 1224 return;
1225 } 1225 }
1226 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { 1226 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1227 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", 1227 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1228 ntohs(s->v4.ver_size) >> SVER_SHIFT); 1228 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1229 return; 1229 return;
1230 } 1230 }
1231 /* Process a single sync_conn */ 1231 /* Process a single sync_conn */
1232 retc = ip_vs_proc_sync_conn(net, p, msg_end); 1232 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1233 if (retc < 0) { 1233 if (retc < 0) {
1234 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", 1234 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1235 retc); 1235 retc);
1236 return; 1236 return;
1237 } 1237 }
1238 /* Make sure we have 32 bit alignment */ 1238 /* Make sure we have 32 bit alignment */
1239 msg_end = p + ((size + 3) & ~3); 1239 msg_end = p + ((size + 3) & ~3);
1240 } 1240 }
1241 } else { 1241 } else {
1242 /* Old type of message */ 1242 /* Old type of message */
1243 ip_vs_process_message_v0(net, buffer, buflen); 1243 ip_vs_process_message_v0(net, buffer, buflen);
1244 return; 1244 return;
1245 } 1245 }
1246 } 1246 }
1247 1247
1248 1248
1249 /* 1249 /*
1250 * Setup sndbuf (mode=1) or rcvbuf (mode=0) 1250 * Setup sndbuf (mode=1) or rcvbuf (mode=0)
1251 */ 1251 */
1252 static void set_sock_size(struct sock *sk, int mode, int val) 1252 static void set_sock_size(struct sock *sk, int mode, int val)
1253 { 1253 {
1254 /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */ 1254 /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
1255 /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */ 1255 /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
1256 lock_sock(sk); 1256 lock_sock(sk);
1257 if (mode) { 1257 if (mode) {
1258 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, 1258 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1259 sysctl_wmem_max); 1259 sysctl_wmem_max);
1260 sk->sk_sndbuf = val * 2; 1260 sk->sk_sndbuf = val * 2;
1261 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 1261 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1262 } else { 1262 } else {
1263 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, 1263 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1264 sysctl_rmem_max); 1264 sysctl_rmem_max);
1265 sk->sk_rcvbuf = val * 2; 1265 sk->sk_rcvbuf = val * 2;
1266 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 1266 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1267 } 1267 }
1268 release_sock(sk); 1268 release_sock(sk);
1269 } 1269 }
1270 1270
1271 /* 1271 /*
1272 * Setup loopback of outgoing multicasts on a sending socket 1272 * Setup loopback of outgoing multicasts on a sending socket
1273 */ 1273 */
1274 static void set_mcast_loop(struct sock *sk, u_char loop) 1274 static void set_mcast_loop(struct sock *sk, u_char loop)
1275 { 1275 {
1276 struct inet_sock *inet = inet_sk(sk); 1276 struct inet_sock *inet = inet_sk(sk);
1277 1277
1278 /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ 1278 /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
1279 lock_sock(sk); 1279 lock_sock(sk);
1280 inet->mc_loop = loop ? 1 : 0; 1280 inet->mc_loop = loop ? 1 : 0;
1281 release_sock(sk); 1281 release_sock(sk);
1282 } 1282 }
1283 1283
1284 /* 1284 /*
1285 * Specify TTL for outgoing multicasts on a sending socket 1285 * Specify TTL for outgoing multicasts on a sending socket
1286 */ 1286 */
1287 static void set_mcast_ttl(struct sock *sk, u_char ttl) 1287 static void set_mcast_ttl(struct sock *sk, u_char ttl)
1288 { 1288 {
1289 struct inet_sock *inet = inet_sk(sk); 1289 struct inet_sock *inet = inet_sk(sk);
1290 1290
1291 /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ 1291 /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
1292 lock_sock(sk); 1292 lock_sock(sk);
1293 inet->mc_ttl = ttl; 1293 inet->mc_ttl = ttl;
1294 release_sock(sk); 1294 release_sock(sk);
1295 } 1295 }
1296 1296
1297 /* 1297 /*
1298 * Specifiy default interface for outgoing multicasts 1298 * Specifiy default interface for outgoing multicasts
1299 */ 1299 */
1300 static int set_mcast_if(struct sock *sk, char *ifname) 1300 static int set_mcast_if(struct sock *sk, char *ifname)
1301 { 1301 {
1302 struct net_device *dev; 1302 struct net_device *dev;
1303 struct inet_sock *inet = inet_sk(sk); 1303 struct inet_sock *inet = inet_sk(sk);
1304 struct net *net = sock_net(sk); 1304 struct net *net = sock_net(sk);
1305 1305
1306 dev = __dev_get_by_name(net, ifname); 1306 dev = __dev_get_by_name(net, ifname);
1307 if (!dev) 1307 if (!dev)
1308 return -ENODEV; 1308 return -ENODEV;
1309 1309
1310 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1310 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1311 return -EINVAL; 1311 return -EINVAL;
1312 1312
1313 lock_sock(sk); 1313 lock_sock(sk);
1314 inet->mc_index = dev->ifindex; 1314 inet->mc_index = dev->ifindex;
1315 /* inet->mc_addr = 0; */ 1315 /* inet->mc_addr = 0; */
1316 release_sock(sk); 1316 release_sock(sk);
1317 1317
1318 return 0; 1318 return 0;
1319 } 1319 }
1320 1320
1321 1321
1322 /* 1322 /*
1323 * Set the maximum length of sync message according to the 1323 * Set the maximum length of sync message according to the
1324 * specified interface's MTU. 1324 * specified interface's MTU.
1325 */ 1325 */
1326 static int set_sync_mesg_maxlen(struct net *net, int sync_state) 1326 static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1327 { 1327 {
1328 struct netns_ipvs *ipvs = net_ipvs(net); 1328 struct netns_ipvs *ipvs = net_ipvs(net);
1329 struct net_device *dev; 1329 struct net_device *dev;
1330 int num; 1330 int num;
1331 1331
1332 if (sync_state == IP_VS_STATE_MASTER) { 1332 if (sync_state == IP_VS_STATE_MASTER) {
1333 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); 1333 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1334 if (!dev) 1334 if (!dev)
1335 return -ENODEV; 1335 return -ENODEV;
1336 1336
1337 num = (dev->mtu - sizeof(struct iphdr) - 1337 num = (dev->mtu - sizeof(struct iphdr) -
1338 sizeof(struct udphdr) - 1338 sizeof(struct udphdr) -
1339 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; 1339 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1340 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + 1340 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1341 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); 1341 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1342 IP_VS_DBG(7, "setting the maximum length of sync sending " 1342 IP_VS_DBG(7, "setting the maximum length of sync sending "
1343 "message %d.\n", ipvs->send_mesg_maxlen); 1343 "message %d.\n", ipvs->send_mesg_maxlen);
1344 } else if (sync_state == IP_VS_STATE_BACKUP) { 1344 } else if (sync_state == IP_VS_STATE_BACKUP) {
1345 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); 1345 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1346 if (!dev) 1346 if (!dev)
1347 return -ENODEV; 1347 return -ENODEV;
1348 1348
1349 ipvs->recv_mesg_maxlen = dev->mtu - 1349 ipvs->recv_mesg_maxlen = dev->mtu -
1350 sizeof(struct iphdr) - sizeof(struct udphdr); 1350 sizeof(struct iphdr) - sizeof(struct udphdr);
1351 IP_VS_DBG(7, "setting the maximum length of sync receiving " 1351 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1352 "message %d.\n", ipvs->recv_mesg_maxlen); 1352 "message %d.\n", ipvs->recv_mesg_maxlen);
1353 } 1353 }
1354 1354
1355 return 0; 1355 return 0;
1356 } 1356 }
1357 1357
1358 1358
1359 /* 1359 /*
1360 * Join a multicast group. 1360 * Join a multicast group.
1361 * the group is specified by a class D multicast address 224.0.0.0/8 1361 * the group is specified by a class D multicast address 224.0.0.0/8
1362 * in the in_addr structure passed in as a parameter. 1362 * in the in_addr structure passed in as a parameter.
1363 */ 1363 */
1364 static int 1364 static int
1365 join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) 1365 join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1366 { 1366 {
1367 struct net *net = sock_net(sk); 1367 struct net *net = sock_net(sk);
1368 struct ip_mreqn mreq; 1368 struct ip_mreqn mreq;
1369 struct net_device *dev; 1369 struct net_device *dev;
1370 int ret; 1370 int ret;
1371 1371
1372 memset(&mreq, 0, sizeof(mreq)); 1372 memset(&mreq, 0, sizeof(mreq));
1373 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); 1373 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1374 1374
1375 dev = __dev_get_by_name(net, ifname); 1375 dev = __dev_get_by_name(net, ifname);
1376 if (!dev) 1376 if (!dev)
1377 return -ENODEV; 1377 return -ENODEV;
1378 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1378 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1379 return -EINVAL; 1379 return -EINVAL;
1380 1380
1381 mreq.imr_ifindex = dev->ifindex; 1381 mreq.imr_ifindex = dev->ifindex;
1382 1382
1383 lock_sock(sk); 1383 lock_sock(sk);
1384 ret = ip_mc_join_group(sk, &mreq); 1384 ret = ip_mc_join_group(sk, &mreq);
1385 release_sock(sk); 1385 release_sock(sk);
1386 1386
1387 return ret; 1387 return ret;
1388 } 1388 }
1389 1389
1390 1390
1391 static int bind_mcastif_addr(struct socket *sock, char *ifname) 1391 static int bind_mcastif_addr(struct socket *sock, char *ifname)
1392 { 1392 {
1393 struct net *net = sock_net(sock->sk); 1393 struct net *net = sock_net(sock->sk);
1394 struct net_device *dev; 1394 struct net_device *dev;
1395 __be32 addr; 1395 __be32 addr;
1396 struct sockaddr_in sin; 1396 struct sockaddr_in sin;
1397 1397
1398 dev = __dev_get_by_name(net, ifname); 1398 dev = __dev_get_by_name(net, ifname);
1399 if (!dev) 1399 if (!dev)
1400 return -ENODEV; 1400 return -ENODEV;
1401 1401
1402 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 1402 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1403 if (!addr) 1403 if (!addr)
1404 pr_err("You probably need to specify IP address on " 1404 pr_err("You probably need to specify IP address on "
1405 "multicast interface.\n"); 1405 "multicast interface.\n");
1406 1406
1407 IP_VS_DBG(7, "binding socket with (%s) %pI4\n", 1407 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1408 ifname, &addr); 1408 ifname, &addr);
1409 1409
1410 /* Now bind the socket with the address of multicast interface */ 1410 /* Now bind the socket with the address of multicast interface */
1411 sin.sin_family = AF_INET; 1411 sin.sin_family = AF_INET;
1412 sin.sin_addr.s_addr = addr; 1412 sin.sin_addr.s_addr = addr;
1413 sin.sin_port = 0; 1413 sin.sin_port = 0;
1414 1414
1415 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); 1415 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1416 } 1416 }
1417 1417
1418 /* 1418 /*
1419 * Set up sending multicast socket over UDP 1419 * Set up sending multicast socket over UDP
1420 */ 1420 */
1421 static struct socket *make_send_sock(struct net *net, int id) 1421 static struct socket *make_send_sock(struct net *net, int id)
1422 { 1422 {
1423 struct netns_ipvs *ipvs = net_ipvs(net); 1423 struct netns_ipvs *ipvs = net_ipvs(net);
1424 /* multicast addr */ 1424 /* multicast addr */
1425 struct sockaddr_in mcast_addr = { 1425 struct sockaddr_in mcast_addr = {
1426 .sin_family = AF_INET, 1426 .sin_family = AF_INET,
1427 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), 1427 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1428 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), 1428 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1429 }; 1429 };
1430 struct socket *sock; 1430 struct socket *sock;
1431 int result; 1431 int result;
1432 1432
1433 /* First create a socket move it to right name space later */ 1433 /* First create a socket move it to right name space later */
1434 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1434 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1435 if (result < 0) { 1435 if (result < 0) {
1436 pr_err("Error during creation of socket; terminating\n"); 1436 pr_err("Error during creation of socket; terminating\n");
1437 return ERR_PTR(result); 1437 return ERR_PTR(result);
1438 } 1438 }
1439 /* 1439 /*
1440 * Kernel sockets that are a part of a namespace, should not 1440 * Kernel sockets that are a part of a namespace, should not
1441 * hold a reference to a namespace in order to allow to stop it. 1441 * hold a reference to a namespace in order to allow to stop it.
1442 * After sk_change_net should be released using sk_release_kernel. 1442 * After sk_change_net should be released using sk_release_kernel.
1443 */ 1443 */
1444 sk_change_net(sock->sk, net); 1444 sk_change_net(sock->sk, net);
1445 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); 1445 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1446 if (result < 0) { 1446 if (result < 0) {
1447 pr_err("Error setting outbound mcast interface\n"); 1447 pr_err("Error setting outbound mcast interface\n");
1448 goto error; 1448 goto error;
1449 } 1449 }
1450 1450
1451 set_mcast_loop(sock->sk, 0); 1451 set_mcast_loop(sock->sk, 0);
1452 set_mcast_ttl(sock->sk, 1); 1452 set_mcast_ttl(sock->sk, 1);
1453 result = sysctl_sync_sock_size(ipvs); 1453 result = sysctl_sync_sock_size(ipvs);
1454 if (result > 0) 1454 if (result > 0)
1455 set_sock_size(sock->sk, 1, result); 1455 set_sock_size(sock->sk, 1, result);
1456 1456
1457 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); 1457 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1458 if (result < 0) { 1458 if (result < 0) {
1459 pr_err("Error binding address of the mcast interface\n"); 1459 pr_err("Error binding address of the mcast interface\n");
1460 goto error; 1460 goto error;
1461 } 1461 }
1462 1462
1463 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, 1463 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1464 sizeof(struct sockaddr), 0); 1464 sizeof(struct sockaddr), 0);
1465 if (result < 0) { 1465 if (result < 0) {
1466 pr_err("Error connecting to the multicast addr\n"); 1466 pr_err("Error connecting to the multicast addr\n");
1467 goto error; 1467 goto error;
1468 } 1468 }
1469 1469
1470 return sock; 1470 return sock;
1471 1471
1472 error: 1472 error:
1473 sk_release_kernel(sock->sk); 1473 sk_release_kernel(sock->sk);
1474 return ERR_PTR(result); 1474 return ERR_PTR(result);
1475 } 1475 }
1476 1476
1477 1477
1478 /* 1478 /*
1479 * Set up receiving multicast socket over UDP 1479 * Set up receiving multicast socket over UDP
1480 */ 1480 */
1481 static struct socket *make_receive_sock(struct net *net, int id) 1481 static struct socket *make_receive_sock(struct net *net, int id)
1482 { 1482 {
1483 struct netns_ipvs *ipvs = net_ipvs(net); 1483 struct netns_ipvs *ipvs = net_ipvs(net);
1484 /* multicast addr */ 1484 /* multicast addr */
1485 struct sockaddr_in mcast_addr = { 1485 struct sockaddr_in mcast_addr = {
1486 .sin_family = AF_INET, 1486 .sin_family = AF_INET,
1487 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), 1487 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1488 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), 1488 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1489 }; 1489 };
1490 struct socket *sock; 1490 struct socket *sock;
1491 int result; 1491 int result;
1492 1492
1493 /* First create a socket */ 1493 /* First create a socket */
1494 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1494 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1495 if (result < 0) { 1495 if (result < 0) {
1496 pr_err("Error during creation of socket; terminating\n"); 1496 pr_err("Error during creation of socket; terminating\n");
1497 return ERR_PTR(result); 1497 return ERR_PTR(result);
1498 } 1498 }
1499 /* 1499 /*
1500 * Kernel sockets that are a part of a namespace, should not 1500 * Kernel sockets that are a part of a namespace, should not
1501 * hold a reference to a namespace in order to allow to stop it. 1501 * hold a reference to a namespace in order to allow to stop it.
1502 * After sk_change_net should be released using sk_release_kernel. 1502 * After sk_change_net should be released using sk_release_kernel.
1503 */ 1503 */
1504 sk_change_net(sock->sk, net); 1504 sk_change_net(sock->sk, net);
1505 /* it is equivalent to the REUSEADDR option in user-space */ 1505 /* it is equivalent to the REUSEADDR option in user-space */
1506 sock->sk->sk_reuse = SK_CAN_REUSE; 1506 sock->sk->sk_reuse = SK_CAN_REUSE;
1507 result = sysctl_sync_sock_size(ipvs); 1507 result = sysctl_sync_sock_size(ipvs);
1508 if (result > 0) 1508 if (result > 0)
1509 set_sock_size(sock->sk, 0, result); 1509 set_sock_size(sock->sk, 0, result);
1510 1510
1511 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, 1511 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1512 sizeof(struct sockaddr)); 1512 sizeof(struct sockaddr));
1513 if (result < 0) { 1513 if (result < 0) {
1514 pr_err("Error binding to the multicast addr\n"); 1514 pr_err("Error binding to the multicast addr\n");
1515 goto error; 1515 goto error;
1516 } 1516 }
1517 1517
1518 /* join the multicast group */ 1518 /* join the multicast group */
1519 result = join_mcast_group(sock->sk, 1519 result = join_mcast_group(sock->sk,
1520 (struct in_addr *) &mcast_addr.sin_addr, 1520 (struct in_addr *) &mcast_addr.sin_addr,
1521 ipvs->backup_mcast_ifn); 1521 ipvs->backup_mcast_ifn);
1522 if (result < 0) { 1522 if (result < 0) {
1523 pr_err("Error joining to the multicast group\n"); 1523 pr_err("Error joining to the multicast group\n");
1524 goto error; 1524 goto error;
1525 } 1525 }
1526 1526
1527 return sock; 1527 return sock;
1528 1528
1529 error: 1529 error:
1530 sk_release_kernel(sock->sk); 1530 sk_release_kernel(sock->sk);
1531 return ERR_PTR(result); 1531 return ERR_PTR(result);
1532 } 1532 }
1533 1533
1534 1534
1535 static int 1535 static int
1536 ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) 1536 ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1537 { 1537 {
1538 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL}; 1538 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1539 struct kvec iov; 1539 struct kvec iov;
1540 int len; 1540 int len;
1541 1541
1542 EnterFunction(7); 1542 EnterFunction(7);
1543 iov.iov_base = (void *)buffer; 1543 iov.iov_base = (void *)buffer;
1544 iov.iov_len = length; 1544 iov.iov_len = length;
1545 1545
1546 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length)); 1546 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1547 1547
1548 LeaveFunction(7); 1548 LeaveFunction(7);
1549 return len; 1549 return len;
1550 } 1550 }
1551 1551
1552 static int 1552 static int
1553 ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) 1553 ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1554 { 1554 {
1555 int msize; 1555 int msize;
1556 int ret; 1556 int ret;
1557 1557
1558 msize = ntohs(msg->size); 1558 msize = ntohs(msg->size);
1559 1559
1560 ret = ip_vs_send_async(sock, (char *)msg, msize); 1560 ret = ip_vs_send_async(sock, (char *)msg, msize);
1561 if (ret >= 0 || ret == -EAGAIN) 1561 if (ret >= 0 || ret == -EAGAIN)
1562 return ret; 1562 return ret;
1563 pr_err("ip_vs_send_async error %d\n", ret); 1563 pr_err("ip_vs_send_async error %d\n", ret);
1564 return 0; 1564 return 0;
1565 } 1565 }
1566 1566
1567 static int 1567 static int
1568 ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) 1568 ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1569 { 1569 {
1570 struct msghdr msg = {NULL,}; 1570 struct msghdr msg = {NULL,};
1571 struct kvec iov; 1571 struct kvec iov;
1572 int len; 1572 int len;
1573 1573
1574 EnterFunction(7); 1574 EnterFunction(7);
1575 1575
1576 /* Receive a packet */ 1576 /* Receive a packet */
1577 iov.iov_base = buffer; 1577 iov.iov_base = buffer;
1578 iov.iov_len = (size_t)buflen; 1578 iov.iov_len = (size_t)buflen;
1579 1579
1580 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT); 1580 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1581 1581
1582 if (len < 0) 1582 if (len < 0)
1583 return len; 1583 return len;
1584 1584
1585 LeaveFunction(7); 1585 LeaveFunction(7);
1586 return len; 1586 return len;
1587 } 1587 }
1588 1588
1589 /* Wakeup the master thread for sending */ 1589 /* Wakeup the master thread for sending */
1590 static void master_wakeup_work_handler(struct work_struct *work) 1590 static void master_wakeup_work_handler(struct work_struct *work)
1591 { 1591 {
1592 struct ipvs_master_sync_state *ms = 1592 struct ipvs_master_sync_state *ms =
1593 container_of(work, struct ipvs_master_sync_state, 1593 container_of(work, struct ipvs_master_sync_state,
1594 master_wakeup_work.work); 1594 master_wakeup_work.work);
1595 struct netns_ipvs *ipvs = ms->ipvs; 1595 struct netns_ipvs *ipvs = ms->ipvs;
1596 1596
1597 spin_lock_bh(&ipvs->sync_lock); 1597 spin_lock_bh(&ipvs->sync_lock);
1598 if (ms->sync_queue_len && 1598 if (ms->sync_queue_len &&
1599 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { 1599 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1600 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; 1600 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1601 wake_up_process(ms->master_thread); 1601 wake_up_process(ms->master_thread);
1602 } 1602 }
1603 spin_unlock_bh(&ipvs->sync_lock); 1603 spin_unlock_bh(&ipvs->sync_lock);
1604 } 1604 }
1605 1605
1606 /* Get next buffer to send */ 1606 /* Get next buffer to send */
1607 static inline struct ip_vs_sync_buff * 1607 static inline struct ip_vs_sync_buff *
1608 next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) 1608 next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1609 { 1609 {
1610 struct ip_vs_sync_buff *sb; 1610 struct ip_vs_sync_buff *sb;
1611 1611
1612 sb = sb_dequeue(ipvs, ms); 1612 sb = sb_dequeue(ipvs, ms);
1613 if (sb) 1613 if (sb)
1614 return sb; 1614 return sb;
1615 /* Do not delay entries in buffer for more than 2 seconds */ 1615 /* Do not delay entries in buffer for more than 2 seconds */
1616 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME); 1616 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1617 } 1617 }
1618 1618
1619 static int sync_thread_master(void *data) 1619 static int sync_thread_master(void *data)
1620 { 1620 {
1621 struct ip_vs_sync_thread_data *tinfo = data; 1621 struct ip_vs_sync_thread_data *tinfo = data;
1622 struct netns_ipvs *ipvs = net_ipvs(tinfo->net); 1622 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1623 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; 1623 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1624 struct sock *sk = tinfo->sock->sk; 1624 struct sock *sk = tinfo->sock->sk;
1625 struct ip_vs_sync_buff *sb; 1625 struct ip_vs_sync_buff *sb;
1626 1626
1627 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " 1627 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1628 "syncid = %d, id = %d\n", 1628 "syncid = %d, id = %d\n",
1629 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); 1629 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
1630 1630
1631 for (;;) { 1631 for (;;) {
1632 sb = next_sync_buff(ipvs, ms); 1632 sb = next_sync_buff(ipvs, ms);
1633 if (unlikely(kthread_should_stop())) 1633 if (unlikely(kthread_should_stop()))
1634 break; 1634 break;
1635 if (!sb) { 1635 if (!sb) {
1636 schedule_timeout(IPVS_SYNC_CHECK_PERIOD); 1636 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1637 continue; 1637 continue;
1638 } 1638 }
1639 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { 1639 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1640 int ret = __wait_event_interruptible(*sk_sleep(sk), 1640 /* (Ab)use interruptible sleep to avoid increasing
1641 * the load avg.
1642 */
1643 __wait_event_interruptible(*sk_sleep(sk),
1641 sock_writeable(sk) || 1644 sock_writeable(sk) ||
1642 kthread_should_stop()); 1645 kthread_should_stop());
1643 if (unlikely(kthread_should_stop())) 1646 if (unlikely(kthread_should_stop()))
1644 goto done; 1647 goto done;
1645 } 1648 }
1646 ip_vs_sync_buff_release(sb); 1649 ip_vs_sync_buff_release(sb);
1647 } 1650 }
1648 1651
1649 done: 1652 done:
1650 __set_current_state(TASK_RUNNING); 1653 __set_current_state(TASK_RUNNING);
1651 if (sb) 1654 if (sb)
1652 ip_vs_sync_buff_release(sb); 1655 ip_vs_sync_buff_release(sb);
1653 1656
1654 /* clean up the sync_buff queue */ 1657 /* clean up the sync_buff queue */
1655 while ((sb = sb_dequeue(ipvs, ms))) 1658 while ((sb = sb_dequeue(ipvs, ms)))
1656 ip_vs_sync_buff_release(sb); 1659 ip_vs_sync_buff_release(sb);
1657 __set_current_state(TASK_RUNNING); 1660 __set_current_state(TASK_RUNNING);
1658 1661
1659 /* clean up the current sync_buff */ 1662 /* clean up the current sync_buff */
1660 sb = get_curr_sync_buff(ipvs, ms, 0); 1663 sb = get_curr_sync_buff(ipvs, ms, 0);
1661 if (sb) 1664 if (sb)
1662 ip_vs_sync_buff_release(sb); 1665 ip_vs_sync_buff_release(sb);
1663 1666
1664 /* release the sending multicast socket */ 1667 /* release the sending multicast socket */
1665 sk_release_kernel(tinfo->sock->sk); 1668 sk_release_kernel(tinfo->sock->sk);
1666 kfree(tinfo); 1669 kfree(tinfo);
1667 1670
1668 return 0; 1671 return 0;
1669 } 1672 }
1670 1673
1671 1674
1672 static int sync_thread_backup(void *data) 1675 static int sync_thread_backup(void *data)
1673 { 1676 {
1674 struct ip_vs_sync_thread_data *tinfo = data; 1677 struct ip_vs_sync_thread_data *tinfo = data;
1675 struct netns_ipvs *ipvs = net_ipvs(tinfo->net); 1678 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1676 int len; 1679 int len;
1677 1680
1678 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " 1681 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1679 "syncid = %d, id = %d\n", 1682 "syncid = %d, id = %d\n",
1680 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); 1683 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
1681 1684
1682 while (!kthread_should_stop()) { 1685 while (!kthread_should_stop()) {
1683 wait_event_interruptible(*sk_sleep(tinfo->sock->sk), 1686 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1684 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) 1687 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1685 || kthread_should_stop()); 1688 || kthread_should_stop());
1686 1689
1687 /* do we have data now? */ 1690 /* do we have data now? */
1688 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { 1691 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1689 len = ip_vs_receive(tinfo->sock, tinfo->buf, 1692 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1690 ipvs->recv_mesg_maxlen); 1693 ipvs->recv_mesg_maxlen);
1691 if (len <= 0) { 1694 if (len <= 0) {
1692 if (len != -EAGAIN) 1695 if (len != -EAGAIN)
1693 pr_err("receiving message error\n"); 1696 pr_err("receiving message error\n");
1694 break; 1697 break;
1695 } 1698 }
1696 1699
1697 ip_vs_process_message(tinfo->net, tinfo->buf, len); 1700 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1698 } 1701 }
1699 } 1702 }
1700 1703
1701 /* release the sending multicast socket */ 1704 /* release the sending multicast socket */
1702 sk_release_kernel(tinfo->sock->sk); 1705 sk_release_kernel(tinfo->sock->sk);
1703 kfree(tinfo->buf); 1706 kfree(tinfo->buf);
1704 kfree(tinfo); 1707 kfree(tinfo);
1705 1708
1706 return 0; 1709 return 0;
1707 } 1710 }
1708 1711
1709 1712
1710 int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) 1713 int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1711 { 1714 {
1712 struct ip_vs_sync_thread_data *tinfo; 1715 struct ip_vs_sync_thread_data *tinfo;
1713 struct task_struct **array = NULL, *task; 1716 struct task_struct **array = NULL, *task;
1714 struct socket *sock; 1717 struct socket *sock;
1715 struct netns_ipvs *ipvs = net_ipvs(net); 1718 struct netns_ipvs *ipvs = net_ipvs(net);
1716 char *name; 1719 char *name;
1717 int (*threadfn)(void *data); 1720 int (*threadfn)(void *data);
1718 int id, count; 1721 int id, count;
1719 int result = -ENOMEM; 1722 int result = -ENOMEM;
1720 1723
1721 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1724 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1722 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", 1725 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1723 sizeof(struct ip_vs_sync_conn_v0)); 1726 sizeof(struct ip_vs_sync_conn_v0));
1724 1727
1725 if (!ipvs->sync_state) { 1728 if (!ipvs->sync_state) {
1726 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); 1729 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1727 ipvs->threads_mask = count - 1; 1730 ipvs->threads_mask = count - 1;
1728 } else 1731 } else
1729 count = ipvs->threads_mask + 1; 1732 count = ipvs->threads_mask + 1;
1730 1733
1731 if (state == IP_VS_STATE_MASTER) { 1734 if (state == IP_VS_STATE_MASTER) {
1732 if (ipvs->ms) 1735 if (ipvs->ms)
1733 return -EEXIST; 1736 return -EEXIST;
1734 1737
1735 strlcpy(ipvs->master_mcast_ifn, mcast_ifn, 1738 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1736 sizeof(ipvs->master_mcast_ifn)); 1739 sizeof(ipvs->master_mcast_ifn));
1737 ipvs->master_syncid = syncid; 1740 ipvs->master_syncid = syncid;
1738 name = "ipvs-m:%d:%d"; 1741 name = "ipvs-m:%d:%d";
1739 threadfn = sync_thread_master; 1742 threadfn = sync_thread_master;
1740 } else if (state == IP_VS_STATE_BACKUP) { 1743 } else if (state == IP_VS_STATE_BACKUP) {
1741 if (ipvs->backup_threads) 1744 if (ipvs->backup_threads)
1742 return -EEXIST; 1745 return -EEXIST;
1743 1746
1744 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, 1747 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1745 sizeof(ipvs->backup_mcast_ifn)); 1748 sizeof(ipvs->backup_mcast_ifn));
1746 ipvs->backup_syncid = syncid; 1749 ipvs->backup_syncid = syncid;
1747 name = "ipvs-b:%d:%d"; 1750 name = "ipvs-b:%d:%d";
1748 threadfn = sync_thread_backup; 1751 threadfn = sync_thread_backup;
1749 } else { 1752 } else {
1750 return -EINVAL; 1753 return -EINVAL;
1751 } 1754 }
1752 1755
1753 if (state == IP_VS_STATE_MASTER) { 1756 if (state == IP_VS_STATE_MASTER) {
1754 struct ipvs_master_sync_state *ms; 1757 struct ipvs_master_sync_state *ms;
1755 1758
1756 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); 1759 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
1757 if (!ipvs->ms) 1760 if (!ipvs->ms)
1758 goto out; 1761 goto out;
1759 ms = ipvs->ms; 1762 ms = ipvs->ms;
1760 for (id = 0; id < count; id++, ms++) { 1763 for (id = 0; id < count; id++, ms++) {
1761 INIT_LIST_HEAD(&ms->sync_queue); 1764 INIT_LIST_HEAD(&ms->sync_queue);
1762 ms->sync_queue_len = 0; 1765 ms->sync_queue_len = 0;
1763 ms->sync_queue_delay = 0; 1766 ms->sync_queue_delay = 0;
1764 INIT_DELAYED_WORK(&ms->master_wakeup_work, 1767 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1765 master_wakeup_work_handler); 1768 master_wakeup_work_handler);
1766 ms->ipvs = ipvs; 1769 ms->ipvs = ipvs;
1767 } 1770 }
1768 } else { 1771 } else {
1769 array = kzalloc(count * sizeof(struct task_struct *), 1772 array = kzalloc(count * sizeof(struct task_struct *),
1770 GFP_KERNEL); 1773 GFP_KERNEL);
1771 if (!array) 1774 if (!array)
1772 goto out; 1775 goto out;
1773 } 1776 }
1774 set_sync_mesg_maxlen(net, state); 1777 set_sync_mesg_maxlen(net, state);
1775 1778
1776 tinfo = NULL; 1779 tinfo = NULL;
1777 for (id = 0; id < count; id++) { 1780 for (id = 0; id < count; id++) {
1778 if (state == IP_VS_STATE_MASTER) 1781 if (state == IP_VS_STATE_MASTER)
1779 sock = make_send_sock(net, id); 1782 sock = make_send_sock(net, id);
1780 else 1783 else
1781 sock = make_receive_sock(net, id); 1784 sock = make_receive_sock(net, id);
1782 if (IS_ERR(sock)) { 1785 if (IS_ERR(sock)) {
1783 result = PTR_ERR(sock); 1786 result = PTR_ERR(sock);
1784 goto outtinfo; 1787 goto outtinfo;
1785 } 1788 }
1786 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 1789 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1787 if (!tinfo) 1790 if (!tinfo)
1788 goto outsocket; 1791 goto outsocket;
1789 tinfo->net = net; 1792 tinfo->net = net;
1790 tinfo->sock = sock; 1793 tinfo->sock = sock;
1791 if (state == IP_VS_STATE_BACKUP) { 1794 if (state == IP_VS_STATE_BACKUP) {
1792 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, 1795 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
1793 GFP_KERNEL); 1796 GFP_KERNEL);
1794 if (!tinfo->buf) 1797 if (!tinfo->buf)
1795 goto outtinfo; 1798 goto outtinfo;
1796 } else { 1799 } else {
1797 tinfo->buf = NULL; 1800 tinfo->buf = NULL;
1798 } 1801 }
1799 tinfo->id = id; 1802 tinfo->id = id;
1800 1803
1801 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); 1804 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1802 if (IS_ERR(task)) { 1805 if (IS_ERR(task)) {
1803 result = PTR_ERR(task); 1806 result = PTR_ERR(task);
1804 goto outtinfo; 1807 goto outtinfo;
1805 } 1808 }
1806 tinfo = NULL; 1809 tinfo = NULL;
1807 if (state == IP_VS_STATE_MASTER) 1810 if (state == IP_VS_STATE_MASTER)
1808 ipvs->ms[id].master_thread = task; 1811 ipvs->ms[id].master_thread = task;
1809 else 1812 else
1810 array[id] = task; 1813 array[id] = task;
1811 } 1814 }
1812 1815
1813 /* mark as active */ 1816 /* mark as active */
1814 1817
1815 if (state == IP_VS_STATE_BACKUP) 1818 if (state == IP_VS_STATE_BACKUP)
1816 ipvs->backup_threads = array; 1819 ipvs->backup_threads = array;
1817 spin_lock_bh(&ipvs->sync_buff_lock); 1820 spin_lock_bh(&ipvs->sync_buff_lock);
1818 ipvs->sync_state |= state; 1821 ipvs->sync_state |= state;
1819 spin_unlock_bh(&ipvs->sync_buff_lock); 1822 spin_unlock_bh(&ipvs->sync_buff_lock);
1820 1823
1821 /* increase the module use count */ 1824 /* increase the module use count */
1822 ip_vs_use_count_inc(); 1825 ip_vs_use_count_inc();
1823 1826
1824 return 0; 1827 return 0;
1825 1828
1826 outsocket: 1829 outsocket:
1827 sk_release_kernel(sock->sk); 1830 sk_release_kernel(sock->sk);
1828 1831
1829 outtinfo: 1832 outtinfo:
1830 if (tinfo) { 1833 if (tinfo) {
1831 sk_release_kernel(tinfo->sock->sk); 1834 sk_release_kernel(tinfo->sock->sk);
1832 kfree(tinfo->buf); 1835 kfree(tinfo->buf);
1833 kfree(tinfo); 1836 kfree(tinfo);
1834 } 1837 }
1835 count = id; 1838 count = id;
1836 while (count-- > 0) { 1839 while (count-- > 0) {
1837 if (state == IP_VS_STATE_MASTER) 1840 if (state == IP_VS_STATE_MASTER)
1838 kthread_stop(ipvs->ms[count].master_thread); 1841 kthread_stop(ipvs->ms[count].master_thread);
1839 else 1842 else
1840 kthread_stop(array[count]); 1843 kthread_stop(array[count]);
1841 } 1844 }
1842 kfree(array); 1845 kfree(array);
1843 1846
1844 out: 1847 out:
1845 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { 1848 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1846 kfree(ipvs->ms); 1849 kfree(ipvs->ms);
1847 ipvs->ms = NULL; 1850 ipvs->ms = NULL;
1848 } 1851 }
1849 return result; 1852 return result;
1850 } 1853 }
1851 1854
1852 1855
1853 int stop_sync_thread(struct net *net, int state) 1856 int stop_sync_thread(struct net *net, int state)
1854 { 1857 {
1855 struct netns_ipvs *ipvs = net_ipvs(net); 1858 struct netns_ipvs *ipvs = net_ipvs(net);
1856 struct task_struct **array; 1859 struct task_struct **array;
1857 int id; 1860 int id;
1858 int retc = -EINVAL; 1861 int retc = -EINVAL;
1859 1862
1860 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1863 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1861 1864
1862 if (state == IP_VS_STATE_MASTER) { 1865 if (state == IP_VS_STATE_MASTER) {
1863 if (!ipvs->ms) 1866 if (!ipvs->ms)
1864 return -ESRCH; 1867 return -ESRCH;
1865 1868
1866 /* 1869 /*
1867 * The lock synchronizes with sb_queue_tail(), so that we don't 1870 * The lock synchronizes with sb_queue_tail(), so that we don't
1868 * add sync buffers to the queue, when we are already in 1871 * add sync buffers to the queue, when we are already in
1869 * progress of stopping the master sync daemon. 1872 * progress of stopping the master sync daemon.
1870 */ 1873 */
1871 1874
1872 spin_lock_bh(&ipvs->sync_buff_lock); 1875 spin_lock_bh(&ipvs->sync_buff_lock);
1873 spin_lock(&ipvs->sync_lock); 1876 spin_lock(&ipvs->sync_lock);
1874 ipvs->sync_state &= ~IP_VS_STATE_MASTER; 1877 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1875 spin_unlock(&ipvs->sync_lock); 1878 spin_unlock(&ipvs->sync_lock);
1876 spin_unlock_bh(&ipvs->sync_buff_lock); 1879 spin_unlock_bh(&ipvs->sync_buff_lock);
1877 1880
1878 retc = 0; 1881 retc = 0;
1879 for (id = ipvs->threads_mask; id >= 0; id--) { 1882 for (id = ipvs->threads_mask; id >= 0; id--) {
1880 struct ipvs_master_sync_state *ms = &ipvs->ms[id]; 1883 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1881 int ret; 1884 int ret;
1882 1885
1883 pr_info("stopping master sync thread %d ...\n", 1886 pr_info("stopping master sync thread %d ...\n",
1884 task_pid_nr(ms->master_thread)); 1887 task_pid_nr(ms->master_thread));
1885 cancel_delayed_work_sync(&ms->master_wakeup_work); 1888 cancel_delayed_work_sync(&ms->master_wakeup_work);
1886 ret = kthread_stop(ms->master_thread); 1889 ret = kthread_stop(ms->master_thread);
1887 if (retc >= 0) 1890 if (retc >= 0)
1888 retc = ret; 1891 retc = ret;
1889 } 1892 }
1890 kfree(ipvs->ms); 1893 kfree(ipvs->ms);
1891 ipvs->ms = NULL; 1894 ipvs->ms = NULL;
1892 } else if (state == IP_VS_STATE_BACKUP) { 1895 } else if (state == IP_VS_STATE_BACKUP) {
1893 if (!ipvs->backup_threads) 1896 if (!ipvs->backup_threads)
1894 return -ESRCH; 1897 return -ESRCH;
1895 1898
1896 ipvs->sync_state &= ~IP_VS_STATE_BACKUP; 1899 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1897 array = ipvs->backup_threads; 1900 array = ipvs->backup_threads;
1898 retc = 0; 1901 retc = 0;
1899 for (id = ipvs->threads_mask; id >= 0; id--) { 1902 for (id = ipvs->threads_mask; id >= 0; id--) {
1900 int ret; 1903 int ret;
1901 1904
1902 pr_info("stopping backup sync thread %d ...\n", 1905 pr_info("stopping backup sync thread %d ...\n",
1903 task_pid_nr(array[id])); 1906 task_pid_nr(array[id]));
1904 ret = kthread_stop(array[id]); 1907 ret = kthread_stop(array[id]);
1905 if (retc >= 0) 1908 if (retc >= 0)
1906 retc = ret; 1909 retc = ret;
1907 } 1910 }
1908 kfree(array); 1911 kfree(array);
1909 ipvs->backup_threads = NULL; 1912 ipvs->backup_threads = NULL;
1910 } 1913 }
1911 1914
1912 /* decrease the module use count */ 1915 /* decrease the module use count */
1913 ip_vs_use_count_dec(); 1916 ip_vs_use_count_dec();
1914 1917
1915 return retc; 1918 return retc;
1916 } 1919 }
1917 1920
1918 /* 1921 /*
1919 * Initialize data struct for each netns 1922 * Initialize data struct for each netns
1920 */ 1923 */
1921 int __net_init ip_vs_sync_net_init(struct net *net) 1924 int __net_init ip_vs_sync_net_init(struct net *net)
1922 { 1925 {
1923 struct netns_ipvs *ipvs = net_ipvs(net); 1926 struct netns_ipvs *ipvs = net_ipvs(net);
1924 1927
1925 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); 1928 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1926 spin_lock_init(&ipvs->sync_lock); 1929 spin_lock_init(&ipvs->sync_lock);
1927 spin_lock_init(&ipvs->sync_buff_lock); 1930 spin_lock_init(&ipvs->sync_buff_lock);
1928 return 0; 1931 return 0;
1929 } 1932 }
1930 1933
1931 void ip_vs_sync_net_cleanup(struct net *net) 1934 void ip_vs_sync_net_cleanup(struct net *net)
1932 { 1935 {
1933 int retc; 1936 int retc;
1934 struct netns_ipvs *ipvs = net_ipvs(net); 1937 struct netns_ipvs *ipvs = net_ipvs(net);
1935 1938
1936 mutex_lock(&ipvs->sync_mutex); 1939 mutex_lock(&ipvs->sync_mutex);
1937 retc = stop_sync_thread(net, IP_VS_STATE_MASTER); 1940 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1938 if (retc && retc != -ESRCH) 1941 if (retc && retc != -ESRCH)
1939 pr_err("Failed to stop Master Daemon\n"); 1942 pr_err("Failed to stop Master Daemon\n");
1940 1943
1941 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); 1944 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1942 if (retc && retc != -ESRCH) 1945 if (retc && retc != -ESRCH)
1943 pr_err("Failed to stop Backup Daemon\n"); 1946 pr_err("Failed to stop Backup Daemon\n");
1944 mutex_unlock(&ipvs->sync_mutex); 1947 mutex_unlock(&ipvs->sync_mutex);
1945 } 1948 }
1946 1949