Commit 9dcbe1b87c4a8e3ed62e95369c18709541a3dc8f
Committed by
Simon Horman
1 parent
6a649f3398
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
ipvs: Remove unused variable ret from sync_thread_master()
net/netfilter/ipvs/ip_vs_sync.c: In function 'sync_thread_master': net/netfilter/ipvs/ip_vs_sync.c:1640:8: warning: unused variable 'ret' [-Wunused-variable] Commit 35a2af94c7ce7130ca292c68b1d27fcfdb648f6b ("sched/wait: Make the __wait_event*() interface more friendly") changed how the interruption state is returned. However, sync_thread_master() ignores this state, now causing a compile warning. According to Julian Anastasov <ja@ssi.bg>, this behavior is OK: "Yes, your patch looks ok to me. In the past we used ssleep() but IPVS users were confused why IPVS threads increase the load average. So, we switched to _interruptible calls and later the socket polling was added." Document this, as requested by Peter Zijlstra, to avoid precious developers disappearing in this pitfall in the future. Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org> Acked-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au>
Showing 1 changed file with 4 additions and 1 deletions Inline Diff
net/netfilter/ipvs/ip_vs_sync.c
1 | /* | 1 | /* |
2 | * IPVS An implementation of the IP virtual server support for the | 2 | * IPVS An implementation of the IP virtual server support for the |
3 | * LINUX operating system. IPVS is now implemented as a module | 3 | * LINUX operating system. IPVS is now implemented as a module |
4 | * over the NetFilter framework. IPVS can be used to build a | 4 | * over the NetFilter framework. IPVS can be used to build a |
5 | * high-performance and highly available server based on a | 5 | * high-performance and highly available server based on a |
6 | * cluster of servers. | 6 | * cluster of servers. |
7 | * | 7 | * |
8 | * Version 1, is capable of handling both version 0 and 1 messages. | 8 | * Version 1, is capable of handling both version 0 and 1 messages. |
9 | * Version 0 is the plain old format. | 9 | * Version 0 is the plain old format. |
10 | * Note Version 0 receivers will just drop Ver 1 messages. | 10 | * Note Version 0 receivers will just drop Ver 1 messages. |
11 | * Version 1 is capable of handle IPv6, Persistence data, | 11 | * Version 1 is capable of handle IPv6, Persistence data, |
12 | * time-outs, and firewall marks. | 12 | * time-outs, and firewall marks. |
13 | * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. | 13 | * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. |
14 | * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 | 14 | * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 |
15 | * | 15 | * |
16 | * Definitions Message: is a complete datagram | 16 | * Definitions Message: is a complete datagram |
17 | * Sync_conn: is a part of a Message | 17 | * Sync_conn: is a part of a Message |
18 | * Param Data is an option to a Sync_conn. | 18 | * Param Data is an option to a Sync_conn. |
19 | * | 19 | * |
20 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | 20 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
21 | * | 21 | * |
22 | * ip_vs_sync: sync connection info from master load balancer to backups | 22 | * ip_vs_sync: sync connection info from master load balancer to backups |
23 | * through multicast | 23 | * through multicast |
24 | * | 24 | * |
25 | * Changes: | 25 | * Changes: |
26 | * Alexandre Cassen : Added master & backup support at a time. | 26 | * Alexandre Cassen : Added master & backup support at a time. |
27 | * Alexandre Cassen : Added SyncID support for incoming sync | 27 | * Alexandre Cassen : Added SyncID support for incoming sync |
28 | * messages filtering. | 28 | * messages filtering. |
29 | * Justin Ossevoort : Fix endian problem on sync message size. | 29 | * Justin Ossevoort : Fix endian problem on sync message size. |
30 | * Hans Schillstrom : Added Version 1: i.e. IPv6, | 30 | * Hans Schillstrom : Added Version 1: i.e. IPv6, |
31 | * Persistence support, fwmark and time-out. | 31 | * Persistence support, fwmark and time-out. |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #define KMSG_COMPONENT "IPVS" | 34 | #define KMSG_COMPONENT "IPVS" |
35 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | 35 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
36 | 36 | ||
37 | #include <linux/module.h> | 37 | #include <linux/module.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/inetdevice.h> | 39 | #include <linux/inetdevice.h> |
40 | #include <linux/net.h> | 40 | #include <linux/net.h> |
41 | #include <linux/completion.h> | 41 | #include <linux/completion.h> |
42 | #include <linux/delay.h> | 42 | #include <linux/delay.h> |
43 | #include <linux/skbuff.h> | 43 | #include <linux/skbuff.h> |
44 | #include <linux/in.h> | 44 | #include <linux/in.h> |
45 | #include <linux/igmp.h> /* for ip_mc_join_group */ | 45 | #include <linux/igmp.h> /* for ip_mc_join_group */ |
46 | #include <linux/udp.h> | 46 | #include <linux/udp.h> |
47 | #include <linux/err.h> | 47 | #include <linux/err.h> |
48 | #include <linux/kthread.h> | 48 | #include <linux/kthread.h> |
49 | #include <linux/wait.h> | 49 | #include <linux/wait.h> |
50 | #include <linux/kernel.h> | 50 | #include <linux/kernel.h> |
51 | 51 | ||
52 | #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ | 52 | #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ |
53 | 53 | ||
54 | #include <net/ip.h> | 54 | #include <net/ip.h> |
55 | #include <net/sock.h> | 55 | #include <net/sock.h> |
56 | 56 | ||
57 | #include <net/ip_vs.h> | 57 | #include <net/ip_vs.h> |
58 | 58 | ||
59 | #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ | 59 | #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ |
60 | #define IP_VS_SYNC_PORT 8848 /* multicast port */ | 60 | #define IP_VS_SYNC_PORT 8848 /* multicast port */ |
61 | 61 | ||
62 | #define SYNC_PROTO_VER 1 /* Protocol version in header */ | 62 | #define SYNC_PROTO_VER 1 /* Protocol version in header */ |
63 | 63 | ||
64 | static struct lock_class_key __ipvs_sync_key; | 64 | static struct lock_class_key __ipvs_sync_key; |
65 | /* | 65 | /* |
66 | * IPVS sync connection entry | 66 | * IPVS sync connection entry |
67 | * Version 0, i.e. original version. | 67 | * Version 0, i.e. original version. |
68 | */ | 68 | */ |
69 | struct ip_vs_sync_conn_v0 { | 69 | struct ip_vs_sync_conn_v0 { |
70 | __u8 reserved; | 70 | __u8 reserved; |
71 | 71 | ||
72 | /* Protocol, addresses and port numbers */ | 72 | /* Protocol, addresses and port numbers */ |
73 | __u8 protocol; /* Which protocol (TCP/UDP) */ | 73 | __u8 protocol; /* Which protocol (TCP/UDP) */ |
74 | __be16 cport; | 74 | __be16 cport; |
75 | __be16 vport; | 75 | __be16 vport; |
76 | __be16 dport; | 76 | __be16 dport; |
77 | __be32 caddr; /* client address */ | 77 | __be32 caddr; /* client address */ |
78 | __be32 vaddr; /* virtual address */ | 78 | __be32 vaddr; /* virtual address */ |
79 | __be32 daddr; /* destination address */ | 79 | __be32 daddr; /* destination address */ |
80 | 80 | ||
81 | /* Flags and state transition */ | 81 | /* Flags and state transition */ |
82 | __be16 flags; /* status flags */ | 82 | __be16 flags; /* status flags */ |
83 | __be16 state; /* state info */ | 83 | __be16 state; /* state info */ |
84 | 84 | ||
85 | /* The sequence options start here */ | 85 | /* The sequence options start here */ |
86 | }; | 86 | }; |
87 | 87 | ||
88 | struct ip_vs_sync_conn_options { | 88 | struct ip_vs_sync_conn_options { |
89 | struct ip_vs_seq in_seq; /* incoming seq. struct */ | 89 | struct ip_vs_seq in_seq; /* incoming seq. struct */ |
90 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ | 90 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ |
91 | }; | 91 | }; |
92 | 92 | ||
93 | /* | 93 | /* |
94 | Sync Connection format (sync_conn) | 94 | Sync Connection format (sync_conn) |
95 | 95 | ||
96 | 0 1 2 3 | 96 | 0 1 2 3 |
97 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 97 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 |
98 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 98 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
99 | | Type | Protocol | Ver. | Size | | 99 | | Type | Protocol | Ver. | Size | |
100 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 100 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
101 | | Flags | | 101 | | Flags | |
102 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 102 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
103 | | State | cport | | 103 | | State | cport | |
104 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 104 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
105 | | vport | dport | | 105 | | vport | dport | |
106 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 106 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
107 | | fwmark | | 107 | | fwmark | |
108 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 108 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
109 | | timeout (in sec.) | | 109 | | timeout (in sec.) | |
110 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 110 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
111 | | ... | | 111 | | ... | |
112 | | IP-Addresses (v4 or v6) | | 112 | | IP-Addresses (v4 or v6) | |
113 | | ... | | 113 | | ... | |
114 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 114 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
115 | Optional Parameters. | 115 | Optional Parameters. |
116 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 116 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
117 | | Param. Type | Param. Length | Param. data | | 117 | | Param. Type | Param. Length | Param. data | |
118 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | 118 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
119 | | ... | | 119 | | ... | |
120 | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 120 | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
121 | | | Param Type | Param. Length | | 121 | | | Param Type | Param. Length | |
122 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 122 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
123 | | Param data | | 123 | | Param data | |
124 | | Last Param data should be padded for 32 bit alignment | | 124 | | Last Param data should be padded for 32 bit alignment | |
125 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 125 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
126 | */ | 126 | */ |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Type 0, IPv4 sync connection format | 129 | * Type 0, IPv4 sync connection format |
130 | */ | 130 | */ |
131 | struct ip_vs_sync_v4 { | 131 | struct ip_vs_sync_v4 { |
132 | __u8 type; | 132 | __u8 type; |
133 | __u8 protocol; /* Which protocol (TCP/UDP) */ | 133 | __u8 protocol; /* Which protocol (TCP/UDP) */ |
134 | __be16 ver_size; /* Version msb 4 bits */ | 134 | __be16 ver_size; /* Version msb 4 bits */ |
135 | /* Flags and state transition */ | 135 | /* Flags and state transition */ |
136 | __be32 flags; /* status flags */ | 136 | __be32 flags; /* status flags */ |
137 | __be16 state; /* state info */ | 137 | __be16 state; /* state info */ |
138 | /* Protocol, addresses and port numbers */ | 138 | /* Protocol, addresses and port numbers */ |
139 | __be16 cport; | 139 | __be16 cport; |
140 | __be16 vport; | 140 | __be16 vport; |
141 | __be16 dport; | 141 | __be16 dport; |
142 | __be32 fwmark; /* Firewall mark from skb */ | 142 | __be32 fwmark; /* Firewall mark from skb */ |
143 | __be32 timeout; /* cp timeout */ | 143 | __be32 timeout; /* cp timeout */ |
144 | __be32 caddr; /* client address */ | 144 | __be32 caddr; /* client address */ |
145 | __be32 vaddr; /* virtual address */ | 145 | __be32 vaddr; /* virtual address */ |
146 | __be32 daddr; /* destination address */ | 146 | __be32 daddr; /* destination address */ |
147 | /* The sequence options start here */ | 147 | /* The sequence options start here */ |
148 | /* PE data padded to 32bit alignment after seq. options */ | 148 | /* PE data padded to 32bit alignment after seq. options */ |
149 | }; | 149 | }; |
150 | /* | 150 | /* |
151 | * Type 2 messages IPv6 | 151 | * Type 2 messages IPv6 |
152 | */ | 152 | */ |
153 | struct ip_vs_sync_v6 { | 153 | struct ip_vs_sync_v6 { |
154 | __u8 type; | 154 | __u8 type; |
155 | __u8 protocol; /* Which protocol (TCP/UDP) */ | 155 | __u8 protocol; /* Which protocol (TCP/UDP) */ |
156 | __be16 ver_size; /* Version msb 4 bits */ | 156 | __be16 ver_size; /* Version msb 4 bits */ |
157 | /* Flags and state transition */ | 157 | /* Flags and state transition */ |
158 | __be32 flags; /* status flags */ | 158 | __be32 flags; /* status flags */ |
159 | __be16 state; /* state info */ | 159 | __be16 state; /* state info */ |
160 | /* Protocol, addresses and port numbers */ | 160 | /* Protocol, addresses and port numbers */ |
161 | __be16 cport; | 161 | __be16 cport; |
162 | __be16 vport; | 162 | __be16 vport; |
163 | __be16 dport; | 163 | __be16 dport; |
164 | __be32 fwmark; /* Firewall mark from skb */ | 164 | __be32 fwmark; /* Firewall mark from skb */ |
165 | __be32 timeout; /* cp timeout */ | 165 | __be32 timeout; /* cp timeout */ |
166 | struct in6_addr caddr; /* client address */ | 166 | struct in6_addr caddr; /* client address */ |
167 | struct in6_addr vaddr; /* virtual address */ | 167 | struct in6_addr vaddr; /* virtual address */ |
168 | struct in6_addr daddr; /* destination address */ | 168 | struct in6_addr daddr; /* destination address */ |
169 | /* The sequence options start here */ | 169 | /* The sequence options start here */ |
170 | /* PE data padded to 32bit alignment after seq. options */ | 170 | /* PE data padded to 32bit alignment after seq. options */ |
171 | }; | 171 | }; |
172 | 172 | ||
173 | union ip_vs_sync_conn { | 173 | union ip_vs_sync_conn { |
174 | struct ip_vs_sync_v4 v4; | 174 | struct ip_vs_sync_v4 v4; |
175 | struct ip_vs_sync_v6 v6; | 175 | struct ip_vs_sync_v6 v6; |
176 | }; | 176 | }; |
177 | 177 | ||
178 | /* Bits in Type field in above */ | 178 | /* Bits in Type field in above */ |
179 | #define STYPE_INET6 0 | 179 | #define STYPE_INET6 0 |
180 | #define STYPE_F_INET6 (1 << STYPE_INET6) | 180 | #define STYPE_F_INET6 (1 << STYPE_INET6) |
181 | 181 | ||
182 | #define SVER_SHIFT 12 /* Shift to get version */ | 182 | #define SVER_SHIFT 12 /* Shift to get version */ |
183 | #define SVER_MASK 0x0fff /* Mask to strip version */ | 183 | #define SVER_MASK 0x0fff /* Mask to strip version */ |
184 | 184 | ||
185 | #define IPVS_OPT_SEQ_DATA 1 | 185 | #define IPVS_OPT_SEQ_DATA 1 |
186 | #define IPVS_OPT_PE_DATA 2 | 186 | #define IPVS_OPT_PE_DATA 2 |
187 | #define IPVS_OPT_PE_NAME 3 | 187 | #define IPVS_OPT_PE_NAME 3 |
188 | #define IPVS_OPT_PARAM 7 | 188 | #define IPVS_OPT_PARAM 7 |
189 | 189 | ||
190 | #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) | 190 | #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) |
191 | #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) | 191 | #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) |
192 | #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) | 192 | #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) |
193 | #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) | 193 | #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) |
194 | 194 | ||
195 | struct ip_vs_sync_thread_data { | 195 | struct ip_vs_sync_thread_data { |
196 | struct net *net; | 196 | struct net *net; |
197 | struct socket *sock; | 197 | struct socket *sock; |
198 | char *buf; | 198 | char *buf; |
199 | int id; | 199 | int id; |
200 | }; | 200 | }; |
201 | 201 | ||
202 | /* Version 0 definition of packet sizes */ | 202 | /* Version 0 definition of packet sizes */ |
203 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) | 203 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) |
204 | #define FULL_CONN_SIZE \ | 204 | #define FULL_CONN_SIZE \ |
205 | (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) | 205 | (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) |
206 | 206 | ||
207 | 207 | ||
208 | /* | 208 | /* |
209 | The master mulitcasts messages (Datagrams) to the backup load balancers | 209 | The master mulitcasts messages (Datagrams) to the backup load balancers |
210 | in the following format. | 210 | in the following format. |
211 | 211 | ||
212 | Version 1: | 212 | Version 1: |
213 | Note, first byte should be Zero, so ver 0 receivers will drop the packet. | 213 | Note, first byte should be Zero, so ver 0 receivers will drop the packet. |
214 | 214 | ||
215 | 0 1 2 3 | 215 | 0 1 2 3 |
216 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 216 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 |
217 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 217 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
218 | | 0 | SyncID | Size | | 218 | | 0 | SyncID | Size | |
219 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 219 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
220 | | Count Conns | Version | Reserved, set to Zero | | 220 | | Count Conns | Version | Reserved, set to Zero | |
221 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 221 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
222 | | | | 222 | | | |
223 | | IPVS Sync Connection (1) | | 223 | | IPVS Sync Connection (1) | |
224 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 224 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
225 | | . | | 225 | | . | |
226 | ~ . ~ | 226 | ~ . ~ |
227 | | . | | 227 | | . | |
228 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 228 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
229 | | | | 229 | | | |
230 | | IPVS Sync Connection (n) | | 230 | | IPVS Sync Connection (n) | |
231 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 231 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
232 | 232 | ||
233 | Version 0 Header | 233 | Version 0 Header |
234 | 0 1 2 3 | 234 | 0 1 2 3 |
235 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 235 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 |
236 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 236 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
237 | | Count Conns | SyncID | Size | | 237 | | Count Conns | SyncID | Size | |
238 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 238 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
239 | | IPVS Sync Connection (1) | | 239 | | IPVS Sync Connection (1) | |
240 | */ | 240 | */ |
241 | 241 | ||
242 | #define SYNC_MESG_HEADER_LEN 4 | 242 | #define SYNC_MESG_HEADER_LEN 4 |
243 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ | 243 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ |
244 | 244 | ||
245 | /* Version 0 header */ | 245 | /* Version 0 header */ |
246 | struct ip_vs_sync_mesg_v0 { | 246 | struct ip_vs_sync_mesg_v0 { |
247 | __u8 nr_conns; | 247 | __u8 nr_conns; |
248 | __u8 syncid; | 248 | __u8 syncid; |
249 | __be16 size; | 249 | __be16 size; |
250 | 250 | ||
251 | /* ip_vs_sync_conn entries start here */ | 251 | /* ip_vs_sync_conn entries start here */ |
252 | }; | 252 | }; |
253 | 253 | ||
254 | /* Version 1 header */ | 254 | /* Version 1 header */ |
255 | struct ip_vs_sync_mesg { | 255 | struct ip_vs_sync_mesg { |
256 | __u8 reserved; /* must be zero */ | 256 | __u8 reserved; /* must be zero */ |
257 | __u8 syncid; | 257 | __u8 syncid; |
258 | __be16 size; | 258 | __be16 size; |
259 | __u8 nr_conns; | 259 | __u8 nr_conns; |
260 | __s8 version; /* SYNC_PROTO_VER */ | 260 | __s8 version; /* SYNC_PROTO_VER */ |
261 | __u16 spare; | 261 | __u16 spare; |
262 | /* ip_vs_sync_conn entries start here */ | 262 | /* ip_vs_sync_conn entries start here */ |
263 | }; | 263 | }; |
264 | 264 | ||
265 | struct ip_vs_sync_buff { | 265 | struct ip_vs_sync_buff { |
266 | struct list_head list; | 266 | struct list_head list; |
267 | unsigned long firstuse; | 267 | unsigned long firstuse; |
268 | 268 | ||
269 | /* pointers for the message data */ | 269 | /* pointers for the message data */ |
270 | struct ip_vs_sync_mesg *mesg; | 270 | struct ip_vs_sync_mesg *mesg; |
271 | unsigned char *head; | 271 | unsigned char *head; |
272 | unsigned char *end; | 272 | unsigned char *end; |
273 | }; | 273 | }; |
274 | 274 | ||
275 | /* | 275 | /* |
276 | * Copy of struct ip_vs_seq | 276 | * Copy of struct ip_vs_seq |
277 | * From unaligned network order to aligned host order | 277 | * From unaligned network order to aligned host order |
278 | */ | 278 | */ |
279 | static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) | 279 | static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) |
280 | { | 280 | { |
281 | ho->init_seq = get_unaligned_be32(&no->init_seq); | 281 | ho->init_seq = get_unaligned_be32(&no->init_seq); |
282 | ho->delta = get_unaligned_be32(&no->delta); | 282 | ho->delta = get_unaligned_be32(&no->delta); |
283 | ho->previous_delta = get_unaligned_be32(&no->previous_delta); | 283 | ho->previous_delta = get_unaligned_be32(&no->previous_delta); |
284 | } | 284 | } |
285 | 285 | ||
286 | /* | 286 | /* |
287 | * Copy of struct ip_vs_seq | 287 | * Copy of struct ip_vs_seq |
288 | * From Aligned host order to unaligned network order | 288 | * From Aligned host order to unaligned network order |
289 | */ | 289 | */ |
290 | static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) | 290 | static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) |
291 | { | 291 | { |
292 | put_unaligned_be32(ho->init_seq, &no->init_seq); | 292 | put_unaligned_be32(ho->init_seq, &no->init_seq); |
293 | put_unaligned_be32(ho->delta, &no->delta); | 293 | put_unaligned_be32(ho->delta, &no->delta); |
294 | put_unaligned_be32(ho->previous_delta, &no->previous_delta); | 294 | put_unaligned_be32(ho->previous_delta, &no->previous_delta); |
295 | } | 295 | } |
296 | 296 | ||
297 | static inline struct ip_vs_sync_buff * | 297 | static inline struct ip_vs_sync_buff * |
298 | sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) | 298 | sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) |
299 | { | 299 | { |
300 | struct ip_vs_sync_buff *sb; | 300 | struct ip_vs_sync_buff *sb; |
301 | 301 | ||
302 | spin_lock_bh(&ipvs->sync_lock); | 302 | spin_lock_bh(&ipvs->sync_lock); |
303 | if (list_empty(&ms->sync_queue)) { | 303 | if (list_empty(&ms->sync_queue)) { |
304 | sb = NULL; | 304 | sb = NULL; |
305 | __set_current_state(TASK_INTERRUPTIBLE); | 305 | __set_current_state(TASK_INTERRUPTIBLE); |
306 | } else { | 306 | } else { |
307 | sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff, | 307 | sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff, |
308 | list); | 308 | list); |
309 | list_del(&sb->list); | 309 | list_del(&sb->list); |
310 | ms->sync_queue_len--; | 310 | ms->sync_queue_len--; |
311 | if (!ms->sync_queue_len) | 311 | if (!ms->sync_queue_len) |
312 | ms->sync_queue_delay = 0; | 312 | ms->sync_queue_delay = 0; |
313 | } | 313 | } |
314 | spin_unlock_bh(&ipvs->sync_lock); | 314 | spin_unlock_bh(&ipvs->sync_lock); |
315 | 315 | ||
316 | return sb; | 316 | return sb; |
317 | } | 317 | } |
318 | 318 | ||
319 | /* | 319 | /* |
320 | * Create a new sync buffer for Version 1 proto. | 320 | * Create a new sync buffer for Version 1 proto. |
321 | */ | 321 | */ |
322 | static inline struct ip_vs_sync_buff * | 322 | static inline struct ip_vs_sync_buff * |
323 | ip_vs_sync_buff_create(struct netns_ipvs *ipvs) | 323 | ip_vs_sync_buff_create(struct netns_ipvs *ipvs) |
324 | { | 324 | { |
325 | struct ip_vs_sync_buff *sb; | 325 | struct ip_vs_sync_buff *sb; |
326 | 326 | ||
327 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) | 327 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) |
328 | return NULL; | 328 | return NULL; |
329 | 329 | ||
330 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); | 330 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); |
331 | if (!sb->mesg) { | 331 | if (!sb->mesg) { |
332 | kfree(sb); | 332 | kfree(sb); |
333 | return NULL; | 333 | return NULL; |
334 | } | 334 | } |
335 | sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ | 335 | sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ |
336 | sb->mesg->version = SYNC_PROTO_VER; | 336 | sb->mesg->version = SYNC_PROTO_VER; |
337 | sb->mesg->syncid = ipvs->master_syncid; | 337 | sb->mesg->syncid = ipvs->master_syncid; |
338 | sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); | 338 | sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); |
339 | sb->mesg->nr_conns = 0; | 339 | sb->mesg->nr_conns = 0; |
340 | sb->mesg->spare = 0; | 340 | sb->mesg->spare = 0; |
341 | sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); | 341 | sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); |
342 | sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; | 342 | sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; |
343 | 343 | ||
344 | sb->firstuse = jiffies; | 344 | sb->firstuse = jiffies; |
345 | return sb; | 345 | return sb; |
346 | } | 346 | } |
347 | 347 | ||
348 | static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) | 348 | static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) |
349 | { | 349 | { |
350 | kfree(sb->mesg); | 350 | kfree(sb->mesg); |
351 | kfree(sb); | 351 | kfree(sb); |
352 | } | 352 | } |
353 | 353 | ||
354 | static inline void sb_queue_tail(struct netns_ipvs *ipvs, | 354 | static inline void sb_queue_tail(struct netns_ipvs *ipvs, |
355 | struct ipvs_master_sync_state *ms) | 355 | struct ipvs_master_sync_state *ms) |
356 | { | 356 | { |
357 | struct ip_vs_sync_buff *sb = ms->sync_buff; | 357 | struct ip_vs_sync_buff *sb = ms->sync_buff; |
358 | 358 | ||
359 | spin_lock(&ipvs->sync_lock); | 359 | spin_lock(&ipvs->sync_lock); |
360 | if (ipvs->sync_state & IP_VS_STATE_MASTER && | 360 | if (ipvs->sync_state & IP_VS_STATE_MASTER && |
361 | ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) { | 361 | ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) { |
362 | if (!ms->sync_queue_len) | 362 | if (!ms->sync_queue_len) |
363 | schedule_delayed_work(&ms->master_wakeup_work, | 363 | schedule_delayed_work(&ms->master_wakeup_work, |
364 | max(IPVS_SYNC_SEND_DELAY, 1)); | 364 | max(IPVS_SYNC_SEND_DELAY, 1)); |
365 | ms->sync_queue_len++; | 365 | ms->sync_queue_len++; |
366 | list_add_tail(&sb->list, &ms->sync_queue); | 366 | list_add_tail(&sb->list, &ms->sync_queue); |
367 | if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) | 367 | if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) |
368 | wake_up_process(ms->master_thread); | 368 | wake_up_process(ms->master_thread); |
369 | } else | 369 | } else |
370 | ip_vs_sync_buff_release(sb); | 370 | ip_vs_sync_buff_release(sb); |
371 | spin_unlock(&ipvs->sync_lock); | 371 | spin_unlock(&ipvs->sync_lock); |
372 | } | 372 | } |
373 | 373 | ||
374 | /* | 374 | /* |
375 | * Get the current sync buffer if it has been created for more | 375 | * Get the current sync buffer if it has been created for more |
376 | * than the specified time or the specified time is zero. | 376 | * than the specified time or the specified time is zero. |
377 | */ | 377 | */ |
378 | static inline struct ip_vs_sync_buff * | 378 | static inline struct ip_vs_sync_buff * |
379 | get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms, | 379 | get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms, |
380 | unsigned long time) | 380 | unsigned long time) |
381 | { | 381 | { |
382 | struct ip_vs_sync_buff *sb; | 382 | struct ip_vs_sync_buff *sb; |
383 | 383 | ||
384 | spin_lock_bh(&ipvs->sync_buff_lock); | 384 | spin_lock_bh(&ipvs->sync_buff_lock); |
385 | sb = ms->sync_buff; | 385 | sb = ms->sync_buff; |
386 | if (sb && time_after_eq(jiffies - sb->firstuse, time)) { | 386 | if (sb && time_after_eq(jiffies - sb->firstuse, time)) { |
387 | ms->sync_buff = NULL; | 387 | ms->sync_buff = NULL; |
388 | __set_current_state(TASK_RUNNING); | 388 | __set_current_state(TASK_RUNNING); |
389 | } else | 389 | } else |
390 | sb = NULL; | 390 | sb = NULL; |
391 | spin_unlock_bh(&ipvs->sync_buff_lock); | 391 | spin_unlock_bh(&ipvs->sync_buff_lock); |
392 | return sb; | 392 | return sb; |
393 | } | 393 | } |
394 | 394 | ||
395 | static inline int | 395 | static inline int |
396 | select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) | 396 | select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) |
397 | { | 397 | { |
398 | return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask; | 398 | return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask; |
399 | } | 399 | } |
400 | 400 | ||
401 | /* | 401 | /* |
402 | * Create a new sync buffer for Version 0 proto. | 402 | * Create a new sync buffer for Version 0 proto. |
403 | */ | 403 | */ |
404 | static inline struct ip_vs_sync_buff * | 404 | static inline struct ip_vs_sync_buff * |
405 | ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) | 405 | ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) |
406 | { | 406 | { |
407 | struct ip_vs_sync_buff *sb; | 407 | struct ip_vs_sync_buff *sb; |
408 | struct ip_vs_sync_mesg_v0 *mesg; | 408 | struct ip_vs_sync_mesg_v0 *mesg; |
409 | 409 | ||
410 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) | 410 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) |
411 | return NULL; | 411 | return NULL; |
412 | 412 | ||
413 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); | 413 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); |
414 | if (!sb->mesg) { | 414 | if (!sb->mesg) { |
415 | kfree(sb); | 415 | kfree(sb); |
416 | return NULL; | 416 | return NULL; |
417 | } | 417 | } |
418 | mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; | 418 | mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; |
419 | mesg->nr_conns = 0; | 419 | mesg->nr_conns = 0; |
420 | mesg->syncid = ipvs->master_syncid; | 420 | mesg->syncid = ipvs->master_syncid; |
421 | mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); | 421 | mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); |
422 | sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); | 422 | sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); |
423 | sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; | 423 | sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; |
424 | sb->firstuse = jiffies; | 424 | sb->firstuse = jiffies; |
425 | return sb; | 425 | return sb; |
426 | } | 426 | } |
427 | 427 | ||
428 | /* Check if connection is controlled by persistence */ | 428 | /* Check if connection is controlled by persistence */ |
429 | static inline bool in_persistence(struct ip_vs_conn *cp) | 429 | static inline bool in_persistence(struct ip_vs_conn *cp) |
430 | { | 430 | { |
431 | for (cp = cp->control; cp; cp = cp->control) { | 431 | for (cp = cp->control; cp; cp = cp->control) { |
432 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) | 432 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) |
433 | return true; | 433 | return true; |
434 | } | 434 | } |
435 | return false; | 435 | return false; |
436 | } | 436 | } |
437 | 437 | ||
438 | /* Check if conn should be synced. | 438 | /* Check if conn should be synced. |
439 | * pkts: conn packets, use sysctl_sync_threshold to avoid packet check | 439 | * pkts: conn packets, use sysctl_sync_threshold to avoid packet check |
440 | * - (1) sync_refresh_period: reduce sync rate. Additionally, retry | 440 | * - (1) sync_refresh_period: reduce sync rate. Additionally, retry |
441 | * sync_retries times with period of sync_refresh_period/8 | 441 | * sync_retries times with period of sync_refresh_period/8 |
442 | * - (2) if both sync_refresh_period and sync_period are 0 send sync only | 442 | * - (2) if both sync_refresh_period and sync_period are 0 send sync only |
443 | * for state changes or only once when pkts matches sync_threshold | 443 | * for state changes or only once when pkts matches sync_threshold |
444 | * - (3) templates: rate can be reduced only with sync_refresh_period or | 444 | * - (3) templates: rate can be reduced only with sync_refresh_period or |
445 | * with (2) | 445 | * with (2) |
446 | */ | 446 | */ |
447 | static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, | 447 | static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, |
448 | struct ip_vs_conn *cp, int pkts) | 448 | struct ip_vs_conn *cp, int pkts) |
449 | { | 449 | { |
450 | unsigned long orig = ACCESS_ONCE(cp->sync_endtime); | 450 | unsigned long orig = ACCESS_ONCE(cp->sync_endtime); |
451 | unsigned long now = jiffies; | 451 | unsigned long now = jiffies; |
452 | unsigned long n = (now + cp->timeout) & ~3UL; | 452 | unsigned long n = (now + cp->timeout) & ~3UL; |
453 | unsigned int sync_refresh_period; | 453 | unsigned int sync_refresh_period; |
454 | int sync_period; | 454 | int sync_period; |
455 | int force; | 455 | int force; |
456 | 456 | ||
457 | /* Check if we sync in current state */ | 457 | /* Check if we sync in current state */ |
458 | if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) | 458 | if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) |
459 | force = 0; | 459 | force = 0; |
460 | else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) | 460 | else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) |
461 | return 0; | 461 | return 0; |
462 | else if (likely(cp->protocol == IPPROTO_TCP)) { | 462 | else if (likely(cp->protocol == IPPROTO_TCP)) { |
463 | if (!((1 << cp->state) & | 463 | if (!((1 << cp->state) & |
464 | ((1 << IP_VS_TCP_S_ESTABLISHED) | | 464 | ((1 << IP_VS_TCP_S_ESTABLISHED) | |
465 | (1 << IP_VS_TCP_S_FIN_WAIT) | | 465 | (1 << IP_VS_TCP_S_FIN_WAIT) | |
466 | (1 << IP_VS_TCP_S_CLOSE) | | 466 | (1 << IP_VS_TCP_S_CLOSE) | |
467 | (1 << IP_VS_TCP_S_CLOSE_WAIT) | | 467 | (1 << IP_VS_TCP_S_CLOSE_WAIT) | |
468 | (1 << IP_VS_TCP_S_TIME_WAIT)))) | 468 | (1 << IP_VS_TCP_S_TIME_WAIT)))) |
469 | return 0; | 469 | return 0; |
470 | force = cp->state != cp->old_state; | 470 | force = cp->state != cp->old_state; |
471 | if (force && cp->state != IP_VS_TCP_S_ESTABLISHED) | 471 | if (force && cp->state != IP_VS_TCP_S_ESTABLISHED) |
472 | goto set; | 472 | goto set; |
473 | } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { | 473 | } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { |
474 | if (!((1 << cp->state) & | 474 | if (!((1 << cp->state) & |
475 | ((1 << IP_VS_SCTP_S_ESTABLISHED) | | 475 | ((1 << IP_VS_SCTP_S_ESTABLISHED) | |
476 | (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | | 476 | (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | |
477 | (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | | 477 | (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | |
478 | (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | | 478 | (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | |
479 | (1 << IP_VS_SCTP_S_CLOSED)))) | 479 | (1 << IP_VS_SCTP_S_CLOSED)))) |
480 | return 0; | 480 | return 0; |
481 | force = cp->state != cp->old_state; | 481 | force = cp->state != cp->old_state; |
482 | if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) | 482 | if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) |
483 | goto set; | 483 | goto set; |
484 | } else { | 484 | } else { |
485 | /* UDP or another protocol with single state */ | 485 | /* UDP or another protocol with single state */ |
486 | force = 0; | 486 | force = 0; |
487 | } | 487 | } |
488 | 488 | ||
489 | sync_refresh_period = sysctl_sync_refresh_period(ipvs); | 489 | sync_refresh_period = sysctl_sync_refresh_period(ipvs); |
490 | if (sync_refresh_period > 0) { | 490 | if (sync_refresh_period > 0) { |
491 | long diff = n - orig; | 491 | long diff = n - orig; |
492 | long min_diff = max(cp->timeout >> 1, 10UL * HZ); | 492 | long min_diff = max(cp->timeout >> 1, 10UL * HZ); |
493 | 493 | ||
494 | /* Avoid sync if difference is below sync_refresh_period | 494 | /* Avoid sync if difference is below sync_refresh_period |
495 | * and below the half timeout. | 495 | * and below the half timeout. |
496 | */ | 496 | */ |
497 | if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) { | 497 | if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) { |
498 | int retries = orig & 3; | 498 | int retries = orig & 3; |
499 | 499 | ||
500 | if (retries >= sysctl_sync_retries(ipvs)) | 500 | if (retries >= sysctl_sync_retries(ipvs)) |
501 | return 0; | 501 | return 0; |
502 | if (time_before(now, orig - cp->timeout + | 502 | if (time_before(now, orig - cp->timeout + |
503 | (sync_refresh_period >> 3))) | 503 | (sync_refresh_period >> 3))) |
504 | return 0; | 504 | return 0; |
505 | n |= retries + 1; | 505 | n |= retries + 1; |
506 | } | 506 | } |
507 | } | 507 | } |
508 | sync_period = sysctl_sync_period(ipvs); | 508 | sync_period = sysctl_sync_period(ipvs); |
509 | if (sync_period > 0) { | 509 | if (sync_period > 0) { |
510 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && | 510 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && |
511 | pkts % sync_period != sysctl_sync_threshold(ipvs)) | 511 | pkts % sync_period != sysctl_sync_threshold(ipvs)) |
512 | return 0; | 512 | return 0; |
513 | } else if (sync_refresh_period <= 0 && | 513 | } else if (sync_refresh_period <= 0 && |
514 | pkts != sysctl_sync_threshold(ipvs)) | 514 | pkts != sysctl_sync_threshold(ipvs)) |
515 | return 0; | 515 | return 0; |
516 | 516 | ||
517 | set: | 517 | set: |
518 | cp->old_state = cp->state; | 518 | cp->old_state = cp->state; |
519 | n = cmpxchg(&cp->sync_endtime, orig, n); | 519 | n = cmpxchg(&cp->sync_endtime, orig, n); |
520 | return n == orig || force; | 520 | return n == orig || force; |
521 | } | 521 | } |
522 | 522 | ||
523 | /* | 523 | /* |
524 | * Version 0 , could be switched in by sys_ctl. | 524 | * Version 0 , could be switched in by sys_ctl. |
525 | * Add an ip_vs_conn information into the current sync_buff. | 525 | * Add an ip_vs_conn information into the current sync_buff. |
526 | */ | 526 | */ |
527 | static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, | 527 | static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, |
528 | int pkts) | 528 | int pkts) |
529 | { | 529 | { |
530 | struct netns_ipvs *ipvs = net_ipvs(net); | 530 | struct netns_ipvs *ipvs = net_ipvs(net); |
531 | struct ip_vs_sync_mesg_v0 *m; | 531 | struct ip_vs_sync_mesg_v0 *m; |
532 | struct ip_vs_sync_conn_v0 *s; | 532 | struct ip_vs_sync_conn_v0 *s; |
533 | struct ip_vs_sync_buff *buff; | 533 | struct ip_vs_sync_buff *buff; |
534 | struct ipvs_master_sync_state *ms; | 534 | struct ipvs_master_sync_state *ms; |
535 | int id; | 535 | int id; |
536 | int len; | 536 | int len; |
537 | 537 | ||
538 | if (unlikely(cp->af != AF_INET)) | 538 | if (unlikely(cp->af != AF_INET)) |
539 | return; | 539 | return; |
540 | /* Do not sync ONE PACKET */ | 540 | /* Do not sync ONE PACKET */ |
541 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | 541 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
542 | return; | 542 | return; |
543 | 543 | ||
544 | if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) | 544 | if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) |
545 | return; | 545 | return; |
546 | 546 | ||
547 | spin_lock_bh(&ipvs->sync_buff_lock); | 547 | spin_lock_bh(&ipvs->sync_buff_lock); |
548 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { | 548 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { |
549 | spin_unlock_bh(&ipvs->sync_buff_lock); | 549 | spin_unlock_bh(&ipvs->sync_buff_lock); |
550 | return; | 550 | return; |
551 | } | 551 | } |
552 | 552 | ||
553 | id = select_master_thread_id(ipvs, cp); | 553 | id = select_master_thread_id(ipvs, cp); |
554 | ms = &ipvs->ms[id]; | 554 | ms = &ipvs->ms[id]; |
555 | buff = ms->sync_buff; | 555 | buff = ms->sync_buff; |
556 | if (buff) { | 556 | if (buff) { |
557 | m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; | 557 | m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; |
558 | /* Send buffer if it is for v1 */ | 558 | /* Send buffer if it is for v1 */ |
559 | if (!m->nr_conns) { | 559 | if (!m->nr_conns) { |
560 | sb_queue_tail(ipvs, ms); | 560 | sb_queue_tail(ipvs, ms); |
561 | ms->sync_buff = NULL; | 561 | ms->sync_buff = NULL; |
562 | buff = NULL; | 562 | buff = NULL; |
563 | } | 563 | } |
564 | } | 564 | } |
565 | if (!buff) { | 565 | if (!buff) { |
566 | buff = ip_vs_sync_buff_create_v0(ipvs); | 566 | buff = ip_vs_sync_buff_create_v0(ipvs); |
567 | if (!buff) { | 567 | if (!buff) { |
568 | spin_unlock_bh(&ipvs->sync_buff_lock); | 568 | spin_unlock_bh(&ipvs->sync_buff_lock); |
569 | pr_err("ip_vs_sync_buff_create failed.\n"); | 569 | pr_err("ip_vs_sync_buff_create failed.\n"); |
570 | return; | 570 | return; |
571 | } | 571 | } |
572 | ms->sync_buff = buff; | 572 | ms->sync_buff = buff; |
573 | } | 573 | } |
574 | 574 | ||
575 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : | 575 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : |
576 | SIMPLE_CONN_SIZE; | 576 | SIMPLE_CONN_SIZE; |
577 | m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; | 577 | m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; |
578 | s = (struct ip_vs_sync_conn_v0 *) buff->head; | 578 | s = (struct ip_vs_sync_conn_v0 *) buff->head; |
579 | 579 | ||
580 | /* copy members */ | 580 | /* copy members */ |
581 | s->reserved = 0; | 581 | s->reserved = 0; |
582 | s->protocol = cp->protocol; | 582 | s->protocol = cp->protocol; |
583 | s->cport = cp->cport; | 583 | s->cport = cp->cport; |
584 | s->vport = cp->vport; | 584 | s->vport = cp->vport; |
585 | s->dport = cp->dport; | 585 | s->dport = cp->dport; |
586 | s->caddr = cp->caddr.ip; | 586 | s->caddr = cp->caddr.ip; |
587 | s->vaddr = cp->vaddr.ip; | 587 | s->vaddr = cp->vaddr.ip; |
588 | s->daddr = cp->daddr.ip; | 588 | s->daddr = cp->daddr.ip; |
589 | s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); | 589 | s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); |
590 | s->state = htons(cp->state); | 590 | s->state = htons(cp->state); |
591 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | 591 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { |
592 | struct ip_vs_sync_conn_options *opt = | 592 | struct ip_vs_sync_conn_options *opt = |
593 | (struct ip_vs_sync_conn_options *)&s[1]; | 593 | (struct ip_vs_sync_conn_options *)&s[1]; |
594 | memcpy(opt, &cp->in_seq, sizeof(*opt)); | 594 | memcpy(opt, &cp->in_seq, sizeof(*opt)); |
595 | } | 595 | } |
596 | 596 | ||
597 | m->nr_conns++; | 597 | m->nr_conns++; |
598 | m->size = htons(ntohs(m->size) + len); | 598 | m->size = htons(ntohs(m->size) + len); |
599 | buff->head += len; | 599 | buff->head += len; |
600 | 600 | ||
601 | /* check if there is a space for next one */ | 601 | /* check if there is a space for next one */ |
602 | if (buff->head + FULL_CONN_SIZE > buff->end) { | 602 | if (buff->head + FULL_CONN_SIZE > buff->end) { |
603 | sb_queue_tail(ipvs, ms); | 603 | sb_queue_tail(ipvs, ms); |
604 | ms->sync_buff = NULL; | 604 | ms->sync_buff = NULL; |
605 | } | 605 | } |
606 | spin_unlock_bh(&ipvs->sync_buff_lock); | 606 | spin_unlock_bh(&ipvs->sync_buff_lock); |
607 | 607 | ||
608 | /* synchronize its controller if it has */ | 608 | /* synchronize its controller if it has */ |
609 | cp = cp->control; | 609 | cp = cp->control; |
610 | if (cp) { | 610 | if (cp) { |
611 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) | 611 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) |
612 | pkts = atomic_add_return(1, &cp->in_pkts); | 612 | pkts = atomic_add_return(1, &cp->in_pkts); |
613 | else | 613 | else |
614 | pkts = sysctl_sync_threshold(ipvs); | 614 | pkts = sysctl_sync_threshold(ipvs); |
615 | ip_vs_sync_conn(net, cp->control, pkts); | 615 | ip_vs_sync_conn(net, cp->control, pkts); |
616 | } | 616 | } |
617 | } | 617 | } |
618 | 618 | ||
619 | /* | 619 | /* |
620 | * Add an ip_vs_conn information into the current sync_buff. | 620 | * Add an ip_vs_conn information into the current sync_buff. |
621 | * Called by ip_vs_in. | 621 | * Called by ip_vs_in. |
622 | * Sending Version 1 messages | 622 | * Sending Version 1 messages |
623 | */ | 623 | */ |
624 | void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) | 624 | void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) |
625 | { | 625 | { |
626 | struct netns_ipvs *ipvs = net_ipvs(net); | 626 | struct netns_ipvs *ipvs = net_ipvs(net); |
627 | struct ip_vs_sync_mesg *m; | 627 | struct ip_vs_sync_mesg *m; |
628 | union ip_vs_sync_conn *s; | 628 | union ip_vs_sync_conn *s; |
629 | struct ip_vs_sync_buff *buff; | 629 | struct ip_vs_sync_buff *buff; |
630 | struct ipvs_master_sync_state *ms; | 630 | struct ipvs_master_sync_state *ms; |
631 | int id; | 631 | int id; |
632 | __u8 *p; | 632 | __u8 *p; |
633 | unsigned int len, pe_name_len, pad; | 633 | unsigned int len, pe_name_len, pad; |
634 | 634 | ||
635 | /* Handle old version of the protocol */ | 635 | /* Handle old version of the protocol */ |
636 | if (sysctl_sync_ver(ipvs) == 0) { | 636 | if (sysctl_sync_ver(ipvs) == 0) { |
637 | ip_vs_sync_conn_v0(net, cp, pkts); | 637 | ip_vs_sync_conn_v0(net, cp, pkts); |
638 | return; | 638 | return; |
639 | } | 639 | } |
640 | /* Do not sync ONE PACKET */ | 640 | /* Do not sync ONE PACKET */ |
641 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | 641 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
642 | goto control; | 642 | goto control; |
643 | sloop: | 643 | sloop: |
644 | if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) | 644 | if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) |
645 | goto control; | 645 | goto control; |
646 | 646 | ||
647 | /* Sanity checks */ | 647 | /* Sanity checks */ |
648 | pe_name_len = 0; | 648 | pe_name_len = 0; |
649 | if (cp->pe_data_len) { | 649 | if (cp->pe_data_len) { |
650 | if (!cp->pe_data || !cp->dest) { | 650 | if (!cp->pe_data || !cp->dest) { |
651 | IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); | 651 | IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); |
652 | return; | 652 | return; |
653 | } | 653 | } |
654 | pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); | 654 | pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); |
655 | } | 655 | } |
656 | 656 | ||
657 | spin_lock_bh(&ipvs->sync_buff_lock); | 657 | spin_lock_bh(&ipvs->sync_buff_lock); |
658 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { | 658 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { |
659 | spin_unlock_bh(&ipvs->sync_buff_lock); | 659 | spin_unlock_bh(&ipvs->sync_buff_lock); |
660 | return; | 660 | return; |
661 | } | 661 | } |
662 | 662 | ||
663 | id = select_master_thread_id(ipvs, cp); | 663 | id = select_master_thread_id(ipvs, cp); |
664 | ms = &ipvs->ms[id]; | 664 | ms = &ipvs->ms[id]; |
665 | 665 | ||
666 | #ifdef CONFIG_IP_VS_IPV6 | 666 | #ifdef CONFIG_IP_VS_IPV6 |
667 | if (cp->af == AF_INET6) | 667 | if (cp->af == AF_INET6) |
668 | len = sizeof(struct ip_vs_sync_v6); | 668 | len = sizeof(struct ip_vs_sync_v6); |
669 | else | 669 | else |
670 | #endif | 670 | #endif |
671 | len = sizeof(struct ip_vs_sync_v4); | 671 | len = sizeof(struct ip_vs_sync_v4); |
672 | 672 | ||
673 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) | 673 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) |
674 | len += sizeof(struct ip_vs_sync_conn_options) + 2; | 674 | len += sizeof(struct ip_vs_sync_conn_options) + 2; |
675 | 675 | ||
676 | if (cp->pe_data_len) | 676 | if (cp->pe_data_len) |
677 | len += cp->pe_data_len + 2; /* + Param hdr field */ | 677 | len += cp->pe_data_len + 2; /* + Param hdr field */ |
678 | if (pe_name_len) | 678 | if (pe_name_len) |
679 | len += pe_name_len + 2; | 679 | len += pe_name_len + 2; |
680 | 680 | ||
681 | /* check if there is a space for this one */ | 681 | /* check if there is a space for this one */ |
682 | pad = 0; | 682 | pad = 0; |
683 | buff = ms->sync_buff; | 683 | buff = ms->sync_buff; |
684 | if (buff) { | 684 | if (buff) { |
685 | m = buff->mesg; | 685 | m = buff->mesg; |
686 | pad = (4 - (size_t) buff->head) & 3; | 686 | pad = (4 - (size_t) buff->head) & 3; |
687 | /* Send buffer if it is for v0 */ | 687 | /* Send buffer if it is for v0 */ |
688 | if (buff->head + len + pad > buff->end || m->reserved) { | 688 | if (buff->head + len + pad > buff->end || m->reserved) { |
689 | sb_queue_tail(ipvs, ms); | 689 | sb_queue_tail(ipvs, ms); |
690 | ms->sync_buff = NULL; | 690 | ms->sync_buff = NULL; |
691 | buff = NULL; | 691 | buff = NULL; |
692 | pad = 0; | 692 | pad = 0; |
693 | } | 693 | } |
694 | } | 694 | } |
695 | 695 | ||
696 | if (!buff) { | 696 | if (!buff) { |
697 | buff = ip_vs_sync_buff_create(ipvs); | 697 | buff = ip_vs_sync_buff_create(ipvs); |
698 | if (!buff) { | 698 | if (!buff) { |
699 | spin_unlock_bh(&ipvs->sync_buff_lock); | 699 | spin_unlock_bh(&ipvs->sync_buff_lock); |
700 | pr_err("ip_vs_sync_buff_create failed.\n"); | 700 | pr_err("ip_vs_sync_buff_create failed.\n"); |
701 | return; | 701 | return; |
702 | } | 702 | } |
703 | ms->sync_buff = buff; | 703 | ms->sync_buff = buff; |
704 | m = buff->mesg; | 704 | m = buff->mesg; |
705 | } | 705 | } |
706 | 706 | ||
707 | p = buff->head; | 707 | p = buff->head; |
708 | buff->head += pad + len; | 708 | buff->head += pad + len; |
709 | m->size = htons(ntohs(m->size) + pad + len); | 709 | m->size = htons(ntohs(m->size) + pad + len); |
710 | /* Add ev. padding from prev. sync_conn */ | 710 | /* Add ev. padding from prev. sync_conn */ |
711 | while (pad--) | 711 | while (pad--) |
712 | *(p++) = 0; | 712 | *(p++) = 0; |
713 | 713 | ||
714 | s = (union ip_vs_sync_conn *)p; | 714 | s = (union ip_vs_sync_conn *)p; |
715 | 715 | ||
716 | /* Set message type & copy members */ | 716 | /* Set message type & copy members */ |
717 | s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); | 717 | s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); |
718 | s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ | 718 | s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ |
719 | s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); | 719 | s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); |
720 | s->v4.state = htons(cp->state); | 720 | s->v4.state = htons(cp->state); |
721 | s->v4.protocol = cp->protocol; | 721 | s->v4.protocol = cp->protocol; |
722 | s->v4.cport = cp->cport; | 722 | s->v4.cport = cp->cport; |
723 | s->v4.vport = cp->vport; | 723 | s->v4.vport = cp->vport; |
724 | s->v4.dport = cp->dport; | 724 | s->v4.dport = cp->dport; |
725 | s->v4.fwmark = htonl(cp->fwmark); | 725 | s->v4.fwmark = htonl(cp->fwmark); |
726 | s->v4.timeout = htonl(cp->timeout / HZ); | 726 | s->v4.timeout = htonl(cp->timeout / HZ); |
727 | m->nr_conns++; | 727 | m->nr_conns++; |
728 | 728 | ||
729 | #ifdef CONFIG_IP_VS_IPV6 | 729 | #ifdef CONFIG_IP_VS_IPV6 |
730 | if (cp->af == AF_INET6) { | 730 | if (cp->af == AF_INET6) { |
731 | p += sizeof(struct ip_vs_sync_v6); | 731 | p += sizeof(struct ip_vs_sync_v6); |
732 | s->v6.caddr = cp->caddr.in6; | 732 | s->v6.caddr = cp->caddr.in6; |
733 | s->v6.vaddr = cp->vaddr.in6; | 733 | s->v6.vaddr = cp->vaddr.in6; |
734 | s->v6.daddr = cp->daddr.in6; | 734 | s->v6.daddr = cp->daddr.in6; |
735 | } else | 735 | } else |
736 | #endif | 736 | #endif |
737 | { | 737 | { |
738 | p += sizeof(struct ip_vs_sync_v4); /* options ptr */ | 738 | p += sizeof(struct ip_vs_sync_v4); /* options ptr */ |
739 | s->v4.caddr = cp->caddr.ip; | 739 | s->v4.caddr = cp->caddr.ip; |
740 | s->v4.vaddr = cp->vaddr.ip; | 740 | s->v4.vaddr = cp->vaddr.ip; |
741 | s->v4.daddr = cp->daddr.ip; | 741 | s->v4.daddr = cp->daddr.ip; |
742 | } | 742 | } |
743 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | 743 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { |
744 | *(p++) = IPVS_OPT_SEQ_DATA; | 744 | *(p++) = IPVS_OPT_SEQ_DATA; |
745 | *(p++) = sizeof(struct ip_vs_sync_conn_options); | 745 | *(p++) = sizeof(struct ip_vs_sync_conn_options); |
746 | hton_seq((struct ip_vs_seq *)p, &cp->in_seq); | 746 | hton_seq((struct ip_vs_seq *)p, &cp->in_seq); |
747 | p += sizeof(struct ip_vs_seq); | 747 | p += sizeof(struct ip_vs_seq); |
748 | hton_seq((struct ip_vs_seq *)p, &cp->out_seq); | 748 | hton_seq((struct ip_vs_seq *)p, &cp->out_seq); |
749 | p += sizeof(struct ip_vs_seq); | 749 | p += sizeof(struct ip_vs_seq); |
750 | } | 750 | } |
751 | /* Handle pe data */ | 751 | /* Handle pe data */ |
752 | if (cp->pe_data_len && cp->pe_data) { | 752 | if (cp->pe_data_len && cp->pe_data) { |
753 | *(p++) = IPVS_OPT_PE_DATA; | 753 | *(p++) = IPVS_OPT_PE_DATA; |
754 | *(p++) = cp->pe_data_len; | 754 | *(p++) = cp->pe_data_len; |
755 | memcpy(p, cp->pe_data, cp->pe_data_len); | 755 | memcpy(p, cp->pe_data, cp->pe_data_len); |
756 | p += cp->pe_data_len; | 756 | p += cp->pe_data_len; |
757 | if (pe_name_len) { | 757 | if (pe_name_len) { |
758 | /* Add PE_NAME */ | 758 | /* Add PE_NAME */ |
759 | *(p++) = IPVS_OPT_PE_NAME; | 759 | *(p++) = IPVS_OPT_PE_NAME; |
760 | *(p++) = pe_name_len; | 760 | *(p++) = pe_name_len; |
761 | memcpy(p, cp->pe->name, pe_name_len); | 761 | memcpy(p, cp->pe->name, pe_name_len); |
762 | p += pe_name_len; | 762 | p += pe_name_len; |
763 | } | 763 | } |
764 | } | 764 | } |
765 | 765 | ||
766 | spin_unlock_bh(&ipvs->sync_buff_lock); | 766 | spin_unlock_bh(&ipvs->sync_buff_lock); |
767 | 767 | ||
768 | control: | 768 | control: |
769 | /* synchronize its controller if it has */ | 769 | /* synchronize its controller if it has */ |
770 | cp = cp->control; | 770 | cp = cp->control; |
771 | if (!cp) | 771 | if (!cp) |
772 | return; | 772 | return; |
773 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) | 773 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) |
774 | pkts = atomic_add_return(1, &cp->in_pkts); | 774 | pkts = atomic_add_return(1, &cp->in_pkts); |
775 | else | 775 | else |
776 | pkts = sysctl_sync_threshold(ipvs); | 776 | pkts = sysctl_sync_threshold(ipvs); |
777 | goto sloop; | 777 | goto sloop; |
778 | } | 778 | } |
779 | 779 | ||
780 | /* | 780 | /* |
781 | * fill_param used by version 1 | 781 | * fill_param used by version 1 |
782 | */ | 782 | */ |
783 | static inline int | 783 | static inline int |
784 | ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, | 784 | ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, |
785 | struct ip_vs_conn_param *p, | 785 | struct ip_vs_conn_param *p, |
786 | __u8 *pe_data, unsigned int pe_data_len, | 786 | __u8 *pe_data, unsigned int pe_data_len, |
787 | __u8 *pe_name, unsigned int pe_name_len) | 787 | __u8 *pe_name, unsigned int pe_name_len) |
788 | { | 788 | { |
789 | #ifdef CONFIG_IP_VS_IPV6 | 789 | #ifdef CONFIG_IP_VS_IPV6 |
790 | if (af == AF_INET6) | 790 | if (af == AF_INET6) |
791 | ip_vs_conn_fill_param(net, af, sc->v6.protocol, | 791 | ip_vs_conn_fill_param(net, af, sc->v6.protocol, |
792 | (const union nf_inet_addr *)&sc->v6.caddr, | 792 | (const union nf_inet_addr *)&sc->v6.caddr, |
793 | sc->v6.cport, | 793 | sc->v6.cport, |
794 | (const union nf_inet_addr *)&sc->v6.vaddr, | 794 | (const union nf_inet_addr *)&sc->v6.vaddr, |
795 | sc->v6.vport, p); | 795 | sc->v6.vport, p); |
796 | else | 796 | else |
797 | #endif | 797 | #endif |
798 | ip_vs_conn_fill_param(net, af, sc->v4.protocol, | 798 | ip_vs_conn_fill_param(net, af, sc->v4.protocol, |
799 | (const union nf_inet_addr *)&sc->v4.caddr, | 799 | (const union nf_inet_addr *)&sc->v4.caddr, |
800 | sc->v4.cport, | 800 | sc->v4.cport, |
801 | (const union nf_inet_addr *)&sc->v4.vaddr, | 801 | (const union nf_inet_addr *)&sc->v4.vaddr, |
802 | sc->v4.vport, p); | 802 | sc->v4.vport, p); |
803 | /* Handle pe data */ | 803 | /* Handle pe data */ |
804 | if (pe_data_len) { | 804 | if (pe_data_len) { |
805 | if (pe_name_len) { | 805 | if (pe_name_len) { |
806 | char buff[IP_VS_PENAME_MAXLEN+1]; | 806 | char buff[IP_VS_PENAME_MAXLEN+1]; |
807 | 807 | ||
808 | memcpy(buff, pe_name, pe_name_len); | 808 | memcpy(buff, pe_name, pe_name_len); |
809 | buff[pe_name_len]=0; | 809 | buff[pe_name_len]=0; |
810 | p->pe = __ip_vs_pe_getbyname(buff); | 810 | p->pe = __ip_vs_pe_getbyname(buff); |
811 | if (!p->pe) { | 811 | if (!p->pe) { |
812 | IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", | 812 | IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", |
813 | buff); | 813 | buff); |
814 | return 1; | 814 | return 1; |
815 | } | 815 | } |
816 | } else { | 816 | } else { |
817 | IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); | 817 | IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); |
818 | return 1; | 818 | return 1; |
819 | } | 819 | } |
820 | 820 | ||
821 | p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); | 821 | p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); |
822 | if (!p->pe_data) { | 822 | if (!p->pe_data) { |
823 | if (p->pe->module) | 823 | if (p->pe->module) |
824 | module_put(p->pe->module); | 824 | module_put(p->pe->module); |
825 | return -ENOMEM; | 825 | return -ENOMEM; |
826 | } | 826 | } |
827 | p->pe_data_len = pe_data_len; | 827 | p->pe_data_len = pe_data_len; |
828 | } | 828 | } |
829 | return 0; | 829 | return 0; |
830 | } | 830 | } |
831 | 831 | ||
832 | /* | 832 | /* |
833 | * Connection Add / Update. | 833 | * Connection Add / Update. |
834 | * Common for version 0 and 1 reception of backup sync_conns. | 834 | * Common for version 0 and 1 reception of backup sync_conns. |
835 | * Param: ... | 835 | * Param: ... |
836 | * timeout is in sec. | 836 | * timeout is in sec. |
837 | */ | 837 | */ |
838 | static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, | 838 | static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, |
839 | unsigned int flags, unsigned int state, | 839 | unsigned int flags, unsigned int state, |
840 | unsigned int protocol, unsigned int type, | 840 | unsigned int protocol, unsigned int type, |
841 | const union nf_inet_addr *daddr, __be16 dport, | 841 | const union nf_inet_addr *daddr, __be16 dport, |
842 | unsigned long timeout, __u32 fwmark, | 842 | unsigned long timeout, __u32 fwmark, |
843 | struct ip_vs_sync_conn_options *opt) | 843 | struct ip_vs_sync_conn_options *opt) |
844 | { | 844 | { |
845 | struct ip_vs_dest *dest; | 845 | struct ip_vs_dest *dest; |
846 | struct ip_vs_conn *cp; | 846 | struct ip_vs_conn *cp; |
847 | struct netns_ipvs *ipvs = net_ipvs(net); | 847 | struct netns_ipvs *ipvs = net_ipvs(net); |
848 | 848 | ||
849 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) | 849 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) |
850 | cp = ip_vs_conn_in_get(param); | 850 | cp = ip_vs_conn_in_get(param); |
851 | else | 851 | else |
852 | cp = ip_vs_ct_in_get(param); | 852 | cp = ip_vs_ct_in_get(param); |
853 | 853 | ||
854 | if (cp) { | 854 | if (cp) { |
855 | /* Free pe_data */ | 855 | /* Free pe_data */ |
856 | kfree(param->pe_data); | 856 | kfree(param->pe_data); |
857 | 857 | ||
858 | dest = cp->dest; | 858 | dest = cp->dest; |
859 | spin_lock_bh(&cp->lock); | 859 | spin_lock_bh(&cp->lock); |
860 | if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && | 860 | if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && |
861 | !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { | 861 | !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { |
862 | if (flags & IP_VS_CONN_F_INACTIVE) { | 862 | if (flags & IP_VS_CONN_F_INACTIVE) { |
863 | atomic_dec(&dest->activeconns); | 863 | atomic_dec(&dest->activeconns); |
864 | atomic_inc(&dest->inactconns); | 864 | atomic_inc(&dest->inactconns); |
865 | } else { | 865 | } else { |
866 | atomic_inc(&dest->activeconns); | 866 | atomic_inc(&dest->activeconns); |
867 | atomic_dec(&dest->inactconns); | 867 | atomic_dec(&dest->inactconns); |
868 | } | 868 | } |
869 | } | 869 | } |
870 | flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; | 870 | flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; |
871 | flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; | 871 | flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; |
872 | cp->flags = flags; | 872 | cp->flags = flags; |
873 | spin_unlock_bh(&cp->lock); | 873 | spin_unlock_bh(&cp->lock); |
874 | if (!dest) | 874 | if (!dest) |
875 | ip_vs_try_bind_dest(cp); | 875 | ip_vs_try_bind_dest(cp); |
876 | } else { | 876 | } else { |
877 | /* | 877 | /* |
878 | * Find the appropriate destination for the connection. | 878 | * Find the appropriate destination for the connection. |
879 | * If it is not found the connection will remain unbound | 879 | * If it is not found the connection will remain unbound |
880 | * but still handled. | 880 | * but still handled. |
881 | */ | 881 | */ |
882 | rcu_read_lock(); | 882 | rcu_read_lock(); |
883 | dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, | 883 | dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, |
884 | param->vport, protocol, fwmark, flags); | 884 | param->vport, protocol, fwmark, flags); |
885 | 885 | ||
886 | cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); | 886 | cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); |
887 | rcu_read_unlock(); | 887 | rcu_read_unlock(); |
888 | if (!cp) { | 888 | if (!cp) { |
889 | if (param->pe_data) | 889 | if (param->pe_data) |
890 | kfree(param->pe_data); | 890 | kfree(param->pe_data); |
891 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); | 891 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); |
892 | return; | 892 | return; |
893 | } | 893 | } |
894 | } | 894 | } |
895 | 895 | ||
896 | if (opt) | 896 | if (opt) |
897 | memcpy(&cp->in_seq, opt, sizeof(*opt)); | 897 | memcpy(&cp->in_seq, opt, sizeof(*opt)); |
898 | atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); | 898 | atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); |
899 | cp->state = state; | 899 | cp->state = state; |
900 | cp->old_state = cp->state; | 900 | cp->old_state = cp->state; |
901 | /* | 901 | /* |
902 | * For Ver 0 messages style | 902 | * For Ver 0 messages style |
903 | * - Not possible to recover the right timeout for templates | 903 | * - Not possible to recover the right timeout for templates |
904 | * - can not find the right fwmark | 904 | * - can not find the right fwmark |
905 | * virtual service. If needed, we can do it for | 905 | * virtual service. If needed, we can do it for |
906 | * non-fwmark persistent services. | 906 | * non-fwmark persistent services. |
907 | * Ver 1 messages style. | 907 | * Ver 1 messages style. |
908 | * - No problem. | 908 | * - No problem. |
909 | */ | 909 | */ |
910 | if (timeout) { | 910 | if (timeout) { |
911 | if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) | 911 | if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) |
912 | timeout = MAX_SCHEDULE_TIMEOUT / HZ; | 912 | timeout = MAX_SCHEDULE_TIMEOUT / HZ; |
913 | cp->timeout = timeout*HZ; | 913 | cp->timeout = timeout*HZ; |
914 | } else { | 914 | } else { |
915 | struct ip_vs_proto_data *pd; | 915 | struct ip_vs_proto_data *pd; |
916 | 916 | ||
917 | pd = ip_vs_proto_data_get(net, protocol); | 917 | pd = ip_vs_proto_data_get(net, protocol); |
918 | if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) | 918 | if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) |
919 | cp->timeout = pd->timeout_table[state]; | 919 | cp->timeout = pd->timeout_table[state]; |
920 | else | 920 | else |
921 | cp->timeout = (3*60*HZ); | 921 | cp->timeout = (3*60*HZ); |
922 | } | 922 | } |
923 | ip_vs_conn_put(cp); | 923 | ip_vs_conn_put(cp); |
924 | } | 924 | } |
925 | 925 | ||
926 | /* | 926 | /* |
927 | * Process received multicast message for Version 0 | 927 | * Process received multicast message for Version 0 |
928 | */ | 928 | */ |
929 | static void ip_vs_process_message_v0(struct net *net, const char *buffer, | 929 | static void ip_vs_process_message_v0(struct net *net, const char *buffer, |
930 | const size_t buflen) | 930 | const size_t buflen) |
931 | { | 931 | { |
932 | struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; | 932 | struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; |
933 | struct ip_vs_sync_conn_v0 *s; | 933 | struct ip_vs_sync_conn_v0 *s; |
934 | struct ip_vs_sync_conn_options *opt; | 934 | struct ip_vs_sync_conn_options *opt; |
935 | struct ip_vs_protocol *pp; | 935 | struct ip_vs_protocol *pp; |
936 | struct ip_vs_conn_param param; | 936 | struct ip_vs_conn_param param; |
937 | char *p; | 937 | char *p; |
938 | int i; | 938 | int i; |
939 | 939 | ||
940 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); | 940 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); |
941 | for (i=0; i<m->nr_conns; i++) { | 941 | for (i=0; i<m->nr_conns; i++) { |
942 | unsigned int flags, state; | 942 | unsigned int flags, state; |
943 | 943 | ||
944 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { | 944 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { |
945 | IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); | 945 | IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); |
946 | return; | 946 | return; |
947 | } | 947 | } |
948 | s = (struct ip_vs_sync_conn_v0 *) p; | 948 | s = (struct ip_vs_sync_conn_v0 *) p; |
949 | flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; | 949 | flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; |
950 | flags &= ~IP_VS_CONN_F_HASHED; | 950 | flags &= ~IP_VS_CONN_F_HASHED; |
951 | if (flags & IP_VS_CONN_F_SEQ_MASK) { | 951 | if (flags & IP_VS_CONN_F_SEQ_MASK) { |
952 | opt = (struct ip_vs_sync_conn_options *)&s[1]; | 952 | opt = (struct ip_vs_sync_conn_options *)&s[1]; |
953 | p += FULL_CONN_SIZE; | 953 | p += FULL_CONN_SIZE; |
954 | if (p > buffer+buflen) { | 954 | if (p > buffer+buflen) { |
955 | IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); | 955 | IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); |
956 | return; | 956 | return; |
957 | } | 957 | } |
958 | } else { | 958 | } else { |
959 | opt = NULL; | 959 | opt = NULL; |
960 | p += SIMPLE_CONN_SIZE; | 960 | p += SIMPLE_CONN_SIZE; |
961 | } | 961 | } |
962 | 962 | ||
963 | state = ntohs(s->state); | 963 | state = ntohs(s->state); |
964 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { | 964 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
965 | pp = ip_vs_proto_get(s->protocol); | 965 | pp = ip_vs_proto_get(s->protocol); |
966 | if (!pp) { | 966 | if (!pp) { |
967 | IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", | 967 | IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", |
968 | s->protocol); | 968 | s->protocol); |
969 | continue; | 969 | continue; |
970 | } | 970 | } |
971 | if (state >= pp->num_states) { | 971 | if (state >= pp->num_states) { |
972 | IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", | 972 | IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", |
973 | pp->name, state); | 973 | pp->name, state); |
974 | continue; | 974 | continue; |
975 | } | 975 | } |
976 | } else { | 976 | } else { |
977 | /* protocol in templates is not used for state/timeout */ | 977 | /* protocol in templates is not used for state/timeout */ |
978 | if (state > 0) { | 978 | if (state > 0) { |
979 | IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", | 979 | IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", |
980 | state); | 980 | state); |
981 | state = 0; | 981 | state = 0; |
982 | } | 982 | } |
983 | } | 983 | } |
984 | 984 | ||
985 | ip_vs_conn_fill_param(net, AF_INET, s->protocol, | 985 | ip_vs_conn_fill_param(net, AF_INET, s->protocol, |
986 | (const union nf_inet_addr *)&s->caddr, | 986 | (const union nf_inet_addr *)&s->caddr, |
987 | s->cport, | 987 | s->cport, |
988 | (const union nf_inet_addr *)&s->vaddr, | 988 | (const union nf_inet_addr *)&s->vaddr, |
989 | s->vport, ¶m); | 989 | s->vport, ¶m); |
990 | 990 | ||
991 | /* Send timeout as Zero */ | 991 | /* Send timeout as Zero */ |
992 | ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, | 992 | ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, |
993 | (union nf_inet_addr *)&s->daddr, s->dport, | 993 | (union nf_inet_addr *)&s->daddr, s->dport, |
994 | 0, 0, opt); | 994 | 0, 0, opt); |
995 | } | 995 | } |
996 | } | 996 | } |
997 | 997 | ||
998 | /* | 998 | /* |
999 | * Handle options | 999 | * Handle options |
1000 | */ | 1000 | */ |
1001 | static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, | 1001 | static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, |
1002 | __u32 *opt_flags, | 1002 | __u32 *opt_flags, |
1003 | struct ip_vs_sync_conn_options *opt) | 1003 | struct ip_vs_sync_conn_options *opt) |
1004 | { | 1004 | { |
1005 | struct ip_vs_sync_conn_options *topt; | 1005 | struct ip_vs_sync_conn_options *topt; |
1006 | 1006 | ||
1007 | topt = (struct ip_vs_sync_conn_options *)p; | 1007 | topt = (struct ip_vs_sync_conn_options *)p; |
1008 | 1008 | ||
1009 | if (plen != sizeof(struct ip_vs_sync_conn_options)) { | 1009 | if (plen != sizeof(struct ip_vs_sync_conn_options)) { |
1010 | IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); | 1010 | IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); |
1011 | return -EINVAL; | 1011 | return -EINVAL; |
1012 | } | 1012 | } |
1013 | if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { | 1013 | if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { |
1014 | IP_VS_DBG(2, "BACKUP, conn options found twice\n"); | 1014 | IP_VS_DBG(2, "BACKUP, conn options found twice\n"); |
1015 | return -EINVAL; | 1015 | return -EINVAL; |
1016 | } | 1016 | } |
1017 | ntoh_seq(&topt->in_seq, &opt->in_seq); | 1017 | ntoh_seq(&topt->in_seq, &opt->in_seq); |
1018 | ntoh_seq(&topt->out_seq, &opt->out_seq); | 1018 | ntoh_seq(&topt->out_seq, &opt->out_seq); |
1019 | *opt_flags |= IPVS_OPT_F_SEQ_DATA; | 1019 | *opt_flags |= IPVS_OPT_F_SEQ_DATA; |
1020 | return 0; | 1020 | return 0; |
1021 | } | 1021 | } |
1022 | 1022 | ||
1023 | static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, | 1023 | static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, |
1024 | __u8 **data, unsigned int maxlen, | 1024 | __u8 **data, unsigned int maxlen, |
1025 | __u32 *opt_flags, __u32 flag) | 1025 | __u32 *opt_flags, __u32 flag) |
1026 | { | 1026 | { |
1027 | if (plen > maxlen) { | 1027 | if (plen > maxlen) { |
1028 | IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); | 1028 | IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); |
1029 | return -EINVAL; | 1029 | return -EINVAL; |
1030 | } | 1030 | } |
1031 | if (*opt_flags & flag) { | 1031 | if (*opt_flags & flag) { |
1032 | IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); | 1032 | IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); |
1033 | return -EINVAL; | 1033 | return -EINVAL; |
1034 | } | 1034 | } |
1035 | *data_len = plen; | 1035 | *data_len = plen; |
1036 | *data = p; | 1036 | *data = p; |
1037 | *opt_flags |= flag; | 1037 | *opt_flags |= flag; |
1038 | return 0; | 1038 | return 0; |
1039 | } | 1039 | } |
1040 | /* | 1040 | /* |
1041 | * Process a Version 1 sync. connection | 1041 | * Process a Version 1 sync. connection |
1042 | */ | 1042 | */ |
1043 | static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) | 1043 | static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) |
1044 | { | 1044 | { |
1045 | struct ip_vs_sync_conn_options opt; | 1045 | struct ip_vs_sync_conn_options opt; |
1046 | union ip_vs_sync_conn *s; | 1046 | union ip_vs_sync_conn *s; |
1047 | struct ip_vs_protocol *pp; | 1047 | struct ip_vs_protocol *pp; |
1048 | struct ip_vs_conn_param param; | 1048 | struct ip_vs_conn_param param; |
1049 | __u32 flags; | 1049 | __u32 flags; |
1050 | unsigned int af, state, pe_data_len=0, pe_name_len=0; | 1050 | unsigned int af, state, pe_data_len=0, pe_name_len=0; |
1051 | __u8 *pe_data=NULL, *pe_name=NULL; | 1051 | __u8 *pe_data=NULL, *pe_name=NULL; |
1052 | __u32 opt_flags=0; | 1052 | __u32 opt_flags=0; |
1053 | int retc=0; | 1053 | int retc=0; |
1054 | 1054 | ||
1055 | s = (union ip_vs_sync_conn *) p; | 1055 | s = (union ip_vs_sync_conn *) p; |
1056 | 1056 | ||
1057 | if (s->v6.type & STYPE_F_INET6) { | 1057 | if (s->v6.type & STYPE_F_INET6) { |
1058 | #ifdef CONFIG_IP_VS_IPV6 | 1058 | #ifdef CONFIG_IP_VS_IPV6 |
1059 | af = AF_INET6; | 1059 | af = AF_INET6; |
1060 | p += sizeof(struct ip_vs_sync_v6); | 1060 | p += sizeof(struct ip_vs_sync_v6); |
1061 | #else | 1061 | #else |
1062 | IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); | 1062 | IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); |
1063 | retc = 10; | 1063 | retc = 10; |
1064 | goto out; | 1064 | goto out; |
1065 | #endif | 1065 | #endif |
1066 | } else if (!s->v4.type) { | 1066 | } else if (!s->v4.type) { |
1067 | af = AF_INET; | 1067 | af = AF_INET; |
1068 | p += sizeof(struct ip_vs_sync_v4); | 1068 | p += sizeof(struct ip_vs_sync_v4); |
1069 | } else { | 1069 | } else { |
1070 | return -10; | 1070 | return -10; |
1071 | } | 1071 | } |
1072 | if (p > msg_end) | 1072 | if (p > msg_end) |
1073 | return -20; | 1073 | return -20; |
1074 | 1074 | ||
1075 | /* Process optional params check Type & Len. */ | 1075 | /* Process optional params check Type & Len. */ |
1076 | while (p < msg_end) { | 1076 | while (p < msg_end) { |
1077 | int ptype; | 1077 | int ptype; |
1078 | int plen; | 1078 | int plen; |
1079 | 1079 | ||
1080 | if (p+2 > msg_end) | 1080 | if (p+2 > msg_end) |
1081 | return -30; | 1081 | return -30; |
1082 | ptype = *(p++); | 1082 | ptype = *(p++); |
1083 | plen = *(p++); | 1083 | plen = *(p++); |
1084 | 1084 | ||
1085 | if (!plen || ((p + plen) > msg_end)) | 1085 | if (!plen || ((p + plen) > msg_end)) |
1086 | return -40; | 1086 | return -40; |
1087 | /* Handle seq option p = param data */ | 1087 | /* Handle seq option p = param data */ |
1088 | switch (ptype & ~IPVS_OPT_F_PARAM) { | 1088 | switch (ptype & ~IPVS_OPT_F_PARAM) { |
1089 | case IPVS_OPT_SEQ_DATA: | 1089 | case IPVS_OPT_SEQ_DATA: |
1090 | if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) | 1090 | if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) |
1091 | return -50; | 1091 | return -50; |
1092 | break; | 1092 | break; |
1093 | 1093 | ||
1094 | case IPVS_OPT_PE_DATA: | 1094 | case IPVS_OPT_PE_DATA: |
1095 | if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, | 1095 | if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, |
1096 | IP_VS_PEDATA_MAXLEN, &opt_flags, | 1096 | IP_VS_PEDATA_MAXLEN, &opt_flags, |
1097 | IPVS_OPT_F_PE_DATA)) | 1097 | IPVS_OPT_F_PE_DATA)) |
1098 | return -60; | 1098 | return -60; |
1099 | break; | 1099 | break; |
1100 | 1100 | ||
1101 | case IPVS_OPT_PE_NAME: | 1101 | case IPVS_OPT_PE_NAME: |
1102 | if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, | 1102 | if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, |
1103 | IP_VS_PENAME_MAXLEN, &opt_flags, | 1103 | IP_VS_PENAME_MAXLEN, &opt_flags, |
1104 | IPVS_OPT_F_PE_NAME)) | 1104 | IPVS_OPT_F_PE_NAME)) |
1105 | return -70; | 1105 | return -70; |
1106 | break; | 1106 | break; |
1107 | 1107 | ||
1108 | default: | 1108 | default: |
1109 | /* Param data mandatory ? */ | 1109 | /* Param data mandatory ? */ |
1110 | if (!(ptype & IPVS_OPT_F_PARAM)) { | 1110 | if (!(ptype & IPVS_OPT_F_PARAM)) { |
1111 | IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", | 1111 | IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", |
1112 | ptype & ~IPVS_OPT_F_PARAM); | 1112 | ptype & ~IPVS_OPT_F_PARAM); |
1113 | retc = 20; | 1113 | retc = 20; |
1114 | goto out; | 1114 | goto out; |
1115 | } | 1115 | } |
1116 | } | 1116 | } |
1117 | p += plen; /* Next option */ | 1117 | p += plen; /* Next option */ |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | /* Get flags and Mask off unsupported */ | 1120 | /* Get flags and Mask off unsupported */ |
1121 | flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; | 1121 | flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; |
1122 | flags |= IP_VS_CONN_F_SYNC; | 1122 | flags |= IP_VS_CONN_F_SYNC; |
1123 | state = ntohs(s->v4.state); | 1123 | state = ntohs(s->v4.state); |
1124 | 1124 | ||
1125 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { | 1125 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
1126 | pp = ip_vs_proto_get(s->v4.protocol); | 1126 | pp = ip_vs_proto_get(s->v4.protocol); |
1127 | if (!pp) { | 1127 | if (!pp) { |
1128 | IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", | 1128 | IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", |
1129 | s->v4.protocol); | 1129 | s->v4.protocol); |
1130 | retc = 30; | 1130 | retc = 30; |
1131 | goto out; | 1131 | goto out; |
1132 | } | 1132 | } |
1133 | if (state >= pp->num_states) { | 1133 | if (state >= pp->num_states) { |
1134 | IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", | 1134 | IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", |
1135 | pp->name, state); | 1135 | pp->name, state); |
1136 | retc = 40; | 1136 | retc = 40; |
1137 | goto out; | 1137 | goto out; |
1138 | } | 1138 | } |
1139 | } else { | 1139 | } else { |
1140 | /* protocol in templates is not used for state/timeout */ | 1140 | /* protocol in templates is not used for state/timeout */ |
1141 | if (state > 0) { | 1141 | if (state > 0) { |
1142 | IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", | 1142 | IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", |
1143 | state); | 1143 | state); |
1144 | state = 0; | 1144 | state = 0; |
1145 | } | 1145 | } |
1146 | } | 1146 | } |
1147 | if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, | 1147 | if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, |
1148 | pe_data_len, pe_name, pe_name_len)) { | 1148 | pe_data_len, pe_name, pe_name_len)) { |
1149 | retc = 50; | 1149 | retc = 50; |
1150 | goto out; | 1150 | goto out; |
1151 | } | 1151 | } |
1152 | /* If only IPv4, just silent skip IPv6 */ | 1152 | /* If only IPv4, just silent skip IPv6 */ |
1153 | if (af == AF_INET) | 1153 | if (af == AF_INET) |
1154 | ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, | 1154 | ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, |
1155 | (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, | 1155 | (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, |
1156 | ntohl(s->v4.timeout), ntohl(s->v4.fwmark), | 1156 | ntohl(s->v4.timeout), ntohl(s->v4.fwmark), |
1157 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) | 1157 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) |
1158 | ); | 1158 | ); |
1159 | #ifdef CONFIG_IP_VS_IPV6 | 1159 | #ifdef CONFIG_IP_VS_IPV6 |
1160 | else | 1160 | else |
1161 | ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, | 1161 | ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, |
1162 | (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, | 1162 | (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, |
1163 | ntohl(s->v6.timeout), ntohl(s->v6.fwmark), | 1163 | ntohl(s->v6.timeout), ntohl(s->v6.fwmark), |
1164 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) | 1164 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) |
1165 | ); | 1165 | ); |
1166 | #endif | 1166 | #endif |
1167 | return 0; | 1167 | return 0; |
1168 | /* Error exit */ | 1168 | /* Error exit */ |
1169 | out: | 1169 | out: |
1170 | IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); | 1170 | IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); |
1171 | return retc; | 1171 | return retc; |
1172 | 1172 | ||
1173 | } | 1173 | } |
1174 | /* | 1174 | /* |
1175 | * Process received multicast message and create the corresponding | 1175 | * Process received multicast message and create the corresponding |
1176 | * ip_vs_conn entries. | 1176 | * ip_vs_conn entries. |
1177 | * Handles Version 0 & 1 | 1177 | * Handles Version 0 & 1 |
1178 | */ | 1178 | */ |
1179 | static void ip_vs_process_message(struct net *net, __u8 *buffer, | 1179 | static void ip_vs_process_message(struct net *net, __u8 *buffer, |
1180 | const size_t buflen) | 1180 | const size_t buflen) |
1181 | { | 1181 | { |
1182 | struct netns_ipvs *ipvs = net_ipvs(net); | 1182 | struct netns_ipvs *ipvs = net_ipvs(net); |
1183 | struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; | 1183 | struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; |
1184 | __u8 *p, *msg_end; | 1184 | __u8 *p, *msg_end; |
1185 | int i, nr_conns; | 1185 | int i, nr_conns; |
1186 | 1186 | ||
1187 | if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { | 1187 | if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { |
1188 | IP_VS_DBG(2, "BACKUP, message header too short\n"); | 1188 | IP_VS_DBG(2, "BACKUP, message header too short\n"); |
1189 | return; | 1189 | return; |
1190 | } | 1190 | } |
1191 | 1191 | ||
1192 | if (buflen != ntohs(m2->size)) { | 1192 | if (buflen != ntohs(m2->size)) { |
1193 | IP_VS_DBG(2, "BACKUP, bogus message size\n"); | 1193 | IP_VS_DBG(2, "BACKUP, bogus message size\n"); |
1194 | return; | 1194 | return; |
1195 | } | 1195 | } |
1196 | /* SyncID sanity check */ | 1196 | /* SyncID sanity check */ |
1197 | if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { | 1197 | if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { |
1198 | IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); | 1198 | IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); |
1199 | return; | 1199 | return; |
1200 | } | 1200 | } |
1201 | /* Handle version 1 message */ | 1201 | /* Handle version 1 message */ |
1202 | if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) | 1202 | if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) |
1203 | && (m2->spare == 0)) { | 1203 | && (m2->spare == 0)) { |
1204 | 1204 | ||
1205 | msg_end = buffer + sizeof(struct ip_vs_sync_mesg); | 1205 | msg_end = buffer + sizeof(struct ip_vs_sync_mesg); |
1206 | nr_conns = m2->nr_conns; | 1206 | nr_conns = m2->nr_conns; |
1207 | 1207 | ||
1208 | for (i=0; i<nr_conns; i++) { | 1208 | for (i=0; i<nr_conns; i++) { |
1209 | union ip_vs_sync_conn *s; | 1209 | union ip_vs_sync_conn *s; |
1210 | unsigned int size; | 1210 | unsigned int size; |
1211 | int retc; | 1211 | int retc; |
1212 | 1212 | ||
1213 | p = msg_end; | 1213 | p = msg_end; |
1214 | if (p + sizeof(s->v4) > buffer+buflen) { | 1214 | if (p + sizeof(s->v4) > buffer+buflen) { |
1215 | IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); | 1215 | IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); |
1216 | return; | 1216 | return; |
1217 | } | 1217 | } |
1218 | s = (union ip_vs_sync_conn *)p; | 1218 | s = (union ip_vs_sync_conn *)p; |
1219 | size = ntohs(s->v4.ver_size) & SVER_MASK; | 1219 | size = ntohs(s->v4.ver_size) & SVER_MASK; |
1220 | msg_end = p + size; | 1220 | msg_end = p + size; |
1221 | /* Basic sanity checks */ | 1221 | /* Basic sanity checks */ |
1222 | if (msg_end > buffer+buflen) { | 1222 | if (msg_end > buffer+buflen) { |
1223 | IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); | 1223 | IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); |
1224 | return; | 1224 | return; |
1225 | } | 1225 | } |
1226 | if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { | 1226 | if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { |
1227 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", | 1227 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", |
1228 | ntohs(s->v4.ver_size) >> SVER_SHIFT); | 1228 | ntohs(s->v4.ver_size) >> SVER_SHIFT); |
1229 | return; | 1229 | return; |
1230 | } | 1230 | } |
1231 | /* Process a single sync_conn */ | 1231 | /* Process a single sync_conn */ |
1232 | retc = ip_vs_proc_sync_conn(net, p, msg_end); | 1232 | retc = ip_vs_proc_sync_conn(net, p, msg_end); |
1233 | if (retc < 0) { | 1233 | if (retc < 0) { |
1234 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", | 1234 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", |
1235 | retc); | 1235 | retc); |
1236 | return; | 1236 | return; |
1237 | } | 1237 | } |
1238 | /* Make sure we have 32 bit alignment */ | 1238 | /* Make sure we have 32 bit alignment */ |
1239 | msg_end = p + ((size + 3) & ~3); | 1239 | msg_end = p + ((size + 3) & ~3); |
1240 | } | 1240 | } |
1241 | } else { | 1241 | } else { |
1242 | /* Old type of message */ | 1242 | /* Old type of message */ |
1243 | ip_vs_process_message_v0(net, buffer, buflen); | 1243 | ip_vs_process_message_v0(net, buffer, buflen); |
1244 | return; | 1244 | return; |
1245 | } | 1245 | } |
1246 | } | 1246 | } |
1247 | 1247 | ||
1248 | 1248 | ||
1249 | /* | 1249 | /* |
1250 | * Setup sndbuf (mode=1) or rcvbuf (mode=0) | 1250 | * Setup sndbuf (mode=1) or rcvbuf (mode=0) |
1251 | */ | 1251 | */ |
1252 | static void set_sock_size(struct sock *sk, int mode, int val) | 1252 | static void set_sock_size(struct sock *sk, int mode, int val) |
1253 | { | 1253 | { |
1254 | /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */ | 1254 | /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */ |
1255 | /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */ | 1255 | /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */ |
1256 | lock_sock(sk); | 1256 | lock_sock(sk); |
1257 | if (mode) { | 1257 | if (mode) { |
1258 | val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, | 1258 | val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, |
1259 | sysctl_wmem_max); | 1259 | sysctl_wmem_max); |
1260 | sk->sk_sndbuf = val * 2; | 1260 | sk->sk_sndbuf = val * 2; |
1261 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | 1261 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
1262 | } else { | 1262 | } else { |
1263 | val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, | 1263 | val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, |
1264 | sysctl_rmem_max); | 1264 | sysctl_rmem_max); |
1265 | sk->sk_rcvbuf = val * 2; | 1265 | sk->sk_rcvbuf = val * 2; |
1266 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | 1266 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
1267 | } | 1267 | } |
1268 | release_sock(sk); | 1268 | release_sock(sk); |
1269 | } | 1269 | } |
1270 | 1270 | ||
1271 | /* | 1271 | /* |
1272 | * Setup loopback of outgoing multicasts on a sending socket | 1272 | * Setup loopback of outgoing multicasts on a sending socket |
1273 | */ | 1273 | */ |
1274 | static void set_mcast_loop(struct sock *sk, u_char loop) | 1274 | static void set_mcast_loop(struct sock *sk, u_char loop) |
1275 | { | 1275 | { |
1276 | struct inet_sock *inet = inet_sk(sk); | 1276 | struct inet_sock *inet = inet_sk(sk); |
1277 | 1277 | ||
1278 | /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ | 1278 | /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ |
1279 | lock_sock(sk); | 1279 | lock_sock(sk); |
1280 | inet->mc_loop = loop ? 1 : 0; | 1280 | inet->mc_loop = loop ? 1 : 0; |
1281 | release_sock(sk); | 1281 | release_sock(sk); |
1282 | } | 1282 | } |
1283 | 1283 | ||
1284 | /* | 1284 | /* |
1285 | * Specify TTL for outgoing multicasts on a sending socket | 1285 | * Specify TTL for outgoing multicasts on a sending socket |
1286 | */ | 1286 | */ |
1287 | static void set_mcast_ttl(struct sock *sk, u_char ttl) | 1287 | static void set_mcast_ttl(struct sock *sk, u_char ttl) |
1288 | { | 1288 | { |
1289 | struct inet_sock *inet = inet_sk(sk); | 1289 | struct inet_sock *inet = inet_sk(sk); |
1290 | 1290 | ||
1291 | /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ | 1291 | /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ |
1292 | lock_sock(sk); | 1292 | lock_sock(sk); |
1293 | inet->mc_ttl = ttl; | 1293 | inet->mc_ttl = ttl; |
1294 | release_sock(sk); | 1294 | release_sock(sk); |
1295 | } | 1295 | } |
1296 | 1296 | ||
1297 | /* | 1297 | /* |
1298 | * Specifiy default interface for outgoing multicasts | 1298 | * Specifiy default interface for outgoing multicasts |
1299 | */ | 1299 | */ |
1300 | static int set_mcast_if(struct sock *sk, char *ifname) | 1300 | static int set_mcast_if(struct sock *sk, char *ifname) |
1301 | { | 1301 | { |
1302 | struct net_device *dev; | 1302 | struct net_device *dev; |
1303 | struct inet_sock *inet = inet_sk(sk); | 1303 | struct inet_sock *inet = inet_sk(sk); |
1304 | struct net *net = sock_net(sk); | 1304 | struct net *net = sock_net(sk); |
1305 | 1305 | ||
1306 | dev = __dev_get_by_name(net, ifname); | 1306 | dev = __dev_get_by_name(net, ifname); |
1307 | if (!dev) | 1307 | if (!dev) |
1308 | return -ENODEV; | 1308 | return -ENODEV; |
1309 | 1309 | ||
1310 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | 1310 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) |
1311 | return -EINVAL; | 1311 | return -EINVAL; |
1312 | 1312 | ||
1313 | lock_sock(sk); | 1313 | lock_sock(sk); |
1314 | inet->mc_index = dev->ifindex; | 1314 | inet->mc_index = dev->ifindex; |
1315 | /* inet->mc_addr = 0; */ | 1315 | /* inet->mc_addr = 0; */ |
1316 | release_sock(sk); | 1316 | release_sock(sk); |
1317 | 1317 | ||
1318 | return 0; | 1318 | return 0; |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | 1321 | ||
1322 | /* | 1322 | /* |
1323 | * Set the maximum length of sync message according to the | 1323 | * Set the maximum length of sync message according to the |
1324 | * specified interface's MTU. | 1324 | * specified interface's MTU. |
1325 | */ | 1325 | */ |
1326 | static int set_sync_mesg_maxlen(struct net *net, int sync_state) | 1326 | static int set_sync_mesg_maxlen(struct net *net, int sync_state) |
1327 | { | 1327 | { |
1328 | struct netns_ipvs *ipvs = net_ipvs(net); | 1328 | struct netns_ipvs *ipvs = net_ipvs(net); |
1329 | struct net_device *dev; | 1329 | struct net_device *dev; |
1330 | int num; | 1330 | int num; |
1331 | 1331 | ||
1332 | if (sync_state == IP_VS_STATE_MASTER) { | 1332 | if (sync_state == IP_VS_STATE_MASTER) { |
1333 | dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); | 1333 | dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); |
1334 | if (!dev) | 1334 | if (!dev) |
1335 | return -ENODEV; | 1335 | return -ENODEV; |
1336 | 1336 | ||
1337 | num = (dev->mtu - sizeof(struct iphdr) - | 1337 | num = (dev->mtu - sizeof(struct iphdr) - |
1338 | sizeof(struct udphdr) - | 1338 | sizeof(struct udphdr) - |
1339 | SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; | 1339 | SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; |
1340 | ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + | 1340 | ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + |
1341 | SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); | 1341 | SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); |
1342 | IP_VS_DBG(7, "setting the maximum length of sync sending " | 1342 | IP_VS_DBG(7, "setting the maximum length of sync sending " |
1343 | "message %d.\n", ipvs->send_mesg_maxlen); | 1343 | "message %d.\n", ipvs->send_mesg_maxlen); |
1344 | } else if (sync_state == IP_VS_STATE_BACKUP) { | 1344 | } else if (sync_state == IP_VS_STATE_BACKUP) { |
1345 | dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); | 1345 | dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); |
1346 | if (!dev) | 1346 | if (!dev) |
1347 | return -ENODEV; | 1347 | return -ENODEV; |
1348 | 1348 | ||
1349 | ipvs->recv_mesg_maxlen = dev->mtu - | 1349 | ipvs->recv_mesg_maxlen = dev->mtu - |
1350 | sizeof(struct iphdr) - sizeof(struct udphdr); | 1350 | sizeof(struct iphdr) - sizeof(struct udphdr); |
1351 | IP_VS_DBG(7, "setting the maximum length of sync receiving " | 1351 | IP_VS_DBG(7, "setting the maximum length of sync receiving " |
1352 | "message %d.\n", ipvs->recv_mesg_maxlen); | 1352 | "message %d.\n", ipvs->recv_mesg_maxlen); |
1353 | } | 1353 | } |
1354 | 1354 | ||
1355 | return 0; | 1355 | return 0; |
1356 | } | 1356 | } |
1357 | 1357 | ||
1358 | 1358 | ||
1359 | /* | 1359 | /* |
1360 | * Join a multicast group. | 1360 | * Join a multicast group. |
1361 | * the group is specified by a class D multicast address 224.0.0.0/8 | 1361 | * the group is specified by a class D multicast address 224.0.0.0/8 |
1362 | * in the in_addr structure passed in as a parameter. | 1362 | * in the in_addr structure passed in as a parameter. |
1363 | */ | 1363 | */ |
1364 | static int | 1364 | static int |
1365 | join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) | 1365 | join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) |
1366 | { | 1366 | { |
1367 | struct net *net = sock_net(sk); | 1367 | struct net *net = sock_net(sk); |
1368 | struct ip_mreqn mreq; | 1368 | struct ip_mreqn mreq; |
1369 | struct net_device *dev; | 1369 | struct net_device *dev; |
1370 | int ret; | 1370 | int ret; |
1371 | 1371 | ||
1372 | memset(&mreq, 0, sizeof(mreq)); | 1372 | memset(&mreq, 0, sizeof(mreq)); |
1373 | memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); | 1373 | memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); |
1374 | 1374 | ||
1375 | dev = __dev_get_by_name(net, ifname); | 1375 | dev = __dev_get_by_name(net, ifname); |
1376 | if (!dev) | 1376 | if (!dev) |
1377 | return -ENODEV; | 1377 | return -ENODEV; |
1378 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | 1378 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) |
1379 | return -EINVAL; | 1379 | return -EINVAL; |
1380 | 1380 | ||
1381 | mreq.imr_ifindex = dev->ifindex; | 1381 | mreq.imr_ifindex = dev->ifindex; |
1382 | 1382 | ||
1383 | lock_sock(sk); | 1383 | lock_sock(sk); |
1384 | ret = ip_mc_join_group(sk, &mreq); | 1384 | ret = ip_mc_join_group(sk, &mreq); |
1385 | release_sock(sk); | 1385 | release_sock(sk); |
1386 | 1386 | ||
1387 | return ret; | 1387 | return ret; |
1388 | } | 1388 | } |
1389 | 1389 | ||
1390 | 1390 | ||
1391 | static int bind_mcastif_addr(struct socket *sock, char *ifname) | 1391 | static int bind_mcastif_addr(struct socket *sock, char *ifname) |
1392 | { | 1392 | { |
1393 | struct net *net = sock_net(sock->sk); | 1393 | struct net *net = sock_net(sock->sk); |
1394 | struct net_device *dev; | 1394 | struct net_device *dev; |
1395 | __be32 addr; | 1395 | __be32 addr; |
1396 | struct sockaddr_in sin; | 1396 | struct sockaddr_in sin; |
1397 | 1397 | ||
1398 | dev = __dev_get_by_name(net, ifname); | 1398 | dev = __dev_get_by_name(net, ifname); |
1399 | if (!dev) | 1399 | if (!dev) |
1400 | return -ENODEV; | 1400 | return -ENODEV; |
1401 | 1401 | ||
1402 | addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | 1402 | addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); |
1403 | if (!addr) | 1403 | if (!addr) |
1404 | pr_err("You probably need to specify IP address on " | 1404 | pr_err("You probably need to specify IP address on " |
1405 | "multicast interface.\n"); | 1405 | "multicast interface.\n"); |
1406 | 1406 | ||
1407 | IP_VS_DBG(7, "binding socket with (%s) %pI4\n", | 1407 | IP_VS_DBG(7, "binding socket with (%s) %pI4\n", |
1408 | ifname, &addr); | 1408 | ifname, &addr); |
1409 | 1409 | ||
1410 | /* Now bind the socket with the address of multicast interface */ | 1410 | /* Now bind the socket with the address of multicast interface */ |
1411 | sin.sin_family = AF_INET; | 1411 | sin.sin_family = AF_INET; |
1412 | sin.sin_addr.s_addr = addr; | 1412 | sin.sin_addr.s_addr = addr; |
1413 | sin.sin_port = 0; | 1413 | sin.sin_port = 0; |
1414 | 1414 | ||
1415 | return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); | 1415 | return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | /* | 1418 | /* |
1419 | * Set up sending multicast socket over UDP | 1419 | * Set up sending multicast socket over UDP |
1420 | */ | 1420 | */ |
1421 | static struct socket *make_send_sock(struct net *net, int id) | 1421 | static struct socket *make_send_sock(struct net *net, int id) |
1422 | { | 1422 | { |
1423 | struct netns_ipvs *ipvs = net_ipvs(net); | 1423 | struct netns_ipvs *ipvs = net_ipvs(net); |
1424 | /* multicast addr */ | 1424 | /* multicast addr */ |
1425 | struct sockaddr_in mcast_addr = { | 1425 | struct sockaddr_in mcast_addr = { |
1426 | .sin_family = AF_INET, | 1426 | .sin_family = AF_INET, |
1427 | .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), | 1427 | .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), |
1428 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | 1428 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), |
1429 | }; | 1429 | }; |
1430 | struct socket *sock; | 1430 | struct socket *sock; |
1431 | int result; | 1431 | int result; |
1432 | 1432 | ||
1433 | /* First create a socket move it to right name space later */ | 1433 | /* First create a socket move it to right name space later */ |
1434 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); | 1434 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); |
1435 | if (result < 0) { | 1435 | if (result < 0) { |
1436 | pr_err("Error during creation of socket; terminating\n"); | 1436 | pr_err("Error during creation of socket; terminating\n"); |
1437 | return ERR_PTR(result); | 1437 | return ERR_PTR(result); |
1438 | } | 1438 | } |
1439 | /* | 1439 | /* |
1440 | * Kernel sockets that are a part of a namespace, should not | 1440 | * Kernel sockets that are a part of a namespace, should not |
1441 | * hold a reference to a namespace in order to allow to stop it. | 1441 | * hold a reference to a namespace in order to allow to stop it. |
1442 | * After sk_change_net should be released using sk_release_kernel. | 1442 | * After sk_change_net should be released using sk_release_kernel. |
1443 | */ | 1443 | */ |
1444 | sk_change_net(sock->sk, net); | 1444 | sk_change_net(sock->sk, net); |
1445 | result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); | 1445 | result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); |
1446 | if (result < 0) { | 1446 | if (result < 0) { |
1447 | pr_err("Error setting outbound mcast interface\n"); | 1447 | pr_err("Error setting outbound mcast interface\n"); |
1448 | goto error; | 1448 | goto error; |
1449 | } | 1449 | } |
1450 | 1450 | ||
1451 | set_mcast_loop(sock->sk, 0); | 1451 | set_mcast_loop(sock->sk, 0); |
1452 | set_mcast_ttl(sock->sk, 1); | 1452 | set_mcast_ttl(sock->sk, 1); |
1453 | result = sysctl_sync_sock_size(ipvs); | 1453 | result = sysctl_sync_sock_size(ipvs); |
1454 | if (result > 0) | 1454 | if (result > 0) |
1455 | set_sock_size(sock->sk, 1, result); | 1455 | set_sock_size(sock->sk, 1, result); |
1456 | 1456 | ||
1457 | result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); | 1457 | result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); |
1458 | if (result < 0) { | 1458 | if (result < 0) { |
1459 | pr_err("Error binding address of the mcast interface\n"); | 1459 | pr_err("Error binding address of the mcast interface\n"); |
1460 | goto error; | 1460 | goto error; |
1461 | } | 1461 | } |
1462 | 1462 | ||
1463 | result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, | 1463 | result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, |
1464 | sizeof(struct sockaddr), 0); | 1464 | sizeof(struct sockaddr), 0); |
1465 | if (result < 0) { | 1465 | if (result < 0) { |
1466 | pr_err("Error connecting to the multicast addr\n"); | 1466 | pr_err("Error connecting to the multicast addr\n"); |
1467 | goto error; | 1467 | goto error; |
1468 | } | 1468 | } |
1469 | 1469 | ||
1470 | return sock; | 1470 | return sock; |
1471 | 1471 | ||
1472 | error: | 1472 | error: |
1473 | sk_release_kernel(sock->sk); | 1473 | sk_release_kernel(sock->sk); |
1474 | return ERR_PTR(result); | 1474 | return ERR_PTR(result); |
1475 | } | 1475 | } |
1476 | 1476 | ||
1477 | 1477 | ||
1478 | /* | 1478 | /* |
1479 | * Set up receiving multicast socket over UDP | 1479 | * Set up receiving multicast socket over UDP |
1480 | */ | 1480 | */ |
1481 | static struct socket *make_receive_sock(struct net *net, int id) | 1481 | static struct socket *make_receive_sock(struct net *net, int id) |
1482 | { | 1482 | { |
1483 | struct netns_ipvs *ipvs = net_ipvs(net); | 1483 | struct netns_ipvs *ipvs = net_ipvs(net); |
1484 | /* multicast addr */ | 1484 | /* multicast addr */ |
1485 | struct sockaddr_in mcast_addr = { | 1485 | struct sockaddr_in mcast_addr = { |
1486 | .sin_family = AF_INET, | 1486 | .sin_family = AF_INET, |
1487 | .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), | 1487 | .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), |
1488 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | 1488 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), |
1489 | }; | 1489 | }; |
1490 | struct socket *sock; | 1490 | struct socket *sock; |
1491 | int result; | 1491 | int result; |
1492 | 1492 | ||
1493 | /* First create a socket */ | 1493 | /* First create a socket */ |
1494 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); | 1494 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); |
1495 | if (result < 0) { | 1495 | if (result < 0) { |
1496 | pr_err("Error during creation of socket; terminating\n"); | 1496 | pr_err("Error during creation of socket; terminating\n"); |
1497 | return ERR_PTR(result); | 1497 | return ERR_PTR(result); |
1498 | } | 1498 | } |
1499 | /* | 1499 | /* |
1500 | * Kernel sockets that are a part of a namespace, should not | 1500 | * Kernel sockets that are a part of a namespace, should not |
1501 | * hold a reference to a namespace in order to allow to stop it. | 1501 | * hold a reference to a namespace in order to allow to stop it. |
1502 | * After sk_change_net should be released using sk_release_kernel. | 1502 | * After sk_change_net should be released using sk_release_kernel. |
1503 | */ | 1503 | */ |
1504 | sk_change_net(sock->sk, net); | 1504 | sk_change_net(sock->sk, net); |
1505 | /* it is equivalent to the REUSEADDR option in user-space */ | 1505 | /* it is equivalent to the REUSEADDR option in user-space */ |
1506 | sock->sk->sk_reuse = SK_CAN_REUSE; | 1506 | sock->sk->sk_reuse = SK_CAN_REUSE; |
1507 | result = sysctl_sync_sock_size(ipvs); | 1507 | result = sysctl_sync_sock_size(ipvs); |
1508 | if (result > 0) | 1508 | if (result > 0) |
1509 | set_sock_size(sock->sk, 0, result); | 1509 | set_sock_size(sock->sk, 0, result); |
1510 | 1510 | ||
1511 | result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, | 1511 | result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, |
1512 | sizeof(struct sockaddr)); | 1512 | sizeof(struct sockaddr)); |
1513 | if (result < 0) { | 1513 | if (result < 0) { |
1514 | pr_err("Error binding to the multicast addr\n"); | 1514 | pr_err("Error binding to the multicast addr\n"); |
1515 | goto error; | 1515 | goto error; |
1516 | } | 1516 | } |
1517 | 1517 | ||
1518 | /* join the multicast group */ | 1518 | /* join the multicast group */ |
1519 | result = join_mcast_group(sock->sk, | 1519 | result = join_mcast_group(sock->sk, |
1520 | (struct in_addr *) &mcast_addr.sin_addr, | 1520 | (struct in_addr *) &mcast_addr.sin_addr, |
1521 | ipvs->backup_mcast_ifn); | 1521 | ipvs->backup_mcast_ifn); |
1522 | if (result < 0) { | 1522 | if (result < 0) { |
1523 | pr_err("Error joining to the multicast group\n"); | 1523 | pr_err("Error joining to the multicast group\n"); |
1524 | goto error; | 1524 | goto error; |
1525 | } | 1525 | } |
1526 | 1526 | ||
1527 | return sock; | 1527 | return sock; |
1528 | 1528 | ||
1529 | error: | 1529 | error: |
1530 | sk_release_kernel(sock->sk); | 1530 | sk_release_kernel(sock->sk); |
1531 | return ERR_PTR(result); | 1531 | return ERR_PTR(result); |
1532 | } | 1532 | } |
1533 | 1533 | ||
1534 | 1534 | ||
1535 | static int | 1535 | static int |
1536 | ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) | 1536 | ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) |
1537 | { | 1537 | { |
1538 | struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL}; | 1538 | struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL}; |
1539 | struct kvec iov; | 1539 | struct kvec iov; |
1540 | int len; | 1540 | int len; |
1541 | 1541 | ||
1542 | EnterFunction(7); | 1542 | EnterFunction(7); |
1543 | iov.iov_base = (void *)buffer; | 1543 | iov.iov_base = (void *)buffer; |
1544 | iov.iov_len = length; | 1544 | iov.iov_len = length; |
1545 | 1545 | ||
1546 | len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length)); | 1546 | len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length)); |
1547 | 1547 | ||
1548 | LeaveFunction(7); | 1548 | LeaveFunction(7); |
1549 | return len; | 1549 | return len; |
1550 | } | 1550 | } |
1551 | 1551 | ||
1552 | static int | 1552 | static int |
1553 | ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) | 1553 | ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) |
1554 | { | 1554 | { |
1555 | int msize; | 1555 | int msize; |
1556 | int ret; | 1556 | int ret; |
1557 | 1557 | ||
1558 | msize = ntohs(msg->size); | 1558 | msize = ntohs(msg->size); |
1559 | 1559 | ||
1560 | ret = ip_vs_send_async(sock, (char *)msg, msize); | 1560 | ret = ip_vs_send_async(sock, (char *)msg, msize); |
1561 | if (ret >= 0 || ret == -EAGAIN) | 1561 | if (ret >= 0 || ret == -EAGAIN) |
1562 | return ret; | 1562 | return ret; |
1563 | pr_err("ip_vs_send_async error %d\n", ret); | 1563 | pr_err("ip_vs_send_async error %d\n", ret); |
1564 | return 0; | 1564 | return 0; |
1565 | } | 1565 | } |
1566 | 1566 | ||
1567 | static int | 1567 | static int |
1568 | ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) | 1568 | ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) |
1569 | { | 1569 | { |
1570 | struct msghdr msg = {NULL,}; | 1570 | struct msghdr msg = {NULL,}; |
1571 | struct kvec iov; | 1571 | struct kvec iov; |
1572 | int len; | 1572 | int len; |
1573 | 1573 | ||
1574 | EnterFunction(7); | 1574 | EnterFunction(7); |
1575 | 1575 | ||
1576 | /* Receive a packet */ | 1576 | /* Receive a packet */ |
1577 | iov.iov_base = buffer; | 1577 | iov.iov_base = buffer; |
1578 | iov.iov_len = (size_t)buflen; | 1578 | iov.iov_len = (size_t)buflen; |
1579 | 1579 | ||
1580 | len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT); | 1580 | len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT); |
1581 | 1581 | ||
1582 | if (len < 0) | 1582 | if (len < 0) |
1583 | return len; | 1583 | return len; |
1584 | 1584 | ||
1585 | LeaveFunction(7); | 1585 | LeaveFunction(7); |
1586 | return len; | 1586 | return len; |
1587 | } | 1587 | } |
1588 | 1588 | ||
1589 | /* Wakeup the master thread for sending */ | 1589 | /* Wakeup the master thread for sending */ |
1590 | static void master_wakeup_work_handler(struct work_struct *work) | 1590 | static void master_wakeup_work_handler(struct work_struct *work) |
1591 | { | 1591 | { |
1592 | struct ipvs_master_sync_state *ms = | 1592 | struct ipvs_master_sync_state *ms = |
1593 | container_of(work, struct ipvs_master_sync_state, | 1593 | container_of(work, struct ipvs_master_sync_state, |
1594 | master_wakeup_work.work); | 1594 | master_wakeup_work.work); |
1595 | struct netns_ipvs *ipvs = ms->ipvs; | 1595 | struct netns_ipvs *ipvs = ms->ipvs; |
1596 | 1596 | ||
1597 | spin_lock_bh(&ipvs->sync_lock); | 1597 | spin_lock_bh(&ipvs->sync_lock); |
1598 | if (ms->sync_queue_len && | 1598 | if (ms->sync_queue_len && |
1599 | ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { | 1599 | ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { |
1600 | ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; | 1600 | ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; |
1601 | wake_up_process(ms->master_thread); | 1601 | wake_up_process(ms->master_thread); |
1602 | } | 1602 | } |
1603 | spin_unlock_bh(&ipvs->sync_lock); | 1603 | spin_unlock_bh(&ipvs->sync_lock); |
1604 | } | 1604 | } |
1605 | 1605 | ||
1606 | /* Get next buffer to send */ | 1606 | /* Get next buffer to send */ |
1607 | static inline struct ip_vs_sync_buff * | 1607 | static inline struct ip_vs_sync_buff * |
1608 | next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) | 1608 | next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) |
1609 | { | 1609 | { |
1610 | struct ip_vs_sync_buff *sb; | 1610 | struct ip_vs_sync_buff *sb; |
1611 | 1611 | ||
1612 | sb = sb_dequeue(ipvs, ms); | 1612 | sb = sb_dequeue(ipvs, ms); |
1613 | if (sb) | 1613 | if (sb) |
1614 | return sb; | 1614 | return sb; |
1615 | /* Do not delay entries in buffer for more than 2 seconds */ | 1615 | /* Do not delay entries in buffer for more than 2 seconds */ |
1616 | return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME); | 1616 | return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME); |
1617 | } | 1617 | } |
1618 | 1618 | ||
1619 | static int sync_thread_master(void *data) | 1619 | static int sync_thread_master(void *data) |
1620 | { | 1620 | { |
1621 | struct ip_vs_sync_thread_data *tinfo = data; | 1621 | struct ip_vs_sync_thread_data *tinfo = data; |
1622 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); | 1622 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); |
1623 | struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; | 1623 | struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; |
1624 | struct sock *sk = tinfo->sock->sk; | 1624 | struct sock *sk = tinfo->sock->sk; |
1625 | struct ip_vs_sync_buff *sb; | 1625 | struct ip_vs_sync_buff *sb; |
1626 | 1626 | ||
1627 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " | 1627 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " |
1628 | "syncid = %d, id = %d\n", | 1628 | "syncid = %d, id = %d\n", |
1629 | ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); | 1629 | ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); |
1630 | 1630 | ||
1631 | for (;;) { | 1631 | for (;;) { |
1632 | sb = next_sync_buff(ipvs, ms); | 1632 | sb = next_sync_buff(ipvs, ms); |
1633 | if (unlikely(kthread_should_stop())) | 1633 | if (unlikely(kthread_should_stop())) |
1634 | break; | 1634 | break; |
1635 | if (!sb) { | 1635 | if (!sb) { |
1636 | schedule_timeout(IPVS_SYNC_CHECK_PERIOD); | 1636 | schedule_timeout(IPVS_SYNC_CHECK_PERIOD); |
1637 | continue; | 1637 | continue; |
1638 | } | 1638 | } |
1639 | while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { | 1639 | while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { |
1640 | int ret = __wait_event_interruptible(*sk_sleep(sk), | 1640 | /* (Ab)use interruptible sleep to avoid increasing |
1641 | * the load avg. | ||
1642 | */ | ||
1643 | __wait_event_interruptible(*sk_sleep(sk), | ||
1641 | sock_writeable(sk) || | 1644 | sock_writeable(sk) || |
1642 | kthread_should_stop()); | 1645 | kthread_should_stop()); |
1643 | if (unlikely(kthread_should_stop())) | 1646 | if (unlikely(kthread_should_stop())) |
1644 | goto done; | 1647 | goto done; |
1645 | } | 1648 | } |
1646 | ip_vs_sync_buff_release(sb); | 1649 | ip_vs_sync_buff_release(sb); |
1647 | } | 1650 | } |
1648 | 1651 | ||
1649 | done: | 1652 | done: |
1650 | __set_current_state(TASK_RUNNING); | 1653 | __set_current_state(TASK_RUNNING); |
1651 | if (sb) | 1654 | if (sb) |
1652 | ip_vs_sync_buff_release(sb); | 1655 | ip_vs_sync_buff_release(sb); |
1653 | 1656 | ||
1654 | /* clean up the sync_buff queue */ | 1657 | /* clean up the sync_buff queue */ |
1655 | while ((sb = sb_dequeue(ipvs, ms))) | 1658 | while ((sb = sb_dequeue(ipvs, ms))) |
1656 | ip_vs_sync_buff_release(sb); | 1659 | ip_vs_sync_buff_release(sb); |
1657 | __set_current_state(TASK_RUNNING); | 1660 | __set_current_state(TASK_RUNNING); |
1658 | 1661 | ||
1659 | /* clean up the current sync_buff */ | 1662 | /* clean up the current sync_buff */ |
1660 | sb = get_curr_sync_buff(ipvs, ms, 0); | 1663 | sb = get_curr_sync_buff(ipvs, ms, 0); |
1661 | if (sb) | 1664 | if (sb) |
1662 | ip_vs_sync_buff_release(sb); | 1665 | ip_vs_sync_buff_release(sb); |
1663 | 1666 | ||
1664 | /* release the sending multicast socket */ | 1667 | /* release the sending multicast socket */ |
1665 | sk_release_kernel(tinfo->sock->sk); | 1668 | sk_release_kernel(tinfo->sock->sk); |
1666 | kfree(tinfo); | 1669 | kfree(tinfo); |
1667 | 1670 | ||
1668 | return 0; | 1671 | return 0; |
1669 | } | 1672 | } |
1670 | 1673 | ||
1671 | 1674 | ||
1672 | static int sync_thread_backup(void *data) | 1675 | static int sync_thread_backup(void *data) |
1673 | { | 1676 | { |
1674 | struct ip_vs_sync_thread_data *tinfo = data; | 1677 | struct ip_vs_sync_thread_data *tinfo = data; |
1675 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); | 1678 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); |
1676 | int len; | 1679 | int len; |
1677 | 1680 | ||
1678 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " | 1681 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " |
1679 | "syncid = %d, id = %d\n", | 1682 | "syncid = %d, id = %d\n", |
1680 | ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); | 1683 | ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); |
1681 | 1684 | ||
1682 | while (!kthread_should_stop()) { | 1685 | while (!kthread_should_stop()) { |
1683 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), | 1686 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), |
1684 | !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) | 1687 | !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) |
1685 | || kthread_should_stop()); | 1688 | || kthread_should_stop()); |
1686 | 1689 | ||
1687 | /* do we have data now? */ | 1690 | /* do we have data now? */ |
1688 | while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { | 1691 | while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { |
1689 | len = ip_vs_receive(tinfo->sock, tinfo->buf, | 1692 | len = ip_vs_receive(tinfo->sock, tinfo->buf, |
1690 | ipvs->recv_mesg_maxlen); | 1693 | ipvs->recv_mesg_maxlen); |
1691 | if (len <= 0) { | 1694 | if (len <= 0) { |
1692 | if (len != -EAGAIN) | 1695 | if (len != -EAGAIN) |
1693 | pr_err("receiving message error\n"); | 1696 | pr_err("receiving message error\n"); |
1694 | break; | 1697 | break; |
1695 | } | 1698 | } |
1696 | 1699 | ||
1697 | ip_vs_process_message(tinfo->net, tinfo->buf, len); | 1700 | ip_vs_process_message(tinfo->net, tinfo->buf, len); |
1698 | } | 1701 | } |
1699 | } | 1702 | } |
1700 | 1703 | ||
1701 | /* release the sending multicast socket */ | 1704 | /* release the sending multicast socket */ |
1702 | sk_release_kernel(tinfo->sock->sk); | 1705 | sk_release_kernel(tinfo->sock->sk); |
1703 | kfree(tinfo->buf); | 1706 | kfree(tinfo->buf); |
1704 | kfree(tinfo); | 1707 | kfree(tinfo); |
1705 | 1708 | ||
1706 | return 0; | 1709 | return 0; |
1707 | } | 1710 | } |
1708 | 1711 | ||
1709 | 1712 | ||
1710 | int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) | 1713 | int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) |
1711 | { | 1714 | { |
1712 | struct ip_vs_sync_thread_data *tinfo; | 1715 | struct ip_vs_sync_thread_data *tinfo; |
1713 | struct task_struct **array = NULL, *task; | 1716 | struct task_struct **array = NULL, *task; |
1714 | struct socket *sock; | 1717 | struct socket *sock; |
1715 | struct netns_ipvs *ipvs = net_ipvs(net); | 1718 | struct netns_ipvs *ipvs = net_ipvs(net); |
1716 | char *name; | 1719 | char *name; |
1717 | int (*threadfn)(void *data); | 1720 | int (*threadfn)(void *data); |
1718 | int id, count; | 1721 | int id, count; |
1719 | int result = -ENOMEM; | 1722 | int result = -ENOMEM; |
1720 | 1723 | ||
1721 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1724 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
1722 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", | 1725 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", |
1723 | sizeof(struct ip_vs_sync_conn_v0)); | 1726 | sizeof(struct ip_vs_sync_conn_v0)); |
1724 | 1727 | ||
1725 | if (!ipvs->sync_state) { | 1728 | if (!ipvs->sync_state) { |
1726 | count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); | 1729 | count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); |
1727 | ipvs->threads_mask = count - 1; | 1730 | ipvs->threads_mask = count - 1; |
1728 | } else | 1731 | } else |
1729 | count = ipvs->threads_mask + 1; | 1732 | count = ipvs->threads_mask + 1; |
1730 | 1733 | ||
1731 | if (state == IP_VS_STATE_MASTER) { | 1734 | if (state == IP_VS_STATE_MASTER) { |
1732 | if (ipvs->ms) | 1735 | if (ipvs->ms) |
1733 | return -EEXIST; | 1736 | return -EEXIST; |
1734 | 1737 | ||
1735 | strlcpy(ipvs->master_mcast_ifn, mcast_ifn, | 1738 | strlcpy(ipvs->master_mcast_ifn, mcast_ifn, |
1736 | sizeof(ipvs->master_mcast_ifn)); | 1739 | sizeof(ipvs->master_mcast_ifn)); |
1737 | ipvs->master_syncid = syncid; | 1740 | ipvs->master_syncid = syncid; |
1738 | name = "ipvs-m:%d:%d"; | 1741 | name = "ipvs-m:%d:%d"; |
1739 | threadfn = sync_thread_master; | 1742 | threadfn = sync_thread_master; |
1740 | } else if (state == IP_VS_STATE_BACKUP) { | 1743 | } else if (state == IP_VS_STATE_BACKUP) { |
1741 | if (ipvs->backup_threads) | 1744 | if (ipvs->backup_threads) |
1742 | return -EEXIST; | 1745 | return -EEXIST; |
1743 | 1746 | ||
1744 | strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, | 1747 | strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, |
1745 | sizeof(ipvs->backup_mcast_ifn)); | 1748 | sizeof(ipvs->backup_mcast_ifn)); |
1746 | ipvs->backup_syncid = syncid; | 1749 | ipvs->backup_syncid = syncid; |
1747 | name = "ipvs-b:%d:%d"; | 1750 | name = "ipvs-b:%d:%d"; |
1748 | threadfn = sync_thread_backup; | 1751 | threadfn = sync_thread_backup; |
1749 | } else { | 1752 | } else { |
1750 | return -EINVAL; | 1753 | return -EINVAL; |
1751 | } | 1754 | } |
1752 | 1755 | ||
1753 | if (state == IP_VS_STATE_MASTER) { | 1756 | if (state == IP_VS_STATE_MASTER) { |
1754 | struct ipvs_master_sync_state *ms; | 1757 | struct ipvs_master_sync_state *ms; |
1755 | 1758 | ||
1756 | ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); | 1759 | ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); |
1757 | if (!ipvs->ms) | 1760 | if (!ipvs->ms) |
1758 | goto out; | 1761 | goto out; |
1759 | ms = ipvs->ms; | 1762 | ms = ipvs->ms; |
1760 | for (id = 0; id < count; id++, ms++) { | 1763 | for (id = 0; id < count; id++, ms++) { |
1761 | INIT_LIST_HEAD(&ms->sync_queue); | 1764 | INIT_LIST_HEAD(&ms->sync_queue); |
1762 | ms->sync_queue_len = 0; | 1765 | ms->sync_queue_len = 0; |
1763 | ms->sync_queue_delay = 0; | 1766 | ms->sync_queue_delay = 0; |
1764 | INIT_DELAYED_WORK(&ms->master_wakeup_work, | 1767 | INIT_DELAYED_WORK(&ms->master_wakeup_work, |
1765 | master_wakeup_work_handler); | 1768 | master_wakeup_work_handler); |
1766 | ms->ipvs = ipvs; | 1769 | ms->ipvs = ipvs; |
1767 | } | 1770 | } |
1768 | } else { | 1771 | } else { |
1769 | array = kzalloc(count * sizeof(struct task_struct *), | 1772 | array = kzalloc(count * sizeof(struct task_struct *), |
1770 | GFP_KERNEL); | 1773 | GFP_KERNEL); |
1771 | if (!array) | 1774 | if (!array) |
1772 | goto out; | 1775 | goto out; |
1773 | } | 1776 | } |
1774 | set_sync_mesg_maxlen(net, state); | 1777 | set_sync_mesg_maxlen(net, state); |
1775 | 1778 | ||
1776 | tinfo = NULL; | 1779 | tinfo = NULL; |
1777 | for (id = 0; id < count; id++) { | 1780 | for (id = 0; id < count; id++) { |
1778 | if (state == IP_VS_STATE_MASTER) | 1781 | if (state == IP_VS_STATE_MASTER) |
1779 | sock = make_send_sock(net, id); | 1782 | sock = make_send_sock(net, id); |
1780 | else | 1783 | else |
1781 | sock = make_receive_sock(net, id); | 1784 | sock = make_receive_sock(net, id); |
1782 | if (IS_ERR(sock)) { | 1785 | if (IS_ERR(sock)) { |
1783 | result = PTR_ERR(sock); | 1786 | result = PTR_ERR(sock); |
1784 | goto outtinfo; | 1787 | goto outtinfo; |
1785 | } | 1788 | } |
1786 | tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); | 1789 | tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); |
1787 | if (!tinfo) | 1790 | if (!tinfo) |
1788 | goto outsocket; | 1791 | goto outsocket; |
1789 | tinfo->net = net; | 1792 | tinfo->net = net; |
1790 | tinfo->sock = sock; | 1793 | tinfo->sock = sock; |
1791 | if (state == IP_VS_STATE_BACKUP) { | 1794 | if (state == IP_VS_STATE_BACKUP) { |
1792 | tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, | 1795 | tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, |
1793 | GFP_KERNEL); | 1796 | GFP_KERNEL); |
1794 | if (!tinfo->buf) | 1797 | if (!tinfo->buf) |
1795 | goto outtinfo; | 1798 | goto outtinfo; |
1796 | } else { | 1799 | } else { |
1797 | tinfo->buf = NULL; | 1800 | tinfo->buf = NULL; |
1798 | } | 1801 | } |
1799 | tinfo->id = id; | 1802 | tinfo->id = id; |
1800 | 1803 | ||
1801 | task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); | 1804 | task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); |
1802 | if (IS_ERR(task)) { | 1805 | if (IS_ERR(task)) { |
1803 | result = PTR_ERR(task); | 1806 | result = PTR_ERR(task); |
1804 | goto outtinfo; | 1807 | goto outtinfo; |
1805 | } | 1808 | } |
1806 | tinfo = NULL; | 1809 | tinfo = NULL; |
1807 | if (state == IP_VS_STATE_MASTER) | 1810 | if (state == IP_VS_STATE_MASTER) |
1808 | ipvs->ms[id].master_thread = task; | 1811 | ipvs->ms[id].master_thread = task; |
1809 | else | 1812 | else |
1810 | array[id] = task; | 1813 | array[id] = task; |
1811 | } | 1814 | } |
1812 | 1815 | ||
1813 | /* mark as active */ | 1816 | /* mark as active */ |
1814 | 1817 | ||
1815 | if (state == IP_VS_STATE_BACKUP) | 1818 | if (state == IP_VS_STATE_BACKUP) |
1816 | ipvs->backup_threads = array; | 1819 | ipvs->backup_threads = array; |
1817 | spin_lock_bh(&ipvs->sync_buff_lock); | 1820 | spin_lock_bh(&ipvs->sync_buff_lock); |
1818 | ipvs->sync_state |= state; | 1821 | ipvs->sync_state |= state; |
1819 | spin_unlock_bh(&ipvs->sync_buff_lock); | 1822 | spin_unlock_bh(&ipvs->sync_buff_lock); |
1820 | 1823 | ||
1821 | /* increase the module use count */ | 1824 | /* increase the module use count */ |
1822 | ip_vs_use_count_inc(); | 1825 | ip_vs_use_count_inc(); |
1823 | 1826 | ||
1824 | return 0; | 1827 | return 0; |
1825 | 1828 | ||
1826 | outsocket: | 1829 | outsocket: |
1827 | sk_release_kernel(sock->sk); | 1830 | sk_release_kernel(sock->sk); |
1828 | 1831 | ||
1829 | outtinfo: | 1832 | outtinfo: |
1830 | if (tinfo) { | 1833 | if (tinfo) { |
1831 | sk_release_kernel(tinfo->sock->sk); | 1834 | sk_release_kernel(tinfo->sock->sk); |
1832 | kfree(tinfo->buf); | 1835 | kfree(tinfo->buf); |
1833 | kfree(tinfo); | 1836 | kfree(tinfo); |
1834 | } | 1837 | } |
1835 | count = id; | 1838 | count = id; |
1836 | while (count-- > 0) { | 1839 | while (count-- > 0) { |
1837 | if (state == IP_VS_STATE_MASTER) | 1840 | if (state == IP_VS_STATE_MASTER) |
1838 | kthread_stop(ipvs->ms[count].master_thread); | 1841 | kthread_stop(ipvs->ms[count].master_thread); |
1839 | else | 1842 | else |
1840 | kthread_stop(array[count]); | 1843 | kthread_stop(array[count]); |
1841 | } | 1844 | } |
1842 | kfree(array); | 1845 | kfree(array); |
1843 | 1846 | ||
1844 | out: | 1847 | out: |
1845 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { | 1848 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { |
1846 | kfree(ipvs->ms); | 1849 | kfree(ipvs->ms); |
1847 | ipvs->ms = NULL; | 1850 | ipvs->ms = NULL; |
1848 | } | 1851 | } |
1849 | return result; | 1852 | return result; |
1850 | } | 1853 | } |
1851 | 1854 | ||
1852 | 1855 | ||
1853 | int stop_sync_thread(struct net *net, int state) | 1856 | int stop_sync_thread(struct net *net, int state) |
1854 | { | 1857 | { |
1855 | struct netns_ipvs *ipvs = net_ipvs(net); | 1858 | struct netns_ipvs *ipvs = net_ipvs(net); |
1856 | struct task_struct **array; | 1859 | struct task_struct **array; |
1857 | int id; | 1860 | int id; |
1858 | int retc = -EINVAL; | 1861 | int retc = -EINVAL; |
1859 | 1862 | ||
1860 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1863 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
1861 | 1864 | ||
1862 | if (state == IP_VS_STATE_MASTER) { | 1865 | if (state == IP_VS_STATE_MASTER) { |
1863 | if (!ipvs->ms) | 1866 | if (!ipvs->ms) |
1864 | return -ESRCH; | 1867 | return -ESRCH; |
1865 | 1868 | ||
1866 | /* | 1869 | /* |
1867 | * The lock synchronizes with sb_queue_tail(), so that we don't | 1870 | * The lock synchronizes with sb_queue_tail(), so that we don't |
1868 | * add sync buffers to the queue, when we are already in | 1871 | * add sync buffers to the queue, when we are already in |
1869 | * progress of stopping the master sync daemon. | 1872 | * progress of stopping the master sync daemon. |
1870 | */ | 1873 | */ |
1871 | 1874 | ||
1872 | spin_lock_bh(&ipvs->sync_buff_lock); | 1875 | spin_lock_bh(&ipvs->sync_buff_lock); |
1873 | spin_lock(&ipvs->sync_lock); | 1876 | spin_lock(&ipvs->sync_lock); |
1874 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; | 1877 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; |
1875 | spin_unlock(&ipvs->sync_lock); | 1878 | spin_unlock(&ipvs->sync_lock); |
1876 | spin_unlock_bh(&ipvs->sync_buff_lock); | 1879 | spin_unlock_bh(&ipvs->sync_buff_lock); |
1877 | 1880 | ||
1878 | retc = 0; | 1881 | retc = 0; |
1879 | for (id = ipvs->threads_mask; id >= 0; id--) { | 1882 | for (id = ipvs->threads_mask; id >= 0; id--) { |
1880 | struct ipvs_master_sync_state *ms = &ipvs->ms[id]; | 1883 | struct ipvs_master_sync_state *ms = &ipvs->ms[id]; |
1881 | int ret; | 1884 | int ret; |
1882 | 1885 | ||
1883 | pr_info("stopping master sync thread %d ...\n", | 1886 | pr_info("stopping master sync thread %d ...\n", |
1884 | task_pid_nr(ms->master_thread)); | 1887 | task_pid_nr(ms->master_thread)); |
1885 | cancel_delayed_work_sync(&ms->master_wakeup_work); | 1888 | cancel_delayed_work_sync(&ms->master_wakeup_work); |
1886 | ret = kthread_stop(ms->master_thread); | 1889 | ret = kthread_stop(ms->master_thread); |
1887 | if (retc >= 0) | 1890 | if (retc >= 0) |
1888 | retc = ret; | 1891 | retc = ret; |
1889 | } | 1892 | } |
1890 | kfree(ipvs->ms); | 1893 | kfree(ipvs->ms); |
1891 | ipvs->ms = NULL; | 1894 | ipvs->ms = NULL; |
1892 | } else if (state == IP_VS_STATE_BACKUP) { | 1895 | } else if (state == IP_VS_STATE_BACKUP) { |
1893 | if (!ipvs->backup_threads) | 1896 | if (!ipvs->backup_threads) |
1894 | return -ESRCH; | 1897 | return -ESRCH; |
1895 | 1898 | ||
1896 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; | 1899 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; |
1897 | array = ipvs->backup_threads; | 1900 | array = ipvs->backup_threads; |
1898 | retc = 0; | 1901 | retc = 0; |
1899 | for (id = ipvs->threads_mask; id >= 0; id--) { | 1902 | for (id = ipvs->threads_mask; id >= 0; id--) { |
1900 | int ret; | 1903 | int ret; |
1901 | 1904 | ||
1902 | pr_info("stopping backup sync thread %d ...\n", | 1905 | pr_info("stopping backup sync thread %d ...\n", |
1903 | task_pid_nr(array[id])); | 1906 | task_pid_nr(array[id])); |
1904 | ret = kthread_stop(array[id]); | 1907 | ret = kthread_stop(array[id]); |
1905 | if (retc >= 0) | 1908 | if (retc >= 0) |
1906 | retc = ret; | 1909 | retc = ret; |
1907 | } | 1910 | } |
1908 | kfree(array); | 1911 | kfree(array); |
1909 | ipvs->backup_threads = NULL; | 1912 | ipvs->backup_threads = NULL; |
1910 | } | 1913 | } |
1911 | 1914 | ||
1912 | /* decrease the module use count */ | 1915 | /* decrease the module use count */ |
1913 | ip_vs_use_count_dec(); | 1916 | ip_vs_use_count_dec(); |
1914 | 1917 | ||
1915 | return retc; | 1918 | return retc; |
1916 | } | 1919 | } |
1917 | 1920 | ||
1918 | /* | 1921 | /* |
1919 | * Initialize data struct for each netns | 1922 | * Initialize data struct for each netns |
1920 | */ | 1923 | */ |
1921 | int __net_init ip_vs_sync_net_init(struct net *net) | 1924 | int __net_init ip_vs_sync_net_init(struct net *net) |
1922 | { | 1925 | { |
1923 | struct netns_ipvs *ipvs = net_ipvs(net); | 1926 | struct netns_ipvs *ipvs = net_ipvs(net); |
1924 | 1927 | ||
1925 | __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); | 1928 | __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); |
1926 | spin_lock_init(&ipvs->sync_lock); | 1929 | spin_lock_init(&ipvs->sync_lock); |
1927 | spin_lock_init(&ipvs->sync_buff_lock); | 1930 | spin_lock_init(&ipvs->sync_buff_lock); |
1928 | return 0; | 1931 | return 0; |
1929 | } | 1932 | } |
1930 | 1933 | ||
1931 | void ip_vs_sync_net_cleanup(struct net *net) | 1934 | void ip_vs_sync_net_cleanup(struct net *net) |
1932 | { | 1935 | { |
1933 | int retc; | 1936 | int retc; |
1934 | struct netns_ipvs *ipvs = net_ipvs(net); | 1937 | struct netns_ipvs *ipvs = net_ipvs(net); |
1935 | 1938 | ||
1936 | mutex_lock(&ipvs->sync_mutex); | 1939 | mutex_lock(&ipvs->sync_mutex); |
1937 | retc = stop_sync_thread(net, IP_VS_STATE_MASTER); | 1940 | retc = stop_sync_thread(net, IP_VS_STATE_MASTER); |
1938 | if (retc && retc != -ESRCH) | 1941 | if (retc && retc != -ESRCH) |
1939 | pr_err("Failed to stop Master Daemon\n"); | 1942 | pr_err("Failed to stop Master Daemon\n"); |
1940 | 1943 | ||
1941 | retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); | 1944 | retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); |
1942 | if (retc && retc != -ESRCH) | 1945 | if (retc && retc != -ESRCH) |
1943 | pr_err("Failed to stop Backup Daemon\n"); | 1946 | pr_err("Failed to stop Backup Daemon\n"); |
1944 | mutex_unlock(&ipvs->sync_mutex); | 1947 | mutex_unlock(&ipvs->sync_mutex); |
1945 | } | 1948 | } |
1946 | 1949 |