Blame view
net/core/sock_reuseport.c
6.7 KB
b24413180 License cleanup: ... |
1 |
// SPDX-License-Identifier: GPL-2.0 |
ef456144d soreuseport: defi... |
2 3 4 |
/* * To speed up listener socket lookup, create an array to store all sockets * listening on the same port. This allows a decision to be made after finding |
538950a1b soreuseport: sets... |
5 6 |
* the first socket. An optional BPF program can also be configured for * selecting the socket index from the array of available sockets. |
ef456144d soreuseport: defi... |
7 8 9 |
*/ #include <net/sock_reuseport.h> |
538950a1b soreuseport: sets... |
10 |
#include <linux/bpf.h> |
ef456144d soreuseport: defi... |
11 12 13 14 15 |
#include <linux/rcupdate.h> #define INIT_SOCKS 128 static DEFINE_SPINLOCK(reuseport_lock); |
822f9bb10 soreuseport: use ... |
16 |
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) |
ef456144d soreuseport: defi... |
17 |
{ |
822f9bb10 soreuseport: use ... |
18 |
unsigned int size = sizeof(struct sock_reuseport) + |
ef456144d soreuseport: defi... |
19 20 21 22 23 24 25 |
sizeof(struct sock *) * max_socks; struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); if (!reuse) return NULL; reuse->max_socks = max_socks; |
538950a1b soreuseport: sets... |
26 |
RCU_INIT_POINTER(reuse->prog, NULL); |
ef456144d soreuseport: defi... |
27 28 29 30 31 32 33 34 35 36 37 |
return reuse; } int reuseport_alloc(struct sock *sk) { struct sock_reuseport *reuse; /* bh lock used since this function call may precede hlist lock in * soft irq of receive path or setsockopt from process context */ spin_lock_bh(&reuseport_lock); |
1b5f962e7 soreuseport: fix ... |
38 39 40 41 42 43 44 |
/* Allocation attempts can occur concurrently via the setsockopt path * and the bind/hash path. Nothing to do when we lose the race. */ if (rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock))) goto out; |
ef456144d soreuseport: defi... |
45 46 47 48 49 50 51 52 53 |
reuse = __reuseport_alloc(INIT_SOCKS); if (!reuse) { spin_unlock_bh(&reuseport_lock); return -ENOMEM; } reuse->socks[0] = sk; reuse->num_socks = 1; rcu_assign_pointer(sk->sk_reuseport_cb, reuse); |
1b5f962e7 soreuseport: fix ... |
54 |
out: |
ef456144d soreuseport: defi... |
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
spin_unlock_bh(&reuseport_lock); return 0; } EXPORT_SYMBOL(reuseport_alloc); static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) { struct sock_reuseport *more_reuse; u32 more_socks_size, i; more_socks_size = reuse->max_socks * 2U; if (more_socks_size > U16_MAX) return NULL; more_reuse = __reuseport_alloc(more_socks_size); if (!more_reuse) return NULL; more_reuse->max_socks = more_socks_size; more_reuse->num_socks = reuse->num_socks; |
538950a1b soreuseport: sets... |
76 |
more_reuse->prog = reuse->prog; |
ef456144d soreuseport: defi... |
77 78 79 80 81 82 83 |
memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); for (i = 0; i < reuse->num_socks; ++i) rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, more_reuse); |
538950a1b soreuseport: sets... |
84 85 86 87 |
/* Note: we use kfree_rcu here instead of reuseport_free_rcu so * that reuse and more_reuse can temporarily share a reference * to prog. */ |
ef456144d soreuseport: defi... |
88 89 90 |
kfree_rcu(reuse, rcu); return more_reuse; } |
981f20bc7 soreuseport: fix ... |
91 92 93 94 95 96 97 98 99 |
static void reuseport_free_rcu(struct rcu_head *head) { struct sock_reuseport *reuse; reuse = container_of(head, struct sock_reuseport, rcu); if (reuse->prog) bpf_prog_destroy(reuse->prog); kfree(reuse); } |
ef456144d soreuseport: defi... |
100 101 102 103 104 105 |
/** * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. * @sk2: Socket belonging to the existing reuseport group. * May return ENOMEM and not add socket to group under memory pressure. */ |
b4ace4f1a soreuseport: fix ... |
106 |
int reuseport_add_sock(struct sock *sk, struct sock *sk2) |
ef456144d soreuseport: defi... |
107 |
{ |
981f20bc7 soreuseport: fix ... |
108 |
struct sock_reuseport *old_reuse, *reuse; |
ef456144d soreuseport: defi... |
109 |
|
b4ace4f1a soreuseport: fix ... |
110 111 112 113 114 115 |
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { int err = reuseport_alloc(sk2); if (err) return err; } |
ef456144d soreuseport: defi... |
116 117 |
spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, |
981f20bc7 soreuseport: fix ... |
118 119 120 121 122 123 124 |
lockdep_is_held(&reuseport_lock)); old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); if (old_reuse && old_reuse->num_socks != 1) { spin_unlock_bh(&reuseport_lock); return -EBUSY; } |
ef456144d soreuseport: defi... |
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
if (reuse->num_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); if (!reuse) { spin_unlock_bh(&reuseport_lock); return -ENOMEM; } } reuse->socks[reuse->num_socks] = sk; /* paired with smp_rmb() in reuseport_select_sock() */ smp_wmb(); reuse->num_socks++; rcu_assign_pointer(sk->sk_reuseport_cb, reuse); spin_unlock_bh(&reuseport_lock); |
981f20bc7 soreuseport: fix ... |
141 142 |
if (old_reuse) call_rcu(&old_reuse->rcu, reuseport_free_rcu); |
ef456144d soreuseport: defi... |
143 144 |
return 0; } |
ef456144d soreuseport: defi... |
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; int i; spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); rcu_assign_pointer(sk->sk_reuseport_cb, NULL); for (i = 0; i < reuse->num_socks; i++) { if (reuse->socks[i] == sk) { reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; if (reuse->num_socks == 0) |
538950a1b soreuseport: sets... |
161 |
call_rcu(&reuse->rcu, reuseport_free_rcu); |
ef456144d soreuseport: defi... |
162 163 164 165 166 167 |
break; } } spin_unlock_bh(&reuseport_lock); } EXPORT_SYMBOL(reuseport_detach_sock); |
538950a1b soreuseport: sets... |
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks, struct bpf_prog *prog, struct sk_buff *skb, int hdr_len) { struct sk_buff *nskb = NULL; u32 index; if (skb_shared(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return NULL; skb = nskb; } /* temporarily advance data past protocol header */ if (!pskb_pull(skb, hdr_len)) { |
00ce3a15d soreuseport: chan... |
184 |
kfree_skb(nskb); |
538950a1b soreuseport: sets... |
185 186 187 188 189 190 191 192 193 194 195 196 |
return NULL; } index = bpf_prog_run_save_cb(prog, skb); __skb_push(skb, hdr_len); consume_skb(nskb); if (index >= socks) return NULL; return reuse->socks[index]; } |
ef456144d soreuseport: defi... |
197 198 199 |
/** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. |
538950a1b soreuseport: sets... |
200 201 202 203 204 |
* @hash: When no BPF filter is available, use this hash to select. * @skb: skb to run through BPF filter. * @hdr_len: BPF filter expects skb data pointer at payload data. If * the skb does not yet point at the payload, this parameter represents * how far the pointer needs to advance to reach the payload. |
ef456144d soreuseport: defi... |
205 206 |
* Returns a socket that should receive the packet (or NULL on error). */ |
538950a1b soreuseport: sets... |
207 208 209 210 |
struct sock *reuseport_select_sock(struct sock *sk, u32 hash, struct sk_buff *skb, int hdr_len) |
ef456144d soreuseport: defi... |
211 212 |
{ struct sock_reuseport *reuse; |
538950a1b soreuseport: sets... |
213 |
struct bpf_prog *prog; |
ef456144d soreuseport: defi... |
214 215 216 217 218 219 220 221 222 |
struct sock *sk2 = NULL; u16 socks; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); /* if memory allocation failed or add call is not yet complete */ if (!reuse) goto out; |
538950a1b soreuseport: sets... |
223 |
prog = rcu_dereference(reuse->prog); |
ef456144d soreuseport: defi... |
224 225 226 227 |
socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in reuseport_add_sock() */ smp_rmb(); |
538950a1b soreuseport: sets... |
228 229 230 231 |
if (prog && skb) sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); else sk2 = reuse->socks[reciprocal_scale(hash, socks)]; |
ef456144d soreuseport: defi... |
232 233 234 235 236 237 238 |
} out: rcu_read_unlock(); return sk2; } EXPORT_SYMBOL(reuseport_select_sock); |
538950a1b soreuseport: sets... |
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
struct bpf_prog * reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog) { struct sock_reuseport *reuse; struct bpf_prog *old_prog; spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); old_prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); rcu_assign_pointer(reuse->prog, prog); spin_unlock_bh(&reuseport_lock); return old_prog; } EXPORT_SYMBOL(reuseport_attach_prog); |