Commit 16e5726269611b71c930054ffe9b858c1cea88eb
af_unix: dont send SCM_CREDENTIALS by default
Since commit 7361c36c5224 (af_unix: Allow credentials to work across user and pid namespaces) af_unix performance dropped a lot. This is because we now take a reference on pid and cred in each write(), and release them in read(), usually done from another process, eventually from another cpu. This triggers false sharing. # Events: 154K cycles # # Overhead Command Shared Object Symbol # ........ ....... .................. ......................... # 10.40% hackbench [kernel.kallsyms] [k] put_pid 8.60% hackbench [kernel.kallsyms] [k] unix_stream_recvmsg 7.87% hackbench [kernel.kallsyms] [k] unix_stream_sendmsg 6.11% hackbench [kernel.kallsyms] [k] do_raw_spin_lock 4.95% hackbench [kernel.kallsyms] [k] unix_scm_to_skb 4.87% hackbench [kernel.kallsyms] [k] pid_nr_ns 4.34% hackbench [kernel.kallsyms] [k] cred_to_ucred 2.39% hackbench [kernel.kallsyms] [k] unix_destruct_scm 2.24% hackbench [kernel.kallsyms] [k] sub_preempt_count 1.75% hackbench [kernel.kallsyms] [k] fget_light 1.51% hackbench [kernel.kallsyms] [k] __mutex_lock_interruptible_slowpath 1.42% hackbench [kernel.kallsyms] [k] sock_alloc_send_pskb This patch includes SCM_CREDENTIALS information in a af_unix message/skb only if requested by the sender, [man 7 unix for details how to include ancillary data using sendmsg() system call] Note: This might break buggy applications that expected SCM_CREDENTIAL from an unaware write() system call, and receiver not using SO_PASSCRED socket option. If SOCK_PASSCRED is set on source or destination socket, we still include credentials for mere write() syscalls. Performance boost in hackbench : more than 50% gain on a 16 thread machine (2 quad-core cpus, 2 threads per core) hackbench 20 thread 2000 4.228 sec instead of 9.102 sec Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 33 additions and 11 deletions Side-by-side Diff
... | ... | @@ -49,7 +49,7 @@ |
49 | 49 | struct pid *pid, const struct cred *cred) |
50 | 50 | { |
51 | 51 | scm->pid = get_pid(pid); |
52 | - scm->cred = get_cred(cred); | |
52 | + scm->cred = cred ? get_cred(cred) : NULL; | |
53 | 53 | cred_to_ucred(pid, cred, &scm->creds); |
54 | 54 | } |
55 | 55 | |
... | ... | @@ -73,8 +73,7 @@ |
73 | 73 | static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, |
74 | 74 | struct scm_cookie *scm) |
75 | 75 | { |
76 | - scm_set_cred(scm, task_tgid(current), current_cred()); | |
77 | - scm->fp = NULL; | |
76 | + memset(scm, 0, sizeof(*scm)); | |
78 | 77 | unix_get_peersec_dgram(sock, scm); |
79 | 78 | if (msg->msg_controllen <= 0) |
80 | 79 | return 0; |
... | ... | @@ -173,7 +173,7 @@ |
173 | 173 | if (err) |
174 | 174 | goto error; |
175 | 175 | |
176 | - if (pid_vnr(p->pid) != p->creds.pid) { | |
176 | + if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { | |
177 | 177 | struct pid *pid; |
178 | 178 | err = -ESRCH; |
179 | 179 | pid = find_get_pid(p->creds.pid); |
... | ... | @@ -183,8 +183,9 @@ |
183 | 183 | p->pid = pid; |
184 | 184 | } |
185 | 185 | |
186 | - if ((p->cred->euid != p->creds.uid) || | |
187 | - (p->cred->egid != p->creds.gid)) { | |
186 | + if (!p->cred || | |
187 | + (p->cred->euid != p->creds.uid) || | |
188 | + (p->cred->egid != p->creds.gid)) { | |
188 | 189 | struct cred *cred; |
189 | 190 | err = -ENOMEM; |
190 | 191 | cred = prepare_creds(); |
... | ... | @@ -193,7 +194,8 @@ |
193 | 194 | |
194 | 195 | cred->uid = cred->euid = p->creds.uid; |
195 | 196 | cred->gid = cred->egid = p->creds.gid; |
196 | - put_cred(p->cred); | |
197 | + if (p->cred) | |
198 | + put_cred(p->cred); | |
197 | 199 | p->cred = cred; |
198 | 200 | } |
199 | 201 | break; |
... | ... | @@ -1324,10 +1324,9 @@ |
1324 | 1324 | if (msg->msg_flags&MSG_OOB) |
1325 | 1325 | return -EOPNOTSUPP; |
1326 | 1326 | |
1327 | - if (NULL == siocb->scm) { | |
1327 | + if (NULL == siocb->scm) | |
1328 | 1328 | siocb->scm = &scm; |
1329 | - memset(&scm, 0, sizeof(scm)); | |
1330 | - } | |
1329 | + | |
1331 | 1330 | err = scm_send(sock, msg, siocb->scm); |
1332 | 1331 | if (err < 0) |
1333 | 1332 | return err; |
... | ... | @@ -1381,8 +1381,10 @@ |
1381 | 1381 | static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) |
1382 | 1382 | { |
1383 | 1383 | int err = 0; |
1384 | + | |
1384 | 1385 | UNIXCB(skb).pid = get_pid(scm->pid); |
1385 | - UNIXCB(skb).cred = get_cred(scm->cred); | |
1386 | + if (scm->cred) | |
1387 | + UNIXCB(skb).cred = get_cred(scm->cred); | |
1386 | 1388 | UNIXCB(skb).fp = NULL; |
1387 | 1389 | if (scm->fp && send_fds) |
1388 | 1390 | err = unix_attach_fds(scm, skb); |
... | ... | @@ -1392,6 +1394,24 @@ |
1392 | 1394 | } |
1393 | 1395 | |
1394 | 1396 | /* |
1397 | + * Some apps rely on write() giving SCM_CREDENTIALS | |
1398 | + * We include credentials if source or destination socket | |
1399 | + * asserted SOCK_PASSCRED. | |
1400 | + */ | |
1401 | +static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, | |
1402 | + const struct sock *other) | |
1403 | +{ | |
1404 | + if (UNIXCB(skb).cred) | |
1405 | + return; | |
1406 | + if (test_bit(SOCK_PASSCRED, &sock->flags) || | |
1407 | + !other->sk_socket || | |
1408 | + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { | |
1409 | + UNIXCB(skb).pid = get_pid(task_tgid(current)); | |
1410 | + UNIXCB(skb).cred = get_current_cred(); | |
1411 | + } | |
1412 | +} | |
1413 | + | |
1414 | +/* | |
1395 | 1415 | * Send AF_UNIX data. |
1396 | 1416 | */ |
1397 | 1417 | |
... | ... | @@ -1538,6 +1558,7 @@ |
1538 | 1558 | |
1539 | 1559 | if (sock_flag(other, SOCK_RCVTSTAMP)) |
1540 | 1560 | __net_timestamp(skb); |
1561 | + maybe_add_creds(skb, sock, other); | |
1541 | 1562 | skb_queue_tail(&other->sk_receive_queue, skb); |
1542 | 1563 | if (max_level > unix_sk(other)->recursion_level) |
1543 | 1564 | unix_sk(other)->recursion_level = max_level; |
... | ... | @@ -1652,6 +1673,7 @@ |
1652 | 1673 | (other->sk_shutdown & RCV_SHUTDOWN)) |
1653 | 1674 | goto pipe_err_free; |
1654 | 1675 | |
1676 | + maybe_add_creds(skb, sock, other); | |
1655 | 1677 | skb_queue_tail(&other->sk_receive_queue, skb); |
1656 | 1678 | if (max_level > unix_sk(other)->recursion_level) |
1657 | 1679 | unix_sk(other)->recursion_level = max_level; |
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce
-
mentioned in commit e0e3ce