Commit 16e5726269611b71c930054ffe9b858c1cea88eb

Authored by Eric Dumazet
Committed by David S. Miller
1 parent a9e9fd7182

af_unix: dont send SCM_CREDENTIALS by default

Since commit 7361c36c5224 (af_unix: Allow credentials to work across
user and pid namespaces) af_unix performance dropped a lot.

This is because we now take a reference on pid and cred in each write(),
and release them in read(), usually done from another process,
eventually from another cpu. This triggers false sharing.

# Events: 154K cycles
#
# Overhead  Command       Shared Object        Symbol
# ........  .......  ..................  .........................
#
    10.40%  hackbench  [kernel.kallsyms]   [k] put_pid
     8.60%  hackbench  [kernel.kallsyms]   [k] unix_stream_recvmsg
     7.87%  hackbench  [kernel.kallsyms]   [k] unix_stream_sendmsg
     6.11%  hackbench  [kernel.kallsyms]   [k] do_raw_spin_lock
     4.95%  hackbench  [kernel.kallsyms]   [k] unix_scm_to_skb
     4.87%  hackbench  [kernel.kallsyms]   [k] pid_nr_ns
     4.34%  hackbench  [kernel.kallsyms]   [k] cred_to_ucred
     2.39%  hackbench  [kernel.kallsyms]   [k] unix_destruct_scm
     2.24%  hackbench  [kernel.kallsyms]   [k] sub_preempt_count
     1.75%  hackbench  [kernel.kallsyms]   [k] fget_light
     1.51%  hackbench  [kernel.kallsyms]   [k]
__mutex_lock_interruptible_slowpath
     1.42%  hackbench  [kernel.kallsyms]   [k] sock_alloc_send_pskb

This patch includes SCM_CREDENTIALS information in a af_unix message/skb
only if requested by the sender, [man 7 unix for details how to include
ancillary data using sendmsg() system call]

Note: This might break buggy applications that expected SCM_CREDENTIAL
from an unaware write() system call, and receiver not using SO_PASSCRED
socket option.

If SOCK_PASSCRED is set on source or destination socket, we still
include credentials for mere write() syscalls.

Performance boost in hackbench : more than 50% gain on a 16 thread
machine (2 quad-core cpus, 2 threads per core)

hackbench 20 thread 2000

4.228 sec instead of 9.102 sec

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 33 additions and 11 deletions Side-by-side Diff

... ... @@ -49,7 +49,7 @@
49 49 struct pid *pid, const struct cred *cred)
50 50 {
51 51 scm->pid = get_pid(pid);
52   - scm->cred = get_cred(cred);
  52 + scm->cred = cred ? get_cred(cred) : NULL;
53 53 cred_to_ucred(pid, cred, &scm->creds);
54 54 }
55 55  
... ... @@ -73,8 +73,7 @@
73 73 static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
74 74 struct scm_cookie *scm)
75 75 {
76   - scm_set_cred(scm, task_tgid(current), current_cred());
77   - scm->fp = NULL;
  76 + memset(scm, 0, sizeof(*scm));
78 77 unix_get_peersec_dgram(sock, scm);
79 78 if (msg->msg_controllen <= 0)
80 79 return 0;
... ... @@ -173,7 +173,7 @@
173 173 if (err)
174 174 goto error;
175 175  
176   - if (pid_vnr(p->pid) != p->creds.pid) {
  176 + if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
177 177 struct pid *pid;
178 178 err = -ESRCH;
179 179 pid = find_get_pid(p->creds.pid);
... ... @@ -183,8 +183,9 @@
183 183 p->pid = pid;
184 184 }
185 185  
186   - if ((p->cred->euid != p->creds.uid) ||
187   - (p->cred->egid != p->creds.gid)) {
  186 + if (!p->cred ||
  187 + (p->cred->euid != p->creds.uid) ||
  188 + (p->cred->egid != p->creds.gid)) {
188 189 struct cred *cred;
189 190 err = -ENOMEM;
190 191 cred = prepare_creds();
... ... @@ -193,7 +194,8 @@
193 194  
194 195 cred->uid = cred->euid = p->creds.uid;
195 196 cred->gid = cred->egid = p->creds.gid;
196   - put_cred(p->cred);
  197 + if (p->cred)
  198 + put_cred(p->cred);
197 199 p->cred = cred;
198 200 }
199 201 break;
net/netlink/af_netlink.c
... ... @@ -1324,10 +1324,9 @@
1324 1324 if (msg->msg_flags&MSG_OOB)
1325 1325 return -EOPNOTSUPP;
1326 1326  
1327   - if (NULL == siocb->scm) {
  1327 + if (NULL == siocb->scm)
1328 1328 siocb->scm = &scm;
1329   - memset(&scm, 0, sizeof(scm));
1330   - }
  1329 +
1331 1330 err = scm_send(sock, msg, siocb->scm);
1332 1331 if (err < 0)
1333 1332 return err;
... ... @@ -1381,8 +1381,10 @@
1381 1381 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1382 1382 {
1383 1383 int err = 0;
  1384 +
1384 1385 UNIXCB(skb).pid = get_pid(scm->pid);
1385   - UNIXCB(skb).cred = get_cred(scm->cred);
  1386 + if (scm->cred)
  1387 + UNIXCB(skb).cred = get_cred(scm->cred);
1386 1388 UNIXCB(skb).fp = NULL;
1387 1389 if (scm->fp && send_fds)
1388 1390 err = unix_attach_fds(scm, skb);
... ... @@ -1392,6 +1394,24 @@
1392 1394 }
1393 1395  
1394 1396 /*
  1397 + * Some apps rely on write() giving SCM_CREDENTIALS
  1398 + * We include credentials if source or destination socket
  1399 + * asserted SOCK_PASSCRED.
  1400 + */
  1401 +static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
  1402 + const struct sock *other)
  1403 +{
  1404 + if (UNIXCB(skb).cred)
  1405 + return;
  1406 + if (test_bit(SOCK_PASSCRED, &sock->flags) ||
  1407 + !other->sk_socket ||
  1408 + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
  1409 + UNIXCB(skb).pid = get_pid(task_tgid(current));
  1410 + UNIXCB(skb).cred = get_current_cred();
  1411 + }
  1412 +}
  1413 +
  1414 +/*
1395 1415 * Send AF_UNIX data.
1396 1416 */
1397 1417  
... ... @@ -1538,6 +1558,7 @@
1538 1558  
1539 1559 if (sock_flag(other, SOCK_RCVTSTAMP))
1540 1560 __net_timestamp(skb);
  1561 + maybe_add_creds(skb, sock, other);
1541 1562 skb_queue_tail(&other->sk_receive_queue, skb);
1542 1563 if (max_level > unix_sk(other)->recursion_level)
1543 1564 unix_sk(other)->recursion_level = max_level;
... ... @@ -1652,6 +1673,7 @@
1652 1673 (other->sk_shutdown & RCV_SHUTDOWN))
1653 1674 goto pipe_err_free;
1654 1675  
  1676 + maybe_add_creds(skb, sock, other);
1655 1677 skb_queue_tail(&other->sk_receive_queue, skb);
1656 1678 if (max_level > unix_sk(other)->recursion_level)
1657 1679 unix_sk(other)->recursion_level = max_level;