Commit aadd06e5c56b9ff5117ec77e59eada43dc46e2fc

Authored by Jens Axboe
1 parent b3cf257623

[PATCH] splice: fix problems with sys_tee()

Several issues noticed/fixed:

- We cannot reliably block in link_pipe() while holding both input and output
  mutexes. So do preparatory checks before locking down both mutexes and doing
  the link.

- The ipipe->nrbufs vs i check was bad, because we could have dropped the
  ipipe lock in-between. This causes us to potentially look at unknown
  buffers if we were racing with someone else reading this pipe.

Signed-off-by: Jens Axboe <axboe@suse.de>

Showing 1 changed file with 133 additions and 105 deletions Side-by-side Diff

... ... @@ -1307,6 +1307,85 @@
1307 1307 }
1308 1308  
1309 1309 /*
  1310 + * Make sure there's data to read. Wait for input if we can, otherwise
  1311 + * return an appropriate error.
  1312 + */
  1313 +static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
  1314 +{
  1315 + int ret;
  1316 +
  1317 + /*
  1318 + * Check ->nrbufs without the inode lock first. This function
  1319 + * is speculative anyways, so missing one is ok.
  1320 + */
  1321 + if (pipe->nrbufs)
  1322 + return 0;
  1323 +
  1324 + ret = 0;
  1325 + mutex_lock(&pipe->inode->i_mutex);
  1326 +
  1327 + while (!pipe->nrbufs) {
  1328 + if (signal_pending(current)) {
  1329 + ret = -ERESTARTSYS;
  1330 + break;
  1331 + }
  1332 + if (!pipe->writers)
  1333 + break;
  1334 + if (!pipe->waiting_writers) {
  1335 + if (flags & SPLICE_F_NONBLOCK) {
  1336 + ret = -EAGAIN;
  1337 + break;
  1338 + }
  1339 + }
  1340 + pipe_wait(pipe);
  1341 + }
  1342 +
  1343 + mutex_unlock(&pipe->inode->i_mutex);
  1344 + return ret;
  1345 +}
  1346 +
  1347 +/*
  1348 + * Make sure there's writeable room. Wait for room if we can, otherwise
  1349 + * return an appropriate error.
  1350 + */
  1351 +static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
  1352 +{
  1353 + int ret;
  1354 +
  1355 + /*
  1356 + * Check ->nrbufs without the inode lock first. This function
  1357 + * is speculative anyways, so missing one is ok.
  1358 + */
  1359 + if (pipe->nrbufs < PIPE_BUFFERS)
  1360 + return 0;
  1361 +
  1362 + ret = 0;
  1363 + mutex_lock(&pipe->inode->i_mutex);
  1364 +
  1365 + while (pipe->nrbufs >= PIPE_BUFFERS) {
  1366 + if (!pipe->readers) {
  1367 + send_sig(SIGPIPE, current, 0);
  1368 + ret = -EPIPE;
  1369 + break;
  1370 + }
  1371 + if (flags & SPLICE_F_NONBLOCK) {
  1372 + ret = -EAGAIN;
  1373 + break;
  1374 + }
  1375 + if (signal_pending(current)) {
  1376 + ret = -ERESTARTSYS;
  1377 + break;
  1378 + }
  1379 + pipe->waiting_writers++;
  1380 + pipe_wait(pipe);
  1381 + pipe->waiting_writers--;
  1382 + }
  1383 +
  1384 + mutex_unlock(&pipe->inode->i_mutex);
  1385 + return ret;
  1386 +}
  1387 +
  1388 +/*
1310 1389 * Link contents of ipipe to opipe.
1311 1390 */
1312 1391 static int link_pipe(struct pipe_inode_info *ipipe,
1313 1392  
1314 1393  
1315 1394  
1316 1395  
1317 1396  
1318 1397  
1319 1398  
1320 1399  
1321 1400  
1322 1401  
1323 1402  
1324 1403  
1325 1404  
1326 1405  
1327 1406  
... ... @@ -1314,136 +1393,70 @@
1314 1393 size_t len, unsigned int flags)
1315 1394 {
1316 1395 struct pipe_buffer *ibuf, *obuf;
1317   - int ret, do_wakeup, i, ipipe_first;
  1396 + int ret = 0, i = 0, nbuf;
1318 1397  
1319   - ret = do_wakeup = ipipe_first = 0;
1320   -
1321 1398 /*
1322 1399 * Potential ABBA deadlock, work around it by ordering lock
1323 1400 * grabbing by inode address. Otherwise two different processes
1324 1401 * could deadlock (one doing tee from A -> B, the other from B -> A).
1325 1402 */
1326 1403 if (ipipe->inode < opipe->inode) {
1327   - ipipe_first = 1;
1328   - mutex_lock(&ipipe->inode->i_mutex);
1329   - mutex_lock(&opipe->inode->i_mutex);
  1404 + mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
  1405 + mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
1330 1406 } else {
1331   - mutex_lock(&opipe->inode->i_mutex);
1332   - mutex_lock(&ipipe->inode->i_mutex);
  1407 + mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
  1408 + mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
1333 1409 }
1334 1410  
1335   - for (i = 0;; i++) {
  1411 + do {
1336 1412 if (!opipe->readers) {
1337 1413 send_sig(SIGPIPE, current, 0);
1338 1414 if (!ret)
1339 1415 ret = -EPIPE;
1340 1416 break;
1341 1417 }
1342   - if (ipipe->nrbufs - i) {
1343   - ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1344 1418  
1345   - /*
1346   - * If we have room, fill this buffer
1347   - */
1348   - if (opipe->nrbufs < PIPE_BUFFERS) {
1349   - int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
  1419 + /*
  1420 + * If we have iterated all input buffers or ran out of
  1421 + * output room, break.
  1422 + */
  1423 + if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
  1424 + break;
1350 1425  
1351   - /*
1352   - * Get a reference to this pipe buffer,
1353   - * so we can copy the contents over.
1354   - */
1355   - ibuf->ops->get(ipipe, ibuf);
  1426 + ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
  1427 + nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1356 1428  
1357   - obuf = opipe->bufs + nbuf;
1358   - *obuf = *ibuf;
  1429 + /*
  1430 + * Get a reference to this pipe buffer,
  1431 + * so we can copy the contents over.
  1432 + */
  1433 + ibuf->ops->get(ipipe, ibuf);
1359 1434  
1360   - /*
1361   - * Don't inherit the gift flag, we need to
1362   - * prevent multiple steals of this page.
1363   - */
1364   - obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
  1435 + obuf = opipe->bufs + nbuf;
  1436 + *obuf = *ibuf;
1365 1437  
1366   - if (obuf->len > len)
1367   - obuf->len = len;
1368   -
1369   - opipe->nrbufs++;
1370   - do_wakeup = 1;
1371   - ret += obuf->len;
1372   - len -= obuf->len;
1373   -
1374   - if (!len)
1375   - break;
1376   - if (opipe->nrbufs < PIPE_BUFFERS)
1377   - continue;
1378   - }
1379   -
1380   - /*
1381   - * We have input available, but no output room.
1382   - * If we already copied data, return that. If we
1383   - * need to drop the opipe lock, it must be ordered
1384   - * last to avoid deadlocks.
1385   - */
1386   - if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1387   - if (!ret)
1388   - ret = -EAGAIN;
1389   - break;
1390   - }
1391   - if (signal_pending(current)) {
1392   - if (!ret)
1393   - ret = -ERESTARTSYS;
1394   - break;
1395   - }
1396   - if (do_wakeup) {
1397   - smp_mb();
1398   - if (waitqueue_active(&opipe->wait))
1399   - wake_up_interruptible(&opipe->wait);
1400   - kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1401   - do_wakeup = 0;
1402   - }
1403   -
1404   - opipe->waiting_writers++;
1405   - pipe_wait(opipe);
1406   - opipe->waiting_writers--;
1407   - continue;
1408   - }
1409   -
1410 1438 /*
1411   - * No input buffers, do the usual checks for available
1412   - * writers and blocking and wait if necessary
  1439 + * Don't inherit the gift flag, we need to
  1440 + * prevent multiple steals of this page.
1413 1441 */
1414   - if (!ipipe->writers)
1415   - break;
1416   - if (!ipipe->waiting_writers) {
1417   - if (ret)
1418   - break;
1419   - }
1420   - /*
1421   - * pipe_wait() drops the ipipe mutex. To avoid deadlocks
1422   - * with another process, we can only safely do that if
1423   - * the ipipe lock is ordered last.
1424   - */
1425   - if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
1426   - if (!ret)
1427   - ret = -EAGAIN;
1428   - break;
1429   - }
1430   - if (signal_pending(current)) {
1431   - if (!ret)
1432   - ret = -ERESTARTSYS;
1433   - break;
1434   - }
  1442 + obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1435 1443  
1436   - if (waitqueue_active(&ipipe->wait))
1437   - wake_up_interruptible_sync(&ipipe->wait);
1438   - kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
  1444 + if (obuf->len > len)
  1445 + obuf->len = len;
1439 1446  
1440   - pipe_wait(ipipe);
1441   - }
  1447 + opipe->nrbufs++;
  1448 + ret += obuf->len;
  1449 + len -= obuf->len;
  1450 + i++;
  1451 + } while (len);
1442 1452  
1443 1453 mutex_unlock(&ipipe->inode->i_mutex);
1444 1454 mutex_unlock(&opipe->inode->i_mutex);
1445 1455  
1446   - if (do_wakeup) {
  1456 + /*
  1457 + * If we put data in the output pipe, wakeup any potential readers.
  1458 + */
  1459 + if (ret > 0) {
1447 1460 smp_mb();
1448 1461 if (waitqueue_active(&opipe->wait))
1449 1462 wake_up_interruptible(&opipe->wait);
1450 1463  
1451 1464  
1452 1465  
... ... @@ -1464,14 +1477,29 @@
1464 1477 {
1465 1478 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
1466 1479 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
  1480 + int ret = -EINVAL;
1467 1481  
1468 1482 /*
1469   - * Link ipipe to the two output pipes, consuming as we go along.
  1483 + * Duplicate the contents of ipipe to opipe without actually
  1484 + * copying the data.
1470 1485 */
1471   - if (ipipe && opipe)
1472   - return link_pipe(ipipe, opipe, len, flags);
  1486 + if (ipipe && opipe && ipipe != opipe) {
  1487 + /*
  1488 + * Keep going, unless we encounter an error. The ipipe/opipe
  1489 + * ordering doesn't really matter.
  1490 + */
  1491 + ret = link_ipipe_prep(ipipe, flags);
  1492 + if (!ret) {
  1493 + ret = link_opipe_prep(opipe, flags);
  1494 + if (!ret) {
  1495 + ret = link_pipe(ipipe, opipe, len, flags);
  1496 + if (!ret && (flags & SPLICE_F_NONBLOCK))
  1497 + ret = -EAGAIN;
  1498 + }
  1499 + }
  1500 + }
1473 1501  
1474   - return -EINVAL;
  1502 + return ret;
1475 1503 }
1476 1504  
1477 1505 asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)