Commit 498052bba55ecaff58db6a1436b0e25bfd75a7ff

Authored by Al Viro
1 parent 3e93cd6718

New locking/refcounting for fs_struct

* all changes of current->fs are done under task_lock and write_lock of
  old fs->lock
* refcount is not atomic anymore (same protection)
* its decrements are done when removing reference from current; at the
  same time we decide whether to free it.
* put_fs_struct() is gone
* new field - ->in_exec.  Set by check_unsafe_exec() if we are trying to do
  execve() and only subthreads share fs_struct.  Cleared when finishing exec
  (success and failure alike).  Makes CLONE_FS fail with -EAGAIN if set.
* check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread
  is in progress.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 7 changed files with 121 additions and 44 deletions Side-by-side Diff

... ... @@ -51,6 +51,7 @@
51 51 #include <linux/poll.h>
52 52 #include <linux/mm.h>
53 53 #include <linux/eventpoll.h>
  54 +#include <linux/fs_struct.h>
54 55  
55 56 #include <asm/uaccess.h>
56 57 #include <asm/mmu_context.h>
57 58  
58 59  
... ... @@ -1441,12 +1442,15 @@
1441 1442 bprm->cred = prepare_exec_creds();
1442 1443 if (!bprm->cred)
1443 1444 goto out_unlock;
1444   - check_unsafe_exec(bprm);
1445 1445  
  1446 + retval = check_unsafe_exec(bprm);
  1447 + if (retval)
  1448 + goto out_unlock;
  1449 +
1446 1450 file = open_exec(filename);
1447 1451 retval = PTR_ERR(file);
1448 1452 if (IS_ERR(file))
1449   - goto out_unlock;
  1453 + goto out_unmark;
1450 1454  
1451 1455 sched_exec();
1452 1456  
... ... @@ -1488,6 +1492,9 @@
1488 1492 goto out;
1489 1493  
1490 1494 /* execve succeeded */
  1495 + write_lock(&current->fs->lock);
  1496 + current->fs->in_exec = 0;
  1497 + write_unlock(&current->fs->lock);
1491 1498 current->in_execve = 0;
1492 1499 mutex_unlock(&current->cred_exec_mutex);
1493 1500 acct_update_integrals(current);
... ... @@ -1505,6 +1512,11 @@
1505 1512 allow_write_access(bprm->file);
1506 1513 fput(bprm->file);
1507 1514 }
  1515 +
  1516 +out_unmark:
  1517 + write_lock(&current->fs->lock);
  1518 + current->fs->in_exec = 0;
  1519 + write_unlock(&current->fs->lock);
1508 1520  
1509 1521 out_unlock:
1510 1522 current->in_execve = 0;
... ... @@ -1056,16 +1056,18 @@
1056 1056 * - the caller must hold current->cred_exec_mutex to protect against
1057 1057 * PTRACE_ATTACH
1058 1058 */
1059   -void check_unsafe_exec(struct linux_binprm *bprm)
  1059 +int check_unsafe_exec(struct linux_binprm *bprm)
1060 1060 {
1061 1061 struct task_struct *p = current, *t;
1062 1062 unsigned long flags;
1063 1063 unsigned n_fs, n_sighand;
  1064 + int res = 0;
1064 1065  
1065 1066 bprm->unsafe = tracehook_unsafe_exec(p);
1066 1067  
1067 1068 n_fs = 1;
1068 1069 n_sighand = 1;
  1070 + write_lock(&p->fs->lock);
1069 1071 lock_task_sighand(p, &flags);
1070 1072 for (t = next_thread(p); t != p; t = next_thread(t)) {
1071 1073 if (t->fs == p->fs)
1072 1074  
1073 1075  
... ... @@ -1073,11 +1075,19 @@
1073 1075 n_sighand++;
1074 1076 }
1075 1077  
1076   - if (atomic_read(&p->fs->count) > n_fs ||
1077   - atomic_read(&p->sighand->count) > n_sighand)
  1078 + if (p->fs->users > n_fs ||
  1079 + atomic_read(&p->sighand->count) > n_sighand) {
1078 1080 bprm->unsafe |= LSM_UNSAFE_SHARE;
  1081 + } else {
  1082 + if (p->fs->in_exec)
  1083 + res = -EAGAIN;
  1084 + p->fs->in_exec = 1;
  1085 + }
1079 1086  
1080 1087 unlock_task_sighand(p, &flags);
  1088 + write_unlock(&p->fs->lock);
  1089 +
  1090 + return res;
1081 1091 }
1082 1092  
1083 1093 /*
1084 1094  
1085 1095  
... ... @@ -1296,12 +1306,15 @@
1296 1306 bprm->cred = prepare_exec_creds();
1297 1307 if (!bprm->cred)
1298 1308 goto out_unlock;
1299   - check_unsafe_exec(bprm);
1300 1309  
  1310 + retval = check_unsafe_exec(bprm);
  1311 + if (retval)
  1312 + goto out_unlock;
  1313 +
1301 1314 file = open_exec(filename);
1302 1315 retval = PTR_ERR(file);
1303 1316 if (IS_ERR(file))
1304   - goto out_unlock;
  1317 + goto out_unmark;
1305 1318  
1306 1319 sched_exec();
1307 1320  
... ... @@ -1344,6 +1357,9 @@
1344 1357 goto out;
1345 1358  
1346 1359 /* execve succeeded */
  1360 + write_lock(&current->fs->lock);
  1361 + current->fs->in_exec = 0;
  1362 + write_unlock(&current->fs->lock);
1347 1363 current->in_execve = 0;
1348 1364 mutex_unlock(&current->cred_exec_mutex);
1349 1365 acct_update_integrals(current);
... ... @@ -1361,6 +1377,11 @@
1361 1377 allow_write_access(bprm->file);
1362 1378 fput(bprm->file);
1363 1379 }
  1380 +
  1381 +out_unmark:
  1382 + write_lock(&current->fs->lock);
  1383 + current->fs->in_exec = 0;
  1384 + write_unlock(&current->fs->lock);
1364 1385  
1365 1386 out_unlock:
1366 1387 current->in_execve = 0;
... ... @@ -72,25 +72,27 @@
72 72 path_put(old_root);
73 73 }
74 74  
75   -void put_fs_struct(struct fs_struct *fs)
  75 +void free_fs_struct(struct fs_struct *fs)
76 76 {
77   - /* No need to hold fs->lock if we are killing it */
78   - if (atomic_dec_and_test(&fs->count)) {
79   - path_put(&fs->root);
80   - path_put(&fs->pwd);
81   - kmem_cache_free(fs_cachep, fs);
82   - }
  77 + path_put(&fs->root);
  78 + path_put(&fs->pwd);
  79 + kmem_cache_free(fs_cachep, fs);
83 80 }
84 81  
85 82 void exit_fs(struct task_struct *tsk)
86 83 {
87   - struct fs_struct * fs = tsk->fs;
  84 + struct fs_struct *fs = tsk->fs;
88 85  
89 86 if (fs) {
  87 + int kill;
90 88 task_lock(tsk);
  89 + write_lock(&fs->lock);
91 90 tsk->fs = NULL;
  91 + kill = !--fs->users;
  92 + write_unlock(&fs->lock);
92 93 task_unlock(tsk);
93   - put_fs_struct(fs);
  94 + if (kill)
  95 + free_fs_struct(fs);
94 96 }
95 97 }
96 98  
... ... @@ -99,7 +101,8 @@
99 101 struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
100 102 /* We don't need to lock fs - think why ;-) */
101 103 if (fs) {
102   - atomic_set(&fs->count, 1);
  104 + fs->users = 1;
  105 + fs->in_exec = 0;
103 106 rwlock_init(&fs->lock);
104 107 fs->umask = old->umask;
105 108 read_lock(&old->lock);
106 109  
107 110  
108 111  
109 112  
... ... @@ -114,29 +117,55 @@
114 117  
115 118 int unshare_fs_struct(void)
116 119 {
117   - struct fs_struct *fsp = copy_fs_struct(current->fs);
118   - if (!fsp)
  120 + struct fs_struct *fs = current->fs;
  121 + struct fs_struct *new_fs = copy_fs_struct(fs);
  122 + int kill;
  123 +
  124 + if (!new_fs)
119 125 return -ENOMEM;
120   - exit_fs(current);
121   - current->fs = fsp;
  126 +
  127 + task_lock(current);
  128 + write_lock(&fs->lock);
  129 + kill = !--fs->users;
  130 + current->fs = new_fs;
  131 + write_unlock(&fs->lock);
  132 + task_unlock(current);
  133 +
  134 + if (kill)
  135 + free_fs_struct(fs);
  136 +
122 137 return 0;
123 138 }
124 139 EXPORT_SYMBOL_GPL(unshare_fs_struct);
125 140  
126 141 /* to be mentioned only in INIT_TASK */
127 142 struct fs_struct init_fs = {
128   - .count = ATOMIC_INIT(1),
  143 + .users = 1,
129 144 .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
130 145 .umask = 0022,
131 146 };
132 147  
133 148 void daemonize_fs_struct(void)
134 149 {
135   - struct fs_struct *fs;
  150 + struct fs_struct *fs = current->fs;
136 151  
137   - exit_fs(current); /* current->fs->count--; */
138   - fs = &init_fs;
139   - current->fs = fs;
140   - atomic_inc(&fs->count);
  152 + if (fs) {
  153 + int kill;
  154 +
  155 + task_lock(current);
  156 +
  157 + write_lock(&init_fs.lock);
  158 + init_fs.users++;
  159 + write_unlock(&init_fs.lock);
  160 +
  161 + write_lock(&fs->lock);
  162 + current->fs = &init_fs;
  163 + kill = !--fs->users;
  164 + write_unlock(&fs->lock);
  165 +
  166 + task_unlock(current);
  167 + if (kill)
  168 + free_fs_struct(fs);
  169 + }
141 170 }
... ... @@ -44,7 +44,7 @@
44 44 /*
45 45 * exec.c
46 46 */
47   -extern void check_unsafe_exec(struct linux_binprm *);
  47 +extern int check_unsafe_exec(struct linux_binprm *);
48 48  
49 49 /*
50 50 * namespace.c
fs/proc/task_nommu.c
... ... @@ -49,7 +49,7 @@
49 49 else
50 50 bytes += kobjsize(mm);
51 51  
52   - if (current->fs && atomic_read(&current->fs->count) > 1)
  52 + if (current->fs && current->fs->users > 1)
53 53 sbytes += kobjsize(current->fs);
54 54 else
55 55 bytes += kobjsize(current->fs);
include/linux/fs_struct.h
... ... @@ -4,12 +4,10 @@
4 4 #include <linux/path.h>
5 5  
6 6 struct fs_struct {
7   - atomic_t count; /* This usage count is used by check_unsafe_exec() for
8   - * security checking purposes - therefore it may not be
9   - * incremented, except by clone(CLONE_FS).
10   - */
  7 + int users;
11 8 rwlock_t lock;
12 9 int umask;
  10 + int in_exec;
13 11 struct path root, pwd;
14 12 };
15 13  
... ... @@ -19,7 +17,7 @@
19 17 extern void set_fs_root(struct fs_struct *, struct path *);
20 18 extern void set_fs_pwd(struct fs_struct *, struct path *);
21 19 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
22   -extern void put_fs_struct(struct fs_struct *);
  20 +extern void free_fs_struct(struct fs_struct *);
23 21 extern void daemonize_fs_struct(void);
24 22 extern int unshare_fs_struct(void);
25 23  
... ... @@ -683,11 +683,19 @@
683 683  
684 684 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
685 685 {
  686 + struct fs_struct *fs = current->fs;
686 687 if (clone_flags & CLONE_FS) {
687   - atomic_inc(&current->fs->count);
  688 + /* tsk->fs is already what we want */
  689 + write_lock(&fs->lock);
  690 + if (fs->in_exec) {
  691 + write_unlock(&fs->lock);
  692 + return -EAGAIN;
  693 + }
  694 + fs->users++;
  695 + write_unlock(&fs->lock);
688 696 return 0;
689 697 }
690   - tsk->fs = copy_fs_struct(current->fs);
  698 + tsk->fs = copy_fs_struct(fs);
691 699 if (!tsk->fs)
692 700 return -ENOMEM;
693 701 return 0;
694 702  
... ... @@ -1518,13 +1526,17 @@
1518 1526 {
1519 1527 struct fs_struct *fs = current->fs;
1520 1528  
1521   - if ((unshare_flags & CLONE_FS) &&
1522   - (fs && atomic_read(&fs->count) > 1)) {
1523   - *new_fsp = copy_fs_struct(current->fs);
1524   - if (!*new_fsp)
1525   - return -ENOMEM;
1526   - }
  1529 + if (!(unshare_flags & CLONE_FS) || !fs)
  1530 + return 0;
1527 1531  
  1532 + /* don't need lock here; in the worst case we'll do useless copy */
  1533 + if (fs->users == 1)
  1534 + return 0;
  1535 +
  1536 + *new_fsp = copy_fs_struct(fs);
  1537 + if (!*new_fsp)
  1538 + return -ENOMEM;
  1539 +
1528 1540 return 0;
1529 1541 }
1530 1542  
1531 1543  
... ... @@ -1639,8 +1651,13 @@
1639 1651  
1640 1652 if (new_fs) {
1641 1653 fs = current->fs;
  1654 + write_lock(&fs->lock);
1642 1655 current->fs = new_fs;
1643   - new_fs = fs;
  1656 + if (--fs->users)
  1657 + new_fs = NULL;
  1658 + else
  1659 + new_fs = fs;
  1660 + write_unlock(&fs->lock);
1644 1661 }
1645 1662  
1646 1663 if (new_mm) {
... ... @@ -1679,7 +1696,7 @@
1679 1696  
1680 1697 bad_unshare_cleanup_fs:
1681 1698 if (new_fs)
1682   - put_fs_struct(new_fs);
  1699 + free_fs_struct(new_fs);
1683 1700  
1684 1701 bad_unshare_cleanup_thread:
1685 1702 bad_unshare_out: