Commit 498052bba55ecaff58db6a1436b0e25bfd75a7ff
1 parent
3e93cd6718
Exists in
master
and in
4 other branches
New locking/refcounting for fs_struct
* all changes of current->fs are done under task_lock and write_lock of old fs->lock * refcount is not atomic anymore (same protection) * its decrements are done when removing reference from current; at the same time we decide whether to free it. * put_fs_struct() is gone * new field - ->in_exec. Set by check_unsafe_exec() if we are trying to do execve() and only subthreads share fs_struct. Cleared when finishing exec (success and failure alike). Makes CLONE_FS fail with -EAGAIN if set. * check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread is in progress. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 7 changed files with 121 additions and 44 deletions Side-by-side Diff
fs/compat.c
... | ... | @@ -51,6 +51,7 @@ |
51 | 51 | #include <linux/poll.h> |
52 | 52 | #include <linux/mm.h> |
53 | 53 | #include <linux/eventpoll.h> |
54 | +#include <linux/fs_struct.h> | |
54 | 55 | |
55 | 56 | #include <asm/uaccess.h> |
56 | 57 | #include <asm/mmu_context.h> |
57 | 58 | |
58 | 59 | |
... | ... | @@ -1441,12 +1442,15 @@ |
1441 | 1442 | bprm->cred = prepare_exec_creds(); |
1442 | 1443 | if (!bprm->cred) |
1443 | 1444 | goto out_unlock; |
1444 | - check_unsafe_exec(bprm); | |
1445 | 1445 | |
1446 | + retval = check_unsafe_exec(bprm); | |
1447 | + if (retval) | |
1448 | + goto out_unlock; | |
1449 | + | |
1446 | 1450 | file = open_exec(filename); |
1447 | 1451 | retval = PTR_ERR(file); |
1448 | 1452 | if (IS_ERR(file)) |
1449 | - goto out_unlock; | |
1453 | + goto out_unmark; | |
1450 | 1454 | |
1451 | 1455 | sched_exec(); |
1452 | 1456 | |
... | ... | @@ -1488,6 +1492,9 @@ |
1488 | 1492 | goto out; |
1489 | 1493 | |
1490 | 1494 | /* execve succeeded */ |
1495 | + write_lock(¤t->fs->lock); | |
1496 | + current->fs->in_exec = 0; | |
1497 | + write_unlock(¤t->fs->lock); | |
1491 | 1498 | current->in_execve = 0; |
1492 | 1499 | mutex_unlock(¤t->cred_exec_mutex); |
1493 | 1500 | acct_update_integrals(current); |
... | ... | @@ -1505,6 +1512,11 @@ |
1505 | 1512 | allow_write_access(bprm->file); |
1506 | 1513 | fput(bprm->file); |
1507 | 1514 | } |
1515 | + | |
1516 | +out_unmark: | |
1517 | + write_lock(¤t->fs->lock); | |
1518 | + current->fs->in_exec = 0; | |
1519 | + write_unlock(¤t->fs->lock); | |
1508 | 1520 | |
1509 | 1521 | out_unlock: |
1510 | 1522 | current->in_execve = 0; |
fs/exec.c
... | ... | @@ -1056,16 +1056,18 @@ |
1056 | 1056 | * - the caller must hold current->cred_exec_mutex to protect against |
1057 | 1057 | * PTRACE_ATTACH |
1058 | 1058 | */ |
1059 | -void check_unsafe_exec(struct linux_binprm *bprm) | |
1059 | +int check_unsafe_exec(struct linux_binprm *bprm) | |
1060 | 1060 | { |
1061 | 1061 | struct task_struct *p = current, *t; |
1062 | 1062 | unsigned long flags; |
1063 | 1063 | unsigned n_fs, n_sighand; |
1064 | + int res = 0; | |
1064 | 1065 | |
1065 | 1066 | bprm->unsafe = tracehook_unsafe_exec(p); |
1066 | 1067 | |
1067 | 1068 | n_fs = 1; |
1068 | 1069 | n_sighand = 1; |
1070 | + write_lock(&p->fs->lock); | |
1069 | 1071 | lock_task_sighand(p, &flags); |
1070 | 1072 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
1071 | 1073 | if (t->fs == p->fs) |
1072 | 1074 | |
1073 | 1075 | |
... | ... | @@ -1073,11 +1075,19 @@ |
1073 | 1075 | n_sighand++; |
1074 | 1076 | } |
1075 | 1077 | |
1076 | - if (atomic_read(&p->fs->count) > n_fs || | |
1077 | - atomic_read(&p->sighand->count) > n_sighand) | |
1078 | + if (p->fs->users > n_fs || | |
1079 | + atomic_read(&p->sighand->count) > n_sighand) { | |
1078 | 1080 | bprm->unsafe |= LSM_UNSAFE_SHARE; |
1081 | + } else { | |
1082 | + if (p->fs->in_exec) | |
1083 | + res = -EAGAIN; | |
1084 | + p->fs->in_exec = 1; | |
1085 | + } | |
1079 | 1086 | |
1080 | 1087 | unlock_task_sighand(p, &flags); |
1088 | + write_unlock(&p->fs->lock); | |
1089 | + | |
1090 | + return res; | |
1081 | 1091 | } |
1082 | 1092 | |
1083 | 1093 | /* |
1084 | 1094 | |
1085 | 1095 | |
... | ... | @@ -1296,12 +1306,15 @@ |
1296 | 1306 | bprm->cred = prepare_exec_creds(); |
1297 | 1307 | if (!bprm->cred) |
1298 | 1308 | goto out_unlock; |
1299 | - check_unsafe_exec(bprm); | |
1300 | 1309 | |
1310 | + retval = check_unsafe_exec(bprm); | |
1311 | + if (retval) | |
1312 | + goto out_unlock; | |
1313 | + | |
1301 | 1314 | file = open_exec(filename); |
1302 | 1315 | retval = PTR_ERR(file); |
1303 | 1316 | if (IS_ERR(file)) |
1304 | - goto out_unlock; | |
1317 | + goto out_unmark; | |
1305 | 1318 | |
1306 | 1319 | sched_exec(); |
1307 | 1320 | |
... | ... | @@ -1344,6 +1357,9 @@ |
1344 | 1357 | goto out; |
1345 | 1358 | |
1346 | 1359 | /* execve succeeded */ |
1360 | + write_lock(¤t->fs->lock); | |
1361 | + current->fs->in_exec = 0; | |
1362 | + write_unlock(¤t->fs->lock); | |
1347 | 1363 | current->in_execve = 0; |
1348 | 1364 | mutex_unlock(¤t->cred_exec_mutex); |
1349 | 1365 | acct_update_integrals(current); |
... | ... | @@ -1361,6 +1377,11 @@ |
1361 | 1377 | allow_write_access(bprm->file); |
1362 | 1378 | fput(bprm->file); |
1363 | 1379 | } |
1380 | + | |
1381 | +out_unmark: | |
1382 | + write_lock(¤t->fs->lock); | |
1383 | + current->fs->in_exec = 0; | |
1384 | + write_unlock(¤t->fs->lock); | |
1364 | 1385 | |
1365 | 1386 | out_unlock: |
1366 | 1387 | current->in_execve = 0; |
fs/fs_struct.c
... | ... | @@ -72,25 +72,27 @@ |
72 | 72 | path_put(old_root); |
73 | 73 | } |
74 | 74 | |
75 | -void put_fs_struct(struct fs_struct *fs) | |
75 | +void free_fs_struct(struct fs_struct *fs) | |
76 | 76 | { |
77 | - /* No need to hold fs->lock if we are killing it */ | |
78 | - if (atomic_dec_and_test(&fs->count)) { | |
79 | - path_put(&fs->root); | |
80 | - path_put(&fs->pwd); | |
81 | - kmem_cache_free(fs_cachep, fs); | |
82 | - } | |
77 | + path_put(&fs->root); | |
78 | + path_put(&fs->pwd); | |
79 | + kmem_cache_free(fs_cachep, fs); | |
83 | 80 | } |
84 | 81 | |
85 | 82 | void exit_fs(struct task_struct *tsk) |
86 | 83 | { |
87 | - struct fs_struct * fs = tsk->fs; | |
84 | + struct fs_struct *fs = tsk->fs; | |
88 | 85 | |
89 | 86 | if (fs) { |
87 | + int kill; | |
90 | 88 | task_lock(tsk); |
89 | + write_lock(&fs->lock); | |
91 | 90 | tsk->fs = NULL; |
91 | + kill = !--fs->users; | |
92 | + write_unlock(&fs->lock); | |
92 | 93 | task_unlock(tsk); |
93 | - put_fs_struct(fs); | |
94 | + if (kill) | |
95 | + free_fs_struct(fs); | |
94 | 96 | } |
95 | 97 | } |
96 | 98 | |
... | ... | @@ -99,7 +101,8 @@ |
99 | 101 | struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); |
100 | 102 | /* We don't need to lock fs - think why ;-) */ |
101 | 103 | if (fs) { |
102 | - atomic_set(&fs->count, 1); | |
104 | + fs->users = 1; | |
105 | + fs->in_exec = 0; | |
103 | 106 | rwlock_init(&fs->lock); |
104 | 107 | fs->umask = old->umask; |
105 | 108 | read_lock(&old->lock); |
106 | 109 | |
107 | 110 | |
108 | 111 | |
109 | 112 | |
... | ... | @@ -114,29 +117,55 @@ |
114 | 117 | |
115 | 118 | int unshare_fs_struct(void) |
116 | 119 | { |
117 | - struct fs_struct *fsp = copy_fs_struct(current->fs); | |
118 | - if (!fsp) | |
120 | + struct fs_struct *fs = current->fs; | |
121 | + struct fs_struct *new_fs = copy_fs_struct(fs); | |
122 | + int kill; | |
123 | + | |
124 | + if (!new_fs) | |
119 | 125 | return -ENOMEM; |
120 | - exit_fs(current); | |
121 | - current->fs = fsp; | |
126 | + | |
127 | + task_lock(current); | |
128 | + write_lock(&fs->lock); | |
129 | + kill = !--fs->users; | |
130 | + current->fs = new_fs; | |
131 | + write_unlock(&fs->lock); | |
132 | + task_unlock(current); | |
133 | + | |
134 | + if (kill) | |
135 | + free_fs_struct(fs); | |
136 | + | |
122 | 137 | return 0; |
123 | 138 | } |
124 | 139 | EXPORT_SYMBOL_GPL(unshare_fs_struct); |
125 | 140 | |
126 | 141 | /* to be mentioned only in INIT_TASK */ |
127 | 142 | struct fs_struct init_fs = { |
128 | - .count = ATOMIC_INIT(1), | |
143 | + .users = 1, | |
129 | 144 | .lock = __RW_LOCK_UNLOCKED(init_fs.lock), |
130 | 145 | .umask = 0022, |
131 | 146 | }; |
132 | 147 | |
133 | 148 | void daemonize_fs_struct(void) |
134 | 149 | { |
135 | - struct fs_struct *fs; | |
150 | + struct fs_struct *fs = current->fs; | |
136 | 151 | |
137 | - exit_fs(current); /* current->fs->count--; */ | |
138 | - fs = &init_fs; | |
139 | - current->fs = fs; | |
140 | - atomic_inc(&fs->count); | |
152 | + if (fs) { | |
153 | + int kill; | |
154 | + | |
155 | + task_lock(current); | |
156 | + | |
157 | + write_lock(&init_fs.lock); | |
158 | + init_fs.users++; | |
159 | + write_unlock(&init_fs.lock); | |
160 | + | |
161 | + write_lock(&fs->lock); | |
162 | + current->fs = &init_fs; | |
163 | + kill = !--fs->users; | |
164 | + write_unlock(&fs->lock); | |
165 | + | |
166 | + task_unlock(current); | |
167 | + if (kill) | |
168 | + free_fs_struct(fs); | |
169 | + } | |
141 | 170 | } |
fs/internal.h
fs/proc/task_nommu.c
include/linux/fs_struct.h
... | ... | @@ -4,12 +4,10 @@ |
4 | 4 | #include <linux/path.h> |
5 | 5 | |
6 | 6 | struct fs_struct { |
7 | - atomic_t count; /* This usage count is used by check_unsafe_exec() for | |
8 | - * security checking purposes - therefore it may not be | |
9 | - * incremented, except by clone(CLONE_FS). | |
10 | - */ | |
7 | + int users; | |
11 | 8 | rwlock_t lock; |
12 | 9 | int umask; |
10 | + int in_exec; | |
13 | 11 | struct path root, pwd; |
14 | 12 | }; |
15 | 13 | |
... | ... | @@ -19,7 +17,7 @@ |
19 | 17 | extern void set_fs_root(struct fs_struct *, struct path *); |
20 | 18 | extern void set_fs_pwd(struct fs_struct *, struct path *); |
21 | 19 | extern struct fs_struct *copy_fs_struct(struct fs_struct *); |
22 | -extern void put_fs_struct(struct fs_struct *); | |
20 | +extern void free_fs_struct(struct fs_struct *); | |
23 | 21 | extern void daemonize_fs_struct(void); |
24 | 22 | extern int unshare_fs_struct(void); |
25 | 23 |
kernel/fork.c
... | ... | @@ -683,11 +683,19 @@ |
683 | 683 | |
684 | 684 | static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) |
685 | 685 | { |
686 | + struct fs_struct *fs = current->fs; | |
686 | 687 | if (clone_flags & CLONE_FS) { |
687 | - atomic_inc(¤t->fs->count); | |
688 | + /* tsk->fs is already what we want */ | |
689 | + write_lock(&fs->lock); | |
690 | + if (fs->in_exec) { | |
691 | + write_unlock(&fs->lock); | |
692 | + return -EAGAIN; | |
693 | + } | |
694 | + fs->users++; | |
695 | + write_unlock(&fs->lock); | |
688 | 696 | return 0; |
689 | 697 | } |
690 | - tsk->fs = copy_fs_struct(current->fs); | |
698 | + tsk->fs = copy_fs_struct(fs); | |
691 | 699 | if (!tsk->fs) |
692 | 700 | return -ENOMEM; |
693 | 701 | return 0; |
694 | 702 | |
... | ... | @@ -1518,13 +1526,17 @@ |
1518 | 1526 | { |
1519 | 1527 | struct fs_struct *fs = current->fs; |
1520 | 1528 | |
1521 | - if ((unshare_flags & CLONE_FS) && | |
1522 | - (fs && atomic_read(&fs->count) > 1)) { | |
1523 | - *new_fsp = copy_fs_struct(current->fs); | |
1524 | - if (!*new_fsp) | |
1525 | - return -ENOMEM; | |
1526 | - } | |
1529 | + if (!(unshare_flags & CLONE_FS) || !fs) | |
1530 | + return 0; | |
1527 | 1531 | |
1532 | + /* don't need lock here; in the worst case we'll do useless copy */ | |
1533 | + if (fs->users == 1) | |
1534 | + return 0; | |
1535 | + | |
1536 | + *new_fsp = copy_fs_struct(fs); | |
1537 | + if (!*new_fsp) | |
1538 | + return -ENOMEM; | |
1539 | + | |
1528 | 1540 | return 0; |
1529 | 1541 | } |
1530 | 1542 | |
1531 | 1543 | |
... | ... | @@ -1639,8 +1651,13 @@ |
1639 | 1651 | |
1640 | 1652 | if (new_fs) { |
1641 | 1653 | fs = current->fs; |
1654 | + write_lock(&fs->lock); | |
1642 | 1655 | current->fs = new_fs; |
1643 | - new_fs = fs; | |
1656 | + if (--fs->users) | |
1657 | + new_fs = NULL; | |
1658 | + else | |
1659 | + new_fs = fs; | |
1660 | + write_unlock(&fs->lock); | |
1644 | 1661 | } |
1645 | 1662 | |
1646 | 1663 | if (new_mm) { |
... | ... | @@ -1679,7 +1696,7 @@ |
1679 | 1696 | |
1680 | 1697 | bad_unshare_cleanup_fs: |
1681 | 1698 | if (new_fs) |
1682 | - put_fs_struct(new_fs); | |
1699 | + free_fs_struct(new_fs); | |
1683 | 1700 | |
1684 | 1701 | bad_unshare_cleanup_thread: |
1685 | 1702 | bad_unshare_out: |