Commit 8169d3005e3bae9bff40349d7caeac5938682297

Authored by Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs fixes from Al Viro:
 "dcache fixes + kvfree() (uninlined, exported by mm/util.c) + posix_acl
  bugfix from hch"

The dcache fixes are for a subtle LRU list corruption bug reported by
Miklos Szeredi, where people inside IBM saw list corruptions with the
LTP/host01 test.

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  nick kvfree() from apparmor
  posix_acl: handle NULL ACL in posix_acl_equiv_mode
  dcache: don't need rcu in shrink_dentry_list()
  more graceful recovery in umount_collect()
  don't remove from shrink list in select_collect()
  dentry_kill(): don't try to remove from shrink list
  expand the call of dentry_lru_del() in dentry_kill()
  new helper: dentry_free()
  fold try_prune_one_dentry()
  fold d_kill() and d_free()
  fix races between __d_instantiate() and checks of dentry flags

Showing 8 changed files Side-by-side Diff

... ... @@ -246,16 +246,8 @@
246 246 kmem_cache_free(dentry_cache, dentry);
247 247 }
248 248  
249   -/*
250   - * no locks, please.
251   - */
252   -static void d_free(struct dentry *dentry)
  249 +static void dentry_free(struct dentry *dentry)
253 250 {
254   - BUG_ON((int)dentry->d_lockref.count > 0);
255   - this_cpu_dec(nr_dentry);
256   - if (dentry->d_op && dentry->d_op->d_release)
257   - dentry->d_op->d_release(dentry);
258   -
259 251 /* if dentry was never visible to RCU, immediate free is OK */
260 252 if (!(dentry->d_flags & DCACHE_RCUACCESS))
261 253 __d_free(&dentry->d_u.d_rcu);
262 254  
... ... @@ -403,57 +395,7 @@
403 395 d_lru_add(dentry);
404 396 }
405 397  
406   -/*
407   - * Remove a dentry with references from the LRU.
408   - *
409   - * If we are on the shrink list, then we can get to try_prune_one_dentry() and
410   - * lose our last reference through the parent walk. In this case, we need to
411   - * remove ourselves from the shrink list, not the LRU.
412   - */
413   -static void dentry_lru_del(struct dentry *dentry)
414   -{
415   - if (dentry->d_flags & DCACHE_LRU_LIST) {
416   - if (dentry->d_flags & DCACHE_SHRINK_LIST)
417   - return d_shrink_del(dentry);
418   - d_lru_del(dentry);
419   - }
420   -}
421   -
422 398 /**
423   - * d_kill - kill dentry and return parent
424   - * @dentry: dentry to kill
425   - * @parent: parent dentry
426   - *
427   - * The dentry must already be unhashed and removed from the LRU.
428   - *
429   - * If this is the root of the dentry tree, return NULL.
430   - *
431   - * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
432   - * d_kill.
433   - */
434   -static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
435   - __releases(dentry->d_lock)
436   - __releases(parent->d_lock)
437   - __releases(dentry->d_inode->i_lock)
438   -{
439   - list_del(&dentry->d_u.d_child);
440   - /*
441   - * Inform d_walk() that we are no longer attached to the
442   - * dentry tree
443   - */
444   - dentry->d_flags |= DCACHE_DENTRY_KILLED;
445   - if (parent)
446   - spin_unlock(&parent->d_lock);
447   - dentry_iput(dentry);
448   - /*
449   - * dentry_iput drops the locks, at which point nobody (except
450   - * transient RCU lookups) can reach this dentry.
451   - */
452   - d_free(dentry);
453   - return parent;
454   -}
455   -
456   -/**
457 399 * d_drop - drop a dentry
458 400 * @dentry: dentry to drop
459 401 *
460 402  
... ... @@ -510,8 +452,15 @@
510 452 __releases(dentry->d_lock)
511 453 {
512 454 struct inode *inode;
513   - struct dentry *parent;
  455 + struct dentry *parent = NULL;
  456 + bool can_free = true;
514 457  
  458 + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
  459 + can_free = dentry->d_flags & DCACHE_MAY_FREE;
  460 + spin_unlock(&dentry->d_lock);
  461 + goto out;
  462 + }
  463 +
515 464 inode = dentry->d_inode;
516 465 if (inode && !spin_trylock(&inode->i_lock)) {
517 466 relock:
... ... @@ -521,9 +470,7 @@
521 470 }
522 471 return dentry; /* try again with same dentry */
523 472 }
524   - if (IS_ROOT(dentry))
525   - parent = NULL;
526   - else
  473 + if (!IS_ROOT(dentry))
527 474 parent = dentry->d_parent;
528 475 if (parent && !spin_trylock(&parent->d_lock)) {
529 476 if (inode)
530 477  
... ... @@ -543,10 +490,40 @@
543 490 if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
544 491 dentry->d_op->d_prune(dentry);
545 492  
546   - dentry_lru_del(dentry);
  493 + if (dentry->d_flags & DCACHE_LRU_LIST) {
  494 + if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
  495 + d_lru_del(dentry);
  496 + }
547 497 /* if it was on the hash then remove it */
548 498 __d_drop(dentry);
549   - return d_kill(dentry, parent);
  499 + list_del(&dentry->d_u.d_child);
  500 + /*
  501 + * Inform d_walk() that we are no longer attached to the
  502 + * dentry tree
  503 + */
  504 + dentry->d_flags |= DCACHE_DENTRY_KILLED;
  505 + if (parent)
  506 + spin_unlock(&parent->d_lock);
  507 + dentry_iput(dentry);
  508 + /*
  509 + * dentry_iput drops the locks, at which point nobody (except
  510 + * transient RCU lookups) can reach this dentry.
  511 + */
  512 + BUG_ON((int)dentry->d_lockref.count > 0);
  513 + this_cpu_dec(nr_dentry);
  514 + if (dentry->d_op && dentry->d_op->d_release)
  515 + dentry->d_op->d_release(dentry);
  516 +
  517 + spin_lock(&dentry->d_lock);
  518 + if (dentry->d_flags & DCACHE_SHRINK_LIST) {
  519 + dentry->d_flags |= DCACHE_MAY_FREE;
  520 + can_free = false;
  521 + }
  522 + spin_unlock(&dentry->d_lock);
  523 +out:
  524 + if (likely(can_free))
  525 + dentry_free(dentry);
  526 + return parent;
550 527 }
551 528  
552 529 /*
553 530  
554 531  
555 532  
... ... @@ -815,65 +792,13 @@
815 792 }
816 793 EXPORT_SYMBOL(d_prune_aliases);
817 794  
818   -/*
819   - * Try to throw away a dentry - free the inode, dput the parent.
820   - * Requires dentry->d_lock is held, and dentry->d_count == 0.
821   - * Releases dentry->d_lock.
822   - *
823   - * This may fail if locks cannot be acquired no problem, just try again.
824   - */
825   -static struct dentry * try_prune_one_dentry(struct dentry *dentry)
826   - __releases(dentry->d_lock)
827   -{
828   - struct dentry *parent;
829   -
830   - parent = dentry_kill(dentry, 0);
831   - /*
832   - * If dentry_kill returns NULL, we have nothing more to do.
833   - * if it returns the same dentry, trylocks failed. In either
834   - * case, just loop again.
835   - *
836   - * Otherwise, we need to prune ancestors too. This is necessary
837   - * to prevent quadratic behavior of shrink_dcache_parent(), but
838   - * is also expected to be beneficial in reducing dentry cache
839   - * fragmentation.
840   - */
841   - if (!parent)
842   - return NULL;
843   - if (parent == dentry)
844   - return dentry;
845   -
846   - /* Prune ancestors. */
847   - dentry = parent;
848   - while (dentry) {
849   - if (lockref_put_or_lock(&dentry->d_lockref))
850   - return NULL;
851   - dentry = dentry_kill(dentry, 1);
852   - }
853   - return NULL;
854   -}
855   -
856 795 static void shrink_dentry_list(struct list_head *list)
857 796 {
858   - struct dentry *dentry;
  797 + struct dentry *dentry, *parent;
859 798  
860   - rcu_read_lock();
861   - for (;;) {
862   - dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
863   - if (&dentry->d_lru == list)
864   - break; /* empty */
865   -
866   - /*
867   - * Get the dentry lock, and re-verify that the dentry is
868   - * this on the shrinking list. If it is, we know that
869   - * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set.
870   - */
  799 + while (!list_empty(list)) {
  800 + dentry = list_entry(list->prev, struct dentry, d_lru);
871 801 spin_lock(&dentry->d_lock);
872   - if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
873   - spin_unlock(&dentry->d_lock);
874   - continue;
875   - }
876   -
877 802 /*
878 803 * The dispose list is isolated and dentries are not accounted
879 804 * to the LRU here, so we can simply remove it from the list
880 805  
881 806  
882 807  
883 808  
884 809  
885 810  
886 811  
887 812  
... ... @@ -885,30 +810,38 @@
885 810 * We found an inuse dentry which was not removed from
886 811 * the LRU because of laziness during lookup. Do not free it.
887 812 */
888   - if (dentry->d_lockref.count) {
  813 + if ((int)dentry->d_lockref.count > 0) {
889 814 spin_unlock(&dentry->d_lock);
890 815 continue;
891 816 }
892   - rcu_read_unlock();
893 817  
  818 + parent = dentry_kill(dentry, 0);
894 819 /*
895   - * If 'try_to_prune()' returns a dentry, it will
896   - * be the same one we passed in, and d_lock will
897   - * have been held the whole time, so it will not
898   - * have been added to any other lists. We failed
899   - * to get the inode lock.
900   - *
901   - * We just add it back to the shrink list.
  820 + * If dentry_kill returns NULL, we have nothing more to do.
902 821 */
903   - dentry = try_prune_one_dentry(dentry);
  822 + if (!parent)
  823 + continue;
904 824  
905   - rcu_read_lock();
906   - if (dentry) {
  825 + if (unlikely(parent == dentry)) {
  826 + /*
  827 + * trylocks have failed and d_lock has been held the
  828 + * whole time, so it could not have been added to any
  829 + * other lists. Just add it back to the shrink list.
  830 + */
907 831 d_shrink_add(dentry, list);
908 832 spin_unlock(&dentry->d_lock);
  833 + continue;
909 834 }
  835 + /*
  836 + * We need to prune ancestors too. This is necessary to prevent
  837 + * quadratic behavior of shrink_dcache_parent(), but is also
  838 + * expected to be beneficial in reducing dentry cache
  839 + * fragmentation.
  840 + */
  841 + dentry = parent;
  842 + while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
  843 + dentry = dentry_kill(dentry, 1);
910 844 }
911   - rcu_read_unlock();
912 845 }
913 846  
914 847 static enum lru_status
915 848  
916 849  
... ... @@ -1261,34 +1194,23 @@
1261 1194 if (data->start == dentry)
1262 1195 goto out;
1263 1196  
1264   - /*
1265   - * move only zero ref count dentries to the dispose list.
1266   - *
1267   - * Those which are presently on the shrink list, being processed
1268   - * by shrink_dentry_list(), shouldn't be moved. Otherwise the
1269   - * loop in shrink_dcache_parent() might not make any progress
1270   - * and loop forever.
1271   - */
1272   - if (dentry->d_lockref.count) {
1273   - dentry_lru_del(dentry);
1274   - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1275   - /*
1276   - * We can't use d_lru_shrink_move() because we
1277   - * need to get the global LRU lock and do the
1278   - * LRU accounting.
1279   - */
1280   - d_lru_del(dentry);
1281   - d_shrink_add(dentry, &data->dispose);
  1197 + if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1282 1198 data->found++;
1283   - ret = D_WALK_NORETRY;
  1199 + } else {
  1200 + if (dentry->d_flags & DCACHE_LRU_LIST)
  1201 + d_lru_del(dentry);
  1202 + if (!dentry->d_lockref.count) {
  1203 + d_shrink_add(dentry, &data->dispose);
  1204 + data->found++;
  1205 + }
1284 1206 }
1285 1207 /*
1286 1208 * We can return to the caller if we have found some (this
1287 1209 * ensures forward progress). We'll be coming back to find
1288 1210 * the rest.
1289 1211 */
1290   - if (data->found && need_resched())
1291   - ret = D_WALK_QUIT;
  1212 + if (!list_empty(&data->dispose))
  1213 + ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1292 1214 out:
1293 1215 return ret;
1294 1216 }
1295 1217  
1296 1218  
1297 1219  
1298 1220  
1299 1221  
... ... @@ -1318,47 +1240,37 @@
1318 1240 }
1319 1241 EXPORT_SYMBOL(shrink_dcache_parent);
1320 1242  
1321   -static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
  1243 +static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
1322 1244 {
1323   - struct select_data *data = _data;
1324   - enum d_walk_ret ret = D_WALK_CONTINUE;
  1245 + /* it has busy descendents; complain about those instead */
  1246 + if (!list_empty(&dentry->d_subdirs))
  1247 + return D_WALK_CONTINUE;
1325 1248  
1326   - if (dentry->d_lockref.count) {
1327   - dentry_lru_del(dentry);
1328   - if (likely(!list_empty(&dentry->d_subdirs)))
1329   - goto out;
1330   - if (dentry == data->start && dentry->d_lockref.count == 1)
1331   - goto out;
1332   - printk(KERN_ERR
1333   - "BUG: Dentry %p{i=%lx,n=%s}"
1334   - " still in use (%d)"
1335   - " [unmount of %s %s]\n",
  1249 + /* root with refcount 1 is fine */
  1250 + if (dentry == _data && dentry->d_lockref.count == 1)
  1251 + return D_WALK_CONTINUE;
  1252 +
  1253 + printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
  1254 + " still in use (%d) [unmount of %s %s]\n",
1336 1255 dentry,
1337 1256 dentry->d_inode ?
1338 1257 dentry->d_inode->i_ino : 0UL,
1339   - dentry->d_name.name,
  1258 + dentry,
1340 1259 dentry->d_lockref.count,
1341 1260 dentry->d_sb->s_type->name,
1342 1261 dentry->d_sb->s_id);
1343   - BUG();
1344   - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1345   - /*
1346   - * We can't use d_lru_shrink_move() because we
1347   - * need to get the global LRU lock and do the
1348   - * LRU accounting.
1349   - */
1350   - if (dentry->d_flags & DCACHE_LRU_LIST)
1351   - d_lru_del(dentry);
1352   - d_shrink_add(dentry, &data->dispose);
1353   - data->found++;
1354   - ret = D_WALK_NORETRY;
1355   - }
1356   -out:
1357   - if (data->found && need_resched())
1358   - ret = D_WALK_QUIT;
1359   - return ret;
  1262 + WARN_ON(1);
  1263 + return D_WALK_CONTINUE;
1360 1264 }
1361 1265  
  1266 +static void do_one_tree(struct dentry *dentry)
  1267 +{
  1268 + shrink_dcache_parent(dentry);
  1269 + d_walk(dentry, dentry, umount_check, NULL);
  1270 + d_drop(dentry);
  1271 + dput(dentry);
  1272 +}
  1273 +
1362 1274 /*
1363 1275 * destroy the dentries attached to a superblock on unmounting
1364 1276 */
1365 1277  
1366 1278  
1367 1279  
... ... @@ -1366,40 +1278,15 @@
1366 1278 {
1367 1279 struct dentry *dentry;
1368 1280  
1369   - if (down_read_trylock(&sb->s_umount))
1370   - BUG();
  1281 + WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
1371 1282  
1372 1283 dentry = sb->s_root;
1373 1284 sb->s_root = NULL;
1374   - for (;;) {
1375   - struct select_data data;
  1285 + do_one_tree(dentry);
1376 1286  
1377   - INIT_LIST_HEAD(&data.dispose);
1378   - data.start = dentry;
1379   - data.found = 0;
1380   -
1381   - d_walk(dentry, &data, umount_collect, NULL);
1382   - if (!data.found)
1383   - break;
1384   -
1385   - shrink_dentry_list(&data.dispose);
1386   - cond_resched();
1387   - }
1388   - d_drop(dentry);
1389   - dput(dentry);
1390   -
1391 1287 while (!hlist_bl_empty(&sb->s_anon)) {
1392   - struct select_data data;
1393   - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
1394   -
1395   - INIT_LIST_HEAD(&data.dispose);
1396   - data.start = NULL;
1397   - data.found = 0;
1398   -
1399   - d_walk(dentry, &data, umount_collect, NULL);
1400   - if (data.found)
1401   - shrink_dentry_list(&data.dispose);
1402   - cond_resched();
  1288 + dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash));
  1289 + do_one_tree(dentry);
1403 1290 }
1404 1291 }
1405 1292  
... ... @@ -1647,8 +1534,7 @@
1647 1534 unsigned add_flags = d_flags_for_inode(inode);
1648 1535  
1649 1536 spin_lock(&dentry->d_lock);
1650   - dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
1651   - dentry->d_flags |= add_flags;
  1537 + __d_set_type(dentry, add_flags);
1652 1538 if (inode)
1653 1539 hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1654 1540 dentry->d_inode = inode;
... ... @@ -1542,7 +1542,7 @@
1542 1542 inode = path->dentry->d_inode;
1543 1543 }
1544 1544 err = -ENOENT;
1545   - if (!inode)
  1545 + if (!inode || d_is_negative(path->dentry))
1546 1546 goto out_path_put;
1547 1547  
1548 1548 if (should_follow_link(path->dentry, follow)) {
... ... @@ -2249,7 +2249,7 @@
2249 2249 mutex_unlock(&dir->d_inode->i_mutex);
2250 2250  
2251 2251 done:
2252   - if (!dentry->d_inode) {
  2252 + if (!dentry->d_inode || d_is_negative(dentry)) {
2253 2253 error = -ENOENT;
2254 2254 dput(dentry);
2255 2255 goto out;
... ... @@ -2994,7 +2994,7 @@
2994 2994 finish_lookup:
2995 2995 /* we _can_ be in RCU mode here */
2996 2996 error = -ENOENT;
2997   - if (d_is_negative(path->dentry)) {
  2997 + if (!inode || d_is_negative(path->dentry)) {
2998 2998 path_to_nameidata(path, nd);
2999 2999 goto out;
3000 3000 }
... ... @@ -246,6 +246,12 @@
246 246 umode_t mode = 0;
247 247 int not_equiv = 0;
248 248  
  249 + /*
  250 + * A null ACL can always be presented as mode bits.
  251 + */
  252 + if (!acl)
  253 + return 0;
  254 +
249 255 FOREACH_ACL_ENTRY(pa, acl, pe) {
250 256 switch (pa->e_tag) {
251 257 case ACL_USER_OBJ:
include/linux/dcache.h
... ... @@ -221,6 +221,8 @@
221 221 #define DCACHE_SYMLINK_TYPE 0x00300000 /* Symlink */
222 222 #define DCACHE_FILE_TYPE 0x00400000 /* Other file type */
223 223  
  224 +#define DCACHE_MAY_FREE 0x00800000
  225 +
224 226 extern seqlock_t rename_lock;
225 227  
226 228 static inline int dname_external(const struct dentry *dentry)
... ... @@ -370,6 +370,8 @@
370 370 }
371 371 #endif
372 372  
  373 +extern void kvfree(const void *addr);
  374 +
373 375 static inline void compound_lock(struct page *page)
374 376 {
375 377 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
... ... @@ -10,6 +10,7 @@
10 10 #include <linux/swapops.h>
11 11 #include <linux/mman.h>
12 12 #include <linux/hugetlb.h>
  13 +#include <linux/vmalloc.h>
13 14  
14 15 #include <asm/uaccess.h>
15 16  
... ... @@ -386,6 +387,15 @@
386 387 return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
387 388 }
388 389 EXPORT_SYMBOL(vm_mmap);
  390 +
  391 +void kvfree(const void *addr)
  392 +{
  393 + if (is_vmalloc_addr(addr))
  394 + vfree(addr);
  395 + else
  396 + kfree(addr);
  397 +}
  398 +EXPORT_SYMBOL(kvfree);
389 399  
390 400 struct address_space *page_mapping(struct page *page)
391 401 {
security/apparmor/include/apparmor.h
... ... @@ -66,7 +66,6 @@
66 66 char *aa_split_fqname(char *args, char **ns_name);
67 67 void aa_info_message(const char *str);
68 68 void *__aa_kvmalloc(size_t size, gfp_t flags);
69   -void kvfree(void *buffer);
70 69  
71 70 static inline void *kvmalloc(size_t size)
72 71 {
security/apparmor/lib.c
... ... @@ -104,18 +104,4 @@
104 104 }
105 105 return buffer;
106 106 }
107   -
108   -/**
109   - * kvfree - free an allocation do by kvmalloc
110   - * @buffer: buffer to free (MAYBE_NULL)
111   - *
112   - * Free a buffer allocated by kvmalloc
113   - */
114   -void kvfree(void *buffer)
115   -{
116   - if (is_vmalloc_addr(buffer))
117   - vfree(buffer);
118   - else
119   - kfree(buffer);
120   -}