Commit 4e717f5c1083995c334ced639cc77a75e9972567

Authored by Glauber Costa
Committed by Al Viro
1 parent 6a4f496fd2

list_lru: remove special case function list_lru_dispose_all.

The list_lru implementation has one function, list_lru_dispose_all, with
only one user (the dentry code).  At first, such function appears to make
sense because we are really not interested in the result of isolating each
dentry separately - all of them are going away anyway.  However, it's
implementation is buggy in the following way:

When we call list_lru_dispose_all in fs/dcache.c, we scan all dentries
marking them with DCACHE_SHRINK_LIST.  However, this is done without the
nlru->lock taken.  The imediate result of that is that someone else may
add or remove the dentry from the LRU at the same time.  When list_lru_del
happens in that scenario we will see an element that is not yet marked
with DCACHE_SHRINK_LIST (even though it will be in the future) and
obviously remove it from an lru where the element no longer is.  Since
list_lru_dispose_all will in effect count down nlru's nr_items and
list_lru_del will do the same, this will lead to an imbalance.

The solution for this would not be so simple: we can obviously just keep
the lru_lock taken, but then we have no guarantees that we will be able to
acquire the dentry lock (dentry->d_lock).  To properly solve this, we need
a communication mechanism between the lru and dentry code, so they can
coordinate this with each other.

Such mechanism already exists in the form of the list_lru_walk_cb
callback.  So it is possible to construct a dcache-side prune function
that does the right thing only by calling list_lru_walk in a loop until no
more dentries are available.

With only one user, plus the fact that a sane solution for the problem
would involve boucing between dcache and list_lru anyway, I see little
justification to keep the special case list_lru_dispose_all in tree.

Signed-off-by: Glauber Costa <glommer@openvz.org>
Cc: Michal Hocko <mhocko@suse.cz>
Acked-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 3 changed files with 29 additions and 79 deletions Side-by-side Diff

... ... @@ -956,27 +956,29 @@
956 956 return freed;
957 957 }
958 958  
959   -/*
960   - * Mark all the dentries as on being the dispose list so we don't think they are
961   - * still on the LRU if we try to kill them from ascending the parent chain in
962   - * try_prune_one_dentry() rather than directly from the dispose list.
963   - */
964   -static void
965   -shrink_dcache_list(
966   - struct list_head *dispose)
  959 +static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
  960 + spinlock_t *lru_lock, void *arg)
967 961 {
968   - struct dentry *dentry;
  962 + struct list_head *freeable = arg;
  963 + struct dentry *dentry = container_of(item, struct dentry, d_lru);
969 964  
970   - rcu_read_lock();
971   - list_for_each_entry_rcu(dentry, dispose, d_lru) {
972   - spin_lock(&dentry->d_lock);
973   - dentry->d_flags |= DCACHE_SHRINK_LIST;
974   - spin_unlock(&dentry->d_lock);
975   - }
976   - rcu_read_unlock();
977   - shrink_dentry_list(dispose);
  965 + /*
  966 + * we are inverting the lru lock/dentry->d_lock here,
  967 + * so use a trylock. If we fail to get the lock, just skip
  968 + * it
  969 + */
  970 + if (!spin_trylock(&dentry->d_lock))
  971 + return LRU_SKIP;
  972 +
  973 + dentry->d_flags |= DCACHE_SHRINK_LIST;
  974 + list_move_tail(&dentry->d_lru, freeable);
  975 + this_cpu_dec(nr_dentry_unused);
  976 + spin_unlock(&dentry->d_lock);
  977 +
  978 + return LRU_REMOVED;
978 979 }
979 980  
  981 +
980 982 /**
981 983 * shrink_dcache_sb - shrink dcache for a superblock
982 984 * @sb: superblock
983 985  
... ... @@ -986,10 +988,17 @@
986 988 */
987 989 void shrink_dcache_sb(struct super_block *sb)
988 990 {
989   - long disposed;
  991 + long freed;
990 992  
991   - disposed = list_lru_dispose_all(&sb->s_dentry_lru, shrink_dcache_list);
992   - this_cpu_sub(nr_dentry_unused, disposed);
  993 + do {
  994 + LIST_HEAD(dispose);
  995 +
  996 + freed = list_lru_walk(&sb->s_dentry_lru,
  997 + dentry_lru_isolate_shrink, &dispose, UINT_MAX);
  998 +
  999 + this_cpu_sub(nr_dentry_unused, freed);
  1000 + shrink_dentry_list(&dispose);
  1001 + } while (freed > 0);
993 1002 }
994 1003 EXPORT_SYMBOL(shrink_dcache_sb);
995 1004  
include/linux/list_lru.h
... ... @@ -137,22 +137,5 @@
137 137 }
138 138 return isolated;
139 139 }
140   -
141   -typedef void (*list_lru_dispose_cb)(struct list_head *dispose_list);
142   -/**
143   - * list_lru_dispose_all: forceably flush all elements in an @lru
144   - * @lru: the lru pointer
145   - * @dispose: callback function to be called for each lru list.
146   - *
147   - * This function will forceably isolate all elements into the dispose list, and
148   - * call the @dispose callback to flush the list. Please note that the callback
149   - * should expect items in any state, clean or dirty, and be able to flush all of
150   - * them.
151   - *
152   - * Return value: how many objects were freed. It should be equal to all objects
153   - * in the list_lru.
154   - */
155   -unsigned long
156   -list_lru_dispose_all(struct list_lru *lru, list_lru_dispose_cb dispose);
157 140 #endif /* _LRU_LIST_H */
... ... @@ -112,48 +112,6 @@
112 112 }
113 113 EXPORT_SYMBOL_GPL(list_lru_walk_node);
114 114  
115   -static unsigned long list_lru_dispose_all_node(struct list_lru *lru, int nid,
116   - list_lru_dispose_cb dispose)
117   -{
118   - struct list_lru_node *nlru = &lru->node[nid];
119   - LIST_HEAD(dispose_list);
120   - unsigned long disposed = 0;
121   -
122   - spin_lock(&nlru->lock);
123   - while (!list_empty(&nlru->list)) {
124   - list_splice_init(&nlru->list, &dispose_list);
125   - disposed += nlru->nr_items;
126   - nlru->nr_items = 0;
127   - node_clear(nid, lru->active_nodes);
128   - spin_unlock(&nlru->lock);
129   -
130   - dispose(&dispose_list);
131   -
132   - spin_lock(&nlru->lock);
133   - }
134   - spin_unlock(&nlru->lock);
135   - return disposed;
136   -}
137   -
138   -unsigned long list_lru_dispose_all(struct list_lru *lru,
139   - list_lru_dispose_cb dispose)
140   -{
141   - unsigned long disposed;
142   - unsigned long total = 0;
143   - int nid;
144   -
145   - do {
146   - disposed = 0;
147   - for_each_node_mask(nid, lru->active_nodes) {
148   - disposed += list_lru_dispose_all_node(lru, nid,
149   - dispose);
150   - }
151   - total += disposed;
152   - } while (disposed != 0);
153   -
154   - return total;
155   -}
156   -
157 115 int list_lru_init(struct list_lru *lru)
158 116 {
159 117 int i;