Commit 2d37a72e406e226656ff9009bb7913e6ff9c3025

Authored by Tim Chen
Committed by Jiri Slaby
1 parent 2f11e3a821

fs/superblock: avoid locking counting inodes and dentries before reclaiming them

commit d23da150a37c9fe3cc83dbaf71b3e37fd434ed52 upstream.

We remove the call to grab_super_passive in call to super_cache_count.
This becomes a scalability bottleneck as multiple threads are trying to do
memory reclamation, e.g.  when we are doing large amount of file read and
page cache is under pressure.  The cached objects quickly got reclaimed
down to 0 and we are aborting the cache_scan() reclaim.  But counting
creates a log jam acquiring the sb_lock.

We are holding the shrinker_rwsem which ensures the safety of call to
list_lru_count_node() and s_op->nr_cached_objects.  The shrinker is
unregistered now before ->kill_sb() so the operation is safe when we are
doing unmount.

The impact will depend heavily on the machine and the workload but for a
small machine using postmark tuned to use 4xRAM size the results were

                                  3.15.0-rc5            3.15.0-rc5
                                     vanilla         shrinker-v1r1
Ops/sec Transactions         21.00 (  0.00%)       24.00 ( 14.29%)
Ops/sec FilesCreate          39.00 (  0.00%)       44.00 ( 12.82%)
Ops/sec CreateTransact       10.00 (  0.00%)       12.00 ( 20.00%)
Ops/sec FilesDeleted       6202.00 (  0.00%)     6202.00 (  0.00%)
Ops/sec DeleteTransact       11.00 (  0.00%)       12.00 (  9.09%)
Ops/sec DataRead/MB          25.97 (  0.00%)       29.10 ( 12.05%)
Ops/sec DataWrite/MB         49.99 (  0.00%)       56.02 ( 12.06%)

ffsb running in a configuration that is meant to simulate a mail server showed

                                 3.15.0-rc5             3.15.0-rc5
                                    vanilla          shrinker-v1r1
Ops/sec readall           9402.63 (  0.00%)      9567.97 (  1.76%)
Ops/sec create            4695.45 (  0.00%)      4735.00 (  0.84%)
Ops/sec delete             173.72 (  0.00%)       179.83 (  3.52%)
Ops/sec Transactions     14271.80 (  0.00%)     14482.81 (  1.48%)
Ops/sec Read                37.00 (  0.00%)        37.60 (  1.62%)
Ops/sec Write               18.20 (  0.00%)        18.30 (  0.55%)

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Chinner <david@fromorbit.com>
Tested-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Jan Kara <jack@suse.cz>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>

Showing 1 changed file with 8 additions and 4 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/super.c 2 * linux/fs/super.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * super.c contains code to handle: - mount structures 6 * super.c contains code to handle: - mount structures
7 * - super-block tables 7 * - super-block tables
8 * - filesystem drivers list 8 * - filesystem drivers list
9 * - mount system call 9 * - mount system call
10 * - umount system call 10 * - umount system call
11 * - ustat system call 11 * - ustat system call
12 * 12 *
13 * GK 2/5/95 - Changed to support mounting the root fs via NFS 13 * GK 2/5/95 - Changed to support mounting the root fs via NFS
14 * 14 *
15 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall 15 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
16 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 16 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
17 * Added options to /proc/mounts: 17 * Added options to /proc/mounts:
18 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. 18 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
19 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998 19 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
21 */ 21 */
22 22
23 #include <linux/export.h> 23 #include <linux/export.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/acct.h> 25 #include <linux/acct.h>
26 #include <linux/blkdev.h> 26 #include <linux/blkdev.h>
27 #include <linux/mount.h> 27 #include <linux/mount.h>
28 #include <linux/security.h> 28 #include <linux/security.h>
29 #include <linux/writeback.h> /* for the emergency remount stuff */ 29 #include <linux/writeback.h> /* for the emergency remount stuff */
30 #include <linux/idr.h> 30 #include <linux/idr.h>
31 #include <linux/mutex.h> 31 #include <linux/mutex.h>
32 #include <linux/backing-dev.h> 32 #include <linux/backing-dev.h>
33 #include <linux/rculist_bl.h> 33 #include <linux/rculist_bl.h>
34 #include <linux/cleancache.h> 34 #include <linux/cleancache.h>
35 #include <linux/fsnotify.h> 35 #include <linux/fsnotify.h>
36 #include <linux/lockdep.h> 36 #include <linux/lockdep.h>
37 #include "internal.h" 37 #include "internal.h"
38 38
39 39
40 LIST_HEAD(super_blocks); 40 LIST_HEAD(super_blocks);
41 DEFINE_SPINLOCK(sb_lock); 41 DEFINE_SPINLOCK(sb_lock);
42 42
43 static char *sb_writers_name[SB_FREEZE_LEVELS] = { 43 static char *sb_writers_name[SB_FREEZE_LEVELS] = {
44 "sb_writers", 44 "sb_writers",
45 "sb_pagefaults", 45 "sb_pagefaults",
46 "sb_internal", 46 "sb_internal",
47 }; 47 };
48 48
49 /* 49 /*
50 * One thing we have to be careful of with a per-sb shrinker is that we don't 50 * One thing we have to be careful of with a per-sb shrinker is that we don't
51 * drop the last active reference to the superblock from within the shrinker. 51 * drop the last active reference to the superblock from within the shrinker.
52 * If that happens we could trigger unregistering the shrinker from within the 52 * If that happens we could trigger unregistering the shrinker from within the
53 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we 53 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
54 * take a passive reference to the superblock to avoid this from occurring. 54 * take a passive reference to the superblock to avoid this from occurring.
55 */ 55 */
56 static unsigned long super_cache_scan(struct shrinker *shrink, 56 static unsigned long super_cache_scan(struct shrinker *shrink,
57 struct shrink_control *sc) 57 struct shrink_control *sc)
58 { 58 {
59 struct super_block *sb; 59 struct super_block *sb;
60 long fs_objects = 0; 60 long fs_objects = 0;
61 long total_objects; 61 long total_objects;
62 long freed = 0; 62 long freed = 0;
63 long dentries; 63 long dentries;
64 long inodes; 64 long inodes;
65 65
66 sb = container_of(shrink, struct super_block, s_shrink); 66 sb = container_of(shrink, struct super_block, s_shrink);
67 67
68 /* 68 /*
69 * Deadlock avoidance. We may hold various FS locks, and we don't want 69 * Deadlock avoidance. We may hold various FS locks, and we don't want
70 * to recurse into the FS that called us in clear_inode() and friends.. 70 * to recurse into the FS that called us in clear_inode() and friends..
71 */ 71 */
72 if (!(sc->gfp_mask & __GFP_FS)) 72 if (!(sc->gfp_mask & __GFP_FS))
73 return SHRINK_STOP; 73 return SHRINK_STOP;
74 74
75 if (!grab_super_passive(sb)) 75 if (!grab_super_passive(sb))
76 return SHRINK_STOP; 76 return SHRINK_STOP;
77 77
78 if (sb->s_op->nr_cached_objects) 78 if (sb->s_op->nr_cached_objects)
79 fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); 79 fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
80 80
81 inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); 81 inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
82 dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); 82 dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
83 total_objects = dentries + inodes + fs_objects + 1; 83 total_objects = dentries + inodes + fs_objects + 1;
84 84
85 /* proportion the scan between the caches */ 85 /* proportion the scan between the caches */
86 dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); 86 dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
87 inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); 87 inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
88 88
89 /* 89 /*
90 * prune the dcache first as the icache is pinned by it, then 90 * prune the dcache first as the icache is pinned by it, then
91 * prune the icache, followed by the filesystem specific caches 91 * prune the icache, followed by the filesystem specific caches
92 */ 92 */
93 freed = prune_dcache_sb(sb, dentries, sc->nid); 93 freed = prune_dcache_sb(sb, dentries, sc->nid);
94 freed += prune_icache_sb(sb, inodes, sc->nid); 94 freed += prune_icache_sb(sb, inodes, sc->nid);
95 95
96 if (fs_objects) { 96 if (fs_objects) {
97 fs_objects = mult_frac(sc->nr_to_scan, fs_objects, 97 fs_objects = mult_frac(sc->nr_to_scan, fs_objects,
98 total_objects); 98 total_objects);
99 freed += sb->s_op->free_cached_objects(sb, fs_objects, 99 freed += sb->s_op->free_cached_objects(sb, fs_objects,
100 sc->nid); 100 sc->nid);
101 } 101 }
102 102
103 drop_super(sb); 103 drop_super(sb);
104 return freed; 104 return freed;
105 } 105 }
106 106
107 static unsigned long super_cache_count(struct shrinker *shrink, 107 static unsigned long super_cache_count(struct shrinker *shrink,
108 struct shrink_control *sc) 108 struct shrink_control *sc)
109 { 109 {
110 struct super_block *sb; 110 struct super_block *sb;
111 long total_objects = 0; 111 long total_objects = 0;
112 112
113 sb = container_of(shrink, struct super_block, s_shrink); 113 sb = container_of(shrink, struct super_block, s_shrink);
114 114
115 if (!grab_super_passive(sb)) 115 /*
116 return 0; 116 * Don't call grab_super_passive as it is a potential
117 117 * scalability bottleneck. The counts could get updated
118 * between super_cache_count and super_cache_scan anyway.
119 * Call to super_cache_count with shrinker_rwsem held
120 * ensures the safety of call to list_lru_count_node() and
121 * s_op->nr_cached_objects().
122 */
118 if (sb->s_op && sb->s_op->nr_cached_objects) 123 if (sb->s_op && sb->s_op->nr_cached_objects)
119 total_objects = sb->s_op->nr_cached_objects(sb, 124 total_objects = sb->s_op->nr_cached_objects(sb,
120 sc->nid); 125 sc->nid);
121 126
122 total_objects += list_lru_count_node(&sb->s_dentry_lru, 127 total_objects += list_lru_count_node(&sb->s_dentry_lru,
123 sc->nid); 128 sc->nid);
124 total_objects += list_lru_count_node(&sb->s_inode_lru, 129 total_objects += list_lru_count_node(&sb->s_inode_lru,
125 sc->nid); 130 sc->nid);
126 131
127 total_objects = vfs_pressure_ratio(total_objects); 132 total_objects = vfs_pressure_ratio(total_objects);
128 drop_super(sb);
129 return total_objects; 133 return total_objects;
130 } 134 }
131 135
132 static int init_sb_writers(struct super_block *s, struct file_system_type *type) 136 static int init_sb_writers(struct super_block *s, struct file_system_type *type)
133 { 137 {
134 int err; 138 int err;
135 int i; 139 int i;
136 140
137 for (i = 0; i < SB_FREEZE_LEVELS; i++) { 141 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
138 err = percpu_counter_init(&s->s_writers.counter[i], 0); 142 err = percpu_counter_init(&s->s_writers.counter[i], 0);
139 if (err < 0) 143 if (err < 0)
140 goto err_out; 144 goto err_out;
141 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], 145 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
142 &type->s_writers_key[i], 0); 146 &type->s_writers_key[i], 0);
143 } 147 }
144 init_waitqueue_head(&s->s_writers.wait); 148 init_waitqueue_head(&s->s_writers.wait);
145 init_waitqueue_head(&s->s_writers.wait_unfrozen); 149 init_waitqueue_head(&s->s_writers.wait_unfrozen);
146 return 0; 150 return 0;
147 err_out: 151 err_out:
148 while (--i >= 0) 152 while (--i >= 0)
149 percpu_counter_destroy(&s->s_writers.counter[i]); 153 percpu_counter_destroy(&s->s_writers.counter[i]);
150 return err; 154 return err;
151 } 155 }
152 156
153 static void destroy_sb_writers(struct super_block *s) 157 static void destroy_sb_writers(struct super_block *s)
154 { 158 {
155 int i; 159 int i;
156 160
157 for (i = 0; i < SB_FREEZE_LEVELS; i++) 161 for (i = 0; i < SB_FREEZE_LEVELS; i++)
158 percpu_counter_destroy(&s->s_writers.counter[i]); 162 percpu_counter_destroy(&s->s_writers.counter[i]);
159 } 163 }
160 164
161 /** 165 /**
162 * alloc_super - create new superblock 166 * alloc_super - create new superblock
163 * @type: filesystem type superblock should belong to 167 * @type: filesystem type superblock should belong to
164 * @flags: the mount flags 168 * @flags: the mount flags
165 * 169 *
166 * Allocates and initializes a new &struct super_block. alloc_super() 170 * Allocates and initializes a new &struct super_block. alloc_super()
167 * returns a pointer new superblock or %NULL if allocation had failed. 171 * returns a pointer new superblock or %NULL if allocation had failed.
168 */ 172 */
169 static struct super_block *alloc_super(struct file_system_type *type, int flags) 173 static struct super_block *alloc_super(struct file_system_type *type, int flags)
170 { 174 {
171 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 175 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
172 static const struct super_operations default_op; 176 static const struct super_operations default_op;
173 177
174 if (s) { 178 if (s) {
175 if (security_sb_alloc(s)) 179 if (security_sb_alloc(s))
176 goto out_free_sb; 180 goto out_free_sb;
177 181
178 #ifdef CONFIG_SMP 182 #ifdef CONFIG_SMP
179 s->s_files = alloc_percpu(struct list_head); 183 s->s_files = alloc_percpu(struct list_head);
180 if (!s->s_files) 184 if (!s->s_files)
181 goto err_out; 185 goto err_out;
182 else { 186 else {
183 int i; 187 int i;
184 188
185 for_each_possible_cpu(i) 189 for_each_possible_cpu(i)
186 INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); 190 INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
187 } 191 }
188 #else 192 #else
189 INIT_LIST_HEAD(&s->s_files); 193 INIT_LIST_HEAD(&s->s_files);
190 #endif 194 #endif
191 if (init_sb_writers(s, type)) 195 if (init_sb_writers(s, type))
192 goto err_out; 196 goto err_out;
193 s->s_flags = flags; 197 s->s_flags = flags;
194 s->s_bdi = &default_backing_dev_info; 198 s->s_bdi = &default_backing_dev_info;
195 INIT_HLIST_NODE(&s->s_instances); 199 INIT_HLIST_NODE(&s->s_instances);
196 INIT_HLIST_BL_HEAD(&s->s_anon); 200 INIT_HLIST_BL_HEAD(&s->s_anon);
197 INIT_LIST_HEAD(&s->s_inodes); 201 INIT_LIST_HEAD(&s->s_inodes);
198 202
199 if (list_lru_init(&s->s_dentry_lru)) 203 if (list_lru_init(&s->s_dentry_lru))
200 goto err_out; 204 goto err_out;
201 if (list_lru_init(&s->s_inode_lru)) 205 if (list_lru_init(&s->s_inode_lru))
202 goto err_out_dentry_lru; 206 goto err_out_dentry_lru;
203 207
204 INIT_LIST_HEAD(&s->s_mounts); 208 INIT_LIST_HEAD(&s->s_mounts);
205 init_rwsem(&s->s_umount); 209 init_rwsem(&s->s_umount);
206 lockdep_set_class(&s->s_umount, &type->s_umount_key); 210 lockdep_set_class(&s->s_umount, &type->s_umount_key);
207 /* 211 /*
208 * sget() can have s_umount recursion. 212 * sget() can have s_umount recursion.
209 * 213 *
210 * When it cannot find a suitable sb, it allocates a new 214 * When it cannot find a suitable sb, it allocates a new
211 * one (this one), and tries again to find a suitable old 215 * one (this one), and tries again to find a suitable old
212 * one. 216 * one.
213 * 217 *
214 * In case that succeeds, it will acquire the s_umount 218 * In case that succeeds, it will acquire the s_umount
215 * lock of the old one. Since these are clearly distrinct 219 * lock of the old one. Since these are clearly distrinct
216 * locks, and this object isn't exposed yet, there's no 220 * locks, and this object isn't exposed yet, there's no
217 * risk of deadlocks. 221 * risk of deadlocks.
218 * 222 *
219 * Annotate this by putting this lock in a different 223 * Annotate this by putting this lock in a different
220 * subclass. 224 * subclass.
221 */ 225 */
222 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); 226 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
223 s->s_count = 1; 227 s->s_count = 1;
224 atomic_set(&s->s_active, 1); 228 atomic_set(&s->s_active, 1);
225 mutex_init(&s->s_vfs_rename_mutex); 229 mutex_init(&s->s_vfs_rename_mutex);
226 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); 230 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
227 mutex_init(&s->s_dquot.dqio_mutex); 231 mutex_init(&s->s_dquot.dqio_mutex);
228 mutex_init(&s->s_dquot.dqonoff_mutex); 232 mutex_init(&s->s_dquot.dqonoff_mutex);
229 init_rwsem(&s->s_dquot.dqptr_sem); 233 init_rwsem(&s->s_dquot.dqptr_sem);
230 s->s_maxbytes = MAX_NON_LFS; 234 s->s_maxbytes = MAX_NON_LFS;
231 s->s_op = &default_op; 235 s->s_op = &default_op;
232 s->s_time_gran = 1000000000; 236 s->s_time_gran = 1000000000;
233 s->cleancache_poolid = -1; 237 s->cleancache_poolid = -1;
234 238
235 s->s_shrink.seeks = DEFAULT_SEEKS; 239 s->s_shrink.seeks = DEFAULT_SEEKS;
236 s->s_shrink.scan_objects = super_cache_scan; 240 s->s_shrink.scan_objects = super_cache_scan;
237 s->s_shrink.count_objects = super_cache_count; 241 s->s_shrink.count_objects = super_cache_count;
238 s->s_shrink.batch = 1024; 242 s->s_shrink.batch = 1024;
239 s->s_shrink.flags = SHRINKER_NUMA_AWARE; 243 s->s_shrink.flags = SHRINKER_NUMA_AWARE;
240 } 244 }
241 out: 245 out:
242 return s; 246 return s;
243 247
244 err_out_dentry_lru: 248 err_out_dentry_lru:
245 list_lru_destroy(&s->s_dentry_lru); 249 list_lru_destroy(&s->s_dentry_lru);
246 err_out: 250 err_out:
247 security_sb_free(s); 251 security_sb_free(s);
248 #ifdef CONFIG_SMP 252 #ifdef CONFIG_SMP
249 if (s->s_files) 253 if (s->s_files)
250 free_percpu(s->s_files); 254 free_percpu(s->s_files);
251 #endif 255 #endif
252 destroy_sb_writers(s); 256 destroy_sb_writers(s);
253 out_free_sb: 257 out_free_sb:
254 kfree(s); 258 kfree(s);
255 s = NULL; 259 s = NULL;
256 goto out; 260 goto out;
257 } 261 }
258 262
259 /** 263 /**
260 * destroy_super - frees a superblock 264 * destroy_super - frees a superblock
261 * @s: superblock to free 265 * @s: superblock to free
262 * 266 *
263 * Frees a superblock. 267 * Frees a superblock.
264 */ 268 */
265 static inline void destroy_super(struct super_block *s) 269 static inline void destroy_super(struct super_block *s)
266 { 270 {
267 list_lru_destroy(&s->s_dentry_lru); 271 list_lru_destroy(&s->s_dentry_lru);
268 list_lru_destroy(&s->s_inode_lru); 272 list_lru_destroy(&s->s_inode_lru);
269 #ifdef CONFIG_SMP 273 #ifdef CONFIG_SMP
270 free_percpu(s->s_files); 274 free_percpu(s->s_files);
271 #endif 275 #endif
272 destroy_sb_writers(s); 276 destroy_sb_writers(s);
273 security_sb_free(s); 277 security_sb_free(s);
274 WARN_ON(!list_empty(&s->s_mounts)); 278 WARN_ON(!list_empty(&s->s_mounts));
275 kfree(s->s_subtype); 279 kfree(s->s_subtype);
276 kfree(s->s_options); 280 kfree(s->s_options);
277 kfree(s); 281 kfree(s);
278 } 282 }
279 283
280 /* Superblock refcounting */ 284 /* Superblock refcounting */
281 285
282 /* 286 /*
283 * Drop a superblock's refcount. The caller must hold sb_lock. 287 * Drop a superblock's refcount. The caller must hold sb_lock.
284 */ 288 */
285 static void __put_super(struct super_block *sb) 289 static void __put_super(struct super_block *sb)
286 { 290 {
287 if (!--sb->s_count) { 291 if (!--sb->s_count) {
288 list_del_init(&sb->s_list); 292 list_del_init(&sb->s_list);
289 destroy_super(sb); 293 destroy_super(sb);
290 } 294 }
291 } 295 }
292 296
293 /** 297 /**
294 * put_super - drop a temporary reference to superblock 298 * put_super - drop a temporary reference to superblock
295 * @sb: superblock in question 299 * @sb: superblock in question
296 * 300 *
297 * Drops a temporary reference, frees superblock if there's no 301 * Drops a temporary reference, frees superblock if there's no
298 * references left. 302 * references left.
299 */ 303 */
300 static void put_super(struct super_block *sb) 304 static void put_super(struct super_block *sb)
301 { 305 {
302 spin_lock(&sb_lock); 306 spin_lock(&sb_lock);
303 __put_super(sb); 307 __put_super(sb);
304 spin_unlock(&sb_lock); 308 spin_unlock(&sb_lock);
305 } 309 }
306 310
307 311
308 /** 312 /**
309 * deactivate_locked_super - drop an active reference to superblock 313 * deactivate_locked_super - drop an active reference to superblock
310 * @s: superblock to deactivate 314 * @s: superblock to deactivate
311 * 315 *
312 * Drops an active reference to superblock, converting it into a temprory 316 * Drops an active reference to superblock, converting it into a temprory
313 * one if there is no other active references left. In that case we 317 * one if there is no other active references left. In that case we
314 * tell fs driver to shut it down and drop the temporary reference we 318 * tell fs driver to shut it down and drop the temporary reference we
315 * had just acquired. 319 * had just acquired.
316 * 320 *
317 * Caller holds exclusive lock on superblock; that lock is released. 321 * Caller holds exclusive lock on superblock; that lock is released.
318 */ 322 */
319 void deactivate_locked_super(struct super_block *s) 323 void deactivate_locked_super(struct super_block *s)
320 { 324 {
321 struct file_system_type *fs = s->s_type; 325 struct file_system_type *fs = s->s_type;
322 if (atomic_dec_and_test(&s->s_active)) { 326 if (atomic_dec_and_test(&s->s_active)) {
323 cleancache_invalidate_fs(s); 327 cleancache_invalidate_fs(s);
324 unregister_shrinker(&s->s_shrink); 328 unregister_shrinker(&s->s_shrink);
325 fs->kill_sb(s); 329 fs->kill_sb(s);
326 330
327 put_filesystem(fs); 331 put_filesystem(fs);
328 put_super(s); 332 put_super(s);
329 } else { 333 } else {
330 up_write(&s->s_umount); 334 up_write(&s->s_umount);
331 } 335 }
332 } 336 }
333 337
334 EXPORT_SYMBOL(deactivate_locked_super); 338 EXPORT_SYMBOL(deactivate_locked_super);
335 339
336 /** 340 /**
337 * deactivate_super - drop an active reference to superblock 341 * deactivate_super - drop an active reference to superblock
338 * @s: superblock to deactivate 342 * @s: superblock to deactivate
339 * 343 *
340 * Variant of deactivate_locked_super(), except that superblock is *not* 344 * Variant of deactivate_locked_super(), except that superblock is *not*
341 * locked by caller. If we are going to drop the final active reference, 345 * locked by caller. If we are going to drop the final active reference,
342 * lock will be acquired prior to that. 346 * lock will be acquired prior to that.
343 */ 347 */
344 void deactivate_super(struct super_block *s) 348 void deactivate_super(struct super_block *s)
345 { 349 {
346 if (!atomic_add_unless(&s->s_active, -1, 1)) { 350 if (!atomic_add_unless(&s->s_active, -1, 1)) {
347 down_write(&s->s_umount); 351 down_write(&s->s_umount);
348 deactivate_locked_super(s); 352 deactivate_locked_super(s);
349 } 353 }
350 } 354 }
351 355
352 EXPORT_SYMBOL(deactivate_super); 356 EXPORT_SYMBOL(deactivate_super);
353 357
354 /** 358 /**
355 * grab_super - acquire an active reference 359 * grab_super - acquire an active reference
356 * @s: reference we are trying to make active 360 * @s: reference we are trying to make active
357 * 361 *
358 * Tries to acquire an active reference. grab_super() is used when we 362 * Tries to acquire an active reference. grab_super() is used when we
359 * had just found a superblock in super_blocks or fs_type->fs_supers 363 * had just found a superblock in super_blocks or fs_type->fs_supers
360 * and want to turn it into a full-blown active reference. grab_super() 364 * and want to turn it into a full-blown active reference. grab_super()
361 * is called with sb_lock held and drops it. Returns 1 in case of 365 * is called with sb_lock held and drops it. Returns 1 in case of
362 * success, 0 if we had failed (superblock contents was already dead or 366 * success, 0 if we had failed (superblock contents was already dead or
363 * dying when grab_super() had been called). Note that this is only 367 * dying when grab_super() had been called). Note that this is only
364 * called for superblocks not in rundown mode (== ones still on ->fs_supers 368 * called for superblocks not in rundown mode (== ones still on ->fs_supers
365 * of their type), so increment of ->s_count is OK here. 369 * of their type), so increment of ->s_count is OK here.
366 */ 370 */
367 static int grab_super(struct super_block *s) __releases(sb_lock) 371 static int grab_super(struct super_block *s) __releases(sb_lock)
368 { 372 {
369 s->s_count++; 373 s->s_count++;
370 spin_unlock(&sb_lock); 374 spin_unlock(&sb_lock);
371 down_write(&s->s_umount); 375 down_write(&s->s_umount);
372 if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { 376 if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
373 put_super(s); 377 put_super(s);
374 return 1; 378 return 1;
375 } 379 }
376 up_write(&s->s_umount); 380 up_write(&s->s_umount);
377 put_super(s); 381 put_super(s);
378 return 0; 382 return 0;
379 } 383 }
380 384
381 /* 385 /*
382 * grab_super_passive - acquire a passive reference 386 * grab_super_passive - acquire a passive reference
383 * @sb: reference we are trying to grab 387 * @sb: reference we are trying to grab
384 * 388 *
385 * Tries to acquire a passive reference. This is used in places where we 389 * Tries to acquire a passive reference. This is used in places where we
386 * cannot take an active reference but we need to ensure that the 390 * cannot take an active reference but we need to ensure that the
387 * superblock does not go away while we are working on it. It returns 391 * superblock does not go away while we are working on it. It returns
388 * false if a reference was not gained, and returns true with the s_umount 392 * false if a reference was not gained, and returns true with the s_umount
389 * lock held in read mode if a reference is gained. On successful return, 393 * lock held in read mode if a reference is gained. On successful return,
390 * the caller must drop the s_umount lock and the passive reference when 394 * the caller must drop the s_umount lock and the passive reference when
391 * done. 395 * done.
392 */ 396 */
393 bool grab_super_passive(struct super_block *sb) 397 bool grab_super_passive(struct super_block *sb)
394 { 398 {
395 spin_lock(&sb_lock); 399 spin_lock(&sb_lock);
396 if (hlist_unhashed(&sb->s_instances)) { 400 if (hlist_unhashed(&sb->s_instances)) {
397 spin_unlock(&sb_lock); 401 spin_unlock(&sb_lock);
398 return false; 402 return false;
399 } 403 }
400 404
401 sb->s_count++; 405 sb->s_count++;
402 spin_unlock(&sb_lock); 406 spin_unlock(&sb_lock);
403 407
404 if (down_read_trylock(&sb->s_umount)) { 408 if (down_read_trylock(&sb->s_umount)) {
405 if (sb->s_root && (sb->s_flags & MS_BORN)) 409 if (sb->s_root && (sb->s_flags & MS_BORN))
406 return true; 410 return true;
407 up_read(&sb->s_umount); 411 up_read(&sb->s_umount);
408 } 412 }
409 413
410 put_super(sb); 414 put_super(sb);
411 return false; 415 return false;
412 } 416 }
413 417
414 /** 418 /**
415 * generic_shutdown_super - common helper for ->kill_sb() 419 * generic_shutdown_super - common helper for ->kill_sb()
416 * @sb: superblock to kill 420 * @sb: superblock to kill
417 * 421 *
418 * generic_shutdown_super() does all fs-independent work on superblock 422 * generic_shutdown_super() does all fs-independent work on superblock
419 * shutdown. Typical ->kill_sb() should pick all fs-specific objects 423 * shutdown. Typical ->kill_sb() should pick all fs-specific objects
420 * that need destruction out of superblock, call generic_shutdown_super() 424 * that need destruction out of superblock, call generic_shutdown_super()
421 * and release aforementioned objects. Note: dentries and inodes _are_ 425 * and release aforementioned objects. Note: dentries and inodes _are_
422 * taken care of and do not need specific handling. 426 * taken care of and do not need specific handling.
423 * 427 *
424 * Upon calling this function, the filesystem may no longer alter or 428 * Upon calling this function, the filesystem may no longer alter or
425 * rearrange the set of dentries belonging to this super_block, nor may it 429 * rearrange the set of dentries belonging to this super_block, nor may it
426 * change the attachments of dentries to inodes. 430 * change the attachments of dentries to inodes.
427 */ 431 */
428 void generic_shutdown_super(struct super_block *sb) 432 void generic_shutdown_super(struct super_block *sb)
429 { 433 {
430 const struct super_operations *sop = sb->s_op; 434 const struct super_operations *sop = sb->s_op;
431 435
432 if (sb->s_root) { 436 if (sb->s_root) {
433 shrink_dcache_for_umount(sb); 437 shrink_dcache_for_umount(sb);
434 sync_filesystem(sb); 438 sync_filesystem(sb);
435 sb->s_flags &= ~MS_ACTIVE; 439 sb->s_flags &= ~MS_ACTIVE;
436 440
437 fsnotify_unmount_inodes(&sb->s_inodes); 441 fsnotify_unmount_inodes(&sb->s_inodes);
438 442
439 evict_inodes(sb); 443 evict_inodes(sb);
440 444
441 if (sb->s_dio_done_wq) { 445 if (sb->s_dio_done_wq) {
442 destroy_workqueue(sb->s_dio_done_wq); 446 destroy_workqueue(sb->s_dio_done_wq);
443 sb->s_dio_done_wq = NULL; 447 sb->s_dio_done_wq = NULL;
444 } 448 }
445 449
446 if (sop->put_super) 450 if (sop->put_super)
447 sop->put_super(sb); 451 sop->put_super(sb);
448 452
449 if (!list_empty(&sb->s_inodes)) { 453 if (!list_empty(&sb->s_inodes)) {
450 printk("VFS: Busy inodes after unmount of %s. " 454 printk("VFS: Busy inodes after unmount of %s. "
451 "Self-destruct in 5 seconds. Have a nice day...\n", 455 "Self-destruct in 5 seconds. Have a nice day...\n",
452 sb->s_id); 456 sb->s_id);
453 } 457 }
454 } 458 }
455 spin_lock(&sb_lock); 459 spin_lock(&sb_lock);
456 /* should be initialized for __put_super_and_need_restart() */ 460 /* should be initialized for __put_super_and_need_restart() */
457 hlist_del_init(&sb->s_instances); 461 hlist_del_init(&sb->s_instances);
458 spin_unlock(&sb_lock); 462 spin_unlock(&sb_lock);
459 up_write(&sb->s_umount); 463 up_write(&sb->s_umount);
460 } 464 }
461 465
462 EXPORT_SYMBOL(generic_shutdown_super); 466 EXPORT_SYMBOL(generic_shutdown_super);
463 467
464 /** 468 /**
465 * sget - find or create a superblock 469 * sget - find or create a superblock
466 * @type: filesystem type superblock should belong to 470 * @type: filesystem type superblock should belong to
467 * @test: comparison callback 471 * @test: comparison callback
468 * @set: setup callback 472 * @set: setup callback
469 * @flags: mount flags 473 * @flags: mount flags
470 * @data: argument to each of them 474 * @data: argument to each of them
471 */ 475 */
472 struct super_block *sget(struct file_system_type *type, 476 struct super_block *sget(struct file_system_type *type,
473 int (*test)(struct super_block *,void *), 477 int (*test)(struct super_block *,void *),
474 int (*set)(struct super_block *,void *), 478 int (*set)(struct super_block *,void *),
475 int flags, 479 int flags,
476 void *data) 480 void *data)
477 { 481 {
478 struct super_block *s = NULL; 482 struct super_block *s = NULL;
479 struct super_block *old; 483 struct super_block *old;
480 int err; 484 int err;
481 485
482 retry: 486 retry:
483 spin_lock(&sb_lock); 487 spin_lock(&sb_lock);
484 if (test) { 488 if (test) {
485 hlist_for_each_entry(old, &type->fs_supers, s_instances) { 489 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
486 if (!test(old, data)) 490 if (!test(old, data))
487 continue; 491 continue;
488 if (!grab_super(old)) 492 if (!grab_super(old))
489 goto retry; 493 goto retry;
490 if (s) { 494 if (s) {
491 up_write(&s->s_umount); 495 up_write(&s->s_umount);
492 destroy_super(s); 496 destroy_super(s);
493 s = NULL; 497 s = NULL;
494 } 498 }
495 return old; 499 return old;
496 } 500 }
497 } 501 }
498 if (!s) { 502 if (!s) {
499 spin_unlock(&sb_lock); 503 spin_unlock(&sb_lock);
500 s = alloc_super(type, flags); 504 s = alloc_super(type, flags);
501 if (!s) 505 if (!s)
502 return ERR_PTR(-ENOMEM); 506 return ERR_PTR(-ENOMEM);
503 goto retry; 507 goto retry;
504 } 508 }
505 509
506 err = set(s, data); 510 err = set(s, data);
507 if (err) { 511 if (err) {
508 spin_unlock(&sb_lock); 512 spin_unlock(&sb_lock);
509 up_write(&s->s_umount); 513 up_write(&s->s_umount);
510 destroy_super(s); 514 destroy_super(s);
511 return ERR_PTR(err); 515 return ERR_PTR(err);
512 } 516 }
513 s->s_type = type; 517 s->s_type = type;
514 strlcpy(s->s_id, type->name, sizeof(s->s_id)); 518 strlcpy(s->s_id, type->name, sizeof(s->s_id));
515 list_add_tail(&s->s_list, &super_blocks); 519 list_add_tail(&s->s_list, &super_blocks);
516 hlist_add_head(&s->s_instances, &type->fs_supers); 520 hlist_add_head(&s->s_instances, &type->fs_supers);
517 spin_unlock(&sb_lock); 521 spin_unlock(&sb_lock);
518 get_filesystem(type); 522 get_filesystem(type);
519 register_shrinker(&s->s_shrink); 523 register_shrinker(&s->s_shrink);
520 return s; 524 return s;
521 } 525 }
522 526
523 EXPORT_SYMBOL(sget); 527 EXPORT_SYMBOL(sget);
524 528
525 void drop_super(struct super_block *sb) 529 void drop_super(struct super_block *sb)
526 { 530 {
527 up_read(&sb->s_umount); 531 up_read(&sb->s_umount);
528 put_super(sb); 532 put_super(sb);
529 } 533 }
530 534
531 EXPORT_SYMBOL(drop_super); 535 EXPORT_SYMBOL(drop_super);
532 536
533 /** 537 /**
534 * iterate_supers - call function for all active superblocks 538 * iterate_supers - call function for all active superblocks
535 * @f: function to call 539 * @f: function to call
536 * @arg: argument to pass to it 540 * @arg: argument to pass to it
537 * 541 *
538 * Scans the superblock list and calls given function, passing it 542 * Scans the superblock list and calls given function, passing it
539 * locked superblock and given argument. 543 * locked superblock and given argument.
540 */ 544 */
541 void iterate_supers(void (*f)(struct super_block *, void *), void *arg) 545 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
542 { 546 {
543 struct super_block *sb, *p = NULL; 547 struct super_block *sb, *p = NULL;
544 548
545 spin_lock(&sb_lock); 549 spin_lock(&sb_lock);
546 list_for_each_entry(sb, &super_blocks, s_list) { 550 list_for_each_entry(sb, &super_blocks, s_list) {
547 if (hlist_unhashed(&sb->s_instances)) 551 if (hlist_unhashed(&sb->s_instances))
548 continue; 552 continue;
549 sb->s_count++; 553 sb->s_count++;
550 spin_unlock(&sb_lock); 554 spin_unlock(&sb_lock);
551 555
552 down_read(&sb->s_umount); 556 down_read(&sb->s_umount);
553 if (sb->s_root && (sb->s_flags & MS_BORN)) 557 if (sb->s_root && (sb->s_flags & MS_BORN))
554 f(sb, arg); 558 f(sb, arg);
555 up_read(&sb->s_umount); 559 up_read(&sb->s_umount);
556 560
557 spin_lock(&sb_lock); 561 spin_lock(&sb_lock);
558 if (p) 562 if (p)
559 __put_super(p); 563 __put_super(p);
560 p = sb; 564 p = sb;
561 } 565 }
562 if (p) 566 if (p)
563 __put_super(p); 567 __put_super(p);
564 spin_unlock(&sb_lock); 568 spin_unlock(&sb_lock);
565 } 569 }
566 570
567 /** 571 /**
568 * iterate_supers_type - call function for superblocks of given type 572 * iterate_supers_type - call function for superblocks of given type
569 * @type: fs type 573 * @type: fs type
570 * @f: function to call 574 * @f: function to call
571 * @arg: argument to pass to it 575 * @arg: argument to pass to it
572 * 576 *
573 * Scans the superblock list and calls given function, passing it 577 * Scans the superblock list and calls given function, passing it
574 * locked superblock and given argument. 578 * locked superblock and given argument.
575 */ 579 */
576 void iterate_supers_type(struct file_system_type *type, 580 void iterate_supers_type(struct file_system_type *type,
577 void (*f)(struct super_block *, void *), void *arg) 581 void (*f)(struct super_block *, void *), void *arg)
578 { 582 {
579 struct super_block *sb, *p = NULL; 583 struct super_block *sb, *p = NULL;
580 584
581 spin_lock(&sb_lock); 585 spin_lock(&sb_lock);
582 hlist_for_each_entry(sb, &type->fs_supers, s_instances) { 586 hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
583 sb->s_count++; 587 sb->s_count++;
584 spin_unlock(&sb_lock); 588 spin_unlock(&sb_lock);
585 589
586 down_read(&sb->s_umount); 590 down_read(&sb->s_umount);
587 if (sb->s_root && (sb->s_flags & MS_BORN)) 591 if (sb->s_root && (sb->s_flags & MS_BORN))
588 f(sb, arg); 592 f(sb, arg);
589 up_read(&sb->s_umount); 593 up_read(&sb->s_umount);
590 594
591 spin_lock(&sb_lock); 595 spin_lock(&sb_lock);
592 if (p) 596 if (p)
593 __put_super(p); 597 __put_super(p);
594 p = sb; 598 p = sb;
595 } 599 }
596 if (p) 600 if (p)
597 __put_super(p); 601 __put_super(p);
598 spin_unlock(&sb_lock); 602 spin_unlock(&sb_lock);
599 } 603 }
600 604
601 EXPORT_SYMBOL(iterate_supers_type); 605 EXPORT_SYMBOL(iterate_supers_type);
602 606
603 /** 607 /**
604 * get_super - get the superblock of a device 608 * get_super - get the superblock of a device
605 * @bdev: device to get the superblock for 609 * @bdev: device to get the superblock for
606 * 610 *
607 * Scans the superblock list and finds the superblock of the file system 611 * Scans the superblock list and finds the superblock of the file system
608 * mounted on the device given. %NULL is returned if no match is found. 612 * mounted on the device given. %NULL is returned if no match is found.
609 */ 613 */
610 614
611 struct super_block *get_super(struct block_device *bdev) 615 struct super_block *get_super(struct block_device *bdev)
612 { 616 {
613 struct super_block *sb; 617 struct super_block *sb;
614 618
615 if (!bdev) 619 if (!bdev)
616 return NULL; 620 return NULL;
617 621
618 spin_lock(&sb_lock); 622 spin_lock(&sb_lock);
619 rescan: 623 rescan:
620 list_for_each_entry(sb, &super_blocks, s_list) { 624 list_for_each_entry(sb, &super_blocks, s_list) {
621 if (hlist_unhashed(&sb->s_instances)) 625 if (hlist_unhashed(&sb->s_instances))
622 continue; 626 continue;
623 if (sb->s_bdev == bdev) { 627 if (sb->s_bdev == bdev) {
624 sb->s_count++; 628 sb->s_count++;
625 spin_unlock(&sb_lock); 629 spin_unlock(&sb_lock);
626 down_read(&sb->s_umount); 630 down_read(&sb->s_umount);
627 /* still alive? */ 631 /* still alive? */
628 if (sb->s_root && (sb->s_flags & MS_BORN)) 632 if (sb->s_root && (sb->s_flags & MS_BORN))
629 return sb; 633 return sb;
630 up_read(&sb->s_umount); 634 up_read(&sb->s_umount);
631 /* nope, got unmounted */ 635 /* nope, got unmounted */
632 spin_lock(&sb_lock); 636 spin_lock(&sb_lock);
633 __put_super(sb); 637 __put_super(sb);
634 goto rescan; 638 goto rescan;
635 } 639 }
636 } 640 }
637 spin_unlock(&sb_lock); 641 spin_unlock(&sb_lock);
638 return NULL; 642 return NULL;
639 } 643 }
640 644
641 EXPORT_SYMBOL(get_super); 645 EXPORT_SYMBOL(get_super);
642 646
643 /** 647 /**
644 * get_super_thawed - get thawed superblock of a device 648 * get_super_thawed - get thawed superblock of a device
645 * @bdev: device to get the superblock for 649 * @bdev: device to get the superblock for
646 * 650 *
647 * Scans the superblock list and finds the superblock of the file system 651 * Scans the superblock list and finds the superblock of the file system
648 * mounted on the device. The superblock is returned once it is thawed 652 * mounted on the device. The superblock is returned once it is thawed
649 * (or immediately if it was not frozen). %NULL is returned if no match 653 * (or immediately if it was not frozen). %NULL is returned if no match
650 * is found. 654 * is found.
651 */ 655 */
652 struct super_block *get_super_thawed(struct block_device *bdev) 656 struct super_block *get_super_thawed(struct block_device *bdev)
653 { 657 {
654 while (1) { 658 while (1) {
655 struct super_block *s = get_super(bdev); 659 struct super_block *s = get_super(bdev);
656 if (!s || s->s_writers.frozen == SB_UNFROZEN) 660 if (!s || s->s_writers.frozen == SB_UNFROZEN)
657 return s; 661 return s;
658 up_read(&s->s_umount); 662 up_read(&s->s_umount);
659 wait_event(s->s_writers.wait_unfrozen, 663 wait_event(s->s_writers.wait_unfrozen,
660 s->s_writers.frozen == SB_UNFROZEN); 664 s->s_writers.frozen == SB_UNFROZEN);
661 put_super(s); 665 put_super(s);
662 } 666 }
663 } 667 }
664 EXPORT_SYMBOL(get_super_thawed); 668 EXPORT_SYMBOL(get_super_thawed);
665 669
666 /** 670 /**
667 * get_active_super - get an active reference to the superblock of a device 671 * get_active_super - get an active reference to the superblock of a device
668 * @bdev: device to get the superblock for 672 * @bdev: device to get the superblock for
669 * 673 *
670 * Scans the superblock list and finds the superblock of the file system 674 * Scans the superblock list and finds the superblock of the file system
671 * mounted on the device given. Returns the superblock with an active 675 * mounted on the device given. Returns the superblock with an active
672 * reference or %NULL if none was found. 676 * reference or %NULL if none was found.
673 */ 677 */
674 struct super_block *get_active_super(struct block_device *bdev) 678 struct super_block *get_active_super(struct block_device *bdev)
675 { 679 {
676 struct super_block *sb; 680 struct super_block *sb;
677 681
678 if (!bdev) 682 if (!bdev)
679 return NULL; 683 return NULL;
680 684
681 restart: 685 restart:
682 spin_lock(&sb_lock); 686 spin_lock(&sb_lock);
683 list_for_each_entry(sb, &super_blocks, s_list) { 687 list_for_each_entry(sb, &super_blocks, s_list) {
684 if (hlist_unhashed(&sb->s_instances)) 688 if (hlist_unhashed(&sb->s_instances))
685 continue; 689 continue;
686 if (sb->s_bdev == bdev) { 690 if (sb->s_bdev == bdev) {
687 if (!grab_super(sb)) 691 if (!grab_super(sb))
688 goto restart; 692 goto restart;
689 up_write(&sb->s_umount); 693 up_write(&sb->s_umount);
690 return sb; 694 return sb;
691 } 695 }
692 } 696 }
693 spin_unlock(&sb_lock); 697 spin_unlock(&sb_lock);
694 return NULL; 698 return NULL;
695 } 699 }
696 700
697 struct super_block *user_get_super(dev_t dev) 701 struct super_block *user_get_super(dev_t dev)
698 { 702 {
699 struct super_block *sb; 703 struct super_block *sb;
700 704
701 spin_lock(&sb_lock); 705 spin_lock(&sb_lock);
702 rescan: 706 rescan:
703 list_for_each_entry(sb, &super_blocks, s_list) { 707 list_for_each_entry(sb, &super_blocks, s_list) {
704 if (hlist_unhashed(&sb->s_instances)) 708 if (hlist_unhashed(&sb->s_instances))
705 continue; 709 continue;
706 if (sb->s_dev == dev) { 710 if (sb->s_dev == dev) {
707 sb->s_count++; 711 sb->s_count++;
708 spin_unlock(&sb_lock); 712 spin_unlock(&sb_lock);
709 down_read(&sb->s_umount); 713 down_read(&sb->s_umount);
710 /* still alive? */ 714 /* still alive? */
711 if (sb->s_root && (sb->s_flags & MS_BORN)) 715 if (sb->s_root && (sb->s_flags & MS_BORN))
712 return sb; 716 return sb;
713 up_read(&sb->s_umount); 717 up_read(&sb->s_umount);
714 /* nope, got unmounted */ 718 /* nope, got unmounted */
715 spin_lock(&sb_lock); 719 spin_lock(&sb_lock);
716 __put_super(sb); 720 __put_super(sb);
717 goto rescan; 721 goto rescan;
718 } 722 }
719 } 723 }
720 spin_unlock(&sb_lock); 724 spin_unlock(&sb_lock);
721 return NULL; 725 return NULL;
722 } 726 }
723 727
724 /** 728 /**
725 * do_remount_sb - asks filesystem to change mount options. 729 * do_remount_sb - asks filesystem to change mount options.
726 * @sb: superblock in question 730 * @sb: superblock in question
727 * @flags: numeric part of options 731 * @flags: numeric part of options
728 * @data: the rest of options 732 * @data: the rest of options
729 * @force: whether or not to force the change 733 * @force: whether or not to force the change
730 * 734 *
731 * Alters the mount options of a mounted file system. 735 * Alters the mount options of a mounted file system.
732 */ 736 */
733 int do_remount_sb(struct super_block *sb, int flags, void *data, int force) 737 int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
734 { 738 {
735 int retval; 739 int retval;
736 int remount_ro; 740 int remount_ro;
737 741
738 if (sb->s_writers.frozen != SB_UNFROZEN) 742 if (sb->s_writers.frozen != SB_UNFROZEN)
739 return -EBUSY; 743 return -EBUSY;
740 744
741 #ifdef CONFIG_BLOCK 745 #ifdef CONFIG_BLOCK
742 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) 746 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
743 return -EACCES; 747 return -EACCES;
744 #endif 748 #endif
745 749
746 if (flags & MS_RDONLY) 750 if (flags & MS_RDONLY)
747 acct_auto_close(sb); 751 acct_auto_close(sb);
748 shrink_dcache_sb(sb); 752 shrink_dcache_sb(sb);
749 sync_filesystem(sb); 753 sync_filesystem(sb);
750 754
751 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); 755 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
752 756
753 /* If we are remounting RDONLY and current sb is read/write, 757 /* If we are remounting RDONLY and current sb is read/write,
754 make sure there are no rw files opened */ 758 make sure there are no rw files opened */
755 if (remount_ro) { 759 if (remount_ro) {
756 if (force) { 760 if (force) {
757 mark_files_ro(sb); 761 mark_files_ro(sb);
758 } else { 762 } else {
759 retval = sb_prepare_remount_readonly(sb); 763 retval = sb_prepare_remount_readonly(sb);
760 if (retval) 764 if (retval)
761 return retval; 765 return retval;
762 } 766 }
763 } 767 }
764 768
765 if (sb->s_op->remount_fs) { 769 if (sb->s_op->remount_fs) {
766 retval = sb->s_op->remount_fs(sb, &flags, data); 770 retval = sb->s_op->remount_fs(sb, &flags, data);
767 if (retval) { 771 if (retval) {
768 if (!force) 772 if (!force)
769 goto cancel_readonly; 773 goto cancel_readonly;
770 /* If forced remount, go ahead despite any errors */ 774 /* If forced remount, go ahead despite any errors */
771 WARN(1, "forced remount of a %s fs returned %i\n", 775 WARN(1, "forced remount of a %s fs returned %i\n",
772 sb->s_type->name, retval); 776 sb->s_type->name, retval);
773 } 777 }
774 } 778 }
775 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 779 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
776 /* Needs to be ordered wrt mnt_is_readonly() */ 780 /* Needs to be ordered wrt mnt_is_readonly() */
777 smp_wmb(); 781 smp_wmb();
778 sb->s_readonly_remount = 0; 782 sb->s_readonly_remount = 0;
779 783
780 /* 784 /*
781 * Some filesystems modify their metadata via some other path than the 785 * Some filesystems modify their metadata via some other path than the
782 * bdev buffer cache (eg. use a private mapping, or directories in 786 * bdev buffer cache (eg. use a private mapping, or directories in
783 * pagecache, etc). Also file data modifications go via their own 787 * pagecache, etc). Also file data modifications go via their own
784 * mappings. So If we try to mount readonly then copy the filesystem 788 * mappings. So If we try to mount readonly then copy the filesystem
785 * from bdev, we could get stale data, so invalidate it to give a best 789 * from bdev, we could get stale data, so invalidate it to give a best
786 * effort at coherency. 790 * effort at coherency.
787 */ 791 */
788 if (remount_ro && sb->s_bdev) 792 if (remount_ro && sb->s_bdev)
789 invalidate_bdev(sb->s_bdev); 793 invalidate_bdev(sb->s_bdev);
790 return 0; 794 return 0;
791 795
792 cancel_readonly: 796 cancel_readonly:
793 sb->s_readonly_remount = 0; 797 sb->s_readonly_remount = 0;
794 return retval; 798 return retval;
795 } 799 }
796 800
797 static void do_emergency_remount(struct work_struct *work) 801 static void do_emergency_remount(struct work_struct *work)
798 { 802 {
799 struct super_block *sb, *p = NULL; 803 struct super_block *sb, *p = NULL;
800 804
801 spin_lock(&sb_lock); 805 spin_lock(&sb_lock);
802 list_for_each_entry(sb, &super_blocks, s_list) { 806 list_for_each_entry(sb, &super_blocks, s_list) {
803 if (hlist_unhashed(&sb->s_instances)) 807 if (hlist_unhashed(&sb->s_instances))
804 continue; 808 continue;
805 sb->s_count++; 809 sb->s_count++;
806 spin_unlock(&sb_lock); 810 spin_unlock(&sb_lock);
807 down_write(&sb->s_umount); 811 down_write(&sb->s_umount);
808 if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) && 812 if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
809 !(sb->s_flags & MS_RDONLY)) { 813 !(sb->s_flags & MS_RDONLY)) {
810 /* 814 /*
811 * What lock protects sb->s_flags?? 815 * What lock protects sb->s_flags??
812 */ 816 */
813 do_remount_sb(sb, MS_RDONLY, NULL, 1); 817 do_remount_sb(sb, MS_RDONLY, NULL, 1);
814 } 818 }
815 up_write(&sb->s_umount); 819 up_write(&sb->s_umount);
816 spin_lock(&sb_lock); 820 spin_lock(&sb_lock);
817 if (p) 821 if (p)
818 __put_super(p); 822 __put_super(p);
819 p = sb; 823 p = sb;
820 } 824 }
821 if (p) 825 if (p)
822 __put_super(p); 826 __put_super(p);
823 spin_unlock(&sb_lock); 827 spin_unlock(&sb_lock);
824 kfree(work); 828 kfree(work);
825 printk("Emergency Remount complete\n"); 829 printk("Emergency Remount complete\n");
826 } 830 }
827 831
828 void emergency_remount(void) 832 void emergency_remount(void)
829 { 833 {
830 struct work_struct *work; 834 struct work_struct *work;
831 835
832 work = kmalloc(sizeof(*work), GFP_ATOMIC); 836 work = kmalloc(sizeof(*work), GFP_ATOMIC);
833 if (work) { 837 if (work) {
834 INIT_WORK(work, do_emergency_remount); 838 INIT_WORK(work, do_emergency_remount);
835 schedule_work(work); 839 schedule_work(work);
836 } 840 }
837 } 841 }
838 842
839 /* 843 /*
840 * Unnamed block devices are dummy devices used by virtual 844 * Unnamed block devices are dummy devices used by virtual
841 * filesystems which don't use real block-devices. -- jrs 845 * filesystems which don't use real block-devices. -- jrs
842 */ 846 */
843 847
844 static DEFINE_IDA(unnamed_dev_ida); 848 static DEFINE_IDA(unnamed_dev_ida);
845 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 849 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
846 /* Many userspace utilities consider an FSID of 0 invalid. 850 /* Many userspace utilities consider an FSID of 0 invalid.
847 * Always return at least 1 from get_anon_bdev. 851 * Always return at least 1 from get_anon_bdev.
848 */ 852 */
849 static int unnamed_dev_start = 1; 853 static int unnamed_dev_start = 1;
850 854
851 int get_anon_bdev(dev_t *p) 855 int get_anon_bdev(dev_t *p)
852 { 856 {
853 int dev; 857 int dev;
854 int error; 858 int error;
855 859
856 retry: 860 retry:
857 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) 861 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
858 return -ENOMEM; 862 return -ENOMEM;
859 spin_lock(&unnamed_dev_lock); 863 spin_lock(&unnamed_dev_lock);
860 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev); 864 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
861 if (!error) 865 if (!error)
862 unnamed_dev_start = dev + 1; 866 unnamed_dev_start = dev + 1;
863 spin_unlock(&unnamed_dev_lock); 867 spin_unlock(&unnamed_dev_lock);
864 if (error == -EAGAIN) 868 if (error == -EAGAIN)
865 /* We raced and lost with another CPU. */ 869 /* We raced and lost with another CPU. */
866 goto retry; 870 goto retry;
867 else if (error) 871 else if (error)
868 return -EAGAIN; 872 return -EAGAIN;
869 873
870 if (dev == (1 << MINORBITS)) { 874 if (dev == (1 << MINORBITS)) {
871 spin_lock(&unnamed_dev_lock); 875 spin_lock(&unnamed_dev_lock);
872 ida_remove(&unnamed_dev_ida, dev); 876 ida_remove(&unnamed_dev_ida, dev);
873 if (unnamed_dev_start > dev) 877 if (unnamed_dev_start > dev)
874 unnamed_dev_start = dev; 878 unnamed_dev_start = dev;
875 spin_unlock(&unnamed_dev_lock); 879 spin_unlock(&unnamed_dev_lock);
876 return -EMFILE; 880 return -EMFILE;
877 } 881 }
878 *p = MKDEV(0, dev & MINORMASK); 882 *p = MKDEV(0, dev & MINORMASK);
879 return 0; 883 return 0;
880 } 884 }
881 EXPORT_SYMBOL(get_anon_bdev); 885 EXPORT_SYMBOL(get_anon_bdev);
882 886
883 void free_anon_bdev(dev_t dev) 887 void free_anon_bdev(dev_t dev)
884 { 888 {
885 int slot = MINOR(dev); 889 int slot = MINOR(dev);
886 spin_lock(&unnamed_dev_lock); 890 spin_lock(&unnamed_dev_lock);
887 ida_remove(&unnamed_dev_ida, slot); 891 ida_remove(&unnamed_dev_ida, slot);
888 if (slot < unnamed_dev_start) 892 if (slot < unnamed_dev_start)
889 unnamed_dev_start = slot; 893 unnamed_dev_start = slot;
890 spin_unlock(&unnamed_dev_lock); 894 spin_unlock(&unnamed_dev_lock);
891 } 895 }
892 EXPORT_SYMBOL(free_anon_bdev); 896 EXPORT_SYMBOL(free_anon_bdev);
893 897
894 int set_anon_super(struct super_block *s, void *data) 898 int set_anon_super(struct super_block *s, void *data)
895 { 899 {
896 int error = get_anon_bdev(&s->s_dev); 900 int error = get_anon_bdev(&s->s_dev);
897 if (!error) 901 if (!error)
898 s->s_bdi = &noop_backing_dev_info; 902 s->s_bdi = &noop_backing_dev_info;
899 return error; 903 return error;
900 } 904 }
901 905
902 EXPORT_SYMBOL(set_anon_super); 906 EXPORT_SYMBOL(set_anon_super);
903 907
904 void kill_anon_super(struct super_block *sb) 908 void kill_anon_super(struct super_block *sb)
905 { 909 {
906 dev_t dev = sb->s_dev; 910 dev_t dev = sb->s_dev;
907 generic_shutdown_super(sb); 911 generic_shutdown_super(sb);
908 free_anon_bdev(dev); 912 free_anon_bdev(dev);
909 } 913 }
910 914
911 EXPORT_SYMBOL(kill_anon_super); 915 EXPORT_SYMBOL(kill_anon_super);
912 916
913 void kill_litter_super(struct super_block *sb) 917 void kill_litter_super(struct super_block *sb)
914 { 918 {
915 if (sb->s_root) 919 if (sb->s_root)
916 d_genocide(sb->s_root); 920 d_genocide(sb->s_root);
917 kill_anon_super(sb); 921 kill_anon_super(sb);
918 } 922 }
919 923
920 EXPORT_SYMBOL(kill_litter_super); 924 EXPORT_SYMBOL(kill_litter_super);
921 925
922 static int ns_test_super(struct super_block *sb, void *data) 926 static int ns_test_super(struct super_block *sb, void *data)
923 { 927 {
924 return sb->s_fs_info == data; 928 return sb->s_fs_info == data;
925 } 929 }
926 930
927 static int ns_set_super(struct super_block *sb, void *data) 931 static int ns_set_super(struct super_block *sb, void *data)
928 { 932 {
929 sb->s_fs_info = data; 933 sb->s_fs_info = data;
930 return set_anon_super(sb, NULL); 934 return set_anon_super(sb, NULL);
931 } 935 }
932 936
933 struct dentry *mount_ns(struct file_system_type *fs_type, int flags, 937 struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
934 void *data, int (*fill_super)(struct super_block *, void *, int)) 938 void *data, int (*fill_super)(struct super_block *, void *, int))
935 { 939 {
936 struct super_block *sb; 940 struct super_block *sb;
937 941
938 sb = sget(fs_type, ns_test_super, ns_set_super, flags, data); 942 sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
939 if (IS_ERR(sb)) 943 if (IS_ERR(sb))
940 return ERR_CAST(sb); 944 return ERR_CAST(sb);
941 945
942 if (!sb->s_root) { 946 if (!sb->s_root) {
943 int err; 947 int err;
944 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 948 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
945 if (err) { 949 if (err) {
946 deactivate_locked_super(sb); 950 deactivate_locked_super(sb);
947 return ERR_PTR(err); 951 return ERR_PTR(err);
948 } 952 }
949 953
950 sb->s_flags |= MS_ACTIVE; 954 sb->s_flags |= MS_ACTIVE;
951 } 955 }
952 956
953 return dget(sb->s_root); 957 return dget(sb->s_root);
954 } 958 }
955 959
956 EXPORT_SYMBOL(mount_ns); 960 EXPORT_SYMBOL(mount_ns);
957 961
958 #ifdef CONFIG_BLOCK 962 #ifdef CONFIG_BLOCK
959 static int set_bdev_super(struct super_block *s, void *data) 963 static int set_bdev_super(struct super_block *s, void *data)
960 { 964 {
961 s->s_bdev = data; 965 s->s_bdev = data;
962 s->s_dev = s->s_bdev->bd_dev; 966 s->s_dev = s->s_bdev->bd_dev;
963 967
964 /* 968 /*
965 * We set the bdi here to the queue backing, file systems can 969 * We set the bdi here to the queue backing, file systems can
966 * overwrite this in ->fill_super() 970 * overwrite this in ->fill_super()
967 */ 971 */
968 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; 972 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
969 return 0; 973 return 0;
970 } 974 }
971 975
972 static int test_bdev_super(struct super_block *s, void *data) 976 static int test_bdev_super(struct super_block *s, void *data)
973 { 977 {
974 return (void *)s->s_bdev == data; 978 return (void *)s->s_bdev == data;
975 } 979 }
976 980
977 struct dentry *mount_bdev(struct file_system_type *fs_type, 981 struct dentry *mount_bdev(struct file_system_type *fs_type,
978 int flags, const char *dev_name, void *data, 982 int flags, const char *dev_name, void *data,
979 int (*fill_super)(struct super_block *, void *, int)) 983 int (*fill_super)(struct super_block *, void *, int))
980 { 984 {
981 struct block_device *bdev; 985 struct block_device *bdev;
982 struct super_block *s; 986 struct super_block *s;
983 fmode_t mode = FMODE_READ | FMODE_EXCL; 987 fmode_t mode = FMODE_READ | FMODE_EXCL;
984 int error = 0; 988 int error = 0;
985 989
986 if (!(flags & MS_RDONLY)) 990 if (!(flags & MS_RDONLY))
987 mode |= FMODE_WRITE; 991 mode |= FMODE_WRITE;
988 992
989 bdev = blkdev_get_by_path(dev_name, mode, fs_type); 993 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
990 if (IS_ERR(bdev)) 994 if (IS_ERR(bdev))
991 return ERR_CAST(bdev); 995 return ERR_CAST(bdev);
992 996
993 /* 997 /*
994 * once the super is inserted into the list by sget, s_umount 998 * once the super is inserted into the list by sget, s_umount
995 * will protect the lockfs code from trying to start a snapshot 999 * will protect the lockfs code from trying to start a snapshot
996 * while we are mounting 1000 * while we are mounting
997 */ 1001 */
998 mutex_lock(&bdev->bd_fsfreeze_mutex); 1002 mutex_lock(&bdev->bd_fsfreeze_mutex);
999 if (bdev->bd_fsfreeze_count > 0) { 1003 if (bdev->bd_fsfreeze_count > 0) {
1000 mutex_unlock(&bdev->bd_fsfreeze_mutex); 1004 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1001 error = -EBUSY; 1005 error = -EBUSY;
1002 goto error_bdev; 1006 goto error_bdev;
1003 } 1007 }
1004 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC, 1008 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
1005 bdev); 1009 bdev);
1006 mutex_unlock(&bdev->bd_fsfreeze_mutex); 1010 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1007 if (IS_ERR(s)) 1011 if (IS_ERR(s))
1008 goto error_s; 1012 goto error_s;
1009 1013
1010 if (s->s_root) { 1014 if (s->s_root) {
1011 if ((flags ^ s->s_flags) & MS_RDONLY) { 1015 if ((flags ^ s->s_flags) & MS_RDONLY) {
1012 deactivate_locked_super(s); 1016 deactivate_locked_super(s);
1013 error = -EBUSY; 1017 error = -EBUSY;
1014 goto error_bdev; 1018 goto error_bdev;
1015 } 1019 }
1016 1020
1017 /* 1021 /*
1018 * s_umount nests inside bd_mutex during 1022 * s_umount nests inside bd_mutex during
1019 * __invalidate_device(). blkdev_put() acquires 1023 * __invalidate_device(). blkdev_put() acquires
1020 * bd_mutex and can't be called under s_umount. Drop 1024 * bd_mutex and can't be called under s_umount. Drop
1021 * s_umount temporarily. This is safe as we're 1025 * s_umount temporarily. This is safe as we're
1022 * holding an active reference. 1026 * holding an active reference.
1023 */ 1027 */
1024 up_write(&s->s_umount); 1028 up_write(&s->s_umount);
1025 blkdev_put(bdev, mode); 1029 blkdev_put(bdev, mode);
1026 down_write(&s->s_umount); 1030 down_write(&s->s_umount);
1027 } else { 1031 } else {
1028 char b[BDEVNAME_SIZE]; 1032 char b[BDEVNAME_SIZE];
1029 1033
1030 s->s_mode = mode; 1034 s->s_mode = mode;
1031 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 1035 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1032 sb_set_blocksize(s, block_size(bdev)); 1036 sb_set_blocksize(s, block_size(bdev));
1033 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 1037 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1034 if (error) { 1038 if (error) {
1035 deactivate_locked_super(s); 1039 deactivate_locked_super(s);
1036 goto error; 1040 goto error;
1037 } 1041 }
1038 1042
1039 s->s_flags |= MS_ACTIVE; 1043 s->s_flags |= MS_ACTIVE;
1040 bdev->bd_super = s; 1044 bdev->bd_super = s;
1041 } 1045 }
1042 1046
1043 return dget(s->s_root); 1047 return dget(s->s_root);
1044 1048
1045 error_s: 1049 error_s:
1046 error = PTR_ERR(s); 1050 error = PTR_ERR(s);
1047 error_bdev: 1051 error_bdev:
1048 blkdev_put(bdev, mode); 1052 blkdev_put(bdev, mode);
1049 error: 1053 error:
1050 return ERR_PTR(error); 1054 return ERR_PTR(error);
1051 } 1055 }
1052 EXPORT_SYMBOL(mount_bdev); 1056 EXPORT_SYMBOL(mount_bdev);
1053 1057
1054 void kill_block_super(struct super_block *sb) 1058 void kill_block_super(struct super_block *sb)
1055 { 1059 {
1056 struct block_device *bdev = sb->s_bdev; 1060 struct block_device *bdev = sb->s_bdev;
1057 fmode_t mode = sb->s_mode; 1061 fmode_t mode = sb->s_mode;
1058 1062
1059 bdev->bd_super = NULL; 1063 bdev->bd_super = NULL;
1060 generic_shutdown_super(sb); 1064 generic_shutdown_super(sb);
1061 sync_blockdev(bdev); 1065 sync_blockdev(bdev);
1062 WARN_ON_ONCE(!(mode & FMODE_EXCL)); 1066 WARN_ON_ONCE(!(mode & FMODE_EXCL));
1063 blkdev_put(bdev, mode | FMODE_EXCL); 1067 blkdev_put(bdev, mode | FMODE_EXCL);
1064 } 1068 }
1065 1069
1066 EXPORT_SYMBOL(kill_block_super); 1070 EXPORT_SYMBOL(kill_block_super);
1067 #endif 1071 #endif
1068 1072
1069 struct dentry *mount_nodev(struct file_system_type *fs_type, 1073 struct dentry *mount_nodev(struct file_system_type *fs_type,
1070 int flags, void *data, 1074 int flags, void *data,
1071 int (*fill_super)(struct super_block *, void *, int)) 1075 int (*fill_super)(struct super_block *, void *, int))
1072 { 1076 {
1073 int error; 1077 int error;
1074 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); 1078 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
1075 1079
1076 if (IS_ERR(s)) 1080 if (IS_ERR(s))
1077 return ERR_CAST(s); 1081 return ERR_CAST(s);
1078 1082
1079 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 1083 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1080 if (error) { 1084 if (error) {
1081 deactivate_locked_super(s); 1085 deactivate_locked_super(s);
1082 return ERR_PTR(error); 1086 return ERR_PTR(error);
1083 } 1087 }
1084 s->s_flags |= MS_ACTIVE; 1088 s->s_flags |= MS_ACTIVE;
1085 return dget(s->s_root); 1089 return dget(s->s_root);
1086 } 1090 }
1087 EXPORT_SYMBOL(mount_nodev); 1091 EXPORT_SYMBOL(mount_nodev);
1088 1092
1089 static int compare_single(struct super_block *s, void *p) 1093 static int compare_single(struct super_block *s, void *p)
1090 { 1094 {
1091 return 1; 1095 return 1;
1092 } 1096 }
1093 1097
1094 struct dentry *mount_single(struct file_system_type *fs_type, 1098 struct dentry *mount_single(struct file_system_type *fs_type,
1095 int flags, void *data, 1099 int flags, void *data,
1096 int (*fill_super)(struct super_block *, void *, int)) 1100 int (*fill_super)(struct super_block *, void *, int))
1097 { 1101 {
1098 struct super_block *s; 1102 struct super_block *s;
1099 int error; 1103 int error;
1100 1104
1101 s = sget(fs_type, compare_single, set_anon_super, flags, NULL); 1105 s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
1102 if (IS_ERR(s)) 1106 if (IS_ERR(s))
1103 return ERR_CAST(s); 1107 return ERR_CAST(s);
1104 if (!s->s_root) { 1108 if (!s->s_root) {
1105 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 1109 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1106 if (error) { 1110 if (error) {
1107 deactivate_locked_super(s); 1111 deactivate_locked_super(s);
1108 return ERR_PTR(error); 1112 return ERR_PTR(error);
1109 } 1113 }
1110 s->s_flags |= MS_ACTIVE; 1114 s->s_flags |= MS_ACTIVE;
1111 } else { 1115 } else {
1112 do_remount_sb(s, flags, data, 0); 1116 do_remount_sb(s, flags, data, 0);
1113 } 1117 }
1114 return dget(s->s_root); 1118 return dget(s->s_root);
1115 } 1119 }
1116 EXPORT_SYMBOL(mount_single); 1120 EXPORT_SYMBOL(mount_single);
1117 1121
1118 struct dentry * 1122 struct dentry *
1119 mount_fs(struct file_system_type *type, int flags, const char *name, void *data) 1123 mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1120 { 1124 {
1121 struct dentry *root; 1125 struct dentry *root;
1122 struct super_block *sb; 1126 struct super_block *sb;
1123 char *secdata = NULL; 1127 char *secdata = NULL;
1124 int error = -ENOMEM; 1128 int error = -ENOMEM;
1125 1129
1126 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { 1130 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
1127 secdata = alloc_secdata(); 1131 secdata = alloc_secdata();
1128 if (!secdata) 1132 if (!secdata)
1129 goto out; 1133 goto out;
1130 1134
1131 error = security_sb_copy_data(data, secdata); 1135 error = security_sb_copy_data(data, secdata);
1132 if (error) 1136 if (error)
1133 goto out_free_secdata; 1137 goto out_free_secdata;
1134 } 1138 }
1135 1139
1136 root = type->mount(type, flags, name, data); 1140 root = type->mount(type, flags, name, data);
1137 if (IS_ERR(root)) { 1141 if (IS_ERR(root)) {
1138 error = PTR_ERR(root); 1142 error = PTR_ERR(root);
1139 goto out_free_secdata; 1143 goto out_free_secdata;
1140 } 1144 }
1141 sb = root->d_sb; 1145 sb = root->d_sb;
1142 BUG_ON(!sb); 1146 BUG_ON(!sb);
1143 WARN_ON(!sb->s_bdi); 1147 WARN_ON(!sb->s_bdi);
1144 WARN_ON(sb->s_bdi == &default_backing_dev_info); 1148 WARN_ON(sb->s_bdi == &default_backing_dev_info);
1145 sb->s_flags |= MS_BORN; 1149 sb->s_flags |= MS_BORN;
1146 1150
1147 error = security_sb_kern_mount(sb, flags, secdata); 1151 error = security_sb_kern_mount(sb, flags, secdata);
1148 if (error) 1152 if (error)
1149 goto out_sb; 1153 goto out_sb;
1150 1154
1151 /* 1155 /*
1152 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 1156 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
1153 * but s_maxbytes was an unsigned long long for many releases. Throw 1157 * but s_maxbytes was an unsigned long long for many releases. Throw
1154 * this warning for a little while to try and catch filesystems that 1158 * this warning for a little while to try and catch filesystems that
1155 * violate this rule. 1159 * violate this rule.
1156 */ 1160 */
1157 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 1161 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1158 "negative value (%lld)\n", type->name, sb->s_maxbytes); 1162 "negative value (%lld)\n", type->name, sb->s_maxbytes);
1159 1163
1160 up_write(&sb->s_umount); 1164 up_write(&sb->s_umount);
1161 free_secdata(secdata); 1165 free_secdata(secdata);
1162 return root; 1166 return root;
1163 out_sb: 1167 out_sb:
1164 dput(root); 1168 dput(root);
1165 deactivate_locked_super(sb); 1169 deactivate_locked_super(sb);
1166 out_free_secdata: 1170 out_free_secdata:
1167 free_secdata(secdata); 1171 free_secdata(secdata);
1168 out: 1172 out:
1169 return ERR_PTR(error); 1173 return ERR_PTR(error);
1170 } 1174 }
1171 1175
1172 /* 1176 /*
1173 * This is an internal function, please use sb_end_{write,pagefault,intwrite} 1177 * This is an internal function, please use sb_end_{write,pagefault,intwrite}
1174 * instead. 1178 * instead.
1175 */ 1179 */
1176 void __sb_end_write(struct super_block *sb, int level) 1180 void __sb_end_write(struct super_block *sb, int level)
1177 { 1181 {
1178 percpu_counter_dec(&sb->s_writers.counter[level-1]); 1182 percpu_counter_dec(&sb->s_writers.counter[level-1]);
1179 /* 1183 /*
1180 * Make sure s_writers are updated before we wake up waiters in 1184 * Make sure s_writers are updated before we wake up waiters in
1181 * freeze_super(). 1185 * freeze_super().
1182 */ 1186 */
1183 smp_mb(); 1187 smp_mb();
1184 if (waitqueue_active(&sb->s_writers.wait)) 1188 if (waitqueue_active(&sb->s_writers.wait))
1185 wake_up(&sb->s_writers.wait); 1189 wake_up(&sb->s_writers.wait);
1186 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_); 1190 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
1187 } 1191 }
1188 EXPORT_SYMBOL(__sb_end_write); 1192 EXPORT_SYMBOL(__sb_end_write);
1189 1193
1190 #ifdef CONFIG_LOCKDEP 1194 #ifdef CONFIG_LOCKDEP
1191 /* 1195 /*
1192 * We want lockdep to tell us about possible deadlocks with freezing but 1196 * We want lockdep to tell us about possible deadlocks with freezing but
1193 * it's it bit tricky to properly instrument it. Getting a freeze protection 1197 * it's it bit tricky to properly instrument it. Getting a freeze protection
1194 * works as getting a read lock but there are subtle problems. XFS for example 1198 * works as getting a read lock but there are subtle problems. XFS for example
1195 * gets freeze protection on internal level twice in some cases, which is OK 1199 * gets freeze protection on internal level twice in some cases, which is OK
1196 * only because we already hold a freeze protection also on higher level. Due 1200 * only because we already hold a freeze protection also on higher level. Due
1197 * to these cases we have to tell lockdep we are doing trylock when we 1201 * to these cases we have to tell lockdep we are doing trylock when we
1198 * already hold a freeze protection for a higher freeze level. 1202 * already hold a freeze protection for a higher freeze level.
1199 */ 1203 */
1200 static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock, 1204 static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
1201 unsigned long ip) 1205 unsigned long ip)
1202 { 1206 {
1203 int i; 1207 int i;
1204 1208
1205 if (!trylock) { 1209 if (!trylock) {
1206 for (i = 0; i < level - 1; i++) 1210 for (i = 0; i < level - 1; i++)
1207 if (lock_is_held(&sb->s_writers.lock_map[i])) { 1211 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1208 trylock = true; 1212 trylock = true;
1209 break; 1213 break;
1210 } 1214 }
1211 } 1215 }
1212 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip); 1216 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
1213 } 1217 }
1214 #endif 1218 #endif
1215 1219
1216 /* 1220 /*
1217 * This is an internal function, please use sb_start_{write,pagefault,intwrite} 1221 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
1218 * instead. 1222 * instead.
1219 */ 1223 */
1220 int __sb_start_write(struct super_block *sb, int level, bool wait) 1224 int __sb_start_write(struct super_block *sb, int level, bool wait)
1221 { 1225 {
1222 retry: 1226 retry:
1223 if (unlikely(sb->s_writers.frozen >= level)) { 1227 if (unlikely(sb->s_writers.frozen >= level)) {
1224 if (!wait) 1228 if (!wait)
1225 return 0; 1229 return 0;
1226 wait_event(sb->s_writers.wait_unfrozen, 1230 wait_event(sb->s_writers.wait_unfrozen,
1227 sb->s_writers.frozen < level); 1231 sb->s_writers.frozen < level);
1228 } 1232 }
1229 1233
1230 #ifdef CONFIG_LOCKDEP 1234 #ifdef CONFIG_LOCKDEP
1231 acquire_freeze_lock(sb, level, !wait, _RET_IP_); 1235 acquire_freeze_lock(sb, level, !wait, _RET_IP_);
1232 #endif 1236 #endif
1233 percpu_counter_inc(&sb->s_writers.counter[level-1]); 1237 percpu_counter_inc(&sb->s_writers.counter[level-1]);
1234 /* 1238 /*
1235 * Make sure counter is updated before we check for frozen. 1239 * Make sure counter is updated before we check for frozen.
1236 * freeze_super() first sets frozen and then checks the counter. 1240 * freeze_super() first sets frozen and then checks the counter.
1237 */ 1241 */
1238 smp_mb(); 1242 smp_mb();
1239 if (unlikely(sb->s_writers.frozen >= level)) { 1243 if (unlikely(sb->s_writers.frozen >= level)) {
1240 __sb_end_write(sb, level); 1244 __sb_end_write(sb, level);
1241 goto retry; 1245 goto retry;
1242 } 1246 }
1243 return 1; 1247 return 1;
1244 } 1248 }
1245 EXPORT_SYMBOL(__sb_start_write); 1249 EXPORT_SYMBOL(__sb_start_write);
1246 1250
1247 /** 1251 /**
1248 * sb_wait_write - wait until all writers to given file system finish 1252 * sb_wait_write - wait until all writers to given file system finish
1249 * @sb: the super for which we wait 1253 * @sb: the super for which we wait
1250 * @level: type of writers we wait for (normal vs page fault) 1254 * @level: type of writers we wait for (normal vs page fault)
1251 * 1255 *
1252 * This function waits until there are no writers of given type to given file 1256 * This function waits until there are no writers of given type to given file
1253 * system. Caller of this function should make sure there can be no new writers 1257 * system. Caller of this function should make sure there can be no new writers
1254 * of type @level before calling this function. Otherwise this function can 1258 * of type @level before calling this function. Otherwise this function can
1255 * livelock. 1259 * livelock.
1256 */ 1260 */
1257 static void sb_wait_write(struct super_block *sb, int level) 1261 static void sb_wait_write(struct super_block *sb, int level)
1258 { 1262 {
1259 s64 writers; 1263 s64 writers;
1260 1264
1261 /* 1265 /*
1262 * We just cycle-through lockdep here so that it does not complain 1266 * We just cycle-through lockdep here so that it does not complain
1263 * about returning with lock to userspace 1267 * about returning with lock to userspace
1264 */ 1268 */
1265 rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); 1269 rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
1266 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); 1270 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
1267 1271
1268 do { 1272 do {
1269 DEFINE_WAIT(wait); 1273 DEFINE_WAIT(wait);
1270 1274
1271 /* 1275 /*
1272 * We use a barrier in prepare_to_wait() to separate setting 1276 * We use a barrier in prepare_to_wait() to separate setting
1273 * of frozen and checking of the counter 1277 * of frozen and checking of the counter
1274 */ 1278 */
1275 prepare_to_wait(&sb->s_writers.wait, &wait, 1279 prepare_to_wait(&sb->s_writers.wait, &wait,
1276 TASK_UNINTERRUPTIBLE); 1280 TASK_UNINTERRUPTIBLE);
1277 1281
1278 writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); 1282 writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
1279 if (writers) 1283 if (writers)
1280 schedule(); 1284 schedule();
1281 1285
1282 finish_wait(&sb->s_writers.wait, &wait); 1286 finish_wait(&sb->s_writers.wait, &wait);
1283 } while (writers); 1287 } while (writers);
1284 } 1288 }
1285 1289
1286 /** 1290 /**
1287 * freeze_super - lock the filesystem and force it into a consistent state 1291 * freeze_super - lock the filesystem and force it into a consistent state
1288 * @sb: the super to lock 1292 * @sb: the super to lock
1289 * 1293 *
1290 * Syncs the super to make sure the filesystem is consistent and calls the fs's 1294 * Syncs the super to make sure the filesystem is consistent and calls the fs's
1291 * freeze_fs. Subsequent calls to this without first thawing the fs will return 1295 * freeze_fs. Subsequent calls to this without first thawing the fs will return
1292 * -EBUSY. 1296 * -EBUSY.
1293 * 1297 *
1294 * During this function, sb->s_writers.frozen goes through these values: 1298 * During this function, sb->s_writers.frozen goes through these values:
1295 * 1299 *
1296 * SB_UNFROZEN: File system is normal, all writes progress as usual. 1300 * SB_UNFROZEN: File system is normal, all writes progress as usual.
1297 * 1301 *
1298 * SB_FREEZE_WRITE: The file system is in the process of being frozen. New 1302 * SB_FREEZE_WRITE: The file system is in the process of being frozen. New
1299 * writes should be blocked, though page faults are still allowed. We wait for 1303 * writes should be blocked, though page faults are still allowed. We wait for
1300 * all writes to complete and then proceed to the next stage. 1304 * all writes to complete and then proceed to the next stage.
1301 * 1305 *
1302 * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked 1306 * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
1303 * but internal fs threads can still modify the filesystem (although they 1307 * but internal fs threads can still modify the filesystem (although they
1304 * should not dirty new pages or inodes), writeback can run etc. After waiting 1308 * should not dirty new pages or inodes), writeback can run etc. After waiting
1305 * for all running page faults we sync the filesystem which will clean all 1309 * for all running page faults we sync the filesystem which will clean all
1306 * dirty pages and inodes (no new dirty pages or inodes can be created when 1310 * dirty pages and inodes (no new dirty pages or inodes can be created when
1307 * sync is running). 1311 * sync is running).
1308 * 1312 *
1309 * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs 1313 * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
1310 * modification are blocked (e.g. XFS preallocation truncation on inode 1314 * modification are blocked (e.g. XFS preallocation truncation on inode
1311 * reclaim). This is usually implemented by blocking new transactions for 1315 * reclaim). This is usually implemented by blocking new transactions for
1312 * filesystems that have them and need this additional guard. After all 1316 * filesystems that have them and need this additional guard. After all
1313 * internal writers are finished we call ->freeze_fs() to finish filesystem 1317 * internal writers are finished we call ->freeze_fs() to finish filesystem
1314 * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is 1318 * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
1315 * mostly auxiliary for filesystems to verify they do not modify frozen fs. 1319 * mostly auxiliary for filesystems to verify they do not modify frozen fs.
1316 * 1320 *
1317 * sb->s_writers.frozen is protected by sb->s_umount. 1321 * sb->s_writers.frozen is protected by sb->s_umount.
1318 */ 1322 */
1319 int freeze_super(struct super_block *sb) 1323 int freeze_super(struct super_block *sb)
1320 { 1324 {
1321 int ret; 1325 int ret;
1322 1326
1323 atomic_inc(&sb->s_active); 1327 atomic_inc(&sb->s_active);
1324 down_write(&sb->s_umount); 1328 down_write(&sb->s_umount);
1325 if (sb->s_writers.frozen != SB_UNFROZEN) { 1329 if (sb->s_writers.frozen != SB_UNFROZEN) {
1326 deactivate_locked_super(sb); 1330 deactivate_locked_super(sb);
1327 return -EBUSY; 1331 return -EBUSY;
1328 } 1332 }
1329 1333
1330 if (!(sb->s_flags & MS_BORN)) { 1334 if (!(sb->s_flags & MS_BORN)) {
1331 up_write(&sb->s_umount); 1335 up_write(&sb->s_umount);
1332 return 0; /* sic - it's "nothing to do" */ 1336 return 0; /* sic - it's "nothing to do" */
1333 } 1337 }
1334 1338
1335 if (sb->s_flags & MS_RDONLY) { 1339 if (sb->s_flags & MS_RDONLY) {
1336 /* Nothing to do really... */ 1340 /* Nothing to do really... */
1337 sb->s_writers.frozen = SB_FREEZE_COMPLETE; 1341 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1338 up_write(&sb->s_umount); 1342 up_write(&sb->s_umount);
1339 return 0; 1343 return 0;
1340 } 1344 }
1341 1345
1342 /* From now on, no new normal writers can start */ 1346 /* From now on, no new normal writers can start */
1343 sb->s_writers.frozen = SB_FREEZE_WRITE; 1347 sb->s_writers.frozen = SB_FREEZE_WRITE;
1344 smp_wmb(); 1348 smp_wmb();
1345 1349
1346 /* Release s_umount to preserve sb_start_write -> s_umount ordering */ 1350 /* Release s_umount to preserve sb_start_write -> s_umount ordering */
1347 up_write(&sb->s_umount); 1351 up_write(&sb->s_umount);
1348 1352
1349 sb_wait_write(sb, SB_FREEZE_WRITE); 1353 sb_wait_write(sb, SB_FREEZE_WRITE);
1350 1354
1351 /* Now we go and block page faults... */ 1355 /* Now we go and block page faults... */
1352 down_write(&sb->s_umount); 1356 down_write(&sb->s_umount);
1353 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; 1357 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1354 smp_wmb(); 1358 smp_wmb();
1355 1359
1356 sb_wait_write(sb, SB_FREEZE_PAGEFAULT); 1360 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1357 1361
1358 /* All writers are done so after syncing there won't be dirty data */ 1362 /* All writers are done so after syncing there won't be dirty data */
1359 sync_filesystem(sb); 1363 sync_filesystem(sb);
1360 1364
1361 /* Now wait for internal filesystem counter */ 1365 /* Now wait for internal filesystem counter */
1362 sb->s_writers.frozen = SB_FREEZE_FS; 1366 sb->s_writers.frozen = SB_FREEZE_FS;
1363 smp_wmb(); 1367 smp_wmb();
1364 sb_wait_write(sb, SB_FREEZE_FS); 1368 sb_wait_write(sb, SB_FREEZE_FS);
1365 1369
1366 if (sb->s_op->freeze_fs) { 1370 if (sb->s_op->freeze_fs) {
1367 ret = sb->s_op->freeze_fs(sb); 1371 ret = sb->s_op->freeze_fs(sb);
1368 if (ret) { 1372 if (ret) {
1369 printk(KERN_ERR 1373 printk(KERN_ERR
1370 "VFS:Filesystem freeze failed\n"); 1374 "VFS:Filesystem freeze failed\n");
1371 sb->s_writers.frozen = SB_UNFROZEN; 1375 sb->s_writers.frozen = SB_UNFROZEN;
1372 smp_wmb(); 1376 smp_wmb();
1373 wake_up(&sb->s_writers.wait_unfrozen); 1377 wake_up(&sb->s_writers.wait_unfrozen);
1374 deactivate_locked_super(sb); 1378 deactivate_locked_super(sb);
1375 return ret; 1379 return ret;
1376 } 1380 }
1377 } 1381 }
1378 /* 1382 /*
1379 * This is just for debugging purposes so that fs can warn if it 1383 * This is just for debugging purposes so that fs can warn if it
1380 * sees write activity when frozen is set to SB_FREEZE_COMPLETE. 1384 * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
1381 */ 1385 */
1382 sb->s_writers.frozen = SB_FREEZE_COMPLETE; 1386 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1383 up_write(&sb->s_umount); 1387 up_write(&sb->s_umount);
1384 return 0; 1388 return 0;
1385 } 1389 }
1386 EXPORT_SYMBOL(freeze_super); 1390 EXPORT_SYMBOL(freeze_super);
1387 1391
1388 /** 1392 /**
1389 * thaw_super -- unlock filesystem 1393 * thaw_super -- unlock filesystem
1390 * @sb: the super to thaw 1394 * @sb: the super to thaw
1391 * 1395 *
1392 * Unlocks the filesystem and marks it writeable again after freeze_super(). 1396 * Unlocks the filesystem and marks it writeable again after freeze_super().
1393 */ 1397 */
1394 int thaw_super(struct super_block *sb) 1398 int thaw_super(struct super_block *sb)
1395 { 1399 {
1396 int error; 1400 int error;
1397 1401
1398 down_write(&sb->s_umount); 1402 down_write(&sb->s_umount);
1399 if (sb->s_writers.frozen == SB_UNFROZEN) { 1403 if (sb->s_writers.frozen == SB_UNFROZEN) {
1400 up_write(&sb->s_umount); 1404 up_write(&sb->s_umount);
1401 return -EINVAL; 1405 return -EINVAL;
1402 } 1406 }
1403 1407
1404 if (sb->s_flags & MS_RDONLY) 1408 if (sb->s_flags & MS_RDONLY)
1405 goto out; 1409 goto out;
1406 1410
1407 if (sb->s_op->unfreeze_fs) { 1411 if (sb->s_op->unfreeze_fs) {
1408 error = sb->s_op->unfreeze_fs(sb); 1412 error = sb->s_op->unfreeze_fs(sb);
1409 if (error) { 1413 if (error) {
1410 printk(KERN_ERR 1414 printk(KERN_ERR
1411 "VFS:Filesystem thaw failed\n"); 1415 "VFS:Filesystem thaw failed\n");
1412 up_write(&sb->s_umount); 1416 up_write(&sb->s_umount);
1413 return error; 1417 return error;
1414 } 1418 }
1415 } 1419 }
1416 1420
1417 out: 1421 out:
1418 sb->s_writers.frozen = SB_UNFROZEN; 1422 sb->s_writers.frozen = SB_UNFROZEN;
1419 smp_wmb(); 1423 smp_wmb();
1420 wake_up(&sb->s_writers.wait_unfrozen); 1424 wake_up(&sb->s_writers.wait_unfrozen);
1421 deactivate_locked_super(sb); 1425 deactivate_locked_super(sb);
1422 1426
1423 return 0; 1427 return 0;
1424 } 1428 }
1425 EXPORT_SYMBOL(thaw_super); 1429 EXPORT_SYMBOL(thaw_super);