Eric Lee / smarc-ti-linux-kernel

1

/*

1

/*

2

* linux/fs/super.c

2

* linux/fs/super.c

3

*

3

*

4

5

*

5

*

6

* super.c contains code to handle: - mount structures

6

* super.c contains code to handle: - mount structures

7

* - super-block tables

7

* - super-block tables

8

* - filesystem drivers list

8

* - filesystem drivers list

9

* - mount system call

9

* - mount system call

10

* - umount system call

10

* - umount system call

11

* - ustat system call

11

* - ustat system call

12

*

12

*

13

* GK 2/5/95 - Changed to support mounting the root fs via NFS

13

* GK 2/5/95 - Changed to support mounting the root fs via NFS

14

*

14

*

15

* Added kerneld support: Jacques Gelinas and Bjorn Ekwall

15

* Added kerneld support: Jacques Gelinas and Bjorn Ekwall

16

* Added change_root: Werner Almesberger & Hans Lermen, Feb '96

16

* Added change_root: Werner Almesberger & Hans Lermen, Feb '96

17

* Added options to /proc/mounts:

17

* Added options to /proc/mounts:

18

* Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.

18

* Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.

19

* Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998

19

* Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998

20

* Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000

20

* Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000

21

*/

21

*/

22

23

#include <linux/export.h>

23

#include <linux/export.h>

24

#include <linux/slab.h>

24

#include <linux/slab.h>

25

#include <linux/acct.h>

25

#include <linux/acct.h>

26

#include <linux/blkdev.h>

26

#include <linux/blkdev.h>

27

#include <linux/mount.h>

27

#include <linux/mount.h>

28

#include <linux/security.h>

28

#include <linux/security.h>

29

#include <linux/writeback.h> /* for the emergency remount stuff */

29

#include <linux/writeback.h> /* for the emergency remount stuff */

30

#include <linux/idr.h>

30

#include <linux/idr.h>

31

#include <linux/mutex.h>

31

#include <linux/mutex.h>

32

#include <linux/backing-dev.h>

32

#include <linux/backing-dev.h>

33

#include <linux/rculist_bl.h>

33

#include <linux/rculist_bl.h>

34

#include <linux/cleancache.h>

34

#include <linux/cleancache.h>

35

#include <linux/fsnotify.h>

35

#include <linux/fsnotify.h>

36

#include <linux/lockdep.h>

36

#include <linux/lockdep.h>

37

#include "internal.h"

37

#include "internal.h"

38

39

40

LIST_HEAD(super_blocks);

40

LIST_HEAD(super_blocks);

41

DEFINE_SPINLOCK(sb_lock);

41

DEFINE_SPINLOCK(sb_lock);

42

43

static char *sb_writers_name[SB_FREEZE_LEVELS] = {

43

static char *sb_writers_name[SB_FREEZE_LEVELS] = {

44

"sb_writers",

44

"sb_writers",

45

"sb_pagefaults",

45

"sb_pagefaults",

46

"sb_internal",

46

"sb_internal",

47

};

47

};

48

49

/*

49

/*

50

* One thing we have to be careful of with a per-sb shrinker is that we don't

50

* One thing we have to be careful of with a per-sb shrinker is that we don't

51

* drop the last active reference to the superblock from within the shrinker.

51

* drop the last active reference to the superblock from within the shrinker.

52

* If that happens we could trigger unregistering the shrinker from within the

52

* If that happens we could trigger unregistering the shrinker from within the

53

* shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we

53

* shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we

54

* take a passive reference to the superblock to avoid this from occurring.

54

* take a passive reference to the superblock to avoid this from occurring.

55

*/

55

*/

56

static unsigned long super_cache_scan(struct shrinker *shrink,

56

static unsigned long super_cache_scan(struct shrinker *shrink,

57

struct shrink_control *sc)

57

struct shrink_control *sc)

58

{

58

{

59

struct super_block *sb;

59

struct super_block *sb;

60

long fs_objects = 0;

60

long fs_objects = 0;

61

long total_objects;

61

long total_objects;

62

long freed = 0;

62

long freed = 0;

63

long dentries;

63

long dentries;

64

long inodes;

64

long inodes;

65

66

sb = container_of(shrink, struct super_block, s_shrink);

66

sb = container_of(shrink, struct super_block, s_shrink);

67

68

/*

68

/*

69

* Deadlock avoidance. We may hold various FS locks, and we don't want

69

* Deadlock avoidance. We may hold various FS locks, and we don't want

70

* to recurse into the FS that called us in clear_inode() and friends..

70

* to recurse into the FS that called us in clear_inode() and friends..

71

*/

71

*/

72

if (!(sc->gfp_mask & __GFP_FS))

72

if (!(sc->gfp_mask & __GFP_FS))

73

return SHRINK_STOP;

73

return SHRINK_STOP;

74

75

if (!grab_super_passive(sb))

75

if (!grab_super_passive(sb))

76

return SHRINK_STOP;

76

return SHRINK_STOP;

77

78

if (sb->s_op->nr_cached_objects)

78

if (sb->s_op->nr_cached_objects)

79

fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);

79

fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);

80

81

inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);

81

inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);

82

dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);

82

dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);

83

total_objects = dentries + inodes + fs_objects + 1;

83

total_objects = dentries + inodes + fs_objects + 1;

84

85

/* proportion the scan between the caches */

85

/* proportion the scan between the caches */

86

dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);

86

dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);

87

inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);

87

inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);

88

89

/*

89

/*

90

* prune the dcache first as the icache is pinned by it, then

90

* prune the dcache first as the icache is pinned by it, then

91

* prune the icache, followed by the filesystem specific caches

91

* prune the icache, followed by the filesystem specific caches

92

*/

92

*/

93

freed = prune_dcache_sb(sb, dentries, sc->nid);

93

freed = prune_dcache_sb(sb, dentries, sc->nid);

94

freed += prune_icache_sb(sb, inodes, sc->nid);

94

freed += prune_icache_sb(sb, inodes, sc->nid);

95

96

if (fs_objects) {

96

if (fs_objects) {

97

fs_objects = mult_frac(sc->nr_to_scan, fs_objects,

97

fs_objects = mult_frac(sc->nr_to_scan, fs_objects,

98

total_objects);

98

total_objects);

99

freed += sb->s_op->free_cached_objects(sb, fs_objects,

99

freed += sb->s_op->free_cached_objects(sb, fs_objects,

100

sc->nid);

100

sc->nid);

101

}

101

}

102

103

drop_super(sb);

103

drop_super(sb);

104

return freed;

104

return freed;

105

}

105

}

106

107

static unsigned long super_cache_count(struct shrinker *shrink,

107

static unsigned long super_cache_count(struct shrinker *shrink,

108

struct shrink_control *sc)

108

struct shrink_control *sc)

109

{

109

{

110

struct super_block *sb;

110

struct super_block *sb;

111

long total_objects = 0;

111

long total_objects = 0;

112

113

sb = container_of(shrink, struct super_block, s_shrink);

113

sb = container_of(shrink, struct super_block, s_shrink);

114

115

if (!grab_super_passive(sb))

115

if (!grab_super_passive(sb))

116

return 0;

116

return 0;

117

118

if (sb->s_op && sb->s_op->nr_cached_objects)

118

if (sb->s_op && sb->s_op->nr_cached_objects)

119

total_objects = sb->s_op->nr_cached_objects(sb,

119

total_objects = sb->s_op->nr_cached_objects(sb,

120

sc->nid);

120

sc->nid);

121

122

total_objects += list_lru_count_node(&sb->s_dentry_lru,

122

total_objects += list_lru_count_node(&sb->s_dentry_lru,

123

sc->nid);

123

sc->nid);

124

total_objects += list_lru_count_node(&sb->s_inode_lru,

124

total_objects += list_lru_count_node(&sb->s_inode_lru,

125

sc->nid);

125

sc->nid);

126

127

total_objects = vfs_pressure_ratio(total_objects);

127

total_objects = vfs_pressure_ratio(total_objects);

128

drop_super(sb);

128

drop_super(sb);

129

return total_objects;

129

return total_objects;

130

}

130

}

131

132

static int init_sb_writers(struct super_block *s, struct file_system_type *type)

132

static int init_sb_writers(struct super_block *s, struct file_system_type *type)

133

{

133

{

134

int err;

134

int err;

135

int i;

135

int i;

136

137

for (i = 0; i < SB_FREEZE_LEVELS; i++) {

137

for (i = 0; i < SB_FREEZE_LEVELS; i++) {

138

err = percpu_counter_init(&s->s_writers.counter[i], 0);

138

err = percpu_counter_init(&s->s_writers.counter[i], 0);

139

if (err < 0)

139

if (err < 0)

140

goto err_out;

140

goto err_out;

141

lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],

141

lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],

142

&type->s_writers_key[i], 0);

142

&type->s_writers_key[i], 0);

143

}

143

}

144

init_waitqueue_head(&s->s_writers.wait);

144

init_waitqueue_head(&s->s_writers.wait);

145

init_waitqueue_head(&s->s_writers.wait_unfrozen);

145

init_waitqueue_head(&s->s_writers.wait_unfrozen);

146

return 0;

146

return 0;

147

err_out:

147

err_out:

148

while (--i >= 0)

148

while (--i >= 0)

149

percpu_counter_destroy(&s->s_writers.counter[i]);

149

percpu_counter_destroy(&s->s_writers.counter[i]);

150

return err;

150

return err;

151

}

151

}

152

153

static void destroy_sb_writers(struct super_block *s)

153

static void destroy_sb_writers(struct super_block *s)

154

{

154

{

155

int i;

155

int i;

156

157

for (i = 0; i < SB_FREEZE_LEVELS; i++)

157

for (i = 0; i < SB_FREEZE_LEVELS; i++)

158

percpu_counter_destroy(&s->s_writers.counter[i]);

158

percpu_counter_destroy(&s->s_writers.counter[i]);

159

}

159

}

160

161

/**

161

/**

162

* alloc_super - create new superblock

162

* alloc_super - create new superblock

163

* @type: filesystem type superblock should belong to

163

* @type: filesystem type superblock should belong to

164

* @flags: the mount flags

164

* @flags: the mount flags

165

*

165

*

166

* Allocates and initializes a new &struct super_block. alloc_super()

166

* Allocates and initializes a new &struct super_block. alloc_super()

167

* returns a pointer new superblock or %NULL if allocation had failed.

167

* returns a pointer new superblock or %NULL if allocation had failed.

168

*/

168

*/

169

static struct super_block *alloc_super(struct file_system_type *type, int flags)

169

static struct super_block *alloc_super(struct file_system_type *type, int flags)

170

{

170

{

171

struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);

171

struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);

172

static const struct super_operations default_op;

172

static const struct super_operations default_op;

173

174

if (s) {

174

if (s) {

175

if (security_sb_alloc(s))

175

if (security_sb_alloc(s))

176

goto out_free_sb;

176

goto out_free_sb;

177

178

#ifdef CONFIG_SMP

178

#ifdef CONFIG_SMP

179

s->s_files = alloc_percpu(struct list_head);

179

s->s_files = alloc_percpu(struct list_head);

180

if (!s->s_files)

180

if (!s->s_files)

181

goto err_out;

181

goto err_out;

182

else {

182

else {

183

int i;

183

int i;

184

185

for_each_possible_cpu(i)

185

for_each_possible_cpu(i)

186

INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));

186

INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));

187

}

187

}

188

#else

188

#else

189

INIT_LIST_HEAD(&s->s_files);

189

INIT_LIST_HEAD(&s->s_files);

190

#endif

190

#endif

191

if (init_sb_writers(s, type))

191

if (init_sb_writers(s, type))

192

goto err_out;

192

goto err_out;

193

s->s_flags = flags;

193

s->s_flags = flags;

194

s->s_bdi = &default_backing_dev_info;

194

s->s_bdi = &default_backing_dev_info;

195

INIT_HLIST_NODE(&s->s_instances);

195

INIT_HLIST_NODE(&s->s_instances);

196

INIT_HLIST_BL_HEAD(&s->s_anon);

196

INIT_HLIST_BL_HEAD(&s->s_anon);

197

INIT_LIST_HEAD(&s->s_inodes);

197

INIT_LIST_HEAD(&s->s_inodes);

198

199

if (list_lru_init(&s->s_dentry_lru))

199

if (list_lru_init(&s->s_dentry_lru))

200

goto err_out;

200

goto err_out;

201

if (list_lru_init(&s->s_inode_lru))

201

if (list_lru_init(&s->s_inode_lru))

202

goto err_out_dentry_lru;

202

goto err_out_dentry_lru;

203

204

INIT_LIST_HEAD(&s->s_mounts);

204

INIT_LIST_HEAD(&s->s_mounts);

205

init_rwsem(&s->s_umount);

205

init_rwsem(&s->s_umount);

206

lockdep_set_class(&s->s_umount, &type->s_umount_key);

206

lockdep_set_class(&s->s_umount, &type->s_umount_key);

207

/*

207

/*

208

* sget() can have s_umount recursion.

208

* sget() can have s_umount recursion.

209

*

209

*

210

* When it cannot find a suitable sb, it allocates a new

210

* When it cannot find a suitable sb, it allocates a new

211

* one (this one), and tries again to find a suitable old

211

* one (this one), and tries again to find a suitable old

212

* one.

212

* one.

213

*

213

*

214

* In case that succeeds, it will acquire the s_umount

214

* In case that succeeds, it will acquire the s_umount

215

* lock of the old one. Since these are clearly distrinct

215

* lock of the old one. Since these are clearly distrinct

216

* locks, and this object isn't exposed yet, there's no

216

* locks, and this object isn't exposed yet, there's no

217

* risk of deadlocks.

217

* risk of deadlocks.

218

*

218

*

219

* Annotate this by putting this lock in a different

219

* Annotate this by putting this lock in a different

220

* subclass.

220

* subclass.

221

*/

221

*/

222

down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);

222

down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);

223

s->s_count = 1;

223

s->s_count = 1;

224

atomic_set(&s->s_active, 1);

224

atomic_set(&s->s_active, 1);

225

mutex_init(&s->s_vfs_rename_mutex);

225

mutex_init(&s->s_vfs_rename_mutex);

226

lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);

226

lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);

227

mutex_init(&s->s_dquot.dqio_mutex);

227

mutex_init(&s->s_dquot.dqio_mutex);

228

mutex_init(&s->s_dquot.dqonoff_mutex);

228

mutex_init(&s->s_dquot.dqonoff_mutex);

229

init_rwsem(&s->s_dquot.dqptr_sem);

229

init_rwsem(&s->s_dquot.dqptr_sem);

230

s->s_maxbytes = MAX_NON_LFS;

230

s->s_maxbytes = MAX_NON_LFS;

231

s->s_op = &default_op;

231

s->s_op = &default_op;

232

s->s_time_gran = 1000000000;

232

s->s_time_gran = 1000000000;

233

s->cleancache_poolid = -1;

233

s->cleancache_poolid = -1;

234

235

s->s_shrink.seeks = DEFAULT_SEEKS;

235

s->s_shrink.seeks = DEFAULT_SEEKS;

236

s->s_shrink.scan_objects = super_cache_scan;

236

s->s_shrink.scan_objects = super_cache_scan;

237

s->s_shrink.count_objects = super_cache_count;

237

s->s_shrink.count_objects = super_cache_count;

238

s->s_shrink.batch = 1024;

238

s->s_shrink.batch = 1024;

239

s->s_shrink.flags = SHRINKER_NUMA_AWARE;

239

s->s_shrink.flags = SHRINKER_NUMA_AWARE;

240

}

240

}

241

out:

241

out:

242

return s;

242

return s;

243

244

err_out_dentry_lru:

244

err_out_dentry_lru:

245

list_lru_destroy(&s->s_dentry_lru);

245

list_lru_destroy(&s->s_dentry_lru);

246

err_out:

246

err_out:

247

security_sb_free(s);

247

security_sb_free(s);

248

#ifdef CONFIG_SMP

248

#ifdef CONFIG_SMP

249

if (s->s_files)

249

if (s->s_files)

250

free_percpu(s->s_files);

250

free_percpu(s->s_files);

251

#endif

251

#endif

252

destroy_sb_writers(s);

252

destroy_sb_writers(s);

253

out_free_sb:

253

out_free_sb:

254

kfree(s);

254

kfree(s);

255

s = NULL;

255

s = NULL;

256

goto out;

256

goto out;

257

}

257

}

258

259

/**

259

/**

260

* destroy_super - frees a superblock

260

* destroy_super - frees a superblock

261

* @s: superblock to free

261

* @s: superblock to free

262

*

262

*

263

* Frees a superblock.

263

* Frees a superblock.

264

*/

264

*/

265

static inline void destroy_super(struct super_block *s)

265

static inline void destroy_super(struct super_block *s)

266

{

266

{

267

list_lru_destroy(&s->s_dentry_lru);

267

list_lru_destroy(&s->s_dentry_lru);

268

list_lru_destroy(&s->s_inode_lru);

268

list_lru_destroy(&s->s_inode_lru);

269

#ifdef CONFIG_SMP

269

#ifdef CONFIG_SMP

270

free_percpu(s->s_files);

270

free_percpu(s->s_files);

271

#endif

271

#endif

272

destroy_sb_writers(s);

272

destroy_sb_writers(s);

273

security_sb_free(s);

273

security_sb_free(s);

274

WARN_ON(!list_empty(&s->s_mounts));

274

WARN_ON(!list_empty(&s->s_mounts));

275

kfree(s->s_subtype);

275

kfree(s->s_subtype);

276

kfree(s->s_options);

276

kfree(s->s_options);

277

kfree(s);

277

kfree(s);

278

}

278

}

279

280

/* Superblock refcounting */

280

/* Superblock refcounting */

281

282

/*

282

/*

283

* Drop a superblock's refcount. The caller must hold sb_lock.

283

* Drop a superblock's refcount. The caller must hold sb_lock.

284

*/

284

*/

285

static void __put_super(struct super_block *sb)

285

static void __put_super(struct super_block *sb)

286

{

286

{

287

if (!--sb->s_count) {

287

if (!--sb->s_count) {

288

list_del_init(&sb->s_list);

288

list_del_init(&sb->s_list);

289

destroy_super(sb);

289

destroy_super(sb);

290

}

290

}

291

}

291

}

292

293

/**

293

/**

294

* put_super - drop a temporary reference to superblock

294

* put_super - drop a temporary reference to superblock

295

* @sb: superblock in question

295

* @sb: superblock in question

296

*

296

*

297

* Drops a temporary reference, frees superblock if there's no

297

* Drops a temporary reference, frees superblock if there's no

298

* references left.

298

* references left.

299

*/

299

*/

300

static void put_super(struct super_block *sb)

300

static void put_super(struct super_block *sb)

301

{

301

{

302

spin_lock(&sb_lock);

302

spin_lock(&sb_lock);

303

__put_super(sb);

303

__put_super(sb);

304

spin_unlock(&sb_lock);

304

spin_unlock(&sb_lock);

305

}

305

}

306

307

308

/**

308

/**

309

* deactivate_locked_super - drop an active reference to superblock

309

* deactivate_locked_super - drop an active reference to superblock

310

* @s: superblock to deactivate

310

* @s: superblock to deactivate

311

*

311

*

312

* Drops an active reference to superblock, converting it into a temprory

312

* Drops an active reference to superblock, converting it into a temprory

313

* one if there is no other active references left. In that case we

313

* one if there is no other active references left. In that case we

314

* tell fs driver to shut it down and drop the temporary reference we

314

* tell fs driver to shut it down and drop the temporary reference we

315

* had just acquired.

315

* had just acquired.

316

*

316

*

317

* Caller holds exclusive lock on superblock; that lock is released.

317

* Caller holds exclusive lock on superblock; that lock is released.

318

*/

318

*/

319

void deactivate_locked_super(struct super_block *s)

319

void deactivate_locked_super(struct super_block *s)

320

{

320

{

321

struct file_system_type *fs = s->s_type;

321

struct file_system_type *fs = s->s_type;

322

if (atomic_dec_and_test(&s->s_active)) {

322

if (atomic_dec_and_test(&s->s_active)) {

323

cleancache_invalidate_fs(s);

323

cleancache_invalidate_fs(s);

324

fs->kill_sb(s);

325

326

/* caches are now gone, we can safely kill the shrinker now */

327

unregister_shrinker(&s->s_shrink);

324

unregister_shrinker(&s->s_shrink);

325

fs->kill_sb(s);

328

326

329

put_filesystem(fs);

327

put_filesystem(fs);

330

put_super(s);

328

put_super(s);

331

} else {

329

} else {

332

up_write(&s->s_umount);

330

up_write(&s->s_umount);

333

}

331

}

334

}

332

}

335

333

336

EXPORT_SYMBOL(deactivate_locked_super);

334

EXPORT_SYMBOL(deactivate_locked_super);

337

335

338

/**

336

/**

339

* deactivate_super - drop an active reference to superblock

337

* deactivate_super - drop an active reference to superblock

340

* @s: superblock to deactivate

338

* @s: superblock to deactivate

341

*

339

*

342

* Variant of deactivate_locked_super(), except that superblock is *not*

340

* Variant of deactivate_locked_super(), except that superblock is *not*

343

* locked by caller. If we are going to drop the final active reference,

341

* locked by caller. If we are going to drop the final active reference,

344

* lock will be acquired prior to that.

342

* lock will be acquired prior to that.

345

*/

343

*/

346

void deactivate_super(struct super_block *s)

344

void deactivate_super(struct super_block *s)

347

{

345

{

348

if (!atomic_add_unless(&s->s_active, -1, 1)) {

346

if (!atomic_add_unless(&s->s_active, -1, 1)) {

349

down_write(&s->s_umount);

347

down_write(&s->s_umount);

350

deactivate_locked_super(s);

348

deactivate_locked_super(s);

351

}

349

}

352

}

350

}

353

351

354

EXPORT_SYMBOL(deactivate_super);

352

EXPORT_SYMBOL(deactivate_super);

355

353

356

/**

354

/**

357

* grab_super - acquire an active reference

355

* grab_super - acquire an active reference

358

* @s: reference we are trying to make active

356

* @s: reference we are trying to make active

359

*

357

*

360

* Tries to acquire an active reference. grab_super() is used when we

358

* Tries to acquire an active reference. grab_super() is used when we

361

* had just found a superblock in super_blocks or fs_type->fs_supers

359

* had just found a superblock in super_blocks or fs_type->fs_supers

362

* and want to turn it into a full-blown active reference. grab_super()

360

* and want to turn it into a full-blown active reference. grab_super()

363

* is called with sb_lock held and drops it. Returns 1 in case of

361

* is called with sb_lock held and drops it. Returns 1 in case of

364

* success, 0 if we had failed (superblock contents was already dead or

362

* success, 0 if we had failed (superblock contents was already dead or

365

* dying when grab_super() had been called). Note that this is only

363

* dying when grab_super() had been called). Note that this is only

366

* called for superblocks not in rundown mode (== ones still on ->fs_supers

364

* called for superblocks not in rundown mode (== ones still on ->fs_supers

367

* of their type), so increment of ->s_count is OK here.

365

* of their type), so increment of ->s_count is OK here.

368

*/

366

*/

369

static int grab_super(struct super_block *s) __releases(sb_lock)

367

static int grab_super(struct super_block *s) __releases(sb_lock)

370

{

368

{

371

s->s_count++;

369

s->s_count++;

372

spin_unlock(&sb_lock);

370

spin_unlock(&sb_lock);

373

down_write(&s->s_umount);

371

down_write(&s->s_umount);

374

if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {

372

if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {

375

put_super(s);

373

put_super(s);

376

return 1;

374

return 1;

377

}

375

}

378

up_write(&s->s_umount);

376

up_write(&s->s_umount);

379

put_super(s);

377

put_super(s);

380

return 0;

378

return 0;

381

}

379

}

382

380

383

/*

381

/*

384

* grab_super_passive - acquire a passive reference

382

* grab_super_passive - acquire a passive reference

385

* @sb: reference we are trying to grab

383

* @sb: reference we are trying to grab

386

*

384

*

387

* Tries to acquire a passive reference. This is used in places where we

385

* Tries to acquire a passive reference. This is used in places where we

388

* cannot take an active reference but we need to ensure that the

386

* cannot take an active reference but we need to ensure that the

389

* superblock does not go away while we are working on it. It returns

387

* superblock does not go away while we are working on it. It returns

390

* false if a reference was not gained, and returns true with the s_umount

388

* false if a reference was not gained, and returns true with the s_umount

391

* lock held in read mode if a reference is gained. On successful return,

389

* lock held in read mode if a reference is gained. On successful return,

392

* the caller must drop the s_umount lock and the passive reference when

390

* the caller must drop the s_umount lock and the passive reference when

393

* done.

391

* done.

394

*/

392

*/

395

bool grab_super_passive(struct super_block *sb)

393

bool grab_super_passive(struct super_block *sb)

396

{

394

{

397

spin_lock(&sb_lock);

395

spin_lock(&sb_lock);

398

if (hlist_unhashed(&sb->s_instances)) {

396

if (hlist_unhashed(&sb->s_instances)) {

399

spin_unlock(&sb_lock);

397

spin_unlock(&sb_lock);

400

return false;

398

return false;

401

}

399

}

402

400

403

sb->s_count++;

401

sb->s_count++;

404

spin_unlock(&sb_lock);

402

spin_unlock(&sb_lock);

405

403

406

if (down_read_trylock(&sb->s_umount)) {

404

if (down_read_trylock(&sb->s_umount)) {

407

if (sb->s_root && (sb->s_flags & MS_BORN))

405

if (sb->s_root && (sb->s_flags & MS_BORN))

408

return true;

406

return true;

409

up_read(&sb->s_umount);

407

up_read(&sb->s_umount);

410

}

408

}

411

409

412

put_super(sb);

410

put_super(sb);

413

return false;

411

return false;

414

}

412

}

415

413

416

/**

414

/**

417

* generic_shutdown_super - common helper for ->kill_sb()

415

* generic_shutdown_super - common helper for ->kill_sb()

418

* @sb: superblock to kill

416

* @sb: superblock to kill

419

*

417

*

420

* generic_shutdown_super() does all fs-independent work on superblock

418

* generic_shutdown_super() does all fs-independent work on superblock

421

* shutdown. Typical ->kill_sb() should pick all fs-specific objects

419

* shutdown. Typical ->kill_sb() should pick all fs-specific objects

422

* that need destruction out of superblock, call generic_shutdown_super()

420

* that need destruction out of superblock, call generic_shutdown_super()

423

* and release aforementioned objects. Note: dentries and inodes _are_

421

* and release aforementioned objects. Note: dentries and inodes _are_

424

* taken care of and do not need specific handling.

422

* taken care of and do not need specific handling.

425

*

423

*

426

* Upon calling this function, the filesystem may no longer alter or

424

* Upon calling this function, the filesystem may no longer alter or

427

* rearrange the set of dentries belonging to this super_block, nor may it

425

* rearrange the set of dentries belonging to this super_block, nor may it

428

* change the attachments of dentries to inodes.

426

* change the attachments of dentries to inodes.

429

*/

427

*/

430

void generic_shutdown_super(struct super_block *sb)

428

void generic_shutdown_super(struct super_block *sb)

431

{

429

{

432

const struct super_operations *sop = sb->s_op;

430

const struct super_operations *sop = sb->s_op;

433

431

434

if (sb->s_root) {

432

if (sb->s_root) {

435

shrink_dcache_for_umount(sb);

433

shrink_dcache_for_umount(sb);

436

sync_filesystem(sb);

434

sync_filesystem(sb);

437

sb->s_flags &= ~MS_ACTIVE;

435

sb->s_flags &= ~MS_ACTIVE;

438

436

439

fsnotify_unmount_inodes(&sb->s_inodes);

437

fsnotify_unmount_inodes(&sb->s_inodes);

440

438

441

evict_inodes(sb);

439

evict_inodes(sb);

442

440

443

if (sb->s_dio_done_wq) {

441

if (sb->s_dio_done_wq) {

444

destroy_workqueue(sb->s_dio_done_wq);

442

destroy_workqueue(sb->s_dio_done_wq);

445

sb->s_dio_done_wq = NULL;

443

sb->s_dio_done_wq = NULL;

446

}

444

}

447

445

448

if (sop->put_super)

446

if (sop->put_super)

449

sop->put_super(sb);

447

sop->put_super(sb);

450

448

451

if (!list_empty(&sb->s_inodes)) {

449

if (!list_empty(&sb->s_inodes)) {

452

printk("VFS: Busy inodes after unmount of %s. "

450

printk("VFS: Busy inodes after unmount of %s. "

453

"Self-destruct in 5 seconds. Have a nice day...\n",

451

"Self-destruct in 5 seconds. Have a nice day...\n",

454

sb->s_id);

452

sb->s_id);

455

}

453

}

456

}

454

}

457

spin_lock(&sb_lock);

455

spin_lock(&sb_lock);

458

/* should be initialized for __put_super_and_need_restart() */

456

/* should be initialized for __put_super_and_need_restart() */

459

hlist_del_init(&sb->s_instances);

457

hlist_del_init(&sb->s_instances);

460

spin_unlock(&sb_lock);

458

spin_unlock(&sb_lock);

461

up_write(&sb->s_umount);

459

up_write(&sb->s_umount);

462

}

460

}

463

461

464

EXPORT_SYMBOL(generic_shutdown_super);

462

EXPORT_SYMBOL(generic_shutdown_super);

465

463

466

/**

464

/**

467

* sget - find or create a superblock

465

* sget - find or create a superblock

468

* @type: filesystem type superblock should belong to

466

* @type: filesystem type superblock should belong to

469

* @test: comparison callback

467

* @test: comparison callback

470

* @set: setup callback

468

* @set: setup callback

471

* @flags: mount flags

469

* @flags: mount flags

472

* @data: argument to each of them

470

* @data: argument to each of them

473

*/

471

*/

474

struct super_block *sget(struct file_system_type *type,

472

struct super_block *sget(struct file_system_type *type,

475

int (*test)(struct super_block *,void *),

473

int (*test)(struct super_block *,void *),

476

int (*set)(struct super_block *,void *),

474

int (*set)(struct super_block *,void *),

477

int flags,

475

int flags,

478

void *data)

476

void *data)

479

{

477

{

480

struct super_block *s = NULL;

478

struct super_block *s = NULL;

481

struct super_block *old;

479

struct super_block *old;

482

int err;

480

int err;

483

481

484

retry:

482

retry:

485

spin_lock(&sb_lock);

483

spin_lock(&sb_lock);

486

if (test) {

484

if (test) {

487

hlist_for_each_entry(old, &type->fs_supers, s_instances) {

485

hlist_for_each_entry(old, &type->fs_supers, s_instances) {

488

if (!test(old, data))

486

if (!test(old, data))

489

continue;

487

continue;

490

if (!grab_super(old))

488

if (!grab_super(old))

491

goto retry;

489

goto retry;

492

if (s) {

490

if (s) {

493

up_write(&s->s_umount);

491

up_write(&s->s_umount);

494

destroy_super(s);

492

destroy_super(s);

495

s = NULL;

493

s = NULL;

496

}

494

}

497

return old;

495

return old;

498

}

496

}

499

}

497

}

500

if (!s) {

498

if (!s) {

501

spin_unlock(&sb_lock);

499

spin_unlock(&sb_lock);

502

s = alloc_super(type, flags);

500

s = alloc_super(type, flags);

503

if (!s)

501

if (!s)

504

return ERR_PTR(-ENOMEM);

502

return ERR_PTR(-ENOMEM);

505

goto retry;

503

goto retry;

506

}

504

}

507

505

508

err = set(s, data);

506

err = set(s, data);

509

if (err) {

507

if (err) {

510

spin_unlock(&sb_lock);

508

spin_unlock(&sb_lock);

511

up_write(&s->s_umount);

509

up_write(&s->s_umount);

512

destroy_super(s);

510

destroy_super(s);

513

return ERR_PTR(err);

511

return ERR_PTR(err);

514

}

512

}

515

s->s_type = type;

513

s->s_type = type;

516

strlcpy(s->s_id, type->name, sizeof(s->s_id));

514

strlcpy(s->s_id, type->name, sizeof(s->s_id));

517

list_add_tail(&s->s_list, &super_blocks);

515

list_add_tail(&s->s_list, &super_blocks);

518

hlist_add_head(&s->s_instances, &type->fs_supers);

516

hlist_add_head(&s->s_instances, &type->fs_supers);

519

spin_unlock(&sb_lock);

517

spin_unlock(&sb_lock);

520

get_filesystem(type);

518

get_filesystem(type);

521

register_shrinker(&s->s_shrink);

519

register_shrinker(&s->s_shrink);

522

return s;

520

return s;

523

}

521

}

524

522

525

EXPORT_SYMBOL(sget);

523

EXPORT_SYMBOL(sget);

526

524

527

void drop_super(struct super_block *sb)

525

void drop_super(struct super_block *sb)

528

{

526

{

529

up_read(&sb->s_umount);

527

up_read(&sb->s_umount);

530

put_super(sb);

528

put_super(sb);

531

}

529

}

532

530

533

EXPORT_SYMBOL(drop_super);

531

EXPORT_SYMBOL(drop_super);

534

532

535

/**

533

/**

536

* iterate_supers - call function for all active superblocks

534

* iterate_supers - call function for all active superblocks

537

* @f: function to call

535

* @f: function to call

538

* @arg: argument to pass to it

536

* @arg: argument to pass to it

539

*

537

*

540

* Scans the superblock list and calls given function, passing it

538

* Scans the superblock list and calls given function, passing it

541

* locked superblock and given argument.

539

* locked superblock and given argument.

542

*/

540

*/

543

void iterate_supers(void (*f)(struct super_block *, void *), void *arg)

541

void iterate_supers(void (*f)(struct super_block *, void *), void *arg)

544

{

542

{

545

struct super_block *sb, *p = NULL;

543

struct super_block *sb, *p = NULL;

546

544

547

spin_lock(&sb_lock);

545

spin_lock(&sb_lock);

548

list_for_each_entry(sb, &super_blocks, s_list) {

546

list_for_each_entry(sb, &super_blocks, s_list) {

549

if (hlist_unhashed(&sb->s_instances))

547

if (hlist_unhashed(&sb->s_instances))

550

continue;

548

continue;

551

sb->s_count++;

549

sb->s_count++;

552

spin_unlock(&sb_lock);

550

spin_unlock(&sb_lock);

553

551

554

down_read(&sb->s_umount);

552

down_read(&sb->s_umount);

555

if (sb->s_root && (sb->s_flags & MS_BORN))

553

if (sb->s_root && (sb->s_flags & MS_BORN))

556

f(sb, arg);

554

f(sb, arg);

557

up_read(&sb->s_umount);

555

up_read(&sb->s_umount);

558

556

559

spin_lock(&sb_lock);

557

spin_lock(&sb_lock);

560

if (p)

558

if (p)

561

__put_super(p);

559

__put_super(p);

562

p = sb;

560

p = sb;

563

}

561

}

564

if (p)

562

if (p)

565

__put_super(p);

563

__put_super(p);

566

spin_unlock(&sb_lock);

564

spin_unlock(&sb_lock);

567

}

565

}

568

566

569

/**

567

/**

570

* iterate_supers_type - call function for superblocks of given type

568

* iterate_supers_type - call function for superblocks of given type

571

* @type: fs type

569

* @type: fs type

572

* @f: function to call

570

* @f: function to call

573

* @arg: argument to pass to it

571

* @arg: argument to pass to it

574

*

572

*

575

* Scans the superblock list and calls given function, passing it

573

* Scans the superblock list and calls given function, passing it

576

* locked superblock and given argument.

574

* locked superblock and given argument.

577

*/

575

*/

578

void iterate_supers_type(struct file_system_type *type,

576

void iterate_supers_type(struct file_system_type *type,

579

void (*f)(struct super_block *, void *), void *arg)

577

void (*f)(struct super_block *, void *), void *arg)

580

{

578

{

581

struct super_block *sb, *p = NULL;

579

struct super_block *sb, *p = NULL;

582

580

583

spin_lock(&sb_lock);

581

spin_lock(&sb_lock);

584

hlist_for_each_entry(sb, &type->fs_supers, s_instances) {

582

hlist_for_each_entry(sb, &type->fs_supers, s_instances) {

585

sb->s_count++;

583

sb->s_count++;

586

spin_unlock(&sb_lock);

584

spin_unlock(&sb_lock);

587

585

588

down_read(&sb->s_umount);

586

down_read(&sb->s_umount);

589

if (sb->s_root && (sb->s_flags & MS_BORN))

587

if (sb->s_root && (sb->s_flags & MS_BORN))

590

f(sb, arg);

588

f(sb, arg);

591

up_read(&sb->s_umount);

589

up_read(&sb->s_umount);

592

590

593

spin_lock(&sb_lock);

591

spin_lock(&sb_lock);

594

if (p)

592

if (p)

595

__put_super(p);

593

__put_super(p);

596

p = sb;

594

p = sb;

597

}

595

}

598

if (p)

596

if (p)

599

__put_super(p);

597

__put_super(p);

600

spin_unlock(&sb_lock);

598

spin_unlock(&sb_lock);

601

}

599

}

602

600

603

EXPORT_SYMBOL(iterate_supers_type);

601

EXPORT_SYMBOL(iterate_supers_type);

604

602

605

/**

603

/**

606

* get_super - get the superblock of a device

604

* get_super - get the superblock of a device

607

* @bdev: device to get the superblock for

605

* @bdev: device to get the superblock for

608

*

606

*

609

* Scans the superblock list and finds the superblock of the file system

607

* Scans the superblock list and finds the superblock of the file system

610

* mounted on the device given. %NULL is returned if no match is found.

608

* mounted on the device given. %NULL is returned if no match is found.

611

*/

609

*/

612

610

613

struct super_block *get_super(struct block_device *bdev)

611

struct super_block *get_super(struct block_device *bdev)

614

{

612

{

615

struct super_block *sb;

613

struct super_block *sb;

616

614

617

if (!bdev)

615

if (!bdev)

618

return NULL;

616

return NULL;

619

617

620

spin_lock(&sb_lock);

618

spin_lock(&sb_lock);

621

rescan:

619

rescan:

622

list_for_each_entry(sb, &super_blocks, s_list) {

620

list_for_each_entry(sb, &super_blocks, s_list) {

623

if (hlist_unhashed(&sb->s_instances))

621

if (hlist_unhashed(&sb->s_instances))

624

continue;

622

continue;

625

if (sb->s_bdev == bdev) {

623

if (sb->s_bdev == bdev) {

626

sb->s_count++;

624

sb->s_count++;

627

spin_unlock(&sb_lock);

625

spin_unlock(&sb_lock);

628

down_read(&sb->s_umount);

626

down_read(&sb->s_umount);

629

/* still alive? */

627

/* still alive? */

630

if (sb->s_root && (sb->s_flags & MS_BORN))

628

if (sb->s_root && (sb->s_flags & MS_BORN))

631

return sb;

629

return sb;

632

up_read(&sb->s_umount);

630

up_read(&sb->s_umount);

633

/* nope, got unmounted */

631

/* nope, got unmounted */

634

spin_lock(&sb_lock);

632

spin_lock(&sb_lock);

635

__put_super(sb);

633

__put_super(sb);

636

goto rescan;

634

goto rescan;

637

}

635

}

638

}

636

}

639

spin_unlock(&sb_lock);

637

spin_unlock(&sb_lock);

640

return NULL;

638

return NULL;

641

}

639

}

642

640

643

EXPORT_SYMBOL(get_super);

641

EXPORT_SYMBOL(get_super);

644

642

645

/**

643

/**

646

* get_super_thawed - get thawed superblock of a device

644

* get_super_thawed - get thawed superblock of a device

647

* @bdev: device to get the superblock for

645

* @bdev: device to get the superblock for

648

*

646

*

649

* Scans the superblock list and finds the superblock of the file system

647

* Scans the superblock list and finds the superblock of the file system

650

* mounted on the device. The superblock is returned once it is thawed

648

* mounted on the device. The superblock is returned once it is thawed

651

* (or immediately if it was not frozen). %NULL is returned if no match

649

* (or immediately if it was not frozen). %NULL is returned if no match

652

* is found.

650

* is found.

653

*/

651

*/

654

struct super_block *get_super_thawed(struct block_device *bdev)

652

struct super_block *get_super_thawed(struct block_device *bdev)

655

{

653

{

656

while (1) {

654

while (1) {

657

struct super_block *s = get_super(bdev);

655

struct super_block *s = get_super(bdev);

658

if (!s || s->s_writers.frozen == SB_UNFROZEN)

656

if (!s || s->s_writers.frozen == SB_UNFROZEN)

659

return s;

657

return s;

660

up_read(&s->s_umount);

658

up_read(&s->s_umount);

661

wait_event(s->s_writers.wait_unfrozen,

659

wait_event(s->s_writers.wait_unfrozen,

662

s->s_writers.frozen == SB_UNFROZEN);

660

s->s_writers.frozen == SB_UNFROZEN);

663

put_super(s);

661

put_super(s);

664

}

662

}

665

}

663

}

666

EXPORT_SYMBOL(get_super_thawed);

664

EXPORT_SYMBOL(get_super_thawed);

667

665

668

/**

666

/**

669

* get_active_super - get an active reference to the superblock of a device

667

* get_active_super - get an active reference to the superblock of a device

670

* @bdev: device to get the superblock for

668

* @bdev: device to get the superblock for

671

*

669

*

672

* Scans the superblock list and finds the superblock of the file system

670

* Scans the superblock list and finds the superblock of the file system

673

* mounted on the device given. Returns the superblock with an active

671

* mounted on the device given. Returns the superblock with an active

674

* reference or %NULL if none was found.

672

* reference or %NULL if none was found.

675

*/

673

*/

676

struct super_block *get_active_super(struct block_device *bdev)

674

struct super_block *get_active_super(struct block_device *bdev)

677

{

675

{

678

struct super_block *sb;

676

struct super_block *sb;

679

677

680

if (!bdev)

678

if (!bdev)

681

return NULL;

679

return NULL;

682

680

683

restart:

681

restart:

684

spin_lock(&sb_lock);

682

spin_lock(&sb_lock);

685

list_for_each_entry(sb, &super_blocks, s_list) {

683

list_for_each_entry(sb, &super_blocks, s_list) {

686

if (hlist_unhashed(&sb->s_instances))

684

if (hlist_unhashed(&sb->s_instances))

687

continue;

685

continue;

688

if (sb->s_bdev == bdev) {

686

if (sb->s_bdev == bdev) {

689

if (!grab_super(sb))

687

if (!grab_super(sb))

690

goto restart;

688

goto restart;

691

up_write(&sb->s_umount);

689

up_write(&sb->s_umount);

692

return sb;

690

return sb;

693

}

691

}

694

}

692

}

695

spin_unlock(&sb_lock);

693

spin_unlock(&sb_lock);

696

return NULL;

694

return NULL;

697

}

695

}

698

696

699

struct super_block *user_get_super(dev_t dev)

697

struct super_block *user_get_super(dev_t dev)

700

{

698

{

701

struct super_block *sb;

699

struct super_block *sb;

702

700

703

spin_lock(&sb_lock);

701

spin_lock(&sb_lock);

704

rescan:

702

rescan:

705

list_for_each_entry(sb, &super_blocks, s_list) {

703

list_for_each_entry(sb, &super_blocks, s_list) {

706

if (hlist_unhashed(&sb->s_instances))

704

if (hlist_unhashed(&sb->s_instances))

707

continue;

705

continue;

708

if (sb->s_dev == dev) {

706

if (sb->s_dev == dev) {

709

sb->s_count++;

707

sb->s_count++;

710

spin_unlock(&sb_lock);

708

spin_unlock(&sb_lock);

711

down_read(&sb->s_umount);

709

down_read(&sb->s_umount);

712

/* still alive? */

710

/* still alive? */

713

if (sb->s_root && (sb->s_flags & MS_BORN))

711

if (sb->s_root && (sb->s_flags & MS_BORN))

714

return sb;

712

return sb;

715

up_read(&sb->s_umount);

713

up_read(&sb->s_umount);

716

/* nope, got unmounted */

714

/* nope, got unmounted */

717

spin_lock(&sb_lock);

715

spin_lock(&sb_lock);

718

__put_super(sb);

716

__put_super(sb);

719

goto rescan;

717

goto rescan;

720

}

718

}

721

}

719

}

722

spin_unlock(&sb_lock);

720

spin_unlock(&sb_lock);

723

return NULL;

721

return NULL;

724

}

722

}

725

723

726

/**

724

/**

727

* do_remount_sb - asks filesystem to change mount options.

725

* do_remount_sb - asks filesystem to change mount options.

728

* @sb: superblock in question

726

* @sb: superblock in question

729

* @flags: numeric part of options

727

* @flags: numeric part of options

730

* @data: the rest of options

728

* @data: the rest of options

731

* @force: whether or not to force the change

729

* @force: whether or not to force the change

732

*

730

*

733

* Alters the mount options of a mounted file system.

731

* Alters the mount options of a mounted file system.

734

*/

732

*/

735

int do_remount_sb(struct super_block *sb, int flags, void *data, int force)

733

int do_remount_sb(struct super_block *sb, int flags, void *data, int force)

736

{

734

{

737

int retval;

735

int retval;

738

int remount_ro;

736

int remount_ro;

739

737

740

if (sb->s_writers.frozen != SB_UNFROZEN)

738

if (sb->s_writers.frozen != SB_UNFROZEN)

741

return -EBUSY;

739

return -EBUSY;

742

740

743

#ifdef CONFIG_BLOCK

741

#ifdef CONFIG_BLOCK

744

if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))

742

if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))

745

return -EACCES;

743

return -EACCES;

746

#endif

744

#endif

747

745

748

if (flags & MS_RDONLY)

746

if (flags & MS_RDONLY)

749

acct_auto_close(sb);

747

acct_auto_close(sb);

750

shrink_dcache_sb(sb);

748

shrink_dcache_sb(sb);

751

sync_filesystem(sb);

749

sync_filesystem(sb);

752

750

753

remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);

751

remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);

754

752

755

/* If we are remounting RDONLY and current sb is read/write,

753

/* If we are remounting RDONLY and current sb is read/write,

756

make sure there are no rw files opened */

754

make sure there are no rw files opened */

757

if (remount_ro) {

755

if (remount_ro) {

758

if (force) {

756

if (force) {

759

mark_files_ro(sb);

757

mark_files_ro(sb);

760

} else {

758

} else {

761

retval = sb_prepare_remount_readonly(sb);

759

retval = sb_prepare_remount_readonly(sb);

762

if (retval)

760

if (retval)

763

return retval;

761

return retval;

764

}

762

}

765

}

763

}

766

764

767

if (sb->s_op->remount_fs) {

765

if (sb->s_op->remount_fs) {

768

retval = sb->s_op->remount_fs(sb, &flags, data);

766

retval = sb->s_op->remount_fs(sb, &flags, data);

769

if (retval) {

767

if (retval) {

770

if (!force)

768

if (!force)

771

goto cancel_readonly;

769

goto cancel_readonly;

772

/* If forced remount, go ahead despite any errors */

770

/* If forced remount, go ahead despite any errors */

773

WARN(1, "forced remount of a %s fs returned %i\n",

771

WARN(1, "forced remount of a %s fs returned %i\n",

774

sb->s_type->name, retval);

772

sb->s_type->name, retval);

775

}

773

}

776

}

774

}

777

sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);

775

sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);

778

/* Needs to be ordered wrt mnt_is_readonly() */

776

/* Needs to be ordered wrt mnt_is_readonly() */

779

smp_wmb();

777

smp_wmb();

780

sb->s_readonly_remount = 0;

778

sb->s_readonly_remount = 0;

781

779

782

/*

780

/*

783

* Some filesystems modify their metadata via some other path than the

781

* Some filesystems modify their metadata via some other path than the

784

* bdev buffer cache (eg. use a private mapping, or directories in

782

* bdev buffer cache (eg. use a private mapping, or directories in

785

* pagecache, etc). Also file data modifications go via their own

783

* pagecache, etc). Also file data modifications go via their own

786

* mappings. So If we try to mount readonly then copy the filesystem

784

* mappings. So If we try to mount readonly then copy the filesystem

787

* from bdev, we could get stale data, so invalidate it to give a best

785

* from bdev, we could get stale data, so invalidate it to give a best

788

* effort at coherency.

786

* effort at coherency.

789

*/

787

*/

790

if (remount_ro && sb->s_bdev)

788

if (remount_ro && sb->s_bdev)

791

invalidate_bdev(sb->s_bdev);

789

invalidate_bdev(sb->s_bdev);

792

return 0;

790

return 0;

793

791

794

cancel_readonly:

792

cancel_readonly:

795

sb->s_readonly_remount = 0;

793

sb->s_readonly_remount = 0;

796

return retval;

794

return retval;

797

}

795

}

798

796

799

static void do_emergency_remount(struct work_struct *work)

797

static void do_emergency_remount(struct work_struct *work)

800

{

798

{

801

struct super_block *sb, *p = NULL;

799

struct super_block *sb, *p = NULL;

802

800

803

spin_lock(&sb_lock);

801

spin_lock(&sb_lock);

804

list_for_each_entry(sb, &super_blocks, s_list) {

802

list_for_each_entry(sb, &super_blocks, s_list) {

805

if (hlist_unhashed(&sb->s_instances))

803

if (hlist_unhashed(&sb->s_instances))

806

continue;

804

continue;

807

sb->s_count++;

805

sb->s_count++;

808

spin_unlock(&sb_lock);

806

spin_unlock(&sb_lock);

809

down_write(&sb->s_umount);

807

down_write(&sb->s_umount);

810

if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&

808

if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&

811

!(sb->s_flags & MS_RDONLY)) {

809

!(sb->s_flags & MS_RDONLY)) {

812

/*

810

/*

813

* What lock protects sb->s_flags??

811

* What lock protects sb->s_flags??

814

*/

812

*/

815

do_remount_sb(sb, MS_RDONLY, NULL, 1);

813

do_remount_sb(sb, MS_RDONLY, NULL, 1);

816

}

814

}

817

up_write(&sb->s_umount);

815

up_write(&sb->s_umount);

818

spin_lock(&sb_lock);

816

spin_lock(&sb_lock);

819

if (p)

817

if (p)

820

__put_super(p);

818

__put_super(p);

821

p = sb;

819

p = sb;

822

}

820

}

823

if (p)

821

if (p)

824

__put_super(p);

822

__put_super(p);

825

spin_unlock(&sb_lock);

823

spin_unlock(&sb_lock);

826

kfree(work);

824

kfree(work);

827

printk("Emergency Remount complete\n");

825

printk("Emergency Remount complete\n");

828

}

826

}

829

827

830

void emergency_remount(void)

828

void emergency_remount(void)

831

{

829

{

832

struct work_struct *work;

830

struct work_struct *work;

833

831

834

work = kmalloc(sizeof(*work), GFP_ATOMIC);

832

work = kmalloc(sizeof(*work), GFP_ATOMIC);

835

if (work) {

833

if (work) {

836

INIT_WORK(work, do_emergency_remount);

834

INIT_WORK(work, do_emergency_remount);

837

schedule_work(work);

835

schedule_work(work);

838

}

836

}

839

}

837

}

840

838

841

/*

839

/*

842

* Unnamed block devices are dummy devices used by virtual

840

* Unnamed block devices are dummy devices used by virtual

843

* filesystems which don't use real block-devices. -- jrs

841

* filesystems which don't use real block-devices. -- jrs

844

*/

842

*/

845

843

846

static DEFINE_IDA(unnamed_dev_ida);

844

static DEFINE_IDA(unnamed_dev_ida);

847

static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */

845

static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */

848

/* Many userspace utilities consider an FSID of 0 invalid.

846

/* Many userspace utilities consider an FSID of 0 invalid.

849

* Always return at least 1 from get_anon_bdev.

847

* Always return at least 1 from get_anon_bdev.

850

*/

848

*/

851

static int unnamed_dev_start = 1;

849

static int unnamed_dev_start = 1;

852

850

853

int get_anon_bdev(dev_t *p)

851

int get_anon_bdev(dev_t *p)

854

{

852

{

855

int dev;

853

int dev;

856

int error;

854

int error;

857

855

858

retry:

856

retry:

859

if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)

857

if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)

860

return -ENOMEM;

858

return -ENOMEM;

861

spin_lock(&unnamed_dev_lock);

859

spin_lock(&unnamed_dev_lock);

862

error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);

860

error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);

863

if (!error)

861

if (!error)

864

unnamed_dev_start = dev + 1;

862

unnamed_dev_start = dev + 1;

865

spin_unlock(&unnamed_dev_lock);

863

spin_unlock(&unnamed_dev_lock);

866

if (error == -EAGAIN)

864

if (error == -EAGAIN)

867

/* We raced and lost with another CPU. */

865

/* We raced and lost with another CPU. */

868

goto retry;

866

goto retry;

869

else if (error)

867

else if (error)

870

return -EAGAIN;

868

return -EAGAIN;

871

869

872

if (dev == (1 << MINORBITS)) {

870

if (dev == (1 << MINORBITS)) {

873

spin_lock(&unnamed_dev_lock);

871

spin_lock(&unnamed_dev_lock);

874

ida_remove(&unnamed_dev_ida, dev);

872

ida_remove(&unnamed_dev_ida, dev);

875

if (unnamed_dev_start > dev)

873

if (unnamed_dev_start > dev)

876

unnamed_dev_start = dev;

874

unnamed_dev_start = dev;

877

spin_unlock(&unnamed_dev_lock);

875

spin_unlock(&unnamed_dev_lock);

878

return -EMFILE;

876

return -EMFILE;

879

}

877

}

880

*p = MKDEV(0, dev & MINORMASK);

878

*p = MKDEV(0, dev & MINORMASK);

881

return 0;

879

return 0;

882

}

880

}

883

EXPORT_SYMBOL(get_anon_bdev);

881

EXPORT_SYMBOL(get_anon_bdev);

884

882

885

void free_anon_bdev(dev_t dev)

883

void free_anon_bdev(dev_t dev)

886

{

884

{

887

int slot = MINOR(dev);

885

int slot = MINOR(dev);

888

spin_lock(&unnamed_dev_lock);

886

spin_lock(&unnamed_dev_lock);

889

ida_remove(&unnamed_dev_ida, slot);

887

ida_remove(&unnamed_dev_ida, slot);

890

if (slot < unnamed_dev_start)

888

if (slot < unnamed_dev_start)

891

unnamed_dev_start = slot;

889

unnamed_dev_start = slot;

892

spin_unlock(&unnamed_dev_lock);

890

spin_unlock(&unnamed_dev_lock);

893

}

891

}

894

EXPORT_SYMBOL(free_anon_bdev);

892

EXPORT_SYMBOL(free_anon_bdev);

895

893

896

int set_anon_super(struct super_block *s, void *data)

894

int set_anon_super(struct super_block *s, void *data)

897

{

895

{

898

int error = get_anon_bdev(&s->s_dev);

896

int error = get_anon_bdev(&s->s_dev);

899

if (!error)

897

if (!error)

900

s->s_bdi = &noop_backing_dev_info;

898

s->s_bdi = &noop_backing_dev_info;

901

return error;

899

return error;

902

}

900

}

903

901

904

EXPORT_SYMBOL(set_anon_super);

902

EXPORT_SYMBOL(set_anon_super);

905

903

906

void kill_anon_super(struct super_block *sb)

904

void kill_anon_super(struct super_block *sb)

907

{

905

{

908

dev_t dev = sb->s_dev;

906

dev_t dev = sb->s_dev;

909

generic_shutdown_super(sb);

907

generic_shutdown_super(sb);

910

free_anon_bdev(dev);

908

free_anon_bdev(dev);

911

}

909

}

912

910

913

EXPORT_SYMBOL(kill_anon_super);

911

EXPORT_SYMBOL(kill_anon_super);

914

912

915

void kill_litter_super(struct super_block *sb)

913

void kill_litter_super(struct super_block *sb)

916

{

914

{

917

if (sb->s_root)

915

if (sb->s_root)

918

d_genocide(sb->s_root);

916

d_genocide(sb->s_root);

919

kill_anon_super(sb);

917

kill_anon_super(sb);

920

}

918

}

921

919

922

EXPORT_SYMBOL(kill_litter_super);

920

EXPORT_SYMBOL(kill_litter_super);

923

921

924

static int ns_test_super(struct super_block *sb, void *data)

922

static int ns_test_super(struct super_block *sb, void *data)

925

{

923

{

926

return sb->s_fs_info == data;

924

return sb->s_fs_info == data;

927

}

925

}

928

926

929

static int ns_set_super(struct super_block *sb, void *data)

927

static int ns_set_super(struct super_block *sb, void *data)

930

{

928

{

931

sb->s_fs_info = data;

929

sb->s_fs_info = data;

932

return set_anon_super(sb, NULL);

930

return set_anon_super(sb, NULL);

933

}

931

}

934

932

935

struct dentry *mount_ns(struct file_system_type *fs_type, int flags,

933

struct dentry *mount_ns(struct file_system_type *fs_type, int flags,

936

void *data, int (*fill_super)(struct super_block *, void *, int))

934

void *data, int (*fill_super)(struct super_block *, void *, int))

937

{

935

{

938

struct super_block *sb;

936

struct super_block *sb;

939

937

940

sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);

938

sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);

941

if (IS_ERR(sb))

939

if (IS_ERR(sb))

942

return ERR_CAST(sb);

940

return ERR_CAST(sb);

943

941

944

if (!sb->s_root) {

942

if (!sb->s_root) {

945

int err;

943

int err;

946

err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);

944

err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);

947

if (err) {

945

if (err) {

948

deactivate_locked_super(sb);

946

deactivate_locked_super(sb);

949

return ERR_PTR(err);

947

return ERR_PTR(err);

950

}

948

}

951

949

952

sb->s_flags |= MS_ACTIVE;

950

sb->s_flags |= MS_ACTIVE;

953

}

951

}

954

952

955

return dget(sb->s_root);

953

return dget(sb->s_root);

956

}

954

}

957

955

958

EXPORT_SYMBOL(mount_ns);

956

EXPORT_SYMBOL(mount_ns);

959

957

960

#ifdef CONFIG_BLOCK

958

#ifdef CONFIG_BLOCK

961

static int set_bdev_super(struct super_block *s, void *data)

959

static int set_bdev_super(struct super_block *s, void *data)

962

{

960

{

963

s->s_bdev = data;

961

s->s_bdev = data;

964

s->s_dev = s->s_bdev->bd_dev;

962

s->s_dev = s->s_bdev->bd_dev;

965

963

966

/*

964

/*

967

* We set the bdi here to the queue backing, file systems can

965

* We set the bdi here to the queue backing, file systems can

968

* overwrite this in ->fill_super()

966

* overwrite this in ->fill_super()

969

*/

967

*/

970

s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;

968

s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;

971

return 0;

969

return 0;

972

}

970

}

973

971

974

static int test_bdev_super(struct super_block *s, void *data)

972

static int test_bdev_super(struct super_block *s, void *data)

975

{

973

{

976

return (void *)s->s_bdev == data;

974

return (void *)s->s_bdev == data;

977

}

975

}

978

976

979

struct dentry *mount_bdev(struct file_system_type *fs_type,

977

struct dentry *mount_bdev(struct file_system_type *fs_type,

980

int flags, const char *dev_name, void *data,

978

int flags, const char *dev_name, void *data,

981

int (*fill_super)(struct super_block *, void *, int))

979

int (*fill_super)(struct super_block *, void *, int))

982

{

980

{

983

struct block_device *bdev;

981

struct block_device *bdev;

984

struct super_block *s;

982

struct super_block *s;

985

fmode_t mode = FMODE_READ | FMODE_EXCL;

983

fmode_t mode = FMODE_READ | FMODE_EXCL;

986

int error = 0;

984

int error = 0;

987

985

988

if (!(flags & MS_RDONLY))

986

if (!(flags & MS_RDONLY))

989

mode |= FMODE_WRITE;

987

mode |= FMODE_WRITE;

990

988

991

bdev = blkdev_get_by_path(dev_name, mode, fs_type);

989

bdev = blkdev_get_by_path(dev_name, mode, fs_type);

992

if (IS_ERR(bdev))

990

if (IS_ERR(bdev))

993

return ERR_CAST(bdev);

991

return ERR_CAST(bdev);

994

992

995

/*

993

/*

996

* once the super is inserted into the list by sget, s_umount

994

* once the super is inserted into the list by sget, s_umount

997

* will protect the lockfs code from trying to start a snapshot

995

* will protect the lockfs code from trying to start a snapshot

998

* while we are mounting

996

* while we are mounting

999

*/

997

*/

1000

mutex_lock(&bdev->bd_fsfreeze_mutex);

998

mutex_lock(&bdev->bd_fsfreeze_mutex);

1001

if (bdev->bd_fsfreeze_count > 0) {

999

if (bdev->bd_fsfreeze_count > 0) {

1002

mutex_unlock(&bdev->bd_fsfreeze_mutex);

1000

mutex_unlock(&bdev->bd_fsfreeze_mutex);

1003

error = -EBUSY;

1001

error = -EBUSY;

1004

goto error_bdev;

1002

goto error_bdev;

1005

}

1003

}

1006

s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,

1004

s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,

1007

bdev);

1005

bdev);

1008

mutex_unlock(&bdev->bd_fsfreeze_mutex);

1006

mutex_unlock(&bdev->bd_fsfreeze_mutex);

1009

if (IS_ERR(s))

1007

if (IS_ERR(s))

1010

goto error_s;

1008

goto error_s;

1011

1009

1012

if (s->s_root) {

1010

if (s->s_root) {

1013

if ((flags ^ s->s_flags) & MS_RDONLY) {

1011

if ((flags ^ s->s_flags) & MS_RDONLY) {

1014

deactivate_locked_super(s);

1012

deactivate_locked_super(s);

1015

error = -EBUSY;

1013

error = -EBUSY;

1016

goto error_bdev;

1014

goto error_bdev;

1017

}

1015

}

1018

1016

1019

/*

1017

/*

1020

* s_umount nests inside bd_mutex during

1018

* s_umount nests inside bd_mutex during

1021

* __invalidate_device(). blkdev_put() acquires

1019

* __invalidate_device(). blkdev_put() acquires

1022

* bd_mutex and can't be called under s_umount. Drop

1020

* bd_mutex and can't be called under s_umount. Drop

1023

* s_umount temporarily. This is safe as we're

1021

* s_umount temporarily. This is safe as we're

1024

* holding an active reference.

1022

* holding an active reference.

1025

*/

1023

*/

1026

up_write(&s->s_umount);

1024

up_write(&s->s_umount);

1027

blkdev_put(bdev, mode);

1025

blkdev_put(bdev, mode);

1028

down_write(&s->s_umount);

1026

down_write(&s->s_umount);

1029

} else {

1027

} else {

1030

char b[BDEVNAME_SIZE];

1028

char b[BDEVNAME_SIZE];

1031

1029

1032

s->s_mode = mode;

1030

s->s_mode = mode;

1033

strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));

1031

strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));

1034

sb_set_blocksize(s, block_size(bdev));

1032

sb_set_blocksize(s, block_size(bdev));

1035

error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);

1033

error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);

1036

if (error) {

1034

if (error) {

1037

deactivate_locked_super(s);

1035

deactivate_locked_super(s);

1038

goto error;

1036

goto error;

1039

}

1037

}

1040

1038

1041

s->s_flags |= MS_ACTIVE;

1039

s->s_flags |= MS_ACTIVE;

1042

bdev->bd_super = s;

1040

bdev->bd_super = s;

1043

}

1041

}

1044

1042

1045

return dget(s->s_root);

1043

return dget(s->s_root);

1046

1044

1047

error_s:

1045

error_s:

1048

error = PTR_ERR(s);

1046

error = PTR_ERR(s);

1049

error_bdev:

1047

error_bdev:

1050

blkdev_put(bdev, mode);

1048

blkdev_put(bdev, mode);

1051

error:

1049

error:

1052

return ERR_PTR(error);

1050

return ERR_PTR(error);

1053

}

1051

}

1054

EXPORT_SYMBOL(mount_bdev);

1052

EXPORT_SYMBOL(mount_bdev);

1055

1053

1056

void kill_block_super(struct super_block *sb)

1054

void kill_block_super(struct super_block *sb)

1057

{

1055

{

1058

struct block_device *bdev = sb->s_bdev;

1056

struct block_device *bdev = sb->s_bdev;

1059

fmode_t mode = sb->s_mode;

1057

fmode_t mode = sb->s_mode;

1060

1058

1061

bdev->bd_super = NULL;

1059

bdev->bd_super = NULL;

1062

generic_shutdown_super(sb);

1060

generic_shutdown_super(sb);

1063

sync_blockdev(bdev);

1061

sync_blockdev(bdev);

1064

WARN_ON_ONCE(!(mode & FMODE_EXCL));

1062

WARN_ON_ONCE(!(mode & FMODE_EXCL));

1065

blkdev_put(bdev, mode | FMODE_EXCL);

1063

blkdev_put(bdev, mode | FMODE_EXCL);

1066

}

1064

}

1067

1065

1068

EXPORT_SYMBOL(kill_block_super);

1066

EXPORT_SYMBOL(kill_block_super);

1069

#endif

1067

#endif

1070

1068

1071

struct dentry *mount_nodev(struct file_system_type *fs_type,

1069

struct dentry *mount_nodev(struct file_system_type *fs_type,

1072

int flags, void *data,

1070

int flags, void *data,

1073

int (*fill_super)(struct super_block *, void *, int))

1071

int (*fill_super)(struct super_block *, void *, int))

1074

{

1072

{

1075

int error;

1073

int error;

1076

struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);

1074

struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);

1077

1075

1078

if (IS_ERR(s))

1076

if (IS_ERR(s))

1079

return ERR_CAST(s);

1077

return ERR_CAST(s);

1080

1078

1081

error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);

1079

error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);

1082

if (error) {

1080

if (error) {

1083

deactivate_locked_super(s);

1081

deactivate_locked_super(s);

1084

return ERR_PTR(error);

1082

return ERR_PTR(error);

1085

}

1083

}

1086

s->s_flags |= MS_ACTIVE;

1084

s->s_flags |= MS_ACTIVE;

1087

return dget(s->s_root);

1085

return dget(s->s_root);

1088

}

1086

}

1089

EXPORT_SYMBOL(mount_nodev);

1087

EXPORT_SYMBOL(mount_nodev);

1090

1088

1091

static int compare_single(struct super_block *s, void *p)

1089

static int compare_single(struct super_block *s, void *p)

1092

{

1090

{

1093

return 1;

1091

return 1;

1094

}

1092

}

1095

1093

1096

struct dentry *mount_single(struct file_system_type *fs_type,

1094

struct dentry *mount_single(struct file_system_type *fs_type,

1097

int flags, void *data,

1095

int flags, void *data,

1098

int (*fill_super)(struct super_block *, void *, int))

1096

int (*fill_super)(struct super_block *, void *, int))

1099

{

1097

{

1100

struct super_block *s;

1098

struct super_block *s;

1101

int error;

1099

int error;

1102

1100

1103

s = sget(fs_type, compare_single, set_anon_super, flags, NULL);

1101

s = sget(fs_type, compare_single, set_anon_super, flags, NULL);

1104

if (IS_ERR(s))

1102

if (IS_ERR(s))

1105

return ERR_CAST(s);

1103

return ERR_CAST(s);

1106

if (!s->s_root) {

1104

if (!s->s_root) {

1107

error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);

1105

error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);

1108

if (error) {

1106

if (error) {

1109

deactivate_locked_super(s);

1107

deactivate_locked_super(s);

1110

return ERR_PTR(error);

1108

return ERR_PTR(error);

1111

}

1109

}

1112

s->s_flags |= MS_ACTIVE;

1110

s->s_flags |= MS_ACTIVE;

1113

} else {

1111

} else {

1114

do_remount_sb(s, flags, data, 0);

1112

do_remount_sb(s, flags, data, 0);

1115

}

1113

}

1116

return dget(s->s_root);

1114

return dget(s->s_root);

1117

}

1115

}

1118

EXPORT_SYMBOL(mount_single);

1116

EXPORT_SYMBOL(mount_single);

1119

1117

1120

struct dentry *

1118

struct dentry *

1121

mount_fs(struct file_system_type *type, int flags, const char *name, void *data)

1119

mount_fs(struct file_system_type *type, int flags, const char *name, void *data)

1122

{

1120

{

1123

struct dentry *root;

1121

struct dentry *root;

1124

struct super_block *sb;

1122

struct super_block *sb;

1125

char *secdata = NULL;

1123

char *secdata = NULL;

1126

int error = -ENOMEM;

1124

int error = -ENOMEM;

1127

1125

1128

if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {

1126

if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {

1129

secdata = alloc_secdata();

1127

secdata = alloc_secdata();

1130

if (!secdata)

1128

if (!secdata)

1131

goto out;

1129

goto out;

1132

1130

1133

error = security_sb_copy_data(data, secdata);

1131

error = security_sb_copy_data(data, secdata);

1134

if (error)

1132

if (error)

1135

goto out_free_secdata;

1133

goto out_free_secdata;

1136

}

1134

}

1137

1135

1138

root = type->mount(type, flags, name, data);

1136

root = type->mount(type, flags, name, data);

1139

if (IS_ERR(root)) {

1137

if (IS_ERR(root)) {

1140

error = PTR_ERR(root);

1138

error = PTR_ERR(root);

1141

goto out_free_secdata;

1139

goto out_free_secdata;

1142

}

1140

}

1143

sb = root->d_sb;

1141

sb = root->d_sb;

1144

BUG_ON(!sb);

1142

BUG_ON(!sb);

1145

WARN_ON(!sb->s_bdi);

1143

WARN_ON(!sb->s_bdi);

1146

WARN_ON(sb->s_bdi == &default_backing_dev_info);

1144

WARN_ON(sb->s_bdi == &default_backing_dev_info);

1147

sb->s_flags |= MS_BORN;

1145

sb->s_flags |= MS_BORN;

1148

1146

1149

error = security_sb_kern_mount(sb, flags, secdata);

1147

error = security_sb_kern_mount(sb, flags, secdata);

1150

if (error)

1148

if (error)

1151

goto out_sb;

1149

goto out_sb;

1152

1150

1153

/*

1151

/*

1154

* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE

1152

* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE

1155

* but s_maxbytes was an unsigned long long for many releases. Throw

1153

* but s_maxbytes was an unsigned long long for many releases. Throw

1156

* this warning for a little while to try and catch filesystems that

1154

* this warning for a little while to try and catch filesystems that

1157

* violate this rule.

1155

* violate this rule.

1158

*/

1156

*/

1159

WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "

1157

WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "

1160

"negative value (%lld)\n", type->name, sb->s_maxbytes);

1158

"negative value (%lld)\n", type->name, sb->s_maxbytes);

1161

1159

1162

up_write(&sb->s_umount);

1160

up_write(&sb->s_umount);

1163

free_secdata(secdata);

1161

free_secdata(secdata);

1164

return root;

1162

return root;

1165

out_sb:

1163

out_sb:

1166

dput(root);

1164

dput(root);

1167

deactivate_locked_super(sb);

1165

deactivate_locked_super(sb);

1168

out_free_secdata:

1166

out_free_secdata:

1169

free_secdata(secdata);

1167

free_secdata(secdata);

1170

out:

1168

out:

1171

return ERR_PTR(error);

1169

return ERR_PTR(error);

1172

}

1170

}

1173

1171

1174

/*

1172

/*

1175

* This is an internal function, please use sb_end_{write,pagefault,intwrite}

1173

* This is an internal function, please use sb_end_{write,pagefault,intwrite}

1176

* instead.

1174

* instead.

1177

*/

1175

*/

1178

void __sb_end_write(struct super_block *sb, int level)

1176

void __sb_end_write(struct super_block *sb, int level)

1179

{

1177

{

1180

percpu_counter_dec(&sb->s_writers.counter[level-1]);

1178

percpu_counter_dec(&sb->s_writers.counter[level-1]);

1181

/*

1179

/*

1182

* Make sure s_writers are updated before we wake up waiters in

1180

* Make sure s_writers are updated before we wake up waiters in

1183

* freeze_super().

1181

* freeze_super().

1184

*/

1182

*/

1185

smp_mb();

1183

smp_mb();

1186

if (waitqueue_active(&sb->s_writers.wait))

1184

if (waitqueue_active(&sb->s_writers.wait))

1187

wake_up(&sb->s_writers.wait);

1185

wake_up(&sb->s_writers.wait);

1188

rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);

1186

rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);

1189

}

1187

}

1190

EXPORT_SYMBOL(__sb_end_write);

1188

EXPORT_SYMBOL(__sb_end_write);

1191

1189

1192

#ifdef CONFIG_LOCKDEP

1190

#ifdef CONFIG_LOCKDEP

1193

/*

1191

/*

1194

* We want lockdep to tell us about possible deadlocks with freezing but

1192

* We want lockdep to tell us about possible deadlocks with freezing but

1195

* it's it bit tricky to properly instrument it. Getting a freeze protection

1193

* it's it bit tricky to properly instrument it. Getting a freeze protection

1196

* works as getting a read lock but there are subtle problems. XFS for example

1194

* works as getting a read lock but there are subtle problems. XFS for example

1197

* gets freeze protection on internal level twice in some cases, which is OK

1195

* gets freeze protection on internal level twice in some cases, which is OK

1198

* only because we already hold a freeze protection also on higher level. Due

1196

* only because we already hold a freeze protection also on higher level. Due

1199

* to these cases we have to tell lockdep we are doing trylock when we

1197

* to these cases we have to tell lockdep we are doing trylock when we

1200

* already hold a freeze protection for a higher freeze level.

1198

* already hold a freeze protection for a higher freeze level.

1201

*/

1199

*/

1202

static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,

1200

static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,

1203

unsigned long ip)

1201

unsigned long ip)

1204

{

1202

{

1205

int i;

1203

int i;

1206

1204

1207

if (!trylock) {

1205

if (!trylock) {

1208

for (i = 0; i < level - 1; i++)

1206

for (i = 0; i < level - 1; i++)

1209

if (lock_is_held(&sb->s_writers.lock_map[i])) {

1207

if (lock_is_held(&sb->s_writers.lock_map[i])) {

1210

trylock = true;

1208

trylock = true;

1211

break;

1209

break;

1212

}

1210

}

1213

}

1211

}

1214

rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);

1212

rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);

1215

}

1213

}

1216

#endif

1214

#endif

1217

1215

1218

/*

1216

/*

1219

* This is an internal function, please use sb_start_{write,pagefault,intwrite}

1217

* This is an internal function, please use sb_start_{write,pagefault,intwrite}

1220

* instead.

1218

* instead.

1221

*/

1219

*/

1222

int __sb_start_write(struct super_block *sb, int level, bool wait)

1220

int __sb_start_write(struct super_block *sb, int level, bool wait)

1223

{

1221

{

1224

retry:

1222

retry:

1225

if (unlikely(sb->s_writers.frozen >= level)) {

1223

if (unlikely(sb->s_writers.frozen >= level)) {

1226

if (!wait)

1224

if (!wait)

1227

return 0;

1225

return 0;

1228

wait_event(sb->s_writers.wait_unfrozen,

1226

wait_event(sb->s_writers.wait_unfrozen,

1229

sb->s_writers.frozen < level);

1227

sb->s_writers.frozen < level);

1230

}

1228

}

1231

1229

1232

#ifdef CONFIG_LOCKDEP

1230

#ifdef CONFIG_LOCKDEP

1233

acquire_freeze_lock(sb, level, !wait, _RET_IP_);

1231

acquire_freeze_lock(sb, level, !wait, _RET_IP_);

1234

#endif

1232

#endif

1235

percpu_counter_inc(&sb->s_writers.counter[level-1]);

1233

percpu_counter_inc(&sb->s_writers.counter[level-1]);

1236

/*

1234

/*

1237

* Make sure counter is updated before we check for frozen.

1235

* Make sure counter is updated before we check for frozen.

1238

* freeze_super() first sets frozen and then checks the counter.

1236

* freeze_super() first sets frozen and then checks the counter.

1239

*/

1237

*/

1240

smp_mb();

1238

smp_mb();

1241

if (unlikely(sb->s_writers.frozen >= level)) {

1239

if (unlikely(sb->s_writers.frozen >= level)) {

1242

__sb_end_write(sb, level);

1240

__sb_end_write(sb, level);

1243

goto retry;

1241

goto retry;

1244

}

1242

}

1245

return 1;

1243

return 1;

1246

}

1244

}

1247

EXPORT_SYMBOL(__sb_start_write);

1245

EXPORT_SYMBOL(__sb_start_write);

1248

1246

1249

/**

1247

/**

1250

* sb_wait_write - wait until all writers to given file system finish

1248

* sb_wait_write - wait until all writers to given file system finish

1251

* @sb: the super for which we wait

1249

* @sb: the super for which we wait

1252

* @level: type of writers we wait for (normal vs page fault)

1250

* @level: type of writers we wait for (normal vs page fault)

1253

*

1251

*

1254

* This function waits until there are no writers of given type to given file

1252

* This function waits until there are no writers of given type to given file

1255

* system. Caller of this function should make sure there can be no new writers

1253

* system. Caller of this function should make sure there can be no new writers

1256

* of type @level before calling this function. Otherwise this function can

1254

* of type @level before calling this function. Otherwise this function can

1257

* livelock.

1255

* livelock.

1258

*/

1256

*/

1259

static void sb_wait_write(struct super_block *sb, int level)

1257

static void sb_wait_write(struct super_block *sb, int level)

1260

{

1258

{

1261

s64 writers;

1259

s64 writers;

1262

1260

1263

/*

1261

/*

1264

* We just cycle-through lockdep here so that it does not complain

1262

* We just cycle-through lockdep here so that it does not complain

1265

* about returning with lock to userspace

1263

* about returning with lock to userspace

1266

*/

1264

*/

1267

rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);

1265

rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);

1268

rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);

1266

rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);

1269

1267

1270

do {

1268

do {

1271

DEFINE_WAIT(wait);

1269

DEFINE_WAIT(wait);

1272

1270

1273

/*

1271

/*

1274

* We use a barrier in prepare_to_wait() to separate setting

1272

* We use a barrier in prepare_to_wait() to separate setting

1275

* of frozen and checking of the counter

1273

* of frozen and checking of the counter

1276

*/

1274

*/

1277

prepare_to_wait(&sb->s_writers.wait, &wait,

1275

prepare_to_wait(&sb->s_writers.wait, &wait,

1278

TASK_UNINTERRUPTIBLE);

1276

TASK_UNINTERRUPTIBLE);

1279

1277

1280

writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);

1278

writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);

1281

if (writers)

1279

if (writers)

1282

schedule();

1280

schedule();

1283

1281

1284

finish_wait(&sb->s_writers.wait, &wait);

1282

finish_wait(&sb->s_writers.wait, &wait);

1285

} while (writers);

1283

} while (writers);

1286

}

1284

}

1287

1285

1288

/**

1286

/**

1289

* freeze_super - lock the filesystem and force it into a consistent state

1287

* freeze_super - lock the filesystem and force it into a consistent state

1290

* @sb: the super to lock

1288

* @sb: the super to lock

1291

*

1289

*

1292

* Syncs the super to make sure the filesystem is consistent and calls the fs's

1290

* Syncs the super to make sure the filesystem is consistent and calls the fs's

1293

* freeze_fs. Subsequent calls to this without first thawing the fs will return

1291

* freeze_fs. Subsequent calls to this without first thawing the fs will return

1294

* -EBUSY.

1292

* -EBUSY.

1295

*

1293

*

1296

* During this function, sb->s_writers.frozen goes through these values:

1294

* During this function, sb->s_writers.frozen goes through these values:

1297

*

1295

*

1298

* SB_UNFROZEN: File system is normal, all writes progress as usual.

1296

* SB_UNFROZEN: File system is normal, all writes progress as usual.

1299

*

1297

*

1300

* SB_FREEZE_WRITE: The file system is in the process of being frozen. New

1298

* SB_FREEZE_WRITE: The file system is in the process of being frozen. New

1301

* writes should be blocked, though page faults are still allowed. We wait for

1299

* writes should be blocked, though page faults are still allowed. We wait for

1302

* all writes to complete and then proceed to the next stage.

1300

* all writes to complete and then proceed to the next stage.

1303

*

1301

*

1304

* SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked

1302

* SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked

1305

* but internal fs threads can still modify the filesystem (although they

1303

* but internal fs threads can still modify the filesystem (although they

1306

* should not dirty new pages or inodes), writeback can run etc. After waiting

1304

* should not dirty new pages or inodes), writeback can run etc. After waiting

1307

* for all running page faults we sync the filesystem which will clean all

1305

* for all running page faults we sync the filesystem which will clean all

1308

* dirty pages and inodes (no new dirty pages or inodes can be created when

1306

* dirty pages and inodes (no new dirty pages or inodes can be created when

1309

* sync is running).

1307

* sync is running).

1310

*

1308

*

1311

* SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs

1309

* SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs

1312

* modification are blocked (e.g. XFS preallocation truncation on inode

1310

* modification are blocked (e.g. XFS preallocation truncation on inode

1313

* reclaim). This is usually implemented by blocking new transactions for

1311

* reclaim). This is usually implemented by blocking new transactions for

1314

* filesystems that have them and need this additional guard. After all

1312

* filesystems that have them and need this additional guard. After all

1315

* internal writers are finished we call ->freeze_fs() to finish filesystem

1313

* internal writers are finished we call ->freeze_fs() to finish filesystem

1316

* freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is

1314

* freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is

1317

* mostly auxiliary for filesystems to verify they do not modify frozen fs.

1315

* mostly auxiliary for filesystems to verify they do not modify frozen fs.

1318

*

1316

*

1319

* sb->s_writers.frozen is protected by sb->s_umount.

1317

* sb->s_writers.frozen is protected by sb->s_umount.

1320

*/

1318

*/

1321

int freeze_super(struct super_block *sb)

1319

int freeze_super(struct super_block *sb)

1322

{

1320

{

1323

int ret;

1321

int ret;

1324

1322

1325

atomic_inc(&sb->s_active);

1323

atomic_inc(&sb->s_active);

1326

down_write(&sb->s_umount);

1324

down_write(&sb->s_umount);

1327

if (sb->s_writers.frozen != SB_UNFROZEN) {

1325

if (sb->s_writers.frozen != SB_UNFROZEN) {

1328

deactivate_locked_super(sb);

1326

deactivate_locked_super(sb);

1329

return -EBUSY;

1327

return -EBUSY;

1330

}

1328

}

1331

1329

1332

if (!(sb->s_flags & MS_BORN)) {

1330

if (!(sb->s_flags & MS_BORN)) {

1333

up_write(&sb->s_umount);

1331

up_write(&sb->s_umount);

1334

return 0; /* sic - it's "nothing to do" */

1332

return 0; /* sic - it's "nothing to do" */

1335

}

1333

}

1336

1334

1337

if (sb->s_flags & MS_RDONLY) {

1335

if (sb->s_flags & MS_RDONLY) {

1338

/* Nothing to do really... */

1336

/* Nothing to do really... */

1339

sb->s_writers.frozen = SB_FREEZE_COMPLETE;

1337

sb->s_writers.frozen = SB_FREEZE_COMPLETE;

1340

up_write(&sb->s_umount);

1338

up_write(&sb->s_umount);

1341

return 0;

1339

return 0;

1342

}

1340

}

1343

1341

1344

/* From now on, no new normal writers can start */

1342

/* From now on, no new normal writers can start */

1345

sb->s_writers.frozen = SB_FREEZE_WRITE;

1343

sb->s_writers.frozen = SB_FREEZE_WRITE;

1346

smp_wmb();

1344

smp_wmb();

1347

1345

1348

/* Release s_umount to preserve sb_start_write -> s_umount ordering */

1346

/* Release s_umount to preserve sb_start_write -> s_umount ordering */

1349

up_write(&sb->s_umount);

1347

up_write(&sb->s_umount);

1350

1348

1351

sb_wait_write(sb, SB_FREEZE_WRITE);

1349

sb_wait_write(sb, SB_FREEZE_WRITE);

1352

1350

1353

/* Now we go and block page faults... */

1351

/* Now we go and block page faults... */

1354

down_write(&sb->s_umount);

1352

down_write(&sb->s_umount);

1355

sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;

1353

sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;

1356

smp_wmb();

1354

smp_wmb();

1357

1355

1358

sb_wait_write(sb, SB_FREEZE_PAGEFAULT);

1356

sb_wait_write(sb, SB_FREEZE_PAGEFAULT);

1359

1357

1360

/* All writers are done so after syncing there won't be dirty data */

1358

/* All writers are done so after syncing there won't be dirty data */

1361

sync_filesystem(sb);

1359

sync_filesystem(sb);

1362

1360

1363

/* Now wait for internal filesystem counter */

1361

/* Now wait for internal filesystem counter */

1364

sb->s_writers.frozen = SB_FREEZE_FS;

1362

sb->s_writers.frozen = SB_FREEZE_FS;

1365

smp_wmb();

1363

smp_wmb();

1366

sb_wait_write(sb, SB_FREEZE_FS);

1364

sb_wait_write(sb, SB_FREEZE_FS);

1367

1365

1368

if (sb->s_op->freeze_fs) {

1366

if (sb->s_op->freeze_fs) {

1369

ret = sb->s_op->freeze_fs(sb);

1367

ret = sb->s_op->freeze_fs(sb);

1370

if (ret) {

1368

if (ret) {

1371

printk(KERN_ERR

1369

printk(KERN_ERR

1372

"VFS:Filesystem freeze failed\n");

1370

"VFS:Filesystem freeze failed\n");

1373

sb->s_writers.frozen = SB_UNFROZEN;

1371

sb->s_writers.frozen = SB_UNFROZEN;

1374

smp_wmb();

1372

smp_wmb();

1375

wake_up(&sb->s_writers.wait_unfrozen);

1373

wake_up(&sb->s_writers.wait_unfrozen);

1376

deactivate_locked_super(sb);

1374

deactivate_locked_super(sb);

1377

return ret;

1375

return ret;

1378

}

1376

}

1379

}

1377

}

1380

/*

1378

/*

1381

* This is just for debugging purposes so that fs can warn if it

1379

* This is just for debugging purposes so that fs can warn if it

1382

* sees write activity when frozen is set to SB_FREEZE_COMPLETE.

1380

* sees write activity when frozen is set to SB_FREEZE_COMPLETE.

1383

*/

1381

*/

1384

sb->s_writers.frozen = SB_FREEZE_COMPLETE;

1382

sb->s_writers.frozen = SB_FREEZE_COMPLETE;

1385

up_write(&sb->s_umount);

1383

up_write(&sb->s_umount);

1386

return 0;

1384

return 0;

1387

}

1385

}

1388

EXPORT_SYMBOL(freeze_super);

1386

EXPORT_SYMBOL(freeze_super);

1389

1387

1390

/**

1388

/**

1391

* thaw_super -- unlock filesystem

1389

* thaw_super -- unlock filesystem

1392

* @sb: the super to thaw

1390

* @sb: the super to thaw

1393

*

1391

*

1394

* Unlocks the filesystem and marks it writeable again after freeze_super().

1392

* Unlocks the filesystem and marks it writeable again after freeze_super().

1395

*/

1393

*/

1396

int thaw_super(struct super_block *sb)

1394

int thaw_super(struct super_block *sb)

1397

{

1395

{

1398

int error;

1396

int error;

1399

1397

1400

down_write(&sb->s_umount);

1398

down_write(&sb->s_umount);

1401

if (sb->s_writers.frozen == SB_UNFROZEN) {

1399

if (sb->s_writers.frozen == SB_UNFROZEN) {

1402

up_write(&sb->s_umount);

1400

up_write(&sb->s_umount);

1403

return -EINVAL;

1401

return -EINVAL;

1404

}

1402

}

1405

1403

1406

if (sb->s_flags & MS_RDONLY)

1404

if (sb->s_flags & MS_RDONLY)

1407

goto out;

1405

goto out;

1408

1406

1409

if (sb->s_op->unfreeze_fs) {

1407

if (sb->s_op->unfreeze_fs) {

1410

error = sb->s_op->unfreeze_fs(sb);

1408

error = sb->s_op->unfreeze_fs(sb);

1411

if (error) {

1409

if (error) {

1412

printk(KERN_ERR

1410

printk(KERN_ERR

1413

"VFS:Filesystem thaw failed\n");

1411

"VFS:Filesystem thaw failed\n");

1414

up_write(&sb->s_umount);

1412

up_write(&sb->s_umount);

1415

return error;

1413

return error;

1416

}

1414

}

1417

}

1415

}

1418

1416

1419

out:

1417

out:

1420

sb->s_writers.frozen = SB_UNFROZEN;

1418

sb->s_writers.frozen = SB_UNFROZEN;

1421

smp_wmb();

1419

smp_wmb();

1422

wake_up(&sb->s_writers.wait_unfrozen);

1420

wake_up(&sb->s_writers.wait_unfrozen);

1423

deactivate_locked_super(sb);

1421

deactivate_locked_super(sb);

1424

1422

1425

return 0;

1423

return 0;

1426

}

1424

}

1427

EXPORT_SYMBOL(thaw_super);

1425

EXPORT_SYMBOL(thaw_super);

GITLAB

fs/superblock: unregister sb shrinker before ->kill_sb()

 /*
  *  linux/fs/super.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
  *  super.c contains code to handle: - mount structures
  *                                   - super-block tables
  *                                   - filesystem drivers list
  *                                   - mount system call
  *                                   - umount system call
  *                                   - ustat system call
  *
  * GK 2/5/95  -  Changed to support mounting the root fs via NFS
  *
  *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
  *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
  *  Added options to /proc/mounts:
  *    Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
  *  Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
  *  Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
  */
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/acct.h>
 #include <linux/blkdev.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/writeback.h>		/* for the emergency remount stuff */
 #include <linux/idr.h>
 #include <linux/mutex.h>
 #include <linux/backing-dev.h>
 #include <linux/rculist_bl.h>
 #include <linux/cleancache.h>
 #include <linux/fsnotify.h>
 #include <linux/lockdep.h>
 #include "internal.h"
 LIST_HEAD(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
 static char *sb_writers_name[SB_FREEZE_LEVELS] = {
 	"sb_writers",
 	"sb_pagefaults",
 	"sb_internal",
 };
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
  * If that happens we could trigger unregistering the shrinker from within the
  * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
  * take a passive reference to the superblock to avoid this from occurring.
  */
 static unsigned long super_cache_scan(struct shrinker *shrink,
 				      struct shrink_control *sc)
 {
 	struct super_block *sb;
 	long	fs_objects = 0;
 	long	total_objects;
 	long	freed = 0;
 	long	dentries;
 	long	inodes;
 	sb = container_of(shrink, struct super_block, s_shrink);
 	/*
 	 * Deadlock avoidance.  We may hold various FS locks, and we don't want
 	 * to recurse into the FS that called us in clear_inode() and friends..
 	 */
 	if (!(sc->gfp_mask & __GFP_FS))
 		return SHRINK_STOP;
 	if (!grab_super_passive(sb))
 		return SHRINK_STOP;
 	if (sb->s_op->nr_cached_objects)
 		fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
 	inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
 	dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
 	total_objects = dentries + inodes + fs_objects + 1;
 	/* proportion the scan between the caches */
 	dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
 	inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
 	/*
 	 * prune the dcache first as the icache is pinned by it, then
 	 * prune the icache, followed by the filesystem specific caches
 	 */
 	freed = prune_dcache_sb(sb, dentries, sc->nid);
 	freed += prune_icache_sb(sb, inodes, sc->nid);
 	if (fs_objects) {
 		fs_objects = mult_frac(sc->nr_to_scan, fs_objects,
 								total_objects);
 		freed += sb->s_op->free_cached_objects(sb, fs_objects,
 						       sc->nid);
 	}
 	drop_super(sb);
 	return freed;
 }
 static unsigned long super_cache_count(struct shrinker *shrink,
 				       struct shrink_control *sc)
 {
 	struct super_block *sb;
 	long	total_objects = 0;
 	sb = container_of(shrink, struct super_block, s_shrink);
 	if (!grab_super_passive(sb))
 		return 0;
 	if (sb->s_op && sb->s_op->nr_cached_objects)
 		total_objects = sb->s_op->nr_cached_objects(sb,
 						 sc->nid);
 	total_objects += list_lru_count_node(&sb->s_dentry_lru,
 						 sc->nid);
 	total_objects += list_lru_count_node(&sb->s_inode_lru,
 						 sc->nid);
 	total_objects = vfs_pressure_ratio(total_objects);
 	drop_super(sb);
 	return total_objects;
 }
 static int init_sb_writers(struct super_block *s, struct file_system_type *type)
 {
 	int err;
 	int i;
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
 		err = percpu_counter_init(&s->s_writers.counter[i], 0);
 		if (err < 0)
 			goto err_out;
 		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
 				 &type->s_writers_key[i], 0);
 	}
 	init_waitqueue_head(&s->s_writers.wait);
 	init_waitqueue_head(&s->s_writers.wait_unfrozen);
 	return 0;
 err_out:
 	while (--i >= 0)
 		percpu_counter_destroy(&s->s_writers.counter[i]);
 	return err;
 }
 static void destroy_sb_writers(struct super_block *s)
 {
 	int i;
 	for (i = 0; i < SB_FREEZE_LEVELS; i++)
 		percpu_counter_destroy(&s->s_writers.counter[i]);
 }
 /**
  *	alloc_super	-	create new superblock
  *	@type:	filesystem type superblock should belong to
  *	@flags: the mount flags
  *
  *	Allocates and initializes a new &struct super_block.  alloc_super()
  *	returns a pointer new superblock or %NULL if allocation had failed.
  */
 static struct super_block *alloc_super(struct file_system_type *type, int flags)
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static const struct super_operations default_op;
 	if (s) {
 		if (security_sb_alloc(s))
 			goto out_free_sb;
 #ifdef CONFIG_SMP
 		s->s_files = alloc_percpu(struct list_head);
 		if (!s->s_files)
 			goto err_out;
 		else {
 			int i;
 			for_each_possible_cpu(i)
 				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
 		}
 #else
 		INIT_LIST_HEAD(&s->s_files);
 #endif
 		if (init_sb_writers(s, type))
 			goto err_out;
 		s->s_flags = flags;
 		s->s_bdi = &default_backing_dev_info;
 		INIT_HLIST_NODE(&s->s_instances);
 		INIT_HLIST_BL_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		if (list_lru_init(&s->s_dentry_lru))
 			goto err_out;
 		if (list_lru_init(&s->s_inode_lru))
 			goto err_out_dentry_lru;
 		INIT_LIST_HEAD(&s->s_mounts);
 		init_rwsem(&s->s_umount);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
 		/*
 		 * sget() can have s_umount recursion.
 		 *
 		 * When it cannot find a suitable sb, it allocates a new
 		 * one (this one), and tries again to find a suitable old
 		 * one.
 		 *
 		 * In case that succeeds, it will acquire the s_umount
 		 * lock of the old one. Since these are clearly distrinct
 		 * locks, and this object isn't exposed yet, there's no
 		 * risk of deadlocks.
 		 *
 		 * Annotate this by putting this lock in a different
 		 * subclass.
 		 */
 		down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
 		s->s_count = 1;
 		atomic_set(&s->s_active, 1);
 		mutex_init(&s->s_vfs_rename_mutex);
 		lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 		mutex_init(&s->s_dquot.dqio_mutex);
 		mutex_init(&s->s_dquot.dqonoff_mutex);
 		init_rwsem(&s->s_dquot.dqptr_sem);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->s_op = &default_op;
 		s->s_time_gran = 1000000000;
 		s->cleancache_poolid = -1;
 		s->s_shrink.seeks = DEFAULT_SEEKS;
 		s->s_shrink.scan_objects = super_cache_scan;
 		s->s_shrink.count_objects = super_cache_count;
 		s->s_shrink.batch = 1024;
 		s->s_shrink.flags = SHRINKER_NUMA_AWARE;
 	}
 out:
 	return s;
 err_out_dentry_lru:
 	list_lru_destroy(&s->s_dentry_lru);
 err_out:
 	security_sb_free(s);
 #ifdef CONFIG_SMP
 	if (s->s_files)
 		free_percpu(s->s_files);
 #endif
 	destroy_sb_writers(s);
 out_free_sb:
 	kfree(s);
 	s = NULL;
 	goto out;
 }
 /**
  *	destroy_super	-	frees a superblock
  *	@s: superblock to free
  *
  *	Frees a superblock.
  */
 static inline void destroy_super(struct super_block *s)
 {
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
 #ifdef CONFIG_SMP
 	free_percpu(s->s_files);
 #endif
 	destroy_sb_writers(s);
 	security_sb_free(s);
 	WARN_ON(!list_empty(&s->s_mounts));
 	kfree(s->s_subtype);
 	kfree(s->s_options);
 	kfree(s);
 }
 /* Superblock refcounting  */
 /*
  * Drop a superblock's refcount.  The caller must hold sb_lock.
  */
 static void __put_super(struct super_block *sb)
 {
 	if (!--sb->s_count) {
 		list_del_init(&sb->s_list);
 		destroy_super(sb);
 	}
 }
 /**
  *	put_super	-	drop a temporary reference to superblock
  *	@sb: superblock in question
  *
  *	Drops a temporary reference, frees superblock if there's no
  *	references left.
  */
 static void put_super(struct super_block *sb)
 {
 	spin_lock(&sb_lock);
 	__put_super(sb);
 	spin_unlock(&sb_lock);
 }
 /**
  *	deactivate_locked_super	-	drop an active reference to superblock
  *	@s: superblock to deactivate
  *
  *	Drops an active reference to superblock, converting it into a temprory
  *	one if there is no other active references left.  In that case we
  *	tell fs driver to shut it down and drop the temporary reference we
  *	had just acquired.
  *
  *	Caller holds exclusive lock on superblock; that lock is released.
  */
 void deactivate_locked_super(struct super_block *s)
 {
 	struct file_system_type *fs = s->s_type;
 	if (atomic_dec_and_test(&s->s_active)) {
 		cleancache_invalidate_fs(s);
-		fs->kill_sb(s);
-		/* caches are now gone, we can safely kill the shrinker now */
 		unregister_shrinker(&s->s_shrink);
+		fs->kill_sb(s);
 		put_filesystem(fs);
 		put_super(s);
 	} else {
 		up_write(&s->s_umount);
 	}
 }
 EXPORT_SYMBOL(deactivate_locked_super);
 /**
  *	deactivate_super	-	drop an active reference to superblock
  *	@s: superblock to deactivate
  *
  *	Variant of deactivate_locked_super(), except that superblock is *not*
  *	locked by caller.  If we are going to drop the final active reference,
  *	lock will be acquired prior to that.
  */
 void deactivate_super(struct super_block *s)
 {
         if (!atomic_add_unless(&s->s_active, -1, 1)) {
 		down_write(&s->s_umount);
 		deactivate_locked_super(s);
 	}
 }
 EXPORT_SYMBOL(deactivate_super);
 /**
  *	grab_super - acquire an active reference
  *	@s: reference we are trying to make active
  *
  *	Tries to acquire an active reference.  grab_super() is used when we
  * 	had just found a superblock in super_blocks or fs_type->fs_supers
  *	and want to turn it into a full-blown active reference.  grab_super()
  *	is called with sb_lock held and drops it.  Returns 1 in case of
  *	success, 0 if we had failed (superblock contents was already dead or
  *	dying when grab_super() had been called).  Note that this is only
  *	called for superblocks not in rundown mode (== ones still on ->fs_supers
  *	of their type), so increment of ->s_count is OK here.
  */
 static int grab_super(struct super_block *s) __releases(sb_lock)
 {
 	s->s_count++;
 	spin_unlock(&sb_lock);
 	down_write(&s->s_umount);
 	if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
 		put_super(s);
 		return 1;
 	}
 	up_write(&s->s_umount);
 	put_super(s);
 	return 0;
 }
 /*
  *	grab_super_passive - acquire a passive reference
  *	@sb: reference we are trying to grab
  *
  *	Tries to acquire a passive reference. This is used in places where we
  *	cannot take an active reference but we need to ensure that the
  *	superblock does not go away while we are working on it. It returns
  *	false if a reference was not gained, and returns true with the s_umount
  *	lock held in read mode if a reference is gained. On successful return,
  *	the caller must drop the s_umount lock and the passive reference when
  *	done.
  */
 bool grab_super_passive(struct super_block *sb)
 {
 	spin_lock(&sb_lock);
 	if (hlist_unhashed(&sb->s_instances)) {
 		spin_unlock(&sb_lock);
 		return false;
 	}
 	sb->s_count++;
 	spin_unlock(&sb_lock);
 	if (down_read_trylock(&sb->s_umount)) {
 		if (sb->s_root && (sb->s_flags & MS_BORN))
 			return true;
 		up_read(&sb->s_umount);
 	}
 	put_super(sb);
 	return false;
 }
 /**
  *	generic_shutdown_super	-	common helper for ->kill_sb()
  *	@sb: superblock to kill
  *
  *	generic_shutdown_super() does all fs-independent work on superblock
  *	shutdown.  Typical ->kill_sb() should pick all fs-specific objects
  *	that need destruction out of superblock, call generic_shutdown_super()
  *	and release aforementioned objects.  Note: dentries and inodes _are_
  *	taken care of and do not need specific handling.
  *
  *	Upon calling this function, the filesystem may no longer alter or
  *	rearrange the set of dentries belonging to this super_block, nor may it
  *	change the attachments of dentries to inodes.
  */
 void generic_shutdown_super(struct super_block *sb)
 {
 	const struct super_operations *sop = sb->s_op;
 	if (sb->s_root) {
 		shrink_dcache_for_umount(sb);
 		sync_filesystem(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 		fsnotify_unmount_inodes(&sb->s_inodes);
 		evict_inodes(sb);
 		if (sb->s_dio_done_wq) {
 			destroy_workqueue(sb->s_dio_done_wq);
 			sb->s_dio_done_wq = NULL;
 		}
 		if (sop->put_super)
 			sop->put_super(sb);
 		if (!list_empty(&sb->s_inodes)) {
 			printk("VFS: Busy inodes after unmount of %s. "
 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
 			   sb->s_id);
 		}
 	}
 	spin_lock(&sb_lock);
 	/* should be initialized for __put_super_and_need_restart() */
 	hlist_del_init(&sb->s_instances);
 	spin_unlock(&sb_lock);
 	up_write(&sb->s_umount);
 }
 EXPORT_SYMBOL(generic_shutdown_super);
 /**
  *	sget	-	find or create a superblock
  *	@type:	filesystem type superblock should belong to
  *	@test:	comparison callback
  *	@set:	setup callback
  *	@flags:	mount flags
  *	@data:	argument to each of them
  */
 struct super_block *sget(struct file_system_type *type,
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
 			int flags,
 			void *data)
 {
 	struct super_block *s = NULL;
 	struct super_block *old;
 	int err;
 retry:
 	spin_lock(&sb_lock);
 	if (test) {
 		hlist_for_each_entry(old, &type->fs_supers, s_instances) {
 			if (!test(old, data))
 				continue;
 			if (!grab_super(old))
 				goto retry;
 			if (s) {
 				up_write(&s->s_umount);
 				destroy_super(s);
 				s = NULL;
 			}
 			return old;
 		}
 	}
 	if (!s) {
 		spin_unlock(&sb_lock);
 		s = alloc_super(type, flags);
 		if (!s)
 			return ERR_PTR(-ENOMEM);
 		goto retry;
 	}
 	err = set(s, data);
 	if (err) {
 		spin_unlock(&sb_lock);
 		up_write(&s->s_umount);
 		destroy_super(s);
 		return ERR_PTR(err);
 	}
 	s->s_type = type;
 	strlcpy(s->s_id, type->name, sizeof(s->s_id));
 	list_add_tail(&s->s_list, &super_blocks);
 	hlist_add_head(&s->s_instances, &type->fs_supers);
 	spin_unlock(&sb_lock);
 	get_filesystem(type);
 	register_shrinker(&s->s_shrink);
 	return s;
 }
 EXPORT_SYMBOL(sget);
 void drop_super(struct super_block *sb)
 {
 	up_read(&sb->s_umount);
 	put_super(sb);
 }
 EXPORT_SYMBOL(drop_super);
 /**
  *	iterate_supers - call function for all active superblocks
  *	@f: function to call
  *	@arg: argument to pass to it
  *
  *	Scans the superblock list and calls given function, passing it
  *	locked superblock and given argument.
  */
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
 	struct super_block *sb, *p = NULL;
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root && (sb->s_flags & MS_BORN))
 			f(sb, arg);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (p)
 			__put_super(p);
 		p = sb;
 	}
 	if (p)
 		__put_super(p);
 	spin_unlock(&sb_lock);
 }
 /**
  *	iterate_supers_type - call function for superblocks of given type
  *	@type: fs type
  *	@f: function to call
  *	@arg: argument to pass to it
  *
  *	Scans the superblock list and calls given function, passing it
  *	locked superblock and given argument.
  */
 void iterate_supers_type(struct file_system_type *type,
 	void (*f)(struct super_block *, void *), void *arg)
 {
 	struct super_block *sb, *p = NULL;
 	spin_lock(&sb_lock);
 	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root && (sb->s_flags & MS_BORN))
 			f(sb, arg);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (p)
 			__put_super(p);
 		p = sb;
 	}
 	if (p)
 		__put_super(p);
 	spin_unlock(&sb_lock);
 }
 EXPORT_SYMBOL(iterate_supers_type);
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
  *
  *	Scans the superblock list and finds the superblock of the file system
  *	mounted on the device given. %NULL is returned if no match is found.
  */
 struct super_block *get_super(struct block_device *bdev)
 {
 	struct super_block *sb;
 	if (!bdev)
 		return NULL;
 	spin_lock(&sb_lock);
 rescan:
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_bdev == bdev) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			/* still alive? */
 			if (sb->s_root && (sb->s_flags & MS_BORN))
 				return sb;
 			up_read(&sb->s_umount);
 			/* nope, got unmounted */
 			spin_lock(&sb_lock);
 			__put_super(sb);
 			goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
 	return NULL;
 }
 EXPORT_SYMBOL(get_super);
 /**
  *	get_super_thawed - get thawed superblock of a device
  *	@bdev: device to get the superblock for
  *
  *	Scans the superblock list and finds the superblock of the file system
  *	mounted on the device. The superblock is returned once it is thawed
  *	(or immediately if it was not frozen). %NULL is returned if no match
  *	is found.
  */
 struct super_block *get_super_thawed(struct block_device *bdev)
 {
 	while (1) {
 		struct super_block *s = get_super(bdev);
 		if (!s || s->s_writers.frozen == SB_UNFROZEN)
 			return s;
 		up_read(&s->s_umount);
 		wait_event(s->s_writers.wait_unfrozen,
 			   s->s_writers.frozen == SB_UNFROZEN);
 		put_super(s);
 	}
 }
 EXPORT_SYMBOL(get_super_thawed);
 /**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
  *
  * Scans the superblock list and finds the superblock of the file system
  * mounted on the device given.  Returns the superblock with an active
  * reference or %NULL if none was found.
  */
 struct super_block *get_active_super(struct block_device *bdev)
 {
 	struct super_block *sb;
 	if (!bdev)
 		return NULL;
 restart:
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_bdev == bdev) {
 			if (!grab_super(sb))
 				goto restart;
 			up_write(&sb->s_umount);
 			return sb;
 		}
 	}
 	spin_unlock(&sb_lock);
 	return NULL;
 }
 struct super_block *user_get_super(dev_t dev)
 {
 	struct super_block *sb;
 	spin_lock(&sb_lock);
 rescan:
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_dev ==  dev) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			/* still alive? */
 			if (sb->s_root && (sb->s_flags & MS_BORN))
 				return sb;
 			up_read(&sb->s_umount);
 			/* nope, got unmounted */
 			spin_lock(&sb_lock);
 			__put_super(sb);
 			goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
 	return NULL;
 }
 /**
  *	do_remount_sb - asks filesystem to change mount options.
  *	@sb:	superblock in question
  *	@flags:	numeric part of options
  *	@data:	the rest of options
  *      @force: whether or not to force the change
  *
  *	Alters the mount options of a mounted file system.
  */
 int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
 	int remount_ro;
 	if (sb->s_writers.frozen != SB_UNFROZEN)
 		return -EBUSY;
 #ifdef CONFIG_BLOCK
 	if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
 		return -EACCES;
 #endif
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
 	sync_filesystem(sb);
 	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
 	if (remount_ro) {
 		if (force) {
 			mark_files_ro(sb);
 		} else {
 			retval = sb_prepare_remount_readonly(sb);
 			if (retval)
 				return retval;
 		}
 	}
 	if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &flags, data);
 		if (retval) {
 			if (!force)
 				goto cancel_readonly;
 			/* If forced remount, go ahead despite any errors */
 			WARN(1, "forced remount of a %s fs returned %i\n",
 			     sb->s_type->name, retval);
 		}
 	}
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 	/* Needs to be ordered wrt mnt_is_readonly() */
 	smp_wmb();
 	sb->s_readonly_remount = 0;
 	/*
 	 * Some filesystems modify their metadata via some other path than the
 	 * bdev buffer cache (eg. use a private mapping, or directories in
 	 * pagecache, etc). Also file data modifications go via their own
 	 * mappings. So If we try to mount readonly then copy the filesystem
 	 * from bdev, we could get stale data, so invalidate it to give a best
 	 * effort at coherency.
 	 */
 	if (remount_ro && sb->s_bdev)
 		invalidate_bdev(sb->s_bdev);
 	return 0;
 cancel_readonly:
 	sb->s_readonly_remount = 0;
 	return retval;
 }
 static void do_emergency_remount(struct work_struct *work)
 {
 	struct super_block *sb, *p = NULL;
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_write(&sb->s_umount);
 		if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
 		    !(sb->s_flags & MS_RDONLY)) {
 			/*
 			 * What lock protects sb->s_flags??
 			 */
 			do_remount_sb(sb, MS_RDONLY, NULL, 1);
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (p)
 			__put_super(p);
 		p = sb;
 	}
 	if (p)
 		__put_super(p);
 	spin_unlock(&sb_lock);
 	kfree(work);
 	printk("Emergency Remount complete\n");
 }
 void emergency_remount(void)
 {
 	struct work_struct *work;
 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	if (work) {
 		INIT_WORK(work, do_emergency_remount);
 		schedule_work(work);
 	}
 }
 /*
  * Unnamed block devices are dummy devices used by virtual
  * filesystems which don't use real block-devices.  -- jrs
  */
 static DEFINE_IDA(unnamed_dev_ida);
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
 /* Many userspace utilities consider an FSID of 0 invalid.
  * Always return at least 1 from get_anon_bdev.
  */
 static int unnamed_dev_start = 1;
 int get_anon_bdev(dev_t *p)
 {
 	int dev;
 	int error;
  retry:
 	if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
 		return -ENOMEM;
 	spin_lock(&unnamed_dev_lock);
 	error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
 	if (!error)
 		unnamed_dev_start = dev + 1;
 	spin_unlock(&unnamed_dev_lock);
 	if (error == -EAGAIN)
 		/* We raced and lost with another CPU. */
 		goto retry;
 	else if (error)
 		return -EAGAIN;
 	if (dev == (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		ida_remove(&unnamed_dev_ida, dev);
 		if (unnamed_dev_start > dev)
 			unnamed_dev_start = dev;
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
 	*p = MKDEV(0, dev & MINORMASK);
 	return 0;
 }
 EXPORT_SYMBOL(get_anon_bdev);
 void free_anon_bdev(dev_t dev)
 {
 	int slot = MINOR(dev);
 	spin_lock(&unnamed_dev_lock);
 	ida_remove(&unnamed_dev_ida, slot);
 	if (slot < unnamed_dev_start)
 		unnamed_dev_start = slot;
 	spin_unlock(&unnamed_dev_lock);
 }
 EXPORT_SYMBOL(free_anon_bdev);
 int set_anon_super(struct super_block *s, void *data)
 {
 	int error = get_anon_bdev(&s->s_dev);
 	if (!error)
 		s->s_bdi = &noop_backing_dev_info;
 	return error;
 }
 EXPORT_SYMBOL(set_anon_super);
 void kill_anon_super(struct super_block *sb)
 {
 	dev_t dev = sb->s_dev;
 	generic_shutdown_super(sb);
 	free_anon_bdev(dev);
 }
 EXPORT_SYMBOL(kill_anon_super);
 void kill_litter_super(struct super_block *sb)
 {
 	if (sb->s_root)
 		d_genocide(sb->s_root);
 	kill_anon_super(sb);
 }
 EXPORT_SYMBOL(kill_litter_super);
 static int ns_test_super(struct super_block *sb, void *data)
 {
 	return sb->s_fs_info == data;
 }
 static int ns_set_super(struct super_block *sb, void *data)
 {
 	sb->s_fs_info = data;
 	return set_anon_super(sb, NULL);
 }
 struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
 	void *data, int (*fill_super)(struct super_block *, void *, int))
 {
 	struct super_block *sb;
 	sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
 	if (IS_ERR(sb))
 		return ERR_CAST(sb);
 	if (!sb->s_root) {
 		int err;
 		err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 		if (err) {
 			deactivate_locked_super(sb);
 			return ERR_PTR(err);
 		}
 		sb->s_flags |= MS_ACTIVE;
 	}
 	return dget(sb->s_root);
 }
 EXPORT_SYMBOL(mount_ns);
 #ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
 	s->s_bdev = data;
 	s->s_dev = s->s_bdev->bd_dev;
 	/*
 	 * We set the bdi here to the queue backing, file systems can
 	 * overwrite this in ->fill_super()
 	 */
 	s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
 	return 0;
 }
 static int test_bdev_super(struct super_block *s, void *data)
 {
 	return (void *)s->s_bdev == data;
 }
 struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
 {
 	struct block_device *bdev;
 	struct super_block *s;
 	fmode_t mode = FMODE_READ | FMODE_EXCL;
 	int error = 0;
 	if (!(flags & MS_RDONLY))
 		mode |= FMODE_WRITE;
 	bdev = blkdev_get_by_path(dev_name, mode, fs_type);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
 	/*
 	 * once the super is inserted into the list by sget, s_umount
 	 * will protect the lockfs code from trying to start a snapshot
 	 * while we are mounting
 	 */
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
 	if (bdev->bd_fsfreeze_count > 0) {
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		error = -EBUSY;
 		goto error_bdev;
 	}
 	s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
 		 bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	if (IS_ERR(s))
 		goto error_s;
 	if (s->s_root) {
 		if ((flags ^ s->s_flags) & MS_RDONLY) {
 			deactivate_locked_super(s);
 			error = -EBUSY;
 			goto error_bdev;
 		}
 		/*
 		 * s_umount nests inside bd_mutex during
 		 * __invalidate_device().  blkdev_put() acquires
 		 * bd_mutex and can't be called under s_umount.  Drop
 		 * s_umount temporarily.  This is safe as we're
 		 * holding an active reference.
 		 */
 		up_write(&s->s_umount);
 		blkdev_put(bdev, mode);
 		down_write(&s->s_umount);
 	} else {
 		char b[BDEVNAME_SIZE];
 		s->s_mode = mode;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
 		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(s);
 			goto error;
 		}
 		s->s_flags |= MS_ACTIVE;
 		bdev->bd_super = s;
 	}
 	return dget(s->s_root);
 error_s:
 	error = PTR_ERR(s);
 error_bdev:
 	blkdev_put(bdev, mode);
 error:
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(mount_bdev);
 void kill_block_super(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
 	fmode_t mode = sb->s_mode;
 	bdev->bd_super = NULL;
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
 	WARN_ON_ONCE(!(mode & FMODE_EXCL));
 	blkdev_put(bdev, mode | FMODE_EXCL);
 }
 EXPORT_SYMBOL(kill_block_super);
 #endif
 struct dentry *mount_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
 {
 	int error;
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		deactivate_locked_super(s);
 		return ERR_PTR(error);
 	}
 	s->s_flags |= MS_ACTIVE;
 	return dget(s->s_root);
 }
 EXPORT_SYMBOL(mount_nodev);
 static int compare_single(struct super_block *s, void *p)
 {
 	return 1;
 }
 struct dentry *mount_single(struct file_system_type *fs_type,
 	int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
 {
 	struct super_block *s;
 	int error;
 	s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 	if (!s->s_root) {
 		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(s);
 			return ERR_PTR(error);
 		}
 		s->s_flags |= MS_ACTIVE;
 	} else {
 		do_remount_sb(s, flags, data, 0);
 	}
 	return dget(s->s_root);
 }
 EXPORT_SYMBOL(mount_single);
 struct dentry *
 mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
 {
 	struct dentry *root;
 	struct super_block *sb;
 	char *secdata = NULL;
 	int error = -ENOMEM;
 	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
 		secdata = alloc_secdata();
 		if (!secdata)
 			goto out;
 		error = security_sb_copy_data(data, secdata);
 		if (error)
 			goto out_free_secdata;
 	}
 	root = type->mount(type, flags, name, data);
 	if (IS_ERR(root)) {
 		error = PTR_ERR(root);
 		goto out_free_secdata;
 	}
 	sb = root->d_sb;
 	BUG_ON(!sb);
 	WARN_ON(!sb->s_bdi);
 	WARN_ON(sb->s_bdi == &default_backing_dev_info);
 	sb->s_flags |= MS_BORN;
 	error = security_sb_kern_mount(sb, flags, secdata);
 	if (error)
 		goto out_sb;
 	/*
 	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
 	 * but s_maxbytes was an unsigned long long for many releases. Throw
 	 * this warning for a little while to try and catch filesystems that
 	 * violate this rule.
 	 */
 	WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
 		"negative value (%lld)\n", type->name, sb->s_maxbytes);
 	up_write(&sb->s_umount);
 	free_secdata(secdata);
 	return root;
 out_sb:
 	dput(root);
 	deactivate_locked_super(sb);
 out_free_secdata:
 	free_secdata(secdata);
 out:
 	return ERR_PTR(error);
 }
 /*
  * This is an internal function, please use sb_end_{write,pagefault,intwrite}
  * instead.
  */
 void __sb_end_write(struct super_block *sb, int level)
 {
 	percpu_counter_dec(&sb->s_writers.counter[level-1]);
 	/*
 	 * Make sure s_writers are updated before we wake up waiters in
 	 * freeze_super().
 	 */
 	smp_mb();
 	if (waitqueue_active(&sb->s_writers.wait))
 		wake_up(&sb->s_writers.wait);
 	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
 }
 EXPORT_SYMBOL(__sb_end_write);
 #ifdef CONFIG_LOCKDEP
 /*
  * We want lockdep to tell us about possible deadlocks with freezing but
  * it's it bit tricky to properly instrument it. Getting a freeze protection
  * works as getting a read lock but there are subtle problems. XFS for example
  * gets freeze protection on internal level twice in some cases, which is OK
  * only because we already hold a freeze protection also on higher level. Due
  * to these cases we have to tell lockdep we are doing trylock when we
  * already hold a freeze protection for a higher freeze level.
  */
 static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
 				unsigned long ip)
 {
 	int i;
 	if (!trylock) {
 		for (i = 0; i < level - 1; i++)
 			if (lock_is_held(&sb->s_writers.lock_map[i])) {
 				trylock = true;
 				break;
 			}
 	}
 	rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
 }
 #endif
 /*
  * This is an internal function, please use sb_start_{write,pagefault,intwrite}
  * instead.
  */
 int __sb_start_write(struct super_block *sb, int level, bool wait)
 {
 retry:
 	if (unlikely(sb->s_writers.frozen >= level)) {
 		if (!wait)
 			return 0;
 		wait_event(sb->s_writers.wait_unfrozen,
 			   sb->s_writers.frozen < level);
 	}
 #ifdef CONFIG_LOCKDEP
 	acquire_freeze_lock(sb, level, !wait, _RET_IP_);
 #endif
 	percpu_counter_inc(&sb->s_writers.counter[level-1]);
 	/*
 	 * Make sure counter is updated before we check for frozen.
 	 * freeze_super() first sets frozen and then checks the counter.
 	 */
 	smp_mb();
 	if (unlikely(sb->s_writers.frozen >= level)) {
 		__sb_end_write(sb, level);
 		goto retry;
 	}
 	return 1;
 }
 EXPORT_SYMBOL(__sb_start_write);
 /**
  * sb_wait_write - wait until all writers to given file system finish
  * @sb: the super for which we wait
  * @level: type of writers we wait for (normal vs page fault)
  *
  * This function waits until there are no writers of given type to given file
  * system. Caller of this function should make sure there can be no new writers
  * of type @level before calling this function. Otherwise this function can
  * livelock.
  */
 static void sb_wait_write(struct super_block *sb, int level)
 {
 	s64 writers;
 	/*
 	 * We just cycle-through lockdep here so that it does not complain
 	 * about returning with lock to userspace
 	 */
 	rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
 	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
 	do {
 		DEFINE_WAIT(wait);
 		/*
 		 * We use a barrier in prepare_to_wait() to separate setting
 		 * of frozen and checking of the counter
 		 */
 		prepare_to_wait(&sb->s_writers.wait, &wait,
 				TASK_UNINTERRUPTIBLE);
 		writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
 		if (writers)
 			schedule();
 		finish_wait(&sb->s_writers.wait, &wait);
 	} while (writers);
 }
 /**
  * freeze_super - lock the filesystem and force it into a consistent state
  * @sb: the super to lock
  *
  * Syncs the super to make sure the filesystem is consistent and calls the fs's
  * freeze_fs.  Subsequent calls to this without first thawing the fs will return
  * -EBUSY.
  *
  * During this function, sb->s_writers.frozen goes through these values:
  *
  * SB_UNFROZEN: File system is normal, all writes progress as usual.
  *
  * SB_FREEZE_WRITE: The file system is in the process of being frozen.  New
  * writes should be blocked, though page faults are still allowed. We wait for
  * all writes to complete and then proceed to the next stage.
  *
  * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
  * but internal fs threads can still modify the filesystem (although they
  * should not dirty new pages or inodes), writeback can run etc. After waiting
  * for all running page faults we sync the filesystem which will clean all
  * dirty pages and inodes (no new dirty pages or inodes can be created when
  * sync is running).
  *
  * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
  * modification are blocked (e.g. XFS preallocation truncation on inode
  * reclaim). This is usually implemented by blocking new transactions for
  * filesystems that have them and need this additional guard. After all
  * internal writers are finished we call ->freeze_fs() to finish filesystem
  * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
  * mostly auxiliary for filesystems to verify they do not modify frozen fs.
  *
  * sb->s_writers.frozen is protected by sb->s_umount.
  */
 int freeze_super(struct super_block *sb)
 {
 	int ret;
 	atomic_inc(&sb->s_active);
 	down_write(&sb->s_umount);
 	if (sb->s_writers.frozen != SB_UNFROZEN) {
 		deactivate_locked_super(sb);
 		return -EBUSY;
 	}
 	if (!(sb->s_flags & MS_BORN)) {
 		up_write(&sb->s_umount);
 		return 0;	/* sic - it's "nothing to do" */
 	}
 	if (sb->s_flags & MS_RDONLY) {
 		/* Nothing to do really... */
 		sb->s_writers.frozen = SB_FREEZE_COMPLETE;
 		up_write(&sb->s_umount);
 		return 0;
 	}
 	/* From now on, no new normal writers can start */
 	sb->s_writers.frozen = SB_FREEZE_WRITE;
 	smp_wmb();
 	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
 	up_write(&sb->s_umount);
 	sb_wait_write(sb, SB_FREEZE_WRITE);
 	/* Now we go and block page faults... */
 	down_write(&sb->s_umount);
 	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
 	smp_wmb();
 	sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
 	/* All writers are done so after syncing there won't be dirty data */
 	sync_filesystem(sb);
 	/* Now wait for internal filesystem counter */
 	sb->s_writers.frozen = SB_FREEZE_FS;
 	smp_wmb();
 	sb_wait_write(sb, SB_FREEZE_FS);
 	if (sb->s_op->freeze_fs) {
 		ret = sb->s_op->freeze_fs(sb);
 		if (ret) {
 			printk(KERN_ERR
 				"VFS:Filesystem freeze failed\n");
 			sb->s_writers.frozen = SB_UNFROZEN;
 			smp_wmb();
 			wake_up(&sb->s_writers.wait_unfrozen);
 			deactivate_locked_super(sb);
 			return ret;
 		}
 	}
 	/*
 	 * This is just for debugging purposes so that fs can warn if it
 	 * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
 	 */
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
 	up_write(&sb->s_umount);
 	return 0;
 }
 EXPORT_SYMBOL(freeze_super);
 /**
  * thaw_super -- unlock filesystem
  * @sb: the super to thaw
  *
  * Unlocks the filesystem and marks it writeable again after freeze_super().
  */
 int thaw_super(struct super_block *sb)
 {
 	int error;
 	down_write(&sb->s_umount);
 	if (sb->s_writers.frozen == SB_UNFROZEN) {
 		up_write(&sb->s_umount);
 		return -EINVAL;
 	}
 	if (sb->s_flags & MS_RDONLY)
 		goto out;
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
 		if (error) {
 			printk(KERN_ERR
 				"VFS:Filesystem thaw failed\n");
 			up_write(&sb->s_umount);
 			return error;
 		}
 	}
 out:
 	sb->s_writers.frozen = SB_UNFROZEN;
 	smp_wmb();
 	wake_up(&sb->s_writers.wait_unfrozen);
 	deactivate_locked_super(sb);
 	return 0;
 }
 EXPORT_SYMBOL(thaw_super);