Blame view

fs/ext4/mmp.c 10 KB
c5e06d101   Johann Lombardi   ext4: add support...
1
2
3
4
5
6
7
  #include <linux/fs.h>
  #include <linux/random.h>
  #include <linux/buffer_head.h>
  #include <linux/utsname.h>
  #include <linux/kthread.h>
  
  #include "ext4.h"
5c359a47e   Darrick J. Wong   ext4: add checksu...
8
  /* Checksumming functions */
171a7f21a   Dmitry Monakhov   ext4: fix big-end...
9
  static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
5c359a47e   Darrick J. Wong   ext4: add checksu...
10
11
12
13
14
15
16
17
18
  {
  	struct ext4_sb_info *sbi = EXT4_SB(sb);
  	int offset = offsetof(struct mmp_struct, mmp_checksum);
  	__u32 csum;
  
  	csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  
  	return cpu_to_le32(csum);
  }
c197855ea   Stephen Hemminger   ext4: make local ...
19
  static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
5c359a47e   Darrick J. Wong   ext4: add checksu...
20
  {
9aa5d32ba   Dmitry Monakhov   ext4: Replace ope...
21
  	if (!ext4_has_metadata_csum(sb))
5c359a47e   Darrick J. Wong   ext4: add checksu...
22
23
24
25
  		return 1;
  
  	return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  }
c197855ea   Stephen Hemminger   ext4: make local ...
26
  static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
5c359a47e   Darrick J. Wong   ext4: add checksu...
27
  {
9aa5d32ba   Dmitry Monakhov   ext4: Replace ope...
28
  	if (!ext4_has_metadata_csum(sb))
5c359a47e   Darrick J. Wong   ext4: add checksu...
29
30
31
32
  		return;
  
  	mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  }
c5e06d101   Johann Lombardi   ext4: add support...
33
34
35
36
  /*
   * Write the MMP block using WRITE_SYNC to try to get the block on-disk
   * faster.
   */
5c359a47e   Darrick J. Wong   ext4: add checksu...
37
  static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
c5e06d101   Johann Lombardi   ext4: add support...
38
  {
5c359a47e   Darrick J. Wong   ext4: add checksu...
39
  	struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
8e8ad8a57   Jan Kara   ext4: Convert to ...
40
41
42
43
44
  	/*
  	 * We protect against freezing so that we don't create dirty buffers
  	 * on frozen filesystem.
  	 */
  	sb_start_write(sb);
5c359a47e   Darrick J. Wong   ext4: add checksu...
45
  	ext4_mmp_csum_set(sb, mmp);
c5e06d101   Johann Lombardi   ext4: add support...
46
47
48
49
  	mark_buffer_dirty(bh);
  	lock_buffer(bh);
  	bh->b_end_io = end_buffer_write_sync;
  	get_bh(bh);
2a222ca99   Mike Christie   fs: have submit_b...
50
  	submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh);
c5e06d101   Johann Lombardi   ext4: add support...
51
  	wait_on_buffer(bh);
8e8ad8a57   Jan Kara   ext4: Convert to ...
52
  	sb_end_write(sb);
c5e06d101   Johann Lombardi   ext4: add support...
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  	if (unlikely(!buffer_uptodate(bh)))
  		return 1;
  
  	return 0;
  }
  
  /*
   * Read the MMP block. It _must_ be read from disk and hence we clear the
   * uptodate flag on the buffer.
   */
  static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  			  ext4_fsblk_t mmp_block)
  {
  	struct mmp_struct *mmp;
981044683   Dan Carpenter   ext4: simplify so...
67
  	int ret;
c5e06d101   Johann Lombardi   ext4: add support...
68
69
70
71
72
73
74
  
  	if (*bh)
  		clear_buffer_uptodate(*bh);
  
  	/* This would be sb_bread(sb, mmp_block), except we need to be sure
  	 * that the MD RAID device cache has been bypassed, and that the read
  	 * is not blocked in the elevator. */
981044683   Dan Carpenter   ext4: simplify so...
75
  	if (!*bh) {
c5e06d101   Johann Lombardi   ext4: add support...
76
  		*bh = sb_getblk(sb, mmp_block);
981044683   Dan Carpenter   ext4: simplify so...
77
78
79
  		if (!*bh) {
  			ret = -ENOMEM;
  			goto warn_exit;
c5e06d101   Johann Lombardi   ext4: add support...
80
81
  		}
  	}
981044683   Dan Carpenter   ext4: simplify so...
82
83
84
85
  
  	get_bh(*bh);
  	lock_buffer(*bh);
  	(*bh)->b_end_io = end_buffer_read_sync;
2a222ca99   Mike Christie   fs: have submit_b...
86
  	submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh);
981044683   Dan Carpenter   ext4: simplify so...
87
88
  	wait_on_buffer(*bh);
  	if (!buffer_uptodate(*bh)) {
981044683   Dan Carpenter   ext4: simplify so...
89
90
  		ret = -EIO;
  		goto warn_exit;
c5e06d101   Johann Lombardi   ext4: add support...
91
  	}
c5e06d101   Johann Lombardi   ext4: add support...
92
  	mmp = (struct mmp_struct *)((*bh)->b_data);
030468867   vikram.jadhav07   ext4: clean up er...
93
  	if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
6a797d273   Darrick J. Wong   ext4: call out CR...
94
  		ret = -EFSCORRUPTED;
030468867   vikram.jadhav07   ext4: clean up er...
95
96
97
  		goto warn_exit;
  	}
  	if (!ext4_mmp_csum_verify(sb, mmp)) {
6a797d273   Darrick J. Wong   ext4: call out CR...
98
  		ret = -EFSBADCRC;
030468867   vikram.jadhav07   ext4: clean up er...
99
100
101
  		goto warn_exit;
  	}
  	return 0;
981044683   Dan Carpenter   ext4: simplify so...
102
  warn_exit:
030468867   vikram.jadhav07   ext4: clean up er...
103
104
  	brelse(*bh);
  	*bh = NULL;
981044683   Dan Carpenter   ext4: simplify so...
105
106
107
  	ext4_warning(sb, "Error %d while reading MMP block %llu",
  		     ret, mmp_block);
  	return ret;
c5e06d101   Johann Lombardi   ext4: add support...
108
109
110
111
112
113
114
115
  }
  
  /*
   * Dump as much information as possible to help the admin.
   */
  void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
  		    const char *function, unsigned int line, const char *msg)
  {
da0b5e40a   Dan Carpenter   ext4: silence a f...
116
  	__ext4_warning(sb, function, line, "%s", msg);
c5e06d101   Johann Lombardi   ext4: add support...
117
118
  	__ext4_warning(sb, function, line,
  		       "MMP failure info: last update time: %llu, last update "
8d2ae1cbe   Jakub Wilk   ext4: remove trai...
119
  		       "node: %s, last update device: %s",
c5e06d101   Johann Lombardi   ext4: add support...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  		       (long long unsigned int) le64_to_cpu(mmp->mmp_time),
  		       mmp->mmp_nodename, mmp->mmp_bdevname);
  }
  
  /*
   * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
   */
  static int kmmpd(void *data)
  {
  	struct super_block *sb = ((struct mmpd_data *) data)->sb;
  	struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  	struct mmp_struct *mmp;
  	ext4_fsblk_t mmp_block;
  	u32 seq = 0;
  	unsigned long failed_writes = 0;
  	int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
  	unsigned mmp_check_interval;
  	unsigned long last_update_time;
  	unsigned long diff;
  	int retval;
  
  	mmp_block = le64_to_cpu(es->s_mmp_block);
  	mmp = (struct mmp_struct *)(bh->b_data);
  	mmp->mmp_time = cpu_to_le64(get_seconds());
  	/*
  	 * Start with the higher mmp_check_interval and reduce it if
  	 * the MMP block is being updated on time.
  	 */
  	mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
  				 EXT4_MMP_MIN_CHECK_INTERVAL);
  	mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
  	bdevname(bh->b_bdev, mmp->mmp_bdevname);
215fc6af7   Nikitas Angelinas   ext4: MMP: kmmpd ...
153
  	memcpy(mmp->mmp_nodename, init_utsname()->nodename,
c5e06d101   Johann Lombardi   ext4: add support...
154
155
156
157
158
159
160
161
162
  	       sizeof(mmp->mmp_nodename));
  
  	while (!kthread_should_stop()) {
  		if (++seq > EXT4_MMP_SEQ_MAX)
  			seq = 1;
  
  		mmp->mmp_seq = cpu_to_le32(seq);
  		mmp->mmp_time = cpu_to_le64(get_seconds());
  		last_update_time = jiffies;
5c359a47e   Darrick J. Wong   ext4: add checksu...
163
  		retval = write_mmp_block(sb, bh);
c5e06d101   Johann Lombardi   ext4: add support...
164
165
166
167
  		/*
  		 * Don't spew too many error messages. Print one every
  		 * (s_mmp_update_interval * 60) seconds.
  		 */
bdfc230f3   Nikitas Angelinas   ext4: MMP: fix er...
168
169
170
  		if (retval) {
  			if ((failed_writes % 60) == 0)
  				ext4_error(sb, "Error writing to MMP block");
c5e06d101   Johann Lombardi   ext4: add support...
171
172
173
174
175
176
177
  			failed_writes++;
  		}
  
  		if (!(le32_to_cpu(es->s_feature_incompat) &
  		    EXT4_FEATURE_INCOMPAT_MMP)) {
  			ext4_warning(sb, "kmmpd being stopped since MMP feature"
  				     " has been disabled.");
030468867   vikram.jadhav07   ext4: clean up er...
178
  			goto exit_thread;
c5e06d101   Johann Lombardi   ext4: add support...
179
180
181
182
183
  		}
  
  		if (sb->s_flags & MS_RDONLY) {
  			ext4_warning(sb, "kmmpd being stopped since filesystem "
  				     "has been remounted as readonly.");
030468867   vikram.jadhav07   ext4: clean up er...
184
  			goto exit_thread;
c5e06d101   Johann Lombardi   ext4: add support...
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  		}
  
  		diff = jiffies - last_update_time;
  		if (diff < mmp_update_interval * HZ)
  			schedule_timeout_interruptible(mmp_update_interval *
  						       HZ - diff);
  
  		/*
  		 * We need to make sure that more than mmp_check_interval
  		 * seconds have not passed since writing. If that has happened
  		 * we need to check if the MMP block is as we left it.
  		 */
  		diff = jiffies - last_update_time;
  		if (diff > mmp_check_interval * HZ) {
  			struct buffer_head *bh_check = NULL;
  			struct mmp_struct *mmp_check;
  
  			retval = read_mmp_block(sb, &bh_check, mmp_block);
  			if (retval) {
  				ext4_error(sb, "error reading MMP data: %d",
  					   retval);
030468867   vikram.jadhav07   ext4: clean up er...
206
  				goto exit_thread;
c5e06d101   Johann Lombardi   ext4: add support...
207
208
209
210
211
212
213
214
215
216
217
  			}
  
  			mmp_check = (struct mmp_struct *)(bh_check->b_data);
  			if (mmp->mmp_seq != mmp_check->mmp_seq ||
  			    memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
  				   sizeof(mmp->mmp_nodename))) {
  				dump_mmp_msg(sb, mmp_check,
  					     "Error while updating MMP info. "
  					     "The filesystem seems to have been"
  					     " multiply mounted.");
  				ext4_error(sb, "abort");
030468867   vikram.jadhav07   ext4: clean up er...
218
219
220
  				put_bh(bh_check);
  				retval = -EBUSY;
  				goto exit_thread;
c5e06d101   Johann Lombardi   ext4: add support...
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
  			}
  			put_bh(bh_check);
  		}
  
  		 /*
  		 * Adjust the mmp_check_interval depending on how much time
  		 * it took for the MMP block to be written.
  		 */
  		mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
  					     EXT4_MMP_MAX_CHECK_INTERVAL),
  					 EXT4_MMP_MIN_CHECK_INTERVAL);
  		mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
  	}
  
  	/*
  	 * Unmount seems to be clean.
  	 */
  	mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
  	mmp->mmp_time = cpu_to_le64(get_seconds());
5c359a47e   Darrick J. Wong   ext4: add checksu...
240
  	retval = write_mmp_block(sb, bh);
c5e06d101   Johann Lombardi   ext4: add support...
241

030468867   vikram.jadhav07   ext4: clean up er...
242
243
  exit_thread:
  	EXT4_SB(sb)->s_mmp_tsk = NULL;
c5e06d101   Johann Lombardi   ext4: add support...
244
245
246
247
248
249
250
251
252
253
254
255
256
257
  	kfree(data);
  	brelse(bh);
  	return retval;
  }
  
  /*
   * Get a random new sequence number but make sure it is not greater than
   * EXT4_MMP_SEQ_MAX.
   */
  static unsigned int mmp_new_seq(void)
  {
  	u32 new_seq;
  
  	do {
dd1f723bf   Theodore Ts'o   ext4: use prandom...
258
  		new_seq = prandom_u32();
c5e06d101   Johann Lombardi   ext4: add support...
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
  	} while (new_seq > EXT4_MMP_SEQ_MAX);
  
  	return new_seq;
  }
  
  /*
   * Protect the filesystem from being mounted more than once.
   */
  int ext4_multi_mount_protect(struct super_block *sb,
  				    ext4_fsblk_t mmp_block)
  {
  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  	struct buffer_head *bh = NULL;
  	struct mmp_struct *mmp = NULL;
  	struct mmpd_data *mmpd_data;
  	u32 seq;
  	unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
  	unsigned int wait_time = 0;
  	int retval;
  
  	if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
  	    mmp_block >= ext4_blocks_count(es)) {
  		ext4_warning(sb, "Invalid MMP block in superblock");
  		goto failed;
  	}
  
  	retval = read_mmp_block(sb, &bh, mmp_block);
  	if (retval)
  		goto failed;
  
  	mmp = (struct mmp_struct *)(bh->b_data);
  
  	if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
  		mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
  
  	/*
  	 * If check_interval in MMP block is larger, use that instead of
  	 * update_interval from the superblock.
  	 */
85d216501   Santosh Nayak   ext4: Fix endiann...
298
299
  	if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
  		mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
c5e06d101   Johann Lombardi   ext4: add support...
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
  
  	seq = le32_to_cpu(mmp->mmp_seq);
  	if (seq == EXT4_MMP_SEQ_CLEAN)
  		goto skip;
  
  	if (seq == EXT4_MMP_SEQ_FSCK) {
  		dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
  		goto failed;
  	}
  
  	wait_time = min(mmp_check_interval * 2 + 1,
  			mmp_check_interval + 60);
  
  	/* Print MMP interval if more than 20 secs. */
  	if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
  		ext4_warning(sb, "MMP interval %u higher than expected, please"
  			     " wait.
  ", wait_time * 2);
  
  	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
  		ext4_warning(sb, "MMP startup interrupted, failing mount
  ");
  		goto failed;
  	}
  
  	retval = read_mmp_block(sb, &bh, mmp_block);
  	if (retval)
  		goto failed;
  	mmp = (struct mmp_struct *)(bh->b_data);
  	if (seq != le32_to_cpu(mmp->mmp_seq)) {
  		dump_mmp_msg(sb, mmp,
  			     "Device is already active on another node.");
  		goto failed;
  	}
  
  skip:
  	/*
  	 * write a new random sequence number.
  	 */
f6f96fdb8   Darrick J. Wong   ext4: Fix compari...
339
340
  	seq = mmp_new_seq();
  	mmp->mmp_seq = cpu_to_le32(seq);
c5e06d101   Johann Lombardi   ext4: add support...
341

5c359a47e   Darrick J. Wong   ext4: add checksu...
342
  	retval = write_mmp_block(sb, bh);
c5e06d101   Johann Lombardi   ext4: add support...
343
344
345
346
347
348
349
  	if (retval)
  		goto failed;
  
  	/*
  	 * wait for MMP interval and check mmp_seq.
  	 */
  	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
8d2ae1cbe   Jakub Wilk   ext4: remove trai...
350
  		ext4_warning(sb, "MMP startup interrupted, failing mount");
c5e06d101   Johann Lombardi   ext4: add support...
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
  		goto failed;
  	}
  
  	retval = read_mmp_block(sb, &bh, mmp_block);
  	if (retval)
  		goto failed;
  	mmp = (struct mmp_struct *)(bh->b_data);
  	if (seq != le32_to_cpu(mmp->mmp_seq)) {
  		dump_mmp_msg(sb, mmp,
  			     "Device is already active on another node.");
  		goto failed;
  	}
  
  	mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
  	if (!mmpd_data) {
  		ext4_warning(sb, "not enough memory for mmpd_data");
  		goto failed;
  	}
  	mmpd_data->sb = sb;
  	mmpd_data->bh = bh;
  
  	/*
  	 * Start a kernel thread to update the MMP block periodically.
  	 */
  	EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
  					     bdevname(bh->b_bdev,
  						      mmp->mmp_bdevname));
  	if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
  		EXT4_SB(sb)->s_mmp_tsk = NULL;
  		kfree(mmpd_data);
  		ext4_warning(sb, "Unable to create kmmpd thread for %s.",
  			     sb->s_id);
  		goto failed;
  	}
  
  	return 0;
  
  failed:
  	brelse(bh);
  	return 1;
  }