Blame view

fs/ocfs2/buffer_head_io.c 10.3 KB
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
  /* -*- mode: c; c-basic-offset: 8; -*-
   * vim: noexpandtab sw=8 ts=8 sts=0:
   *
   * io.c
   *
   * Buffer cache handling
   *
   * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public
   * License as published by the Free Software Foundation; either
   * version 2 of the License, or (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * General Public License for more details.
   *
   * You should have received a copy of the GNU General Public
   * License along with this program; if not, write to the
   * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   * Boston, MA 021110-1307, USA.
   */
  
  #include <linux/fs.h>
  #include <linux/types.h>
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
28
29
30
31
32
33
34
35
36
37
  #include <linux/highmem.h>
  
  #include <cluster/masklog.h>
  
  #include "ocfs2.h"
  
  #include "alloc.h"
  #include "inode.h"
  #include "journal.h"
  #include "uptodate.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
38
  #include "buffer_head_io.h"
15057e981   Tao Ma   ocfs2: Remove mas...
39
  #include "ocfs2_trace.h"
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
40

970e4936d   Joel Becker   ocfs2: Validate m...
41
42
43
  /*
   * Bits on bh->b_state used by ocfs2.
   *
b86c86fa1   Mark Fasheh   ocfs2: Use BH_JBD...
44
   * These MUST be after the JBD2 bits.  Hence, we use BH_JBDPrivateStart.
970e4936d   Joel Becker   ocfs2: Validate m...
45
46
   */
  enum ocfs2_state_bits {
b86c86fa1   Mark Fasheh   ocfs2: Use BH_JBD...
47
  	BH_NeedsValidate = BH_JBDPrivateStart,
970e4936d   Joel Becker   ocfs2: Validate m...
48
49
50
51
  };
  
  /* Expand the magic b_state functions */
  BUFFER_FNS(NeedsValidate, needs_validate);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
52
  int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
8cb471e8f   Joel Becker   ocfs2: Take the i...
53
  		      struct ocfs2_caching_info *ci)
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
54
55
  {
  	int ret = 0;
15057e981   Tao Ma   ocfs2: Remove mas...
56
  	trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
57
58
59
60
61
62
63
64
65
  
  	BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
  	BUG_ON(buffer_jbd(bh));
  
  	/* No need to check for a soft readonly file system here. non
  	 * journalled writes are only ever done on system files which
  	 * can get modified during recovery even if read-only. */
  	if (ocfs2_is_hard_readonly(osb)) {
  		ret = -EROFS;
c1e8d35ef   Tao Ma   ocfs2: Remove EXI...
66
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
67
68
  		goto out;
  	}
8cb471e8f   Joel Becker   ocfs2: Take the i...
69
  	ocfs2_metadata_cache_io_lock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
70
71
72
73
74
75
  
  	lock_buffer(bh);
  	set_buffer_uptodate(bh);
  
  	/* remove from dirty list before I/O. */
  	clear_buffer_dirty(bh);
da1e90985   Joel Becker   ocfs2: Separate o...
76
  	get_bh(bh); /* for end_buffer_write_sync() */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
77
78
79
80
81
82
  	bh->b_end_io = end_buffer_write_sync;
  	submit_bh(WRITE, bh);
  
  	wait_on_buffer(bh);
  
  	if (buffer_uptodate(bh)) {
8cb471e8f   Joel Becker   ocfs2: Take the i...
83
  		ocfs2_set_buffer_uptodate(ci, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
84
85
86
87
88
  	} else {
  		/* We don't need to remove the clustered uptodate
  		 * information for this bh as it's not marked locally
  		 * uptodate. */
  		ret = -EIO;
c1e8d35ef   Tao Ma   ocfs2: Remove EXI...
89
  		mlog_errno(ret);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
90
  	}
8cb471e8f   Joel Becker   ocfs2: Take the i...
91
  	ocfs2_metadata_cache_io_unlock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
92
  out:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
93
94
  	return ret;
  }
da1e90985   Joel Becker   ocfs2: Separate o...
95
96
97
98
99
100
  int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
  			   unsigned int nr, struct buffer_head *bhs[])
  {
  	int status = 0;
  	unsigned int i;
  	struct buffer_head *bh;
15057e981   Tao Ma   ocfs2: Remove mas...
101
102
103
  	trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
  
  	if (!nr)
da1e90985   Joel Becker   ocfs2: Separate o...
104
  		goto bail;
da1e90985   Joel Becker   ocfs2: Separate o...
105
106
107
108
109
  
  	for (i = 0 ; i < nr ; i++) {
  		if (bhs[i] == NULL) {
  			bhs[i] = sb_getblk(osb->sb, block++);
  			if (bhs[i] == NULL) {
7391a294b   Rui Xiang   ocfs2: return ENO...
110
  				status = -ENOMEM;
da1e90985   Joel Becker   ocfs2: Separate o...
111
112
113
114
115
116
117
  				mlog_errno(status);
  				goto bail;
  			}
  		}
  		bh = bhs[i];
  
  		if (buffer_jbd(bh)) {
15057e981   Tao Ma   ocfs2: Remove mas...
118
119
  			trace_ocfs2_read_blocks_sync_jbd(
  					(unsigned long long)bh->b_blocknr);
da1e90985   Joel Becker   ocfs2: Separate o...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
  			continue;
  		}
  
  		if (buffer_dirty(bh)) {
  			/* This should probably be a BUG, or
  			 * at least return an error. */
  			mlog(ML_ERROR,
  			     "trying to sync read a dirty "
  			     "buffer! (blocknr = %llu), skipping
  ",
  			     (unsigned long long)bh->b_blocknr);
  			continue;
  		}
  
  		lock_buffer(bh);
  		if (buffer_jbd(bh)) {
  			mlog(ML_ERROR,
  			     "block %llu had the JBD bit set "
  			     "while I was in lock_buffer!",
  			     (unsigned long long)bh->b_blocknr);
  			BUG();
  		}
  
  		clear_buffer_uptodate(bh);
  		get_bh(bh); /* for end_buffer_read_sync() */
  		bh->b_end_io = end_buffer_read_sync;
  		submit_bh(READ, bh);
  	}
  
  	for (i = nr; i > 0; i--) {
  		bh = bhs[i - 1];
d6b58f89f   Mark Fasheh   ocfs2: fix regres...
151
152
153
  		/* No need to wait on the buffer if it's managed by JBD. */
  		if (!buffer_jbd(bh))
  			wait_on_buffer(bh);
da1e90985   Joel Becker   ocfs2: Separate o...
154

da1e90985   Joel Becker   ocfs2: Separate o...
155
156
157
158
159
160
161
162
163
164
165
166
167
  		if (!buffer_uptodate(bh)) {
  			/* Status won't be cleared from here on out,
  			 * so we can safely record this and loop back
  			 * to cleanup the other buffers. */
  			status = -EIO;
  			put_bh(bh);
  			bhs[i - 1] = NULL;
  		}
  	}
  
  bail:
  	return status;
  }
8cb471e8f   Joel Becker   ocfs2: Take the i...
168
  int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
970e4936d   Joel Becker   ocfs2: Validate m...
169
170
171
  		      struct buffer_head *bhs[], int flags,
  		      int (*validate)(struct super_block *sb,
  				      struct buffer_head *bh))
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
172
173
  {
  	int status = 0;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
174
175
  	int i, ignore_cache = 0;
  	struct buffer_head *bh;
8cb471e8f   Joel Becker   ocfs2: Take the i...
176
  	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
177

15057e981   Tao Ma   ocfs2: Remove mas...
178
  	trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
179

8cb471e8f   Joel Becker   ocfs2: Take the i...
180
  	BUG_ON(!ci);
d4a8c93c8   Joel Becker   ocfs2: Make cache...
181
182
  	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
  	       (flags & OCFS2_BH_IGNORE_CACHE));
aa9588741   Mark Fasheh   ocfs2: implement ...
183

31d33073c   Joel Becker   ocfs2: Require an...
184
  	if (bhs == NULL) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
185
186
187
188
189
190
191
192
193
194
195
196
197
198
  		status = -EINVAL;
  		mlog_errno(status);
  		goto bail;
  	}
  
  	if (nr < 0) {
  		mlog(ML_ERROR, "asked to read %d blocks!
  ", nr);
  		status = -EINVAL;
  		mlog_errno(status);
  		goto bail;
  	}
  
  	if (nr == 0) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
199
200
201
  		status = 0;
  		goto bail;
  	}
8cb471e8f   Joel Becker   ocfs2: Take the i...
202
  	ocfs2_metadata_cache_io_lock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
203
204
  	for (i = 0 ; i < nr ; i++) {
  		if (bhs[i] == NULL) {
8cb471e8f   Joel Becker   ocfs2: Take the i...
205
  			bhs[i] = sb_getblk(sb, block++);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
206
  			if (bhs[i] == NULL) {
8cb471e8f   Joel Becker   ocfs2: Take the i...
207
  				ocfs2_metadata_cache_io_unlock(ci);
7391a294b   Rui Xiang   ocfs2: return ENO...
208
  				status = -ENOMEM;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
209
210
211
212
213
  				mlog_errno(status);
  				goto bail;
  			}
  		}
  		bh = bhs[i];
d4a8c93c8   Joel Becker   ocfs2: Make cache...
214
  		ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
215

aa9588741   Mark Fasheh   ocfs2: implement ...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
  		/* There are three read-ahead cases here which we need to
  		 * be concerned with. All three assume a buffer has
  		 * previously been submitted with OCFS2_BH_READAHEAD
  		 * and it hasn't yet completed I/O.
  		 *
  		 * 1) The current request is sync to disk. This rarely
  		 *    happens these days, and never when performance
  		 *    matters - the code can just wait on the buffer
  		 *    lock and re-submit.
  		 *
  		 * 2) The current request is cached, but not
  		 *    readahead. ocfs2_buffer_uptodate() will return
  		 *    false anyway, so we'll wind up waiting on the
  		 *    buffer lock to do I/O. We re-check the request
  		 *    with after getting the lock to avoid a re-submit.
  		 *
  		 * 3) The current request is readahead (and so must
  		 *    also be a caching one). We short circuit if the
  		 *    buffer is locked (under I/O) and if it's in the
  		 *    uptodate cache. The re-check from #2 catches the
  		 *    case that the previous read-ahead completes just
  		 *    before our is-it-in-flight check.
  		 */
8cb471e8f   Joel Becker   ocfs2: Take the i...
239
  		if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
d701485a6   Tao Ma   ocfs2: Remove mas...
240
  			trace_ocfs2_read_blocks_from_disk(
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
241
  			     (unsigned long long)bh->b_blocknr,
8cb471e8f   Joel Becker   ocfs2: Take the i...
242
  			     (unsigned long long)ocfs2_metadata_cache_owner(ci));
d4a8c93c8   Joel Becker   ocfs2: Make cache...
243
244
  			/* We're using ignore_cache here to say
  			 * "go to disk" */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
245
246
  			ignore_cache = 1;
  		}
15057e981   Tao Ma   ocfs2: Remove mas...
247
248
  		trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr,
  			ignore_cache, buffer_jbd(bh), buffer_dirty(bh));
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
249
  		if (buffer_jbd(bh)) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
250
251
  			continue;
  		}
d4a8c93c8   Joel Becker   ocfs2: Make cache...
252
  		if (ignore_cache) {
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
253
254
255
  			if (buffer_dirty(bh)) {
  				/* This should probably be a BUG, or
  				 * at least return an error. */
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
256
257
  				continue;
  			}
aa9588741   Mark Fasheh   ocfs2: implement ...
258
259
260
261
262
  			/* A read-ahead request was made - if the
  			 * buffer is already under read-ahead from a
  			 * previously submitted request than we are
  			 * done here. */
  			if ((flags & OCFS2_BH_READAHEAD)
8cb471e8f   Joel Becker   ocfs2: Take the i...
263
  			    && ocfs2_buffer_read_ahead(ci, bh))
aa9588741   Mark Fasheh   ocfs2: implement ...
264
  				continue;
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
265
266
267
268
269
270
271
272
273
274
275
276
  			lock_buffer(bh);
  			if (buffer_jbd(bh)) {
  #ifdef CATCH_BH_JBD_RACES
  				mlog(ML_ERROR, "block %llu had the JBD bit set "
  					       "while I was in lock_buffer!",
  				     (unsigned long long)bh->b_blocknr);
  				BUG();
  #else
  				unlock_buffer(bh);
  				continue;
  #endif
  			}
aa9588741   Mark Fasheh   ocfs2: implement ...
277
278
279
280
281
  
  			/* Re-check ocfs2_buffer_uptodate() as a
  			 * previously read-ahead buffer may have
  			 * completed I/O while we were waiting for the
  			 * buffer lock. */
d4a8c93c8   Joel Becker   ocfs2: Make cache...
282
  			if (!(flags & OCFS2_BH_IGNORE_CACHE)
aa9588741   Mark Fasheh   ocfs2: implement ...
283
  			    && !(flags & OCFS2_BH_READAHEAD)
8cb471e8f   Joel Becker   ocfs2: Take the i...
284
  			    && ocfs2_buffer_uptodate(ci, bh)) {
aa9588741   Mark Fasheh   ocfs2: implement ...
285
286
287
  				unlock_buffer(bh);
  				continue;
  			}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
288
289
  			clear_buffer_uptodate(bh);
  			get_bh(bh); /* for end_buffer_read_sync() */
970e4936d   Joel Becker   ocfs2: Validate m...
290
291
  			if (validate)
  				set_buffer_needs_validate(bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
292
  			bh->b_end_io = end_buffer_read_sync;
aa9588741   Mark Fasheh   ocfs2: implement ...
293
  			submit_bh(READ, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
294
295
296
297
298
299
300
301
  			continue;
  		}
  	}
  
  	status = 0;
  
  	for (i = (nr - 1); i >= 0; i--) {
  		bh = bhs[i];
aa9588741   Mark Fasheh   ocfs2: implement ...
302
303
  		if (!(flags & OCFS2_BH_READAHEAD)) {
  			/* We know this can't have changed as we hold the
8cb471e8f   Joel Becker   ocfs2: Take the i...
304
  			 * owner sem. Avoid doing any work on the bh if the
aa9588741   Mark Fasheh   ocfs2: implement ...
305
306
307
308
309
310
311
312
313
314
315
316
  			 * journal has it. */
  			if (!buffer_jbd(bh))
  				wait_on_buffer(bh);
  
  			if (!buffer_uptodate(bh)) {
  				/* Status won't be cleared from here on out,
  				 * so we can safely record this and loop back
  				 * to cleanup the other buffers. Don't need to
  				 * remove the clustered uptodate information
  				 * for this bh as it's not marked locally
  				 * uptodate. */
  				status = -EIO;
2fe5c1d7e   Mark Fasheh   ocfs2: clean up b...
317
  				put_bh(bh);
aa9588741   Mark Fasheh   ocfs2: implement ...
318
319
320
  				bhs[i] = NULL;
  				continue;
  			}
970e4936d   Joel Becker   ocfs2: Validate m...
321
322
323
324
325
326
327
  
  			if (buffer_needs_validate(bh)) {
  				/* We never set NeedsValidate if the
  				 * buffer was held by the journal, so
  				 * that better not have changed */
  				BUG_ON(buffer_jbd(bh));
  				clear_buffer_needs_validate(bh);
8cb471e8f   Joel Becker   ocfs2: Take the i...
328
  				status = validate(sb, bh);
970e4936d   Joel Becker   ocfs2: Validate m...
329
330
331
332
333
334
  				if (status) {
  					put_bh(bh);
  					bhs[i] = NULL;
  					continue;
  				}
  			}
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
335
  		}
aa9588741   Mark Fasheh   ocfs2: implement ...
336
337
338
  		/* Always set the buffer in the cache, even if it was
  		 * a forced read, or read-ahead which hasn't yet
  		 * completed. */
8cb471e8f   Joel Becker   ocfs2: Take the i...
339
  		ocfs2_set_buffer_uptodate(ci, bh);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
340
  	}
8cb471e8f   Joel Becker   ocfs2: Take the i...
341
  	ocfs2_metadata_cache_io_unlock(ci);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
342

15057e981   Tao Ma   ocfs2: Remove mas...
343
344
  	trace_ocfs2_read_blocks_end((unsigned long long)block, nr,
  				    flags, ignore_cache);
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
345
346
  
  bail:
ccd979bdb   Mark Fasheh   [PATCH] OCFS2: Th...
347
348
  	return status;
  }
d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
  
  /* Check whether the blkno is the super block or one of the backups. */
  static void ocfs2_check_super_or_backup(struct super_block *sb,
  					sector_t blkno)
  {
  	int i;
  	u64 backup_blkno;
  
  	if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
  		return;
  
  	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
  		backup_blkno = ocfs2_backup_super_blkno(sb, i);
  		if (backup_blkno == blkno)
  			return;
  	}
  
  	BUG();
  }
  
  /*
   * Write super block and backups doesn't need to collaborate with journal,
8cb471e8f   Joel Becker   ocfs2: Take the i...
371
   * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
372
373
374
375
376
377
   * into this function.
   */
  int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
  				struct buffer_head *bh)
  {
  	int ret = 0;
a42ab8e1a   Joel Becker   ocfs2: Compute me...
378
  	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
379

d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
380
381
382
383
384
  	BUG_ON(buffer_jbd(bh));
  	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
  
  	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
  		ret = -EROFS;
c1e8d35ef   Tao Ma   ocfs2: Remove EXI...
385
  		mlog_errno(ret);
d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
386
387
388
389
390
391
392
393
394
395
396
  		goto out;
  	}
  
  	lock_buffer(bh);
  	set_buffer_uptodate(bh);
  
  	/* remove from dirty list before I/O. */
  	clear_buffer_dirty(bh);
  
  	get_bh(bh); /* for end_buffer_write_sync() */
  	bh->b_end_io = end_buffer_write_sync;
a42ab8e1a   Joel Becker   ocfs2: Compute me...
397
  	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
398
399
400
401
402
403
  	submit_bh(WRITE, bh);
  
  	wait_on_buffer(bh);
  
  	if (!buffer_uptodate(bh)) {
  		ret = -EIO;
c1e8d35ef   Tao Ma   ocfs2: Remove EXI...
404
  		mlog_errno(ret);
d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
405
406
407
  	}
  
  out:
d659072f7   Tao Ma   [PATCH 1/2] ocfs2...
408
409
  	return ret;
  }