Blame view

fs/nfs/blocklayout/blocklayout.c 28.1 KB
155e7524f   Fred Isaman   pnfsblock: add bl...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
  /*
   *  linux/fs/nfs/blocklayout/blocklayout.c
   *
   *  Module for the NFSv4.1 pNFS block layout driver.
   *
   *  Copyright (c) 2006 The Regents of the University of Michigan.
   *  All rights reserved.
   *
   *  Andy Adamson <andros@citi.umich.edu>
   *  Fred Isaman <iisaman@umich.edu>
   *
   * permission is granted to use, copy, create derivative works and
   * redistribute this software and such derivative works for any purpose,
   * so long as the name of the university of michigan is not used in
   * any advertising or publicity pertaining to the use or distribution
   * of this software without specific, written prior authorization.  if
   * the above copyright notice or any other identification of the
   * university of michigan is included in any copy of any portion of
   * this software, then the disclaimer below must also be included.
   *
   * this software is provided as is, without representation from the
   * university of michigan as to its fitness for any purpose, and without
   * warranty by the university of michigan of any kind, either express
   * or implied, including without limitation the implied warranties of
   * merchantability and fitness for a particular purpose.  the regents
   * of the university of michigan shall not be liable for any damages,
   * including special, indirect, incidental, or consequential damages,
   * with respect to any claim arising out or in connection with the use
   * of the software, even if it has been or is hereafter advised of the
   * possibility of such damages.
   */
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
32

155e7524f   Fred Isaman   pnfsblock: add bl...
33
34
  #include <linux/module.h>
  #include <linux/init.h>
fe0a9b740   Jim Rees   pnfsblock: add de...
35
36
  #include <linux/mount.h>
  #include <linux/namei.h>
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
37
  #include <linux/bio.h>		/* struct bio */
71cdd40fd   Peng Tao   pnfsblock: write_...
38
  #include <linux/buffer_head.h>	/* various write calls */
88c9e4219   Heiko Carstens   nfs: add missing ...
39
  #include <linux/prefetch.h>
155e7524f   Fred Isaman   pnfsblock: add bl...
40
41
42
43
44
45
46
47
  
  #include "blocklayout.h"
  
  #define NFSDBG_FACILITY	NFSDBG_PNFS_LD
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
  MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
fe0a9b740   Jim Rees   pnfsblock: add de...
48
49
  struct dentry *bl_device_pipe;
  wait_queue_head_t bl_wq;
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  static void print_page(struct page *page)
  {
  	dprintk("PRINTPAGE page %p
  ", page);
  	dprintk("	PagePrivate %d
  ", PagePrivate(page));
  	dprintk("	PageUptodate %d
  ", PageUptodate(page));
  	dprintk("	PageError %d
  ", PageError(page));
  	dprintk("	PageDirty %d
  ", PageDirty(page));
  	dprintk("	PageReferenced %d
  ", PageReferenced(page));
  	dprintk("	PageLocked %d
  ", PageLocked(page));
  	dprintk("	PageWriteback %d
  ", PageWriteback(page));
  	dprintk("	PageMappedToDisk %d
  ", PageMappedToDisk(page));
  	dprintk("
  ");
  }
  
  /* Given the be associated with isect, determine if page data needs to be
   * initialized.
   */
  static int is_hole(struct pnfs_block_extent *be, sector_t isect)
  {
  	if (be->be_state == PNFS_BLOCK_NONE_DATA)
  		return 1;
  	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
  		return 0;
  	else
  		return !bl_is_sector_init(be->be_inval, isect);
  }
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
86
87
88
89
90
  /* Given the be associated with isect, determine if page data can be
   * written to disk.
   */
  static int is_writable(struct pnfs_block_extent *be, sector_t isect)
  {
71cdd40fd   Peng Tao   pnfsblock: write_...
91
92
  	return (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
  		be->be_state == PNFS_BLOCK_INVALID_DATA);
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
93
  }
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
94
95
96
97
98
  /* The data we are handed might be spread across several bios.  We need
   * to track when the last one is finished.
   */
  struct parallel_io {
  	struct kref refcnt;
7c5465d6c   Peng Tao   pnfsblock: alloc ...
99
  	void (*pnfs_callback) (void *data, int num_se);
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
100
  	void *data;
7c5465d6c   Peng Tao   pnfsblock: alloc ...
101
  	int bse_count;
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
102
103
104
105
106
107
108
109
110
111
  };
  
  static inline struct parallel_io *alloc_parallel(void *data)
  {
  	struct parallel_io *rv;
  
  	rv  = kmalloc(sizeof(*rv), GFP_NOFS);
  	if (rv) {
  		rv->data = data;
  		kref_init(&rv->refcnt);
7c5465d6c   Peng Tao   pnfsblock: alloc ...
112
  		rv->bse_count = 0;
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  	}
  	return rv;
  }
  
  static inline void get_parallel(struct parallel_io *p)
  {
  	kref_get(&p->refcnt);
  }
  
  static void destroy_parallel(struct kref *kref)
  {
  	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
  
  	dprintk("%s enter
  ", __func__);
7c5465d6c   Peng Tao   pnfsblock: alloc ...
128
  	p->pnfs_callback(p->data, p->bse_count);
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  	kfree(p);
  }
  
  static inline void put_parallel(struct parallel_io *p)
  {
  	kref_put(&p->refcnt, destroy_parallel);
  }
  
  static struct bio *
  bl_submit_bio(int rw, struct bio *bio)
  {
  	if (bio) {
  		get_parallel(bio->bi_private);
  		dprintk("%s submitting %s bio %u@%llu
  ", __func__,
  			rw == READ ? "read" : "write",
  			bio->bi_size, (unsigned long long)bio->bi_sector);
  		submit_bio(rw, bio);
  	}
  	return NULL;
  }
  
  static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
  				     struct pnfs_block_extent *be,
  				     void (*end_io)(struct bio *, int err),
  				     struct parallel_io *par)
  {
  	struct bio *bio;
74a6eeb44   Peng Tao   pnfsblock: limit ...
157
  	npg = min(npg, BIO_MAX_PAGES);
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
158
  	bio = bio_alloc(GFP_NOIO, npg);
74a6eeb44   Peng Tao   pnfsblock: limit ...
159
160
161
162
  	if (!bio && (current->flags & PF_MEMALLOC)) {
  		while (!bio && (npg /= 2))
  			bio = bio_alloc(GFP_NOIO, npg);
  	}
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
163

74a6eeb44   Peng Tao   pnfsblock: limit ...
164
165
166
167
168
169
  	if (bio) {
  		bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
  		bio->bi_bdev = be->be_mdev;
  		bio->bi_end_io = end_io;
  		bio->bi_private = par;
  	}
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
  	return bio;
  }
  
  static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
  				      sector_t isect, struct page *page,
  				      struct pnfs_block_extent *be,
  				      void (*end_io)(struct bio *, int err),
  				      struct parallel_io *par)
  {
  retry:
  	if (!bio) {
  		bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
  		if (!bio)
  			return ERR_PTR(-ENOMEM);
  	}
  	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
  		bio = bl_submit_bio(rw, bio);
  		goto retry;
  	}
  	return bio;
  }
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  /* This is basically copied from mpage_end_io_read */
  static void bl_end_io_read(struct bio *bio, int err)
  {
  	struct parallel_io *par = bio->bi_private;
  	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
  	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
  	struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
  
  	do {
  		struct page *page = bvec->bv_page;
  
  		if (--bvec >= bio->bi_io_vec)
  			prefetchw(&bvec->bv_page->flags);
  		if (uptodate)
  			SetPageUptodate(page);
  	} while (bvec >= bio->bi_io_vec);
  	if (!uptodate) {
  		if (!rdata->pnfs_error)
  			rdata->pnfs_error = -EIO;
1b0ae0687   Peng Tao   pnfs: make _set_l...
210
  		pnfs_set_lo_fail(rdata->lseg);
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  	}
  	bio_put(bio);
  	put_parallel(par);
  }
  
  static void bl_read_cleanup(struct work_struct *work)
  {
  	struct rpc_task *task;
  	struct nfs_read_data *rdata;
  	dprintk("%s enter
  ", __func__);
  	task = container_of(work, struct rpc_task, u.tk_work);
  	rdata = container_of(task, struct nfs_read_data, task);
  	pnfs_ld_read_done(rdata);
  }
  
  static void
7c5465d6c   Peng Tao   pnfsblock: alloc ...
228
  bl_end_par_io_read(void *data, int unused)
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
229
230
  {
  	struct nfs_read_data *rdata = data;
82b906d65   Peng Tao   pnfsblock: set re...
231
  	rdata->task.tk_status = rdata->pnfs_error;
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
232
233
234
  	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
  	schedule_work(&rdata->task.u.tk_work);
  }
155e7524f   Fred Isaman   pnfsblock: add bl...
235
236
237
  static enum pnfs_try_status
  bl_read_pagelist(struct nfs_read_data *rdata)
  {
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  	int i, hole;
  	struct bio *bio = NULL;
  	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
  	sector_t isect, extent_length = 0;
  	struct parallel_io *par;
  	loff_t f_offset = rdata->args.offset;
  	size_t count = rdata->args.count;
  	struct page **pages = rdata->args.pages;
  	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
  
  	dprintk("%s enter nr_pages %u offset %lld count %Zd
  ", __func__,
  	       rdata->npages, f_offset, count);
  
  	par = alloc_parallel(rdata);
  	if (!par)
  		goto use_mds;
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
  	par->pnfs_callback = bl_end_par_io_read;
  	/* At this point, we can no longer jump to use_mds */
  
  	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
  	/* Code assumes extents are page-aligned */
  	for (i = pg_index; i < rdata->npages; i++) {
  		if (!extent_length) {
  			/* We've used up the previous extent */
  			bl_put_extent(be);
  			bl_put_extent(cow_read);
  			bio = bl_submit_bio(READ, bio);
  			/* Get the next one */
  			be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
  					     isect, &cow_read);
  			if (!be) {
  				rdata->pnfs_error = -EIO;
  				goto out;
  			}
  			extent_length = be->be_length -
  				(isect - be->be_f_offset);
  			if (cow_read) {
  				sector_t cow_length = cow_read->be_length -
  					(isect - cow_read->be_f_offset);
  				extent_length = min(extent_length, cow_length);
  			}
  		}
  		hole = is_hole(be, isect);
  		if (hole && !cow_read) {
  			bio = bl_submit_bio(READ, bio);
  			/* Fill hole w/ zeroes w/o accessing device */
  			dprintk("%s Zeroing page for hole
  ", __func__);
  			zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
  			print_page(pages[i]);
  			SetPageUptodate(pages[i]);
  		} else {
  			struct pnfs_block_extent *be_read;
  
  			be_read = (hole && cow_read) ? cow_read : be;
  			bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
  						 isect, pages[i], be_read,
  						 bl_end_io_read, par);
  			if (IS_ERR(bio)) {
  				rdata->pnfs_error = PTR_ERR(bio);
e6d05a757   Peng Tao   pnfsblock: fix NU...
299
  				bio = NULL;
9549ec01b   Fred Isaman   pnfsblock: bl_rea...
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  				goto out;
  			}
  		}
  		isect += PAGE_CACHE_SECTORS;
  		extent_length -= PAGE_CACHE_SECTORS;
  	}
  	if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
  		rdata->res.eof = 1;
  		rdata->res.count = rdata->inode->i_size - f_offset;
  	} else {
  		rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
  	}
  out:
  	bl_put_extent(be);
  	bl_put_extent(cow_read);
  	bl_submit_bio(READ, bio);
  	put_parallel(par);
  	return PNFS_ATTEMPTED;
  
   use_mds:
  	dprintk("Giving up and using normal NFS
  ");
155e7524f   Fred Isaman   pnfsblock: add bl...
322
323
  	return PNFS_NOT_ATTEMPTED;
  }
31e6306a4   Fred Isaman   pnfsblock: note w...
324
325
326
327
328
  static void mark_extents_written(struct pnfs_block_layout *bl,
  				 __u64 offset, __u32 count)
  {
  	sector_t isect, end;
  	struct pnfs_block_extent *be;
7c5465d6c   Peng Tao   pnfsblock: alloc ...
329
  	struct pnfs_block_short_extent *se;
31e6306a4   Fred Isaman   pnfsblock: note w...
330
331
332
333
334
335
336
337
338
339
340
341
342
  
  	dprintk("%s(%llu, %u)
  ", __func__, offset, count);
  	if (count == 0)
  		return;
  	isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT;
  	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
  	end >>= SECTOR_SHIFT;
  	while (isect < end) {
  		sector_t len;
  		be = bl_find_get_extent(bl, isect, NULL);
  		BUG_ON(!be); /* FIXME */
  		len = min(end, be->be_f_offset + be->be_length) - isect;
7c5465d6c   Peng Tao   pnfsblock: alloc ...
343
344
345
346
347
  		if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
  			se = bl_pop_one_short_extent(be->be_inval);
  			BUG_ON(!se);
  			bl_mark_for_commit(be, isect, len, se);
  		}
31e6306a4   Fred Isaman   pnfsblock: note w...
348
349
350
351
  		isect += len;
  		bl_put_extent(be);
  	}
  }
71cdd40fd   Peng Tao   pnfsblock: write_...
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  static void bl_end_io_write_zero(struct bio *bio, int err)
  {
  	struct parallel_io *par = bio->bi_private;
  	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
  	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
  	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
  
  	do {
  		struct page *page = bvec->bv_page;
  
  		if (--bvec >= bio->bi_io_vec)
  			prefetchw(&bvec->bv_page->flags);
  		/* This is the zeroing page we added */
  		end_page_writeback(page);
  		page_cache_release(page);
  	} while (bvec >= bio->bi_io_vec);
7c5465d6c   Peng Tao   pnfsblock: alloc ...
368
369
  
  	if (unlikely(!uptodate)) {
71cdd40fd   Peng Tao   pnfsblock: write_...
370
371
  		if (!wdata->pnfs_error)
  			wdata->pnfs_error = -EIO;
1b0ae0687   Peng Tao   pnfs: make _set_l...
372
  		pnfs_set_lo_fail(wdata->lseg);
71cdd40fd   Peng Tao   pnfsblock: write_...
373
374
375
376
  	}
  	bio_put(bio);
  	put_parallel(par);
  }
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
377
378
379
380
381
382
383
384
385
  static void bl_end_io_write(struct bio *bio, int err)
  {
  	struct parallel_io *par = bio->bi_private;
  	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
  	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
  
  	if (!uptodate) {
  		if (!wdata->pnfs_error)
  			wdata->pnfs_error = -EIO;
1b0ae0687   Peng Tao   pnfs: make _set_l...
386
  		pnfs_set_lo_fail(wdata->lseg);
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  	}
  	bio_put(bio);
  	put_parallel(par);
  }
  
  /* Function scheduled for call during bl_end_par_io_write,
   * it marks sectors as written and extends the commitlist.
   */
  static void bl_write_cleanup(struct work_struct *work)
  {
  	struct rpc_task *task;
  	struct nfs_write_data *wdata;
  	dprintk("%s enter
  ", __func__);
  	task = container_of(work, struct rpc_task, u.tk_work);
  	wdata = container_of(task, struct nfs_write_data, task);
7c5465d6c   Peng Tao   pnfsblock: alloc ...
403
  	if (likely(!wdata->pnfs_error)) {
31e6306a4   Fred Isaman   pnfsblock: note w...
404
  		/* Marks for LAYOUTCOMMIT */
31e6306a4   Fred Isaman   pnfsblock: note w...
405
406
407
  		mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
  				     wdata->args.offset, wdata->args.count);
  	}
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
408
409
410
411
  	pnfs_ld_write_done(wdata);
  }
  
  /* Called when last of bios associated with a bl_write_pagelist call finishes */
7c5465d6c   Peng Tao   pnfsblock: alloc ...
412
  static void bl_end_par_io_write(void *data, int num_se)
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
413
414
  {
  	struct nfs_write_data *wdata = data;
7c5465d6c   Peng Tao   pnfsblock: alloc ...
415
416
417
418
  	if (unlikely(wdata->pnfs_error)) {
  		bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
  					num_se);
  	}
82b906d65   Peng Tao   pnfsblock: set re...
419
  	wdata->task.tk_status = wdata->pnfs_error;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
420
421
422
423
  	wdata->verf.committed = NFS_FILE_SYNC;
  	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
  	schedule_work(&wdata->task.u.tk_work);
  }
71cdd40fd   Peng Tao   pnfsblock: write_...
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
  /* FIXME STUB - mark intersection of layout and page as bad, so is not
   * used again.
   */
  static void mark_bad_read(void)
  {
  	return;
  }
  
  /*
   * map_block:  map a requested I/0 block (isect) into an offset in the LVM
   * block_device
   */
  static void
  map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
  {
  	dprintk("%s enter be=%p
  ", __func__, be);
  
  	set_buffer_mapped(bh);
  	bh->b_bdev = be->be_mdev;
  	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
  	    (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT);
  
  	dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd
  ",
  		__func__, (unsigned long long)isect, (long)bh->b_blocknr,
  		bh->b_size);
  	return;
  }
  
  /* Given an unmapped page, zero it or read in page for COW, page is locked
   * by caller.
   */
  static int
  init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
  {
  	struct buffer_head *bh = NULL;
  	int ret = 0;
  	sector_t isect;
  
  	dprintk("%s enter, %p
  ", __func__, page);
  	BUG_ON(PageUptodate(page));
  	if (!cow_read) {
  		zero_user_segment(page, 0, PAGE_SIZE);
  		SetPageUptodate(page);
  		goto cleanup;
  	}
  
  	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
  	if (!bh) {
  		ret = -ENOMEM;
  		goto cleanup;
  	}
  
  	isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT;
  	map_block(bh, isect, cow_read);
  	if (!bh_uptodate_or_lock(bh))
  		ret = bh_submit_read(bh);
  	if (ret)
  		goto cleanup;
  	SetPageUptodate(page);
  
  cleanup:
  	bl_put_extent(cow_read);
  	if (bh)
  		free_buffer_head(bh);
  	if (ret) {
  		/* Need to mark layout with bad read...should now
  		 * just use nfs4 for reads and writes.
  		 */
  		mark_bad_read();
  	}
  	return ret;
  }
72c508879   Peng Tao   pnfsblock: move f...
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
  /* Find or create a zeroing page marked being writeback.
   * Return ERR_PTR on error, NULL to indicate skip this page and page itself
   * to indicate write out.
   */
  static struct page *
  bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
  			struct pnfs_block_extent *cow_read)
  {
  	struct page *page;
  	int locked = 0;
  	page = find_get_page(inode->i_mapping, index);
  	if (page)
  		goto check_page;
  
  	page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
  	if (unlikely(!page)) {
  		dprintk("%s oom
  ", __func__);
  		return ERR_PTR(-ENOMEM);
  	}
  	locked = 1;
  
  check_page:
  	/* PageDirty: Other will write this out
  	 * PageWriteback: Other is writing this out
  	 * PageUptodate: It was read before
  	 */
  	if (PageDirty(page) || PageWriteback(page)) {
  		print_page(page);
  		if (locked)
  			unlock_page(page);
  		page_cache_release(page);
  		return NULL;
  	}
  
  	if (!locked) {
  		lock_page(page);
  		locked = 1;
  		goto check_page;
  	}
  	if (!PageUptodate(page)) {
  		/* New page, readin or zero it */
  		init_page_for_write(page, cow_read);
  	}
  	set_page_writeback(page);
  	unlock_page(page);
  
  	return page;
  }
155e7524f   Fred Isaman   pnfsblock: add bl...
548
  static enum pnfs_try_status
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
549
  bl_write_pagelist(struct nfs_write_data *wdata, int sync)
155e7524f   Fred Isaman   pnfsblock: add bl...
550
  {
71cdd40fd   Peng Tao   pnfsblock: write_...
551
  	int i, ret, npg_zero, pg_index, last = 0;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
552
  	struct bio *bio = NULL;
71cdd40fd   Peng Tao   pnfsblock: write_...
553
554
  	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
  	sector_t isect, last_isect = 0, extent_length = 0;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
555
556
557
558
  	struct parallel_io *par;
  	loff_t offset = wdata->args.offset;
  	size_t count = wdata->args.count;
  	struct page **pages = wdata->args.pages;
71cdd40fd   Peng Tao   pnfsblock: write_...
559
560
561
562
563
  	struct page *page;
  	pgoff_t index;
  	u64 temp;
  	int npg_per_block =
  	    NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
564
565
566
567
  
  	dprintk("%s enter, %Zu@%lld
  ", __func__, count, offset);
  	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
71cdd40fd   Peng Tao   pnfsblock: write_...
568
569
  	 * We want to write each, and if there is an error set pnfs_error
  	 * to have it redone using nfs.
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
570
571
572
  	 */
  	par = alloc_parallel(wdata);
  	if (!par)
7c5465d6c   Peng Tao   pnfsblock: alloc ...
573
  		goto out_mds;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
574
575
576
577
  	par->pnfs_callback = bl_end_par_io_write;
  	/* At this point, have to be more careful with error handling */
  
  	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
71cdd40fd   Peng Tao   pnfsblock: write_...
578
579
580
581
  	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
  	if (!be || !is_writable(be, isect)) {
  		dprintk("%s no matching extents!
  ", __func__);
7c5465d6c   Peng Tao   pnfsblock: alloc ...
582
  		goto out_mds;
71cdd40fd   Peng Tao   pnfsblock: write_...
583
584
585
586
  	}
  
  	/* First page inside INVALID extent */
  	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
7c5465d6c   Peng Tao   pnfsblock: alloc ...
587
588
589
590
  		if (likely(!bl_push_one_short_extent(be->be_inval)))
  			par->bse_count++;
  		else
  			goto out_mds;
71cdd40fd   Peng Tao   pnfsblock: write_...
591
592
593
594
595
596
597
598
599
600
  		temp = offset >> PAGE_CACHE_SHIFT;
  		npg_zero = do_div(temp, npg_per_block);
  		isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
  				     (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
  		extent_length = be->be_length - (isect - be->be_f_offset);
  
  fill_invalid_ext:
  		dprintk("%s need to zero %d pages
  ", __func__, npg_zero);
  		for (;npg_zero > 0; npg_zero--) {
754227451   Peng Tao   pnfsblock: fix wr...
601
602
603
604
605
606
  			if (bl_is_sector_init(be->be_inval, isect)) {
  				dprintk("isect %llu already init
  ",
  					(unsigned long long)isect);
  				goto next_page;
  			}
71cdd40fd   Peng Tao   pnfsblock: write_...
607
608
609
610
611
612
  			/* page ref released in bl_end_io_write_zero */
  			index = isect >> PAGE_CACHE_SECTOR_SHIFT;
  			dprintk("%s zero %dth page: index %lu isect %llu
  ",
  				__func__, npg_zero, index,
  				(unsigned long long)isect);
72c508879   Peng Tao   pnfsblock: move f...
613
614
615
616
  			page = bl_find_get_zeroing_page(wdata->inode, index,
  							cow_read);
  			if (unlikely(IS_ERR(page))) {
  				wdata->pnfs_error = PTR_ERR(page);
71cdd40fd   Peng Tao   pnfsblock: write_...
617
  				goto out;
72c508879   Peng Tao   pnfsblock: move f...
618
  			} else if (page == NULL)
71cdd40fd   Peng Tao   pnfsblock: write_...
619
  				goto next_page;
71cdd40fd   Peng Tao   pnfsblock: write_...
620
621
  
  			ret = bl_mark_sectors_init(be->be_inval, isect,
60c52e3a7   Peng Tao   pnfsblock: cleanu...
622
  						       PAGE_CACHE_SECTORS);
71cdd40fd   Peng Tao   pnfsblock: write_...
623
624
625
626
627
628
629
630
631
  			if (unlikely(ret)) {
  				dprintk("%s bl_mark_sectors_init fail %d
  ",
  					__func__, ret);
  				end_page_writeback(page);
  				page_cache_release(page);
  				wdata->pnfs_error = ret;
  				goto out;
  			}
7c5465d6c   Peng Tao   pnfsblock: alloc ...
632
633
634
635
636
637
638
639
640
641
642
643
  			if (likely(!bl_push_one_short_extent(be->be_inval)))
  				par->bse_count++;
  			else {
  				end_page_writeback(page);
  				page_cache_release(page);
  				wdata->pnfs_error = -ENOMEM;
  				goto out;
  			}
  			/* FIXME: This should be done in bi_end_io */
  			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
  					     page->index << PAGE_CACHE_SHIFT,
  					     PAGE_CACHE_SIZE);
71cdd40fd   Peng Tao   pnfsblock: write_...
644
645
646
647
648
  			bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
  						 isect, page, be,
  						 bl_end_io_write_zero, par);
  			if (IS_ERR(bio)) {
  				wdata->pnfs_error = PTR_ERR(bio);
e6d05a757   Peng Tao   pnfsblock: fix NU...
649
  				bio = NULL;
71cdd40fd   Peng Tao   pnfsblock: write_...
650
651
  				goto out;
  			}
71cdd40fd   Peng Tao   pnfsblock: write_...
652
653
654
655
656
657
658
659
660
661
662
663
  next_page:
  			isect += PAGE_CACHE_SECTORS;
  			extent_length -= PAGE_CACHE_SECTORS;
  		}
  		if (last)
  			goto write_done;
  	}
  	bio = bl_submit_bio(WRITE, bio);
  
  	/* Middle pages */
  	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
  	for (i = pg_index; i < wdata->npages; i++) {
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
664
665
666
667
668
669
670
671
  		if (!extent_length) {
  			/* We've used up the previous extent */
  			bl_put_extent(be);
  			bio = bl_submit_bio(WRITE, bio);
  			/* Get the next one */
  			be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
  					     isect, NULL);
  			if (!be || !is_writable(be, isect)) {
71cdd40fd   Peng Tao   pnfsblock: write_...
672
  				wdata->pnfs_error = -EINVAL;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
673
674
  				goto out;
  			}
7c5465d6c   Peng Tao   pnfsblock: alloc ...
675
676
677
678
679
680
681
682
683
  			if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
  				if (likely(!bl_push_one_short_extent(
  								be->be_inval)))
  					par->bse_count++;
  				else {
  					wdata->pnfs_error = -ENOMEM;
  					goto out;
  				}
  			}
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
684
  			extent_length = be->be_length -
71cdd40fd   Peng Tao   pnfsblock: write_...
685
  			    (isect - be->be_f_offset);
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
686
  		}
71cdd40fd   Peng Tao   pnfsblock: write_...
687
688
  		if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
  			ret = bl_mark_sectors_init(be->be_inval, isect,
60c52e3a7   Peng Tao   pnfsblock: cleanu...
689
  						       PAGE_CACHE_SECTORS);
71cdd40fd   Peng Tao   pnfsblock: write_...
690
691
692
693
694
695
  			if (unlikely(ret)) {
  				dprintk("%s bl_mark_sectors_init fail %d
  ",
  					__func__, ret);
  				wdata->pnfs_error = ret;
  				goto out;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
696
  			}
71cdd40fd   Peng Tao   pnfsblock: write_...
697
698
699
700
701
702
  		}
  		bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
  					 isect, pages[i], be,
  					 bl_end_io_write, par);
  		if (IS_ERR(bio)) {
  			wdata->pnfs_error = PTR_ERR(bio);
e6d05a757   Peng Tao   pnfsblock: fix NU...
703
  			bio = NULL;
71cdd40fd   Peng Tao   pnfsblock: write_...
704
  			goto out;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
705
706
  		}
  		isect += PAGE_CACHE_SECTORS;
71cdd40fd   Peng Tao   pnfsblock: write_...
707
  		last_isect = isect;
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
708
709
  		extent_length -= PAGE_CACHE_SECTORS;
  	}
71cdd40fd   Peng Tao   pnfsblock: write_...
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
  
  	/* Last page inside INVALID extent */
  	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
  		bio = bl_submit_bio(WRITE, bio);
  		temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT;
  		npg_zero = npg_per_block - do_div(temp, npg_per_block);
  		if (npg_zero < npg_per_block) {
  			last = 1;
  			goto fill_invalid_ext;
  		}
  	}
  
  write_done:
  	wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
  	if (count < wdata->res.count) {
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
725
  		wdata->res.count = count;
71cdd40fd   Peng Tao   pnfsblock: write_...
726
  	}
650e2d39b   Fred Isaman   pnfsblock: bl_wri...
727
728
729
730
731
  out:
  	bl_put_extent(be);
  	bl_submit_bio(WRITE, bio);
  	put_parallel(par);
  	return PNFS_ATTEMPTED;
7c5465d6c   Peng Tao   pnfsblock: alloc ...
732
733
734
735
  out_mds:
  	bl_put_extent(be);
  	kfree(par);
  	return PNFS_NOT_ATTEMPTED;
155e7524f   Fred Isaman   pnfsblock: add bl...
736
  }
9e6929699   Fred Isaman   pnfsblock: basic ...
737
  /* FIXME - range ignored */
155e7524f   Fred Isaman   pnfsblock: add bl...
738
  static void
9e6929699   Fred Isaman   pnfsblock: basic ...
739
  release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range)
155e7524f   Fred Isaman   pnfsblock: add bl...
740
  {
9e6929699   Fred Isaman   pnfsblock: basic ...
741
742
743
744
745
746
747
748
749
750
751
752
753
754
  	int i;
  	struct pnfs_block_extent *be;
  
  	spin_lock(&bl->bl_ext_lock);
  	for (i = 0; i < EXTENT_LISTS; i++) {
  		while (!list_empty(&bl->bl_extents[i])) {
  			be = list_first_entry(&bl->bl_extents[i],
  					      struct pnfs_block_extent,
  					      be_node);
  			list_del(&be->be_node);
  			bl_put_extent(be);
  		}
  	}
  	spin_unlock(&bl->bl_ext_lock);
155e7524f   Fred Isaman   pnfsblock: add bl...
755
  }
155e7524f   Fred Isaman   pnfsblock: add bl...
756
757
758
  static void
  release_inval_marks(struct pnfs_inval_markings *marks)
  {
c1c2a4cd3   Fred Isaman   pnfsblock: add ex...
759
  	struct pnfs_inval_tracking *pos, *temp;
7c5465d6c   Peng Tao   pnfsblock: alloc ...
760
  	struct pnfs_block_short_extent *se, *stemp;
c1c2a4cd3   Fred Isaman   pnfsblock: add ex...
761
762
763
764
765
  
  	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
  		list_del(&pos->it_link);
  		kfree(pos);
  	}
7c5465d6c   Peng Tao   pnfsblock: alloc ...
766
767
768
769
770
  
  	list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
  		list_del(&se->bse_node);
  		kfree(se);
  	}
155e7524f   Fred Isaman   pnfsblock: add bl...
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
  	return;
  }
  
  static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
  {
  	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
  
  	dprintk("%s enter
  ", __func__);
  	release_extents(bl, NULL);
  	release_inval_marks(&bl->bl_inval);
  	kfree(bl);
  }
  
  static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
  						   gfp_t gfp_flags)
  {
  	struct pnfs_block_layout *bl;
  
  	dprintk("%s enter
  ", __func__);
  	bl = kzalloc(sizeof(*bl), gfp_flags);
  	if (!bl)
  		return NULL;
  	spin_lock_init(&bl->bl_ext_lock);
  	INIT_LIST_HEAD(&bl->bl_extents[0]);
  	INIT_LIST_HEAD(&bl->bl_extents[1]);
  	INIT_LIST_HEAD(&bl->bl_commit);
  	INIT_LIST_HEAD(&bl->bl_committing);
  	bl->bl_count = 0;
  	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT;
  	BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
  	return &bl->bl_layout;
  }
a60d2ebd9   Fred Isaman   pnfsblock: lseg a...
805
  static void bl_free_lseg(struct pnfs_layout_segment *lseg)
155e7524f   Fred Isaman   pnfsblock: add bl...
806
  {
a60d2ebd9   Fred Isaman   pnfsblock: lseg a...
807
808
809
  	dprintk("%s enter
  ", __func__);
  	kfree(lseg);
155e7524f   Fred Isaman   pnfsblock: add bl...
810
  }
a60d2ebd9   Fred Isaman   pnfsblock: lseg a...
811
812
813
814
815
816
  /* We pretty much ignore lseg, and store all data layout wide, so we
   * can correctly merge.
   */
  static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo,
  						 struct nfs4_layoutget_res *lgr,
  						 gfp_t gfp_flags)
155e7524f   Fred Isaman   pnfsblock: add bl...
817
  {
a60d2ebd9   Fred Isaman   pnfsblock: lseg a...
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
  	struct pnfs_layout_segment *lseg;
  	int status;
  
  	dprintk("%s enter
  ", __func__);
  	lseg = kzalloc(sizeof(*lseg), gfp_flags);
  	if (!lseg)
  		return ERR_PTR(-ENOMEM);
  	status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags);
  	if (status) {
  		/* We don't want to call the full-blown bl_free_lseg,
  		 * since on error extents were not touched.
  		 */
  		kfree(lseg);
  		return ERR_PTR(status);
  	}
  	return lseg;
155e7524f   Fred Isaman   pnfsblock: add bl...
835
836
837
838
839
840
  }
  
  static void
  bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
  		       const struct nfs4_layoutcommit_args *arg)
  {
90ace12ac   Fred Isaman   pnfsblock: encode...
841
842
843
  	dprintk("%s enter
  ", __func__);
  	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
155e7524f   Fred Isaman   pnfsblock: add bl...
844
845
846
847
848
  }
  
  static void
  bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
  {
b2be7811d   Fred Isaman   pnfsblock: cleanu...
849
850
851
852
853
  	struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout;
  
  	dprintk("%s enter
  ", __func__);
  	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status);
155e7524f   Fred Isaman   pnfsblock: add bl...
854
  }
2f9fd1826   Fred Isaman   pnfsblock: call a...
855
856
857
  static void free_blk_mountid(struct block_mount_id *mid)
  {
  	if (mid) {
93a3844ee   Peng Tao   pnfsblock: don't ...
858
859
860
861
  		struct pnfs_block_dev *dev, *tmp;
  
  		/* No need to take bm_lock as we are last user freeing bm_devlist */
  		list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
2f9fd1826   Fred Isaman   pnfsblock: call a...
862
863
864
  			list_del(&dev->bm_node);
  			bl_free_block_dev(dev);
  		}
2f9fd1826   Fred Isaman   pnfsblock: call a...
865
866
867
868
869
870
871
872
873
874
875
876
  		kfree(mid);
  	}
  }
  
  /* This is mostly copied from the filelayout's get_device_info function.
   * It seems much of this should be at the generic pnfs level.
   */
  static struct pnfs_block_dev *
  nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
  			struct nfs4_deviceid *d_id)
  {
  	struct pnfs_device *dev;
516f2e24f   Jim Rees   pnfsblock: fix re...
877
  	struct pnfs_block_dev *rv;
2f9fd1826   Fred Isaman   pnfsblock: call a...
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
  	u32 max_resp_sz;
  	int max_pages;
  	struct page **pages = NULL;
  	int i, rc;
  
  	/*
  	 * Use the session max response size as the basis for setting
  	 * GETDEVICEINFO's maxcount
  	 */
  	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
  	max_pages = max_resp_sz >> PAGE_SHIFT;
  	dprintk("%s max_resp_sz %u max_pages %d
  ",
  		__func__, max_resp_sz, max_pages);
  
  	dev = kmalloc(sizeof(*dev), GFP_NOFS);
  	if (!dev) {
  		dprintk("%s kmalloc failed
  ", __func__);
516f2e24f   Jim Rees   pnfsblock: fix re...
897
  		return ERR_PTR(-ENOMEM);
2f9fd1826   Fred Isaman   pnfsblock: call a...
898
899
900
901
902
  	}
  
  	pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
  	if (pages == NULL) {
  		kfree(dev);
516f2e24f   Jim Rees   pnfsblock: fix re...
903
  		return ERR_PTR(-ENOMEM);
2f9fd1826   Fred Isaman   pnfsblock: call a...
904
905
906
  	}
  	for (i = 0; i < max_pages; i++) {
  		pages[i] = alloc_page(GFP_NOFS);
516f2e24f   Jim Rees   pnfsblock: fix re...
907
908
  		if (!pages[i]) {
  			rv = ERR_PTR(-ENOMEM);
2f9fd1826   Fred Isaman   pnfsblock: call a...
909
  			goto out_free;
516f2e24f   Jim Rees   pnfsblock: fix re...
910
  		}
2f9fd1826   Fred Isaman   pnfsblock: call a...
911
912
913
914
915
916
917
918
919
920
921
922
923
924
  	}
  
  	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
  	dev->layout_type = LAYOUT_BLOCK_VOLUME;
  	dev->pages = pages;
  	dev->pgbase = 0;
  	dev->pglen = PAGE_SIZE * max_pages;
  	dev->mincount = 0;
  
  	dprintk("%s: dev_id: %s
  ", __func__, dev->dev_id.data);
  	rc = nfs4_proc_getdeviceinfo(server, dev);
  	dprintk("%s getdevice info returns %d
  ", __func__, rc);
516f2e24f   Jim Rees   pnfsblock: fix re...
925
926
  	if (rc) {
  		rv = ERR_PTR(rc);
2f9fd1826   Fred Isaman   pnfsblock: call a...
927
  		goto out_free;
516f2e24f   Jim Rees   pnfsblock: fix re...
928
  	}
2f9fd1826   Fred Isaman   pnfsblock: call a...
929
930
931
932
933
934
935
936
937
  
  	rv = nfs4_blk_decode_device(server, dev);
   out_free:
  	for (i = 0; i < max_pages; i++)
  		__free_page(pages[i]);
  	kfree(pages);
  	kfree(dev);
  	return rv;
  }
155e7524f   Fred Isaman   pnfsblock: add bl...
938
939
940
  static int
  bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
  {
2f9fd1826   Fred Isaman   pnfsblock: call a...
941
942
943
944
  	struct block_mount_id *b_mt_id = NULL;
  	struct pnfs_devicelist *dlist = NULL;
  	struct pnfs_block_dev *bdev;
  	LIST_HEAD(block_disklist);
516f2e24f   Jim Rees   pnfsblock: fix re...
945
  	int status, i;
2f9fd1826   Fred Isaman   pnfsblock: call a...
946

155e7524f   Fred Isaman   pnfsblock: add bl...
947
948
  	dprintk("%s enter
  ", __func__);
2f9fd1826   Fred Isaman   pnfsblock: call a...
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
  
  	if (server->pnfs_blksize == 0) {
  		dprintk("%s Server did not return blksize
  ", __func__);
  		return -EINVAL;
  	}
  	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS);
  	if (!b_mt_id) {
  		status = -ENOMEM;
  		goto out_error;
  	}
  	/* Initialize nfs4 block layout mount id */
  	spin_lock_init(&b_mt_id->bm_lock);
  	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
  
  	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
  	if (!dlist) {
  		status = -ENOMEM;
  		goto out_error;
  	}
  	dlist->eof = 0;
  	while (!dlist->eof) {
  		status = nfs4_proc_getdevicelist(server, fh, dlist);
  		if (status)
  			goto out_error;
  		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i
  ",
  			__func__, dlist->num_devs, dlist->eof);
  		for (i = 0; i < dlist->num_devs; i++) {
  			bdev = nfs4_blk_get_deviceinfo(server, fh,
  						       &dlist->dev_id[i]);
516f2e24f   Jim Rees   pnfsblock: fix re...
980
981
  			if (IS_ERR(bdev)) {
  				status = PTR_ERR(bdev);
2f9fd1826   Fred Isaman   pnfsblock: call a...
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
  				goto out_error;
  			}
  			spin_lock(&b_mt_id->bm_lock);
  			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
  			spin_unlock(&b_mt_id->bm_lock);
  		}
  	}
  	dprintk("%s SUCCESS
  ", __func__);
  	server->pnfs_ld_data = b_mt_id;
  
   out_return:
  	kfree(dlist);
  	return status;
  
   out_error:
  	free_blk_mountid(b_mt_id);
  	goto out_return;
155e7524f   Fred Isaman   pnfsblock: add bl...
1000
1001
1002
1003
1004
  }
  
  static int
  bl_clear_layoutdriver(struct nfs_server *server)
  {
2f9fd1826   Fred Isaman   pnfsblock: call a...
1005
  	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
155e7524f   Fred Isaman   pnfsblock: add bl...
1006
1007
  	dprintk("%s enter
  ", __func__);
2f9fd1826   Fred Isaman   pnfsblock: call a...
1008
1009
1010
  	free_blk_mountid(b_mt_id);
  	dprintk("%s RETURNS
  ", __func__);
155e7524f   Fred Isaman   pnfsblock: add bl...
1011
1012
  	return 0;
  }
e9643fe80   Benny Halevy   pnfsblock: use pa...
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
  static const struct nfs_pageio_ops bl_pg_read_ops = {
  	.pg_init = pnfs_generic_pg_init_read,
  	.pg_test = pnfs_generic_pg_test,
  	.pg_doio = pnfs_generic_pg_readpages,
  };
  
  static const struct nfs_pageio_ops bl_pg_write_ops = {
  	.pg_init = pnfs_generic_pg_init_write,
  	.pg_test = pnfs_generic_pg_test,
  	.pg_doio = pnfs_generic_pg_writepages,
  };
155e7524f   Fred Isaman   pnfsblock: add bl...
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
  static struct pnfs_layoutdriver_type blocklayout_type = {
  	.id				= LAYOUT_BLOCK_VOLUME,
  	.name				= "LAYOUT_BLOCK_VOLUME",
  	.read_pagelist			= bl_read_pagelist,
  	.write_pagelist			= bl_write_pagelist,
  	.alloc_layout_hdr		= bl_alloc_layout_hdr,
  	.free_layout_hdr		= bl_free_layout_hdr,
  	.alloc_lseg			= bl_alloc_lseg,
  	.free_lseg			= bl_free_lseg,
  	.encode_layoutcommit		= bl_encode_layoutcommit,
  	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
  	.set_layoutdriver		= bl_set_layoutdriver,
  	.clear_layoutdriver		= bl_clear_layoutdriver,
e9643fe80   Benny Halevy   pnfsblock: use pa...
1037
1038
  	.pg_read_ops			= &bl_pg_read_ops,
  	.pg_write_ops			= &bl_pg_write_ops,
155e7524f   Fred Isaman   pnfsblock: add bl...
1039
  };
fe0a9b740   Jim Rees   pnfsblock: add de...
1040
  static const struct rpc_pipe_ops bl_upcall_ops = {
c1225158a   Peng Tao   SUNRPC/NFS: make ...
1041
  	.upcall		= rpc_pipe_generic_upcall,
fe0a9b740   Jim Rees   pnfsblock: add de...
1042
1043
1044
  	.downcall	= bl_pipe_downcall,
  	.destroy_msg	= bl_pipe_destroy_msg,
  };
155e7524f   Fred Isaman   pnfsblock: add bl...
1045
1046
  static int __init nfs4blocklayout_init(void)
  {
fe0a9b740   Jim Rees   pnfsblock: add de...
1047
1048
  	struct vfsmount *mnt;
  	struct path path;
155e7524f   Fred Isaman   pnfsblock: add bl...
1049
1050
1051
1052
1053
1054
  	int ret;
  
  	dprintk("%s: NFSv4 Block Layout Driver Registering...
  ", __func__);
  
  	ret = pnfs_register_layoutdriver(&blocklayout_type);
fe0a9b740   Jim Rees   pnfsblock: add de...
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
  	if (ret)
  		goto out;
  
  	init_waitqueue_head(&bl_wq);
  
  	mnt = rpc_get_mount();
  	if (IS_ERR(mnt)) {
  		ret = PTR_ERR(mnt);
  		goto out_remove;
  	}
  
  	ret = vfs_path_lookup(mnt->mnt_root,
  			      mnt,
  			      NFS_PIPE_DIRNAME, 0, &path);
  	if (ret)
760383f1e   Peng Tao   pnfsblock: add mi...
1070
  		goto out_putrpc;
fe0a9b740   Jim Rees   pnfsblock: add de...
1071
1072
1073
  
  	bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
  				    &bl_upcall_ops, 0);
760383f1e   Peng Tao   pnfsblock: add mi...
1074
  	path_put(&path);
fe0a9b740   Jim Rees   pnfsblock: add de...
1075
1076
  	if (IS_ERR(bl_device_pipe)) {
  		ret = PTR_ERR(bl_device_pipe);
760383f1e   Peng Tao   pnfsblock: add mi...
1077
  		goto out_putrpc;
fe0a9b740   Jim Rees   pnfsblock: add de...
1078
1079
1080
  	}
  out:
  	return ret;
760383f1e   Peng Tao   pnfsblock: add mi...
1081
1082
  out_putrpc:
  	rpc_put_mount();
fe0a9b740   Jim Rees   pnfsblock: add de...
1083
1084
  out_remove:
  	pnfs_unregister_layoutdriver(&blocklayout_type);
155e7524f   Fred Isaman   pnfsblock: add bl...
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
  	return ret;
  }
  
  static void __exit nfs4blocklayout_exit(void)
  {
  	dprintk("%s: NFSv4 Block Layout Driver Unregistering...
  ",
  	       __func__);
  
  	pnfs_unregister_layoutdriver(&blocklayout_type);
fe0a9b740   Jim Rees   pnfsblock: add de...
1095
  	rpc_unlink(bl_device_pipe);
760383f1e   Peng Tao   pnfsblock: add mi...
1096
  	rpc_put_mount();
155e7524f   Fred Isaman   pnfsblock: add bl...
1097
1098
1099
1100
1101
1102
  }
  
  MODULE_ALIAS("nfs-layouttype4-3");
  
  module_init(nfs4blocklayout_init);
  module_exit(nfs4blocklayout_exit);