Commit 6736c047995c560b73f3860095c631456b0bbea8

Authored by Linus Torvalds

Merge branch 'nfs-for-3.2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

* 'nfs-for-3.2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (25 commits)
  nfs: set vs_hidden on nfs4_callback_version4 (try #2)
  pnfs-obj: Support for RAID5 read-4-write interface.
  pnfs-obj: move to ore 03: Remove old raid engine
  pnfs-obj: move to ore 02: move to ORE
  pnfs-obj: move to ore 01: ore_layout & ore_components
  pnfs-obj: Rename objlayout_io_state => objlayout_io_res
  pnfs-obj: Get rid of objlayout_{alloc,free}_io_state
  pnfs-obj: Return PNFS_NOT_ATTEMPTED in case of read/write_pagelist
  pnfs-obj: Remove redundant EOF from objlayout_io_state
  nfs: Remove unused variable from write.c
  nfs: Fix unused variable warning from file.c
  NFS: Remove no-op less-than-zero checks on unsigned variables.
  NFS: Clean up nfs4_xdr_dec_secinfo()
  NFS: Fix documenting comment for nfs_create_request()
  NFS4: fix cb_recallany decode error
  nfs4: serialize layoutcommit
  SUNRPC: remove rpcbind clients destruction on module cleanup
  SUNRPC: remove rpcbind clients creation during service registering
  NFSd: call svc rpcbind cleanup explicitly
  SUNRPC: cleanup service destruction
  ...

Showing 20 changed files Side-by-side Diff

... ... @@ -5,7 +5,7 @@
5 5 # selected by any of the users.
6 6 config ORE
7 7 tristate
8   - depends on EXOFS_FS
  8 + depends on EXOFS_FS || PNFS_OBJLAYOUT
9 9 select ASYNC_XOR
10 10 default SCSI_OSD_ULD
11 11  
fs/nfs/callback_xdr.c
... ... @@ -488,17 +488,18 @@
488 488 struct xdr_stream *xdr,
489 489 struct cb_recallanyargs *args)
490 490 {
491   - __be32 *p;
  491 + uint32_t bitmap[2];
  492 + __be32 *p, status;
492 493  
493 494 args->craa_addr = svc_addr(rqstp);
494 495 p = read_buf(xdr, 4);
495 496 if (unlikely(p == NULL))
496 497 return htonl(NFS4ERR_BADXDR);
497 498 args->craa_objs_to_keep = ntohl(*p++);
498   - p = read_buf(xdr, 4);
499   - if (unlikely(p == NULL))
500   - return htonl(NFS4ERR_BADXDR);
501   - args->craa_type_mask = ntohl(*p);
  499 + status = decode_bitmap(xdr, bitmap);
  500 + if (unlikely(status))
  501 + return status;
  502 + args->craa_type_mask = bitmap[0];
502 503  
503 504 return 0;
504 505 }
... ... @@ -986,5 +987,6 @@
986 987 .vs_proc = nfs4_callback_procedures1,
987 988 .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
988 989 .vs_dispatch = NULL,
  990 + .vs_hidden = 1,
989 991 };
... ... @@ -137,11 +137,9 @@
137 137 static int
138 138 nfs_file_release(struct inode *inode, struct file *filp)
139 139 {
140   - struct dentry *dentry = filp->f_path.dentry;
141   -
142 140 dprintk("NFS: release(%s/%s)\n",
143   - dentry->d_parent->d_name.name,
144   - dentry->d_name.name);
  141 + filp->f_path.dentry->d_parent->d_name.name,
  142 + filp->f_path.dentry->d_name.name);
145 143  
146 144 nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
147 145 return nfs_release(inode, filp);
148 146  
... ... @@ -228,14 +226,13 @@
228 226 struct dentry * dentry = iocb->ki_filp->f_path.dentry;
229 227 struct inode * inode = dentry->d_inode;
230 228 ssize_t result;
231   - size_t count = iov_length(iov, nr_segs);
232 229  
233 230 if (iocb->ki_filp->f_flags & O_DIRECT)
234 231 return nfs_file_direct_read(iocb, iov, nr_segs, pos);
235 232  
236 233 dprintk("NFS: read(%s/%s, %lu@%lu)\n",
237 234 dentry->d_parent->d_name.name, dentry->d_name.name,
238   - (unsigned long) count, (unsigned long) pos);
  235 + (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
239 236  
240 237 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
241 238 if (!result) {
fs/nfs/nfs4filelayout.c
... ... @@ -449,9 +449,8 @@
449 449  
450 450 fl->dsaddr = dsaddr;
451 451  
452   - if (fl->first_stripe_index < 0 ||
453   - fl->first_stripe_index >= dsaddr->stripe_count) {
454   - dprintk("%s Bad first_stripe_index %d\n",
  452 + if (fl->first_stripe_index >= dsaddr->stripe_count) {
  453 + dprintk("%s Bad first_stripe_index %u\n",
455 454 __func__, fl->first_stripe_index);
456 455 goto out_put;
457 456 }
... ... @@ -552,7 +551,7 @@
552 551  
553 552 /* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
554 553 * Futher checking is done in filelayout_check_layout */
555   - if (fl->num_fh < 0 || fl->num_fh >
  554 + if (fl->num_fh >
556 555 max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
557 556 goto out_err;
558 557  
... ... @@ -5950,6 +5950,7 @@
5950 5950 {
5951 5951 struct nfs4_layoutcommit_data *data = calldata;
5952 5952 struct pnfs_layout_segment *lseg, *tmp;
  5953 + unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
5953 5954  
5954 5955 pnfs_cleanup_layoutcommit(data);
5955 5956 /* Matched by references in pnfs_set_layoutcommit */
... ... @@ -5959,6 +5960,11 @@
5959 5960 &lseg->pls_flags))
5960 5961 put_lseg(lseg);
5961 5962 }
  5963 +
  5964 + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
  5965 + smp_mb__after_clear_bit();
  5966 + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
  5967 +
5962 5968 put_rpccred(data->cred);
5963 5969 kfree(data);
5964 5970 }
... ... @@ -6602,8 +6602,6 @@
6602 6602 if (status)
6603 6603 goto out;
6604 6604 status = decode_secinfo(xdr, res);
6605   - if (status)
6606   - goto out;
6607 6605 out:
6608 6606 return status;
6609 6607 }
fs/nfs/objlayout/objio_osd.c
Changes suppressed. Click to show
... ... @@ -38,21 +38,15 @@
38 38 */
39 39  
40 40 #include <linux/module.h>
41   -#include <scsi/osd_initiator.h>
  41 +#include <scsi/osd_ore.h>
42 42  
43 43 #include "objlayout.h"
44 44  
45 45 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
46 46  
47   -#define _LLU(x) ((unsigned long long)x)
48   -
49   -enum { BIO_MAX_PAGES_KMALLOC =
50   - (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
51   -};
52   -
53 47 struct objio_dev_ent {
54 48 struct nfs4_deviceid_node id_node;
55   - struct osd_dev *od;
  49 + struct ore_dev od;
56 50 };
57 51  
58 52 static void
... ... @@ -60,8 +54,8 @@
60 54 {
61 55 struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
62 56  
63   - dprintk("%s: free od=%p\n", __func__, de->od);
64   - osduld_put_device(de->od);
  57 + dprintk("%s: free od=%p\n", __func__, de->od.od);
  58 + osduld_put_device(de->od.od);
65 59 kfree(de);
66 60 }
67 61  
68 62  
... ... @@ -98,12 +92,12 @@
98 92 nfss->pnfs_curr_ld,
99 93 nfss->nfs_client,
100 94 d_id);
101   - de->od = od;
  95 + de->od.od = od;
102 96  
103 97 d = nfs4_insert_deviceid_node(&de->id_node);
104 98 n = container_of(d, struct objio_dev_ent, id_node);
105 99 if (n != de) {
106   - dprintk("%s: Race with other n->od=%p\n", __func__, n->od);
  100 + dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od);
107 101 objio_free_deviceid_node(&de->id_node);
108 102 de = n;
109 103 }
110 104  
... ... @@ -111,28 +105,11 @@
111 105 return de;
112 106 }
113 107  
114   -struct caps_buffers {
115   - u8 caps_key[OSD_CRYPTO_KEYID_SIZE];
116   - u8 creds[OSD_CAP_LEN];
117   -};
118   -
119 108 struct objio_segment {
120 109 struct pnfs_layout_segment lseg;
121 110  
122   - struct pnfs_osd_object_cred *comps;
123   -
124   - unsigned mirrors_p1;
125   - unsigned stripe_unit;
126   - unsigned group_width; /* Data stripe_units without integrity comps */
127   - u64 group_depth;
128   - unsigned group_count;
129   -
130   - unsigned max_io_size;
131   -
132   - unsigned comps_index;
133   - unsigned num_comps;
134   - /* variable length */
135   - struct objio_dev_ent *ods[];
  111 + struct ore_layout layout;
  112 + struct ore_components oc;
136 113 };
137 114  
138 115 static inline struct objio_segment *
139 116  
140 117  
141 118  
142 119  
143 120  
144 121  
145 122  
146 123  
... ... @@ -141,59 +118,44 @@
141 118 return container_of(lseg, struct objio_segment, lseg);
142 119 }
143 120  
144   -struct objio_state;
145   -typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
146   -
147 121 struct objio_state {
148 122 /* Generic layer */
149   - struct objlayout_io_state ol_state;
  123 + struct objlayout_io_res oir;
150 124  
151   - struct objio_segment *layout;
152   -
153   - struct kref kref;
154   - objio_done_fn done;
155   - void *private;
156   -
157   - unsigned long length;
158   - unsigned numdevs; /* Actually used devs in this IO */
159   - /* A per-device variable array of size numdevs */
160   - struct _objio_per_comp {
161   - struct bio *bio;
162   - struct osd_request *or;
163   - unsigned long length;
164   - u64 offset;
165   - unsigned dev;
166   - } per_dev[];
  125 + bool sync;
  126 + /*FIXME: Support for extra_bytes at ore_get_rw_state() */
  127 + struct ore_io_state *ios;
167 128 };
168 129  
169 130 /* Send and wait for a get_device_info of devices in the layout,
170 131 then look them up with the osd_initiator library */
171   -static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay,
172   - struct objio_segment *objio_seg, unsigned comp,
173   - gfp_t gfp_flags)
  132 +static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
  133 + struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id,
  134 + gfp_t gfp_flags)
174 135 {
175 136 struct pnfs_osd_deviceaddr *deviceaddr;
176   - struct nfs4_deviceid *d_id;
177 137 struct objio_dev_ent *ode;
178 138 struct osd_dev *od;
179 139 struct osd_dev_info odi;
180 140 int err;
181 141  
182   - d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id;
183   -
184 142 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
185   - if (ode)
186   - return ode;
  143 + if (ode) {
  144 + objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
  145 + return 0;
  146 + }
187 147  
188 148 err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags);
189 149 if (unlikely(err)) {
190 150 dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
191 151 __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err);
192   - return ERR_PTR(err);
  152 + return err;
193 153 }
194 154  
195 155 odi.systemid_len = deviceaddr->oda_systemid.len;
196 156 if (odi.systemid_len > sizeof(odi.systemid)) {
  157 + dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
  158 + __func__, sizeof(odi.systemid));
197 159 err = -EINVAL;
198 160 goto out;
199 161 } else if (odi.systemid_len)
200 162  
201 163  
202 164  
203 165  
204 166  
205 167  
206 168  
207 169  
208 170  
209 171  
210 172  
211 173  
212 174  
213 175  
... ... @@ -218,96 +180,53 @@
218 180  
219 181 ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od,
220 182 gfp_flags);
221   -
  183 + objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
  184 + dprintk("Adding new dev_id(%llx:%llx)\n",
  185 + _DEVID_LO(d_id), _DEVID_HI(d_id));
222 186 out:
223   - dprintk("%s: return=%d\n", __func__, err);
224 187 objlayout_put_deviceinfo(deviceaddr);
225   - return err ? ERR_PTR(err) : ode;
  188 + return err;
226 189 }
227 190  
228   -static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
229   - struct objio_segment *objio_seg,
230   - gfp_t gfp_flags)
  191 +static void copy_single_comp(struct ore_components *oc, unsigned c,
  192 + struct pnfs_osd_object_cred *src_comp)
231 193 {
232   - unsigned i;
233   - int err;
  194 + struct ore_comp *ocomp = &oc->comps[c];
234 195  
235   - /* lookup all devices */
236   - for (i = 0; i < objio_seg->num_comps; i++) {
237   - struct objio_dev_ent *ode;
  196 + WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
  197 + WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
238 198  
239   - ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags);
240   - if (unlikely(IS_ERR(ode))) {
241   - err = PTR_ERR(ode);
242   - goto out;
243   - }
244   - objio_seg->ods[i] = ode;
245   - }
246   - err = 0;
  199 + ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
  200 + ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
247 201  
248   -out:
249   - dprintk("%s: return=%d\n", __func__, err);
250   - return err;
  202 + memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
251 203 }
252 204  
253   -static int _verify_data_map(struct pnfs_osd_layout *layout)
  205 +int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
  206 + struct objio_segment **pseg)
254 207 {
255   - struct pnfs_osd_data_map *data_map = &layout->olo_map;
256   - u64 stripe_length;
257   - u32 group_width;
  208 + struct __alloc_objio_segment {
  209 + struct objio_segment olseg;
  210 + struct ore_dev *ods[numdevs];
  211 + struct ore_comp comps[numdevs];
  212 + } *aolseg;
258 213  
259   -/* FIXME: Only raid0 for now. if not go through MDS */
260   - if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
261   - printk(KERN_ERR "Only RAID_0 for now\n");
262   - return -ENOTSUPP;
  214 + aolseg = kzalloc(sizeof(*aolseg), gfp_flags);
  215 + if (unlikely(!aolseg)) {
  216 + dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
  217 + numdevs, sizeof(*aolseg));
  218 + return -ENOMEM;
263 219 }
264   - if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
265   - printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
266   - data_map->odm_num_comps, data_map->odm_mirror_cnt);
267   - return -EINVAL;
268   - }
269 220  
270   - if (data_map->odm_group_width)
271   - group_width = data_map->odm_group_width;
272   - else
273   - group_width = data_map->odm_num_comps /
274   - (data_map->odm_mirror_cnt + 1);
  221 + aolseg->olseg.oc.numdevs = numdevs;
  222 + aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS;
  223 + aolseg->olseg.oc.comps = aolseg->comps;
  224 + aolseg->olseg.oc.ods = aolseg->ods;
275 225  
276   - stripe_length = (u64)data_map->odm_stripe_unit * group_width;
277   - if (stripe_length >= (1ULL << 32)) {
278   - printk(KERN_ERR "Total Stripe length(0x%llx)"
279   - " >= 32bit is not supported\n", _LLU(stripe_length));
280   - return -ENOTSUPP;
281   - }
282   -
283   - if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
284   - printk(KERN_ERR "Stripe Unit(0x%llx)"
285   - " must be Multples of PAGE_SIZE(0x%lx)\n",
286   - _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
287   - return -ENOTSUPP;
288   - }
289   -
  226 + *pseg = &aolseg->olseg;
290 227 return 0;
291 228 }
292 229  
293   -static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp,
294   - struct pnfs_osd_object_cred *src_comp,
295   - struct caps_buffers *caps_p)
296   -{
297   - WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key));
298   - WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds));
299   -
300   - *cur_comp = *src_comp;
301   -
302   - memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred,
303   - sizeof(caps_p->caps_key));
304   - cur_comp->oc_cap_key.cred = caps_p->caps_key;
305   -
306   - memcpy(caps_p->creds, src_comp->oc_cap.cred,
307   - sizeof(caps_p->creds));
308   - cur_comp->oc_cap.cred = caps_p->creds;
309   -}
310   -
311 230 int objio_alloc_lseg(struct pnfs_layout_segment **outp,
312 231 struct pnfs_layout_hdr *pnfslay,
313 232 struct pnfs_layout_range *range,
314 233  
315 234  
316 235  
317 236  
318 237  
... ... @@ -317,60 +236,44 @@
317 236 struct objio_segment *objio_seg;
318 237 struct pnfs_osd_xdr_decode_layout_iter iter;
319 238 struct pnfs_osd_layout layout;
320   - struct pnfs_osd_object_cred *cur_comp, src_comp;
321   - struct caps_buffers *caps_p;
  239 + struct pnfs_osd_object_cred src_comp;
  240 + unsigned cur_comp;
322 241 int err;
323 242  
324 243 err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
325 244 if (unlikely(err))
326 245 return err;
327 246  
328   - err = _verify_data_map(&layout);
  247 + err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
329 248 if (unlikely(err))
330 249 return err;
331 250  
332   - objio_seg = kzalloc(sizeof(*objio_seg) +
333   - sizeof(objio_seg->ods[0]) * layout.olo_num_comps +
334   - sizeof(*objio_seg->comps) * layout.olo_num_comps +
335   - sizeof(struct caps_buffers) * layout.olo_num_comps,
336   - gfp_flags);
337   - if (!objio_seg)
338   - return -ENOMEM;
  251 + objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
  252 + objio_seg->layout.group_width = layout.olo_map.odm_group_width;
  253 + objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
  254 + objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
  255 + objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
339 256  
340   - objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps);
341   - cur_comp = objio_seg->comps;
342   - caps_p = (void *)(cur_comp + layout.olo_num_comps);
343   - while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err))
344   - copy_single_comp(cur_comp++, &src_comp, caps_p++);
  257 + err = ore_verify_layout(layout.olo_map.odm_num_comps,
  258 + &objio_seg->layout);
345 259 if (unlikely(err))
346 260 goto err;
347 261  
348   - objio_seg->num_comps = layout.olo_num_comps;
349   - objio_seg->comps_index = layout.olo_comps_index;
350   - err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags);
351   - if (err)
  262 + objio_seg->oc.first_dev = layout.olo_comps_index;
  263 + cur_comp = 0;
  264 + while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
  265 + copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
  266 + err = objio_devices_lookup(pnfslay, objio_seg, cur_comp,
  267 + &src_comp.oc_object_id.oid_device_id,
  268 + gfp_flags);
  269 + if (err)
  270 + goto err;
  271 + ++cur_comp;
  272 + }
  273 + /* pnfs_osd_xdr_decode_layout_comp returns false on error */
  274 + if (unlikely(err))
352 275 goto err;
353 276  
354   - objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
355   - objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit;
356   - if (layout.olo_map.odm_group_width) {
357   - objio_seg->group_width = layout.olo_map.odm_group_width;
358   - objio_seg->group_depth = layout.olo_map.odm_group_depth;
359   - objio_seg->group_count = layout.olo_map.odm_num_comps /
360   - objio_seg->mirrors_p1 /
361   - objio_seg->group_width;
362   - } else {
363   - objio_seg->group_width = layout.olo_map.odm_num_comps /
364   - objio_seg->mirrors_p1;
365   - objio_seg->group_depth = -1;
366   - objio_seg->group_count = 1;
367   - }
368   -
369   - /* Cache this calculation it will hit for every page */
370   - objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE -
371   - objio_seg->stripe_unit) *
372   - objio_seg->group_width;
373   -
374 277 *outp = &objio_seg->lseg;
375 278 return 0;
376 279  
377 280  
378 281  
379 282  
380 283  
381 284  
382 285  
383 286  
384 287  
385 288  
... ... @@ -386,43 +289,63 @@
386 289 int i;
387 290 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
388 291  
389   - for (i = 0; i < objio_seg->num_comps; i++) {
390   - if (!objio_seg->ods[i])
  292 + for (i = 0; i < objio_seg->oc.numdevs; i++) {
  293 + struct ore_dev *od = objio_seg->oc.ods[i];
  294 + struct objio_dev_ent *ode;
  295 +
  296 + if (!od)
391 297 break;
392   - nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node);
  298 + ode = container_of(od, typeof(*ode), od);
  299 + nfs4_put_deviceid_node(&ode->id_node);
393 300 }
394 301 kfree(objio_seg);
395 302 }
396 303  
397   -int objio_alloc_io_state(struct pnfs_layout_segment *lseg,
398   - struct objlayout_io_state **outp,
399   - gfp_t gfp_flags)
  304 +static int
  305 +objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
  306 + struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
  307 + loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
  308 + struct objio_state **outp)
400 309 {
401 310 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
402   - struct objio_state *ios;
403   - const unsigned first_size = sizeof(*ios) +
404   - objio_seg->num_comps * sizeof(ios->per_dev[0]);
405   - const unsigned sec_size = objio_seg->num_comps *
406   - sizeof(ios->ol_state.ioerrs[0]);
  311 + struct ore_io_state *ios;
  312 + int ret;
  313 + struct __alloc_objio_state {
  314 + struct objio_state objios;
  315 + struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
  316 + } *aos;
407 317  
408   - ios = kzalloc(first_size + sec_size, gfp_flags);
409   - if (unlikely(!ios))
  318 + aos = kzalloc(sizeof(*aos), gfp_flags);
  319 + if (unlikely(!aos))
410 320 return -ENOMEM;
411 321  
412   - ios->layout = objio_seg;
413   - ios->ol_state.ioerrs = ((void *)ios) + first_size;
414   - ios->ol_state.num_comps = objio_seg->num_comps;
  322 + objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
  323 + aos->ioerrs, rpcdata, pnfs_layout_type);
415 324  
416   - *outp = &ios->ol_state;
  325 + ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
  326 + offset, count, &ios);
  327 + if (unlikely(ret)) {
  328 + kfree(aos);
  329 + return ret;
  330 + }
  331 +
  332 + ios->pages = pages;
  333 + ios->pgbase = pgbase;
  334 + ios->private = aos;
  335 + BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
  336 +
  337 + aos->objios.sync = 0;
  338 + aos->objios.ios = ios;
  339 + *outp = &aos->objios;
417 340 return 0;
418 341 }
419 342  
420   -void objio_free_io_state(struct objlayout_io_state *ol_state)
  343 +void objio_free_result(struct objlayout_io_res *oir)
421 344 {
422   - struct objio_state *ios = container_of(ol_state, struct objio_state,
423   - ol_state);
  345 + struct objio_state *objios = container_of(oir, struct objio_state, oir);
424 346  
425   - kfree(ios);
  347 + ore_put_io_state(objios->ios);
  348 + kfree(objios);
426 349 }
427 350  
428 351 enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
429 352  
430 353  
431 354  
432 355  
433 356  
434 357  
435 358  
436 359  
437 360  
438 361  
439 362  
440 363  
441 364  
442 365  
443 366  
444 367  
445 368  
446 369  
447 370  
448 371  
449 372  
450 373  
451 374  
452 375  
453 376  
454 377  
455 378  
456 379  
457 380  
458 381  
459 382  
460 383  
461 384  
... ... @@ -455,539 +378,152 @@
455 378 }
456 379 }
457 380  
458   -static void _clear_bio(struct bio *bio)
  381 +static void __on_dev_error(struct ore_io_state *ios,
  382 + struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
  383 + u64 dev_offset, u64 dev_len)
459 384 {
460   - struct bio_vec *bv;
461   - unsigned i;
  385 + struct objio_state *objios = ios->private;
  386 + struct pnfs_osd_objid pooid;
  387 + struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
  388 + /* FIXME: what to do with more-then-one-group layouts. We need to
  389 + * translate from ore_io_state index to oc->comps index
  390 + */
  391 + unsigned comp = dev_index;
462 392  
463   - __bio_for_each_segment(bv, bio, i, 0) {
464   - unsigned this_count = bv->bv_len;
  393 + pooid.oid_device_id = ode->id_node.deviceid;
  394 + pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
  395 + pooid.oid_object_id = ios->oc->comps[comp].obj.id;
465 396  
466   - if (likely(PAGE_SIZE == this_count))
467   - clear_highpage(bv->bv_page);
468   - else
469   - zero_user(bv->bv_page, bv->bv_offset, this_count);
470   - }
  397 + objlayout_io_set_result(&objios->oir, comp,
  398 + &pooid, osd_pri_2_pnfs_err(oep),
  399 + dev_offset, dev_len, !ios->reading);
471 400 }
472 401  
473   -static int _io_check(struct objio_state *ios, bool is_write)
474   -{
475   - enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
476   - int lin_ret = 0;
477   - int i;
478   -
479   - for (i = 0; i < ios->numdevs; i++) {
480   - struct osd_sense_info osi;
481   - struct osd_request *or = ios->per_dev[i].or;
482   - int ret;
483   -
484   - if (!or)
485   - continue;
486   -
487   - ret = osd_req_decode_sense(or, &osi);
488   - if (likely(!ret))
489   - continue;
490   -
491   - if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
492   - /* start read offset passed endof file */
493   - BUG_ON(is_write);
494   - _clear_bio(ios->per_dev[i].bio);
495   - dprintk("%s: start read offset passed end of file "
496   - "offset=0x%llx, length=0x%lx\n", __func__,
497   - _LLU(ios->per_dev[i].offset),
498   - ios->per_dev[i].length);
499   -
500   - continue; /* we recovered */
501   - }
502   - objlayout_io_set_result(&ios->ol_state, i,
503   - &ios->layout->comps[i].oc_object_id,
504   - osd_pri_2_pnfs_err(osi.osd_err_pri),
505   - ios->per_dev[i].offset,
506   - ios->per_dev[i].length,
507   - is_write);
508   -
509   - if (osi.osd_err_pri >= oep) {
510   - oep = osi.osd_err_pri;
511   - lin_ret = ret;
512   - }
513   - }
514   -
515   - return lin_ret;
516   -}
517   -
518 402 /*
519   - * Common IO state helpers.
520   - */
521   -static void _io_free(struct objio_state *ios)
522   -{
523   - unsigned i;
524   -
525   - for (i = 0; i < ios->numdevs; i++) {
526   - struct _objio_per_comp *per_dev = &ios->per_dev[i];
527   -
528   - if (per_dev->or) {
529   - osd_end_request(per_dev->or);
530   - per_dev->or = NULL;
531   - }
532   -
533   - if (per_dev->bio) {
534   - bio_put(per_dev->bio);
535   - per_dev->bio = NULL;
536   - }
537   - }
538   -}
539   -
540   -struct osd_dev *_io_od(struct objio_state *ios, unsigned dev)
541   -{
542   - unsigned min_dev = ios->layout->comps_index;
543   - unsigned max_dev = min_dev + ios->layout->num_comps;
544   -
545   - BUG_ON(dev < min_dev || max_dev <= dev);
546   - return ios->layout->ods[dev - min_dev]->od;
547   -}
548   -
549   -struct _striping_info {
550   - u64 obj_offset;
551   - u64 group_length;
552   - unsigned dev;
553   - unsigned unit_off;
554   -};
555   -
556   -static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
557   - struct _striping_info *si)
558   -{
559   - u32 stripe_unit = ios->layout->stripe_unit;
560   - u32 group_width = ios->layout->group_width;
561   - u64 group_depth = ios->layout->group_depth;
562   - u32 U = stripe_unit * group_width;
563   -
564   - u64 T = U * group_depth;
565   - u64 S = T * ios->layout->group_count;
566   - u64 M = div64_u64(file_offset, S);
567   -
568   - /*
569   - G = (L - (M * S)) / T
570   - H = (L - (M * S)) % T
571   - */
572   - u64 LmodU = file_offset - M * S;
573   - u32 G = div64_u64(LmodU, T);
574   - u64 H = LmodU - G * T;
575   -
576   - u32 N = div_u64(H, U);
577   -
578   - div_u64_rem(file_offset, stripe_unit, &si->unit_off);
579   - si->obj_offset = si->unit_off + (N * stripe_unit) +
580   - (M * group_depth * stripe_unit);
581   -
582   - /* "H - (N * U)" is just "H % U" so it's bound to u32 */
583   - si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
584   - si->dev *= ios->layout->mirrors_p1;
585   -
586   - si->group_length = T - H;
587   -}
588   -
589   -static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
590   - unsigned pgbase, struct _objio_per_comp *per_dev, int len,
591   - gfp_t gfp_flags)
592   -{
593   - unsigned pg = *cur_pg;
594   - int cur_len = len;
595   - struct request_queue *q =
596   - osd_request_queue(_io_od(ios, per_dev->dev));
597   -
598   - if (per_dev->bio == NULL) {
599   - unsigned pages_in_stripe = ios->layout->group_width *
600   - (ios->layout->stripe_unit / PAGE_SIZE);
601   - unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
602   - ios->layout->group_width;
603   -
604   - if (BIO_MAX_PAGES_KMALLOC < bio_size)
605   - bio_size = BIO_MAX_PAGES_KMALLOC;
606   -
607   - per_dev->bio = bio_kmalloc(gfp_flags, bio_size);
608   - if (unlikely(!per_dev->bio)) {
609   - dprintk("Faild to allocate BIO size=%u\n", bio_size);
610   - return -ENOMEM;
611   - }
612   - }
613   -
614   - while (cur_len > 0) {
615   - unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
616   - unsigned added_len;
617   -
618   - BUG_ON(ios->ol_state.nr_pages <= pg);
619   - cur_len -= pglen;
620   -
621   - added_len = bio_add_pc_page(q, per_dev->bio,
622   - ios->ol_state.pages[pg], pglen, pgbase);
623   - if (unlikely(pglen != added_len))
624   - return -ENOMEM;
625   - pgbase = 0;
626   - ++pg;
627   - }
628   - BUG_ON(cur_len);
629   -
630   - per_dev->length += len;
631   - *cur_pg = pg;
632   - return 0;
633   -}
634   -
635   -static int _prepare_one_group(struct objio_state *ios, u64 length,
636   - struct _striping_info *si, unsigned *last_pg,
637   - gfp_t gfp_flags)
638   -{
639   - unsigned stripe_unit = ios->layout->stripe_unit;
640   - unsigned mirrors_p1 = ios->layout->mirrors_p1;
641   - unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
642   - unsigned dev = si->dev;
643   - unsigned first_dev = dev - (dev % devs_in_group);
644   - unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
645   - unsigned cur_pg = *last_pg;
646   - int ret = 0;
647   -
648   - while (length) {
649   - struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
650   - unsigned cur_len, page_off = 0;
651   -
652   - if (!per_dev->length) {
653   - per_dev->dev = dev;
654   - if (dev < si->dev) {
655   - per_dev->offset = si->obj_offset + stripe_unit -
656   - si->unit_off;
657   - cur_len = stripe_unit;
658   - } else if (dev == si->dev) {
659   - per_dev->offset = si->obj_offset;
660   - cur_len = stripe_unit - si->unit_off;
661   - page_off = si->unit_off & ~PAGE_MASK;
662   - BUG_ON(page_off &&
663   - (page_off != ios->ol_state.pgbase));
664   - } else { /* dev > si->dev */
665   - per_dev->offset = si->obj_offset - si->unit_off;
666   - cur_len = stripe_unit;
667   - }
668   -
669   - if (max_comp < dev - first_dev)
670   - max_comp = dev - first_dev;
671   - } else {
672   - cur_len = stripe_unit;
673   - }
674   - if (cur_len >= length)
675   - cur_len = length;
676   -
677   - ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
678   - cur_len, gfp_flags);
679   - if (unlikely(ret))
680   - goto out;
681   -
682   - dev += mirrors_p1;
683   - dev = (dev % devs_in_group) + first_dev;
684   -
685   - length -= cur_len;
686   - ios->length += cur_len;
687   - }
688   -out:
689   - ios->numdevs = max_comp + mirrors_p1;
690   - *last_pg = cur_pg;
691   - return ret;
692   -}
693   -
694   -static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags)
695   -{
696   - u64 length = ios->ol_state.count;
697   - u64 offset = ios->ol_state.offset;
698   - struct _striping_info si;
699   - unsigned last_pg = 0;
700   - int ret = 0;
701   -
702   - while (length) {
703   - _calc_stripe_info(ios, offset, &si);
704   -
705   - if (length < si.group_length)
706   - si.group_length = length;
707   -
708   - ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags);
709   - if (unlikely(ret))
710   - goto out;
711   -
712   - offset += si.group_length;
713   - length -= si.group_length;
714   - }
715   -
716   -out:
717   - if (!ios->length)
718   - return ret;
719   -
720   - return 0;
721   -}
722   -
723   -static ssize_t _sync_done(struct objio_state *ios)
724   -{
725   - struct completion *waiting = ios->private;
726   -
727   - complete(waiting);
728   - return 0;
729   -}
730   -
731   -static void _last_io(struct kref *kref)
732   -{
733   - struct objio_state *ios = container_of(kref, struct objio_state, kref);
734   -
735   - ios->done(ios);
736   -}
737   -
738   -static void _done_io(struct osd_request *or, void *p)
739   -{
740   - struct objio_state *ios = p;
741   -
742   - kref_put(&ios->kref, _last_io);
743   -}
744   -
745   -static ssize_t _io_exec(struct objio_state *ios)
746   -{
747   - DECLARE_COMPLETION_ONSTACK(wait);
748   - ssize_t status = 0; /* sync status */
749   - unsigned i;
750   - objio_done_fn saved_done_fn = ios->done;
751   - bool sync = ios->ol_state.sync;
752   -
753   - if (sync) {
754   - ios->done = _sync_done;
755   - ios->private = &wait;
756   - }
757   -
758   - kref_init(&ios->kref);
759   -
760   - for (i = 0; i < ios->numdevs; i++) {
761   - struct osd_request *or = ios->per_dev[i].or;
762   -
763   - if (!or)
764   - continue;
765   -
766   - kref_get(&ios->kref);
767   - osd_execute_request_async(or, _done_io, ios);
768   - }
769   -
770   - kref_put(&ios->kref, _last_io);
771   -
772   - if (sync) {
773   - wait_for_completion(&wait);
774   - status = saved_done_fn(ios);
775   - }
776   -
777   - return status;
778   -}
779   -
780   -/*
781 403 * read
782 404 */
783   -static ssize_t _read_done(struct objio_state *ios)
  405 +static void _read_done(struct ore_io_state *ios, void *private)
784 406 {
  407 + struct objio_state *objios = private;
785 408 ssize_t status;
786   - int ret = _io_check(ios, false);
  409 + int ret = ore_check_io(ios, &__on_dev_error);
787 410  
788   - _io_free(ios);
  411 + /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
789 412  
790 413 if (likely(!ret))
791 414 status = ios->length;
792 415 else
793 416 status = ret;
794 417  
795   - objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
796   - return status;
  418 + objlayout_read_done(&objios->oir, status, objios->sync);
797 419 }
798 420  
799   -static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
  421 +int objio_read_pagelist(struct nfs_read_data *rdata)
800 422 {
801   - struct osd_request *or = NULL;
802   - struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
803   - unsigned dev = per_dev->dev;
804   - struct pnfs_osd_object_cred *cred =
805   - &ios->layout->comps[cur_comp];
806   - struct osd_obj_id obj = {
807   - .partition = cred->oc_object_id.oid_partition_id,
808   - .id = cred->oc_object_id.oid_object_id,
809   - };
  423 + struct objio_state *objios;
810 424 int ret;
811 425  
812   - or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
813   - if (unlikely(!or)) {
814   - ret = -ENOMEM;
815   - goto err;
816   - }
817   - per_dev->or = or;
818   -
819   - osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
820   -
821   - ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
822   - if (ret) {
823   - dprintk("%s: Faild to osd_finalize_request() => %d\n",
824   - __func__, ret);
825   - goto err;
826   - }
827   -
828   - dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
829   - __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
830   - per_dev->length);
831   -
832   -err:
833   - return ret;
834   -}
835   -
836   -static ssize_t _read_exec(struct objio_state *ios)
837   -{
838   - unsigned i;
839   - int ret;
840   -
841   - for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
842   - if (!ios->per_dev[i].length)
843   - continue;
844   - ret = _read_mirrors(ios, i);
845   - if (unlikely(ret))
846   - goto err;
847   - }
848   -
849   - ios->done = _read_done;
850   - return _io_exec(ios); /* In sync mode exec returns the io status */
851   -
852   -err:
853   - _io_free(ios);
854   - return ret;
855   -}
856   -
857   -ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
858   -{
859   - struct objio_state *ios = container_of(ol_state, struct objio_state,
860   - ol_state);
861   - int ret;
862   -
863   - ret = _io_rw_pagelist(ios, GFP_KERNEL);
  426 + ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
  427 + rdata->lseg, rdata->args.pages, rdata->args.pgbase,
  428 + rdata->args.offset, rdata->args.count, rdata,
  429 + GFP_KERNEL, &objios);
864 430 if (unlikely(ret))
865 431 return ret;
866 432  
867   - return _read_exec(ios);
  433 + objios->ios->done = _read_done;
  434 + dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
  435 + rdata->args.offset, rdata->args.count);
  436 + return ore_read(objios->ios);
868 437 }
869 438  
870 439 /*
871 440 * write
872 441 */
873   -static ssize_t _write_done(struct objio_state *ios)
  442 +static void _write_done(struct ore_io_state *ios, void *private)
874 443 {
  444 + struct objio_state *objios = private;
875 445 ssize_t status;
876   - int ret = _io_check(ios, true);
  446 + int ret = ore_check_io(ios, &__on_dev_error);
877 447  
878   - _io_free(ios);
  448 + /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
879 449  
880 450 if (likely(!ret)) {
881 451 /* FIXME: should be based on the OSD's persistence model
882 452 * See OSD2r05 Section 4.13 Data persistence model */
883   - ios->ol_state.committed = NFS_FILE_SYNC;
  453 + objios->oir.committed = NFS_FILE_SYNC;
884 454 status = ios->length;
885 455 } else {
886 456 status = ret;
887 457 }
888 458  
889   - objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
890   - return status;
  459 + objlayout_write_done(&objios->oir, status, objios->sync);
891 460 }
892 461  
893   -static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
  462 +static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
894 463 {
895   - struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
896   - unsigned dev = ios->per_dev[cur_comp].dev;
897   - unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
898   - int ret;
  464 + struct objio_state *objios = priv;
  465 + struct nfs_write_data *wdata = objios->oir.rpcdata;
  466 + pgoff_t index = offset / PAGE_SIZE;
  467 + struct page *page = find_get_page(wdata->inode->i_mapping, index);
899 468  
900   - for (; cur_comp < last_comp; ++cur_comp, ++dev) {
901   - struct osd_request *or = NULL;
902   - struct pnfs_osd_object_cred *cred =
903   - &ios->layout->comps[cur_comp];
904   - struct osd_obj_id obj = {
905   - .partition = cred->oc_object_id.oid_partition_id,
906   - .id = cred->oc_object_id.oid_object_id,
907   - };
908   - struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
909   - struct bio *bio;
910   -
911   - or = osd_start_request(_io_od(ios, dev), GFP_NOFS);
912   - if (unlikely(!or)) {
913   - ret = -ENOMEM;
914   - goto err;
  469 + if (!page) {
  470 + page = find_or_create_page(wdata->inode->i_mapping,
  471 + index, GFP_NOFS);
  472 + if (unlikely(!page)) {
  473 + dprintk("%s: grab_cache_page Failed index=0x%lx\n",
  474 + __func__, index);
  475 + return NULL;
915 476 }
916   - per_dev->or = or;
917   -
918   - if (per_dev != master_dev) {
919   - bio = bio_kmalloc(GFP_NOFS,
920   - master_dev->bio->bi_max_vecs);
921   - if (unlikely(!bio)) {
922   - dprintk("Faild to allocate BIO size=%u\n",
923   - master_dev->bio->bi_max_vecs);
924   - ret = -ENOMEM;
925   - goto err;
926   - }
927   -
928   - __bio_clone(bio, master_dev->bio);
929   - bio->bi_bdev = NULL;
930   - bio->bi_next = NULL;
931   - per_dev->bio = bio;
932   - per_dev->dev = dev;
933   - per_dev->length = master_dev->length;
934   - per_dev->offset = master_dev->offset;
935   - } else {
936   - bio = master_dev->bio;
937   - bio->bi_rw |= REQ_WRITE;
938   - }
939   -
940   - osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
941   -
942   - ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
943   - if (ret) {
944   - dprintk("%s: Faild to osd_finalize_request() => %d\n",
945   - __func__, ret);
946   - goto err;
947   - }
948   -
949   - dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
950   - __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
951   - per_dev->length);
  477 + unlock_page(page);
952 478 }
  479 + if (PageDirty(page) || PageWriteback(page))
  480 + *uptodate = true;
  481 + else
  482 + *uptodate = PageUptodate(page);
  483 + dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
  484 + return page;
  485 +}
953 486  
954   -err:
955   - return ret;
  487 +static void __r4w_put_page(void *priv, struct page *page)
  488 +{
  489 + dprintk("%s: index=0x%lx\n", __func__, page->index);
  490 + page_cache_release(page);
  491 + return;
956 492 }
957 493  
958   -static ssize_t _write_exec(struct objio_state *ios)
  494 +static const struct _ore_r4w_op _r4w_op = {
  495 + .get_page = &__r4w_get_page,
  496 + .put_page = &__r4w_put_page,
  497 +};
  498 +
  499 +int objio_write_pagelist(struct nfs_write_data *wdata, int how)
959 500 {
960   - unsigned i;
  501 + struct objio_state *objios;
961 502 int ret;
962 503  
963   - for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
964   - if (!ios->per_dev[i].length)
965   - continue;
966   - ret = _write_mirrors(ios, i);
967   - if (unlikely(ret))
968   - goto err;
969   - }
  504 + ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
  505 + wdata->lseg, wdata->args.pages, wdata->args.pgbase,
  506 + wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
  507 + &objios);
  508 + if (unlikely(ret))
  509 + return ret;
970 510  
971   - ios->done = _write_done;
972   - return _io_exec(ios); /* In sync mode exec returns the io->status */
  511 + objios->sync = 0 != (how & FLUSH_SYNC);
  512 + objios->ios->r4w = &_r4w_op;
973 513  
974   -err:
975   - _io_free(ios);
976   - return ret;
977   -}
  514 + if (!objios->sync)
  515 + objios->ios->done = _write_done;
978 516  
979   -ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
980   -{
981   - struct objio_state *ios = container_of(ol_state, struct objio_state,
982   - ol_state);
983   - int ret;
984   -
985   - /* TODO: ios->stable = stable; */
986   - ret = _io_rw_pagelist(ios, GFP_NOFS);
  517 + dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
  518 + wdata->args.offset, wdata->args.count);
  519 + ret = ore_write(objios->ios);
987 520 if (unlikely(ret))
988 521 return ret;
989 522  
990   - return _write_exec(ios);
  523 + if (objios->sync)
  524 + _write_done(objios->ios, objios);
  525 +
  526 + return 0;
991 527 }
992 528  
993 529 static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
... ... @@ -997,7 +533,7 @@
997 533 return false;
998 534  
999 535 return pgio->pg_count + req->wb_bytes <=
1000   - OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
  536 + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
1001 537 }
1002 538  
1003 539 static const struct nfs_pageio_ops objio_pg_read_ops = {
fs/nfs/objlayout/objlayout.c
... ... @@ -156,77 +156,39 @@
156 156 return end > start ? end - 1 : NFS4_MAX_UINT64;
157 157 }
158 158  
159   -static struct objlayout_io_state *
160   -objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
161   - struct page **pages,
162   - unsigned pgbase,
163   - loff_t offset,
164   - size_t count,
165   - struct pnfs_layout_segment *lseg,
166   - void *rpcdata,
167   - gfp_t gfp_flags)
  159 +void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
  160 + struct page ***p_pages, unsigned *p_pgbase,
  161 + u64 offset, unsigned long count)
168 162 {
169   - struct objlayout_io_state *state;
170 163 u64 lseg_end_offset;
171 164  
172   - dprintk("%s: allocating io_state\n", __func__);
173   - if (objio_alloc_io_state(lseg, &state, gfp_flags))
174   - return NULL;
175   -
176 165 BUG_ON(offset < lseg->pls_range.offset);
177 166 lseg_end_offset = end_offset(lseg->pls_range.offset,
178 167 lseg->pls_range.length);
179 168 BUG_ON(offset >= lseg_end_offset);
180   - if (offset + count > lseg_end_offset) {
181   - count = lseg->pls_range.length -
182   - (offset - lseg->pls_range.offset);
183   - dprintk("%s: truncated count %Zd\n", __func__, count);
184   - }
  169 + WARN_ON(offset + count > lseg_end_offset);
185 170  
186   - if (pgbase > PAGE_SIZE) {
187   - pages += pgbase >> PAGE_SHIFT;
188   - pgbase &= ~PAGE_MASK;
  171 + if (*p_pgbase > PAGE_SIZE) {
  172 + dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
  173 + *p_pages += *p_pgbase >> PAGE_SHIFT;
  174 + *p_pgbase &= ~PAGE_MASK;
189 175 }
190   -
191   - INIT_LIST_HEAD(&state->err_list);
192   - state->lseg = lseg;
193   - state->rpcdata = rpcdata;
194   - state->pages = pages;
195   - state->pgbase = pgbase;
196   - state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
197   - state->offset = offset;
198   - state->count = count;
199   - state->sync = 0;
200   -
201   - return state;
202 176 }
203 177  
204   -static void
205   -objlayout_free_io_state(struct objlayout_io_state *state)
206   -{
207   - dprintk("%s: freeing io_state\n", __func__);
208   - if (unlikely(!state))
209   - return;
210   -
211   - objio_free_io_state(state);
212   -}
213   -
214 178 /*
215 179 * I/O done common code
216 180 */
217 181 static void
218   -objlayout_iodone(struct objlayout_io_state *state)
  182 +objlayout_iodone(struct objlayout_io_res *oir)
219 183 {
220   - dprintk("%s: state %p status\n", __func__, state);
221   -
222   - if (likely(state->status >= 0)) {
223   - objlayout_free_io_state(state);
  184 + if (likely(oir->status >= 0)) {
  185 + objio_free_result(oir);
224 186 } else {
225   - struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
  187 + struct objlayout *objlay = oir->objlay;
226 188  
227 189 spin_lock(&objlay->lock);
228 190 objlay->delta_space_valid = OBJ_DSU_INVALID;
229   - list_add(&objlay->err_list, &state->err_list);
  191 + list_add(&objlay->err_list, &oir->err_list);
230 192 spin_unlock(&objlay->lock);
231 193 }
232 194 }
233 195  
234 196  
... ... @@ -238,13 +200,13 @@
238 200 * the error for later reporting at layout-return.
239 201 */
240 202 void
241   -objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
  203 +objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
242 204 struct pnfs_osd_objid *pooid, int osd_error,
243 205 u64 offset, u64 length, bool is_write)
244 206 {
245   - struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
  207 + struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
246 208  
247   - BUG_ON(index >= state->num_comps);
  209 + BUG_ON(index >= oir->num_comps);
248 210 if (osd_error) {
249 211 ioerr->oer_component = *pooid;
250 212 ioerr->oer_comp_offset = offset;
251 213  
252 214  
253 215  
254 216  
... ... @@ -285,22 +247,19 @@
285 247 }
286 248  
287 249 void
288   -objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
  250 +objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
289 251 {
290   - int eof = state->eof;
291   - struct nfs_read_data *rdata;
  252 + struct nfs_read_data *rdata = oir->rpcdata;
292 253  
293   - state->status = status;
294   - dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
295   - rdata = state->rpcdata;
296   - rdata->task.tk_status = status;
297   - if (status >= 0) {
  254 + oir->status = rdata->task.tk_status = status;
  255 + if (status >= 0)
298 256 rdata->res.count = status;
299   - rdata->res.eof = eof;
300   - }
301   - objlayout_iodone(state);
302   - /* must not use state after this point */
  257 + objlayout_iodone(oir);
  258 + /* must not use oir after this point */
303 259  
  260 + dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
  261 + status, rdata->res.eof, sync);
  262 +
304 263 if (sync)
305 264 pnfs_ld_read_done(rdata);
306 265 else {
307 266  
308 267  
309 268  
310 269  
311 270  
312 271  
313 272  
... ... @@ -317,40 +276,36 @@
317 276 {
318 277 loff_t offset = rdata->args.offset;
319 278 size_t count = rdata->args.count;
320   - struct objlayout_io_state *state;
321   - ssize_t status = 0;
  279 + int err;
322 280 loff_t eof;
323 281  
324   - dprintk("%s: Begin inode %p offset %llu count %d\n",
325   - __func__, rdata->inode, offset, (int)count);
326   -
327 282 eof = i_size_read(rdata->inode);
328 283 if (unlikely(offset + count > eof)) {
329 284 if (offset >= eof) {
330   - status = 0;
  285 + err = 0;
331 286 rdata->res.count = 0;
332 287 rdata->res.eof = 1;
  288 + /*FIXME: do we need to call pnfs_ld_read_done() */
333 289 goto out;
334 290 }
335 291 count = eof - offset;
336 292 }
337 293  
338   - state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
339   - rdata->args.pages, rdata->args.pgbase,
340   - offset, count,
341   - rdata->lseg, rdata,
342   - GFP_KERNEL);
343   - if (unlikely(!state)) {
344   - status = -ENOMEM;
345   - goto out;
346   - }
  294 + rdata->res.eof = (offset + count) >= eof;
  295 + _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
  296 + &rdata->args.pgbase,
  297 + rdata->args.offset, rdata->args.count);
347 298  
348   - state->eof = state->offset + state->count >= eof;
  299 + dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
  300 + __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
349 301  
350   - status = objio_read_pagelist(state);
  302 + err = objio_read_pagelist(rdata);
351 303 out:
352   - dprintk("%s: Return status %Zd\n", __func__, status);
353   - rdata->pnfs_error = status;
  304 + if (unlikely(err)) {
  305 + rdata->pnfs_error = err;
  306 + dprintk("%s: Returned Error %d\n", __func__, err);
  307 + return PNFS_NOT_ATTEMPTED;
  308 + }
354 309 return PNFS_ATTEMPTED;
355 310 }
356 311  
357 312  
358 313  
359 314  
360 315  
... ... @@ -371,27 +326,21 @@
371 326 }
372 327  
373 328 void
374   -objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
375   - bool sync)
  329 +objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
376 330 {
377   - struct nfs_write_data *wdata;
  331 + struct nfs_write_data *wdata = oir->rpcdata;
378 332  
379   - dprintk("%s: Begin\n", __func__);
380   - wdata = state->rpcdata;
381   - state->status = status;
382   - wdata->task.tk_status = status;
  333 + oir->status = wdata->task.tk_status = status;
383 334 if (status >= 0) {
384 335 wdata->res.count = status;
385   - wdata->verf.committed = state->committed;
386   - dprintk("%s: Return status %d committed %d\n",
387   - __func__, wdata->task.tk_status,
388   - wdata->verf.committed);
389   - } else
390   - dprintk("%s: Return status %d\n",
391   - __func__, wdata->task.tk_status);
392   - objlayout_iodone(state);
393   - /* must not use state after this point */
  336 + wdata->verf.committed = oir->committed;
  337 + }
  338 + objlayout_iodone(oir);
  339 + /* must not use oir after this point */
394 340  
  341 + dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
  342 + status, wdata->verf.committed, sync);
  343 +
395 344 if (sync)
396 345 pnfs_ld_write_done(wdata);
397 346 else {
398 347  
399 348  
400 349  
... ... @@ -407,30 +356,18 @@
407 356 objlayout_write_pagelist(struct nfs_write_data *wdata,
408 357 int how)
409 358 {
410   - struct objlayout_io_state *state;
411   - ssize_t status;
  359 + int err;
412 360  
413   - dprintk("%s: Begin inode %p offset %llu count %u\n",
414   - __func__, wdata->inode, wdata->args.offset, wdata->args.count);
  361 + _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
  362 + &wdata->args.pgbase,
  363 + wdata->args.offset, wdata->args.count);
415 364  
416   - state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
417   - wdata->args.pages,
418   - wdata->args.pgbase,
419   - wdata->args.offset,
420   - wdata->args.count,
421   - wdata->lseg, wdata,
422   - GFP_NOFS);
423   - if (unlikely(!state)) {
424   - status = -ENOMEM;
425   - goto out;
  365 + err = objio_write_pagelist(wdata, how);
  366 + if (unlikely(err)) {
  367 + wdata->pnfs_error = err;
  368 + dprintk("%s: Returned Error %d\n", __func__, err);
  369 + return PNFS_NOT_ATTEMPTED;
426 370 }
427   -
428   - state->sync = how & FLUSH_SYNC;
429   -
430   - status = objio_write_pagelist(state, how & FLUSH_STABLE);
431   - out:
432   - dprintk("%s: Return status %Zd\n", __func__, status);
433   - wdata->pnfs_error = status;
434 371 return PNFS_ATTEMPTED;
435 372 }
436 373  
437 374  
438 375  
... ... @@ -537,14 +474,14 @@
537 474 static void
538 475 encode_accumulated_error(struct objlayout *objlay, __be32 *p)
539 476 {
540   - struct objlayout_io_state *state, *tmp;
  477 + struct objlayout_io_res *oir, *tmp;
541 478 struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
542 479  
543   - list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
  480 + list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
544 481 unsigned i;
545 482  
546   - for (i = 0; i < state->num_comps; i++) {
547   - struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
  483 + for (i = 0; i < oir->num_comps; i++) {
  484 + struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
548 485  
549 486 if (!ioerr->oer_errno)
550 487 continue;
... ... @@ -563,8 +500,8 @@
563 500  
564 501 merge_ioerr(&accumulated_err, ioerr);
565 502 }
566   - list_del(&state->err_list);
567   - objlayout_free_io_state(state);
  503 + list_del(&oir->err_list);
  504 + objio_free_result(oir);
568 505 }
569 506  
570 507 pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
... ... @@ -576,7 +513,7 @@
576 513 const struct nfs4_layoutreturn_args *args)
577 514 {
578 515 struct objlayout *objlay = OBJLAYOUT(pnfslay);
579   - struct objlayout_io_state *state, *tmp;
  516 + struct objlayout_io_res *oir, *tmp;
580 517 __be32 *start;
581 518  
582 519 dprintk("%s: Begin\n", __func__);
583 520  
... ... @@ -585,13 +522,13 @@
585 522  
586 523 spin_lock(&objlay->lock);
587 524  
588   - list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
  525 + list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
589 526 __be32 *last_xdr = NULL, *p;
590 527 unsigned i;
591 528 int res = 0;
592 529  
593   - for (i = 0; i < state->num_comps; i++) {
594   - struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
  530 + for (i = 0; i < oir->num_comps; i++) {
  531 + struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
595 532  
596 533 if (!ioerr->oer_errno)
597 534 continue;
... ... @@ -615,7 +552,7 @@
615 552 }
616 553  
617 554 last_xdr = p;
618   - pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]);
  555 + pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
619 556 }
620 557  
621 558 /* TODO: use xdr_write_pages */
... ... @@ -631,8 +568,8 @@
631 568 encode_accumulated_error(objlay, last_xdr);
632 569 goto loop_done;
633 570 }
634   - list_del(&state->err_list);
635   - objlayout_free_io_state(state);
  571 + list_del(&oir->err_list);
  572 + objio_free_result(oir);
636 573 }
637 574 loop_done:
638 575 spin_unlock(&objlay->lock);
fs/nfs/objlayout/objlayout.h
... ... @@ -74,19 +74,11 @@
74 74 * per-I/O operation state
75 75 * embedded in objects provider io_state data structure
76 76 */
77   -struct objlayout_io_state {
78   - struct pnfs_layout_segment *lseg;
  77 +struct objlayout_io_res {
  78 + struct objlayout *objlay;
79 79  
80   - struct page **pages;
81   - unsigned pgbase;
82   - unsigned nr_pages;
83   - unsigned long count;
84   - loff_t offset;
85   - bool sync;
86   -
87 80 void *rpcdata;
88 81 int status; /* res */
89   - int eof; /* res */
90 82 int committed; /* res */
91 83  
92 84 /* Error reporting (layout_return) */
... ... @@ -100,6 +92,18 @@
100 92 struct pnfs_osd_ioerr *ioerrs;
101 93 };
102 94  
  95 +static inline
  96 +void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
  97 + struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
  98 + struct pnfs_layout_hdr *pnfs_layout_type)
  99 +{
  100 + oir->objlay = OBJLAYOUT(pnfs_layout_type);
  101 + oir->rpcdata = rpcdata;
  102 + INIT_LIST_HEAD(&oir->err_list);
  103 + oir->num_comps = num_comps;
  104 + oir->ioerrs = ioerrs;
  105 +}
  106 +
103 107 /*
104 108 * Raid engine I/O API
105 109 */
106 110  
107 111  
108 112  
109 113  
... ... @@ -110,28 +114,24 @@
110 114 gfp_t gfp_flags);
111 115 extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
112 116  
113   -extern int objio_alloc_io_state(
114   - struct pnfs_layout_segment *lseg,
115   - struct objlayout_io_state **outp,
116   - gfp_t gfp_flags);
117   -extern void objio_free_io_state(struct objlayout_io_state *state);
  117 +/* objio_free_result will free these @oir structs recieved from
  118 + * objlayout_{read,write}_done
  119 + */
  120 +extern void objio_free_result(struct objlayout_io_res *oir);
118 121  
119   -extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
120   -extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
121   - bool stable);
  122 +extern int objio_read_pagelist(struct nfs_read_data *rdata);
  123 +extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
122 124  
123 125 /*
124 126 * callback API
125 127 */
126   -extern void objlayout_io_set_result(struct objlayout_io_state *state,
  128 +extern void objlayout_io_set_result(struct objlayout_io_res *oir,
127 129 unsigned index, struct pnfs_osd_objid *pooid,
128 130 int osd_error, u64 offset, u64 length, bool is_write);
129 131  
130 132 static inline void
131   -objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
  133 +objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
132 134 {
133   - struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
134   -
135 135 /* If one of the I/Os errored out and the delta_space_used was
136 136 * invalid we render the complete report as invalid. Protocol mandate
137 137 * the DSU be accurate or not reported.
138 138  
... ... @@ -144,9 +144,9 @@
144 144 spin_unlock(&objlay->lock);
145 145 }
146 146  
147   -extern void objlayout_read_done(struct objlayout_io_state *state,
  147 +extern void objlayout_read_done(struct objlayout_io_res *oir,
148 148 ssize_t status, bool sync);
149   -extern void objlayout_write_done(struct objlayout_io_state *state,
  149 +extern void objlayout_write_done(struct objlayout_io_res *oir,
150 150 ssize_t status, bool sync);
151 151  
152 152 extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
... ... @@ -41,7 +41,7 @@
41 41  
42 42 /**
43 43 * nfs_create_request - Create an NFS read/write request.
44   - * @file: file descriptor to use
  44 + * @ctx: open context to use
45 45 * @inode: inode to which the request is attached
46 46 * @page: page to write
47 47 * @offset: starting offset within the page for the write
... ... @@ -1443,17 +1443,31 @@
1443 1443 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1444 1444 data = kzalloc(sizeof(*data), GFP_NOFS);
1445 1445 if (!data) {
1446   - mark_inode_dirty_sync(inode);
1447 1446 status = -ENOMEM;
1448 1447 goto out;
1449 1448 }
1450 1449  
  1450 + if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
  1451 + goto out_free;
  1452 +
  1453 + if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
  1454 + if (!sync) {
  1455 + status = -EAGAIN;
  1456 + goto out_free;
  1457 + }
  1458 + status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
  1459 + nfs_wait_bit_killable, TASK_KILLABLE);
  1460 + if (status)
  1461 + goto out_free;
  1462 + }
  1463 +
1451 1464 INIT_LIST_HEAD(&data->lseg_list);
1452 1465 spin_lock(&inode->i_lock);
1453 1466 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
  1467 + clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
1454 1468 spin_unlock(&inode->i_lock);
1455   - kfree(data);
1456   - goto out;
  1469 + wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
  1470 + goto out_free;
1457 1471 }
1458 1472  
1459 1473 pnfs_list_write_lseg(inode, &data->lseg_list);
1460 1474  
... ... @@ -1475,7 +1489,12 @@
1475 1489  
1476 1490 status = nfs4_proc_layoutcommit(data, sync);
1477 1491 out:
  1492 + if (status)
  1493 + mark_inode_dirty_sync(inode);
1478 1494 dprintk("<-- %s status %d\n", __func__, status);
1479 1495 return status;
  1496 +out_free:
  1497 + kfree(data);
  1498 + goto out;
1480 1499 }
... ... @@ -1243,7 +1243,6 @@
1243 1243 {
1244 1244 struct nfs_writeargs *argp = &data->args;
1245 1245 struct nfs_writeres *resp = &data->res;
1246   - struct nfs_server *server = NFS_SERVER(data->inode);
1247 1246 int status;
1248 1247  
1249 1248 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
... ... @@ -1277,7 +1276,7 @@
1277 1276 if (time_before(complain, jiffies)) {
1278 1277 dprintk("NFS: faulty NFS server %s:"
1279 1278 " (committed = %d) != (stable = %d)\n",
1280   - server->nfs_client->cl_hostname,
  1279 + NFS_SERVER(data->inode)->nfs_client->cl_hostname,
1281 1280 resp->verf->committed, argp->stable);
1282 1281 complain = jiffies + 300 * HZ;
1283 1282 }
... ... @@ -256,6 +256,8 @@
256 256 nfsd_serv = NULL;
257 257 nfsd_shutdown();
258 258  
  259 + svc_rpcb_cleanup(serv);
  260 +
259 261 printk(KERN_WARNING "nfsd: last server has exited, flushing export "
260 262 "cache\n");
261 263 nfsd_export_flush();
include/linux/nfs_fs.h
... ... @@ -229,6 +229,7 @@
229 229 #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
230 230 #define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */
231 231 #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
  232 +#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
232 233  
233 234 static inline struct nfs_inode *NFS_I(const struct inode *inode)
234 235 {
include/linux/sunrpc/clnt.h
... ... @@ -136,6 +136,8 @@
136 136 void rpc_release_client(struct rpc_clnt *);
137 137 void rpc_task_release_client(struct rpc_task *);
138 138  
  139 +int rpcb_create_local(void);
  140 +void rpcb_put_local(void);
139 141 int rpcb_register(u32, u32, int, unsigned short);
140 142 int rpcb_v4_register(const u32 program, const u32 version,
141 143 const struct sockaddr *address,
include/linux/sunrpc/svc.h
... ... @@ -413,6 +413,7 @@
413 413 /*
414 414 * Function prototypes.
415 415 */
  416 +void svc_rpcb_cleanup(struct svc_serv *serv);
416 417 struct svc_serv *svc_create(struct svc_program *, unsigned int,
417 418 void (*shutdown)(struct svc_serv *));
418 419 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
net/sunrpc/auth_unix.c
... ... @@ -129,6 +129,9 @@
129 129 for (i = 0; i < groups ; i++)
130 130 if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i))
131 131 return 0;
  132 + if (groups < NFS_NGROUPS &&
  133 + cred->uc_gids[groups] != NOGROUP)
  134 + return 0;
132 135 return 1;
133 136 }
134 137  
net/sunrpc/rpcb_clnt.c
... ... @@ -114,6 +114,9 @@
114 114 static struct rpc_clnt * rpcb_local_clnt;
115 115 static struct rpc_clnt * rpcb_local_clnt4;
116 116  
  117 +DEFINE_SPINLOCK(rpcb_clnt_lock);
  118 +unsigned int rpcb_users;
  119 +
117 120 struct rpcbind_args {
118 121 struct rpc_xprt * r_xprt;
119 122  
... ... @@ -161,6 +164,56 @@
161 164 kfree(map);
162 165 }
163 166  
  167 +static int rpcb_get_local(void)
  168 +{
  169 + int cnt;
  170 +
  171 + spin_lock(&rpcb_clnt_lock);
  172 + if (rpcb_users)
  173 + rpcb_users++;
  174 + cnt = rpcb_users;
  175 + spin_unlock(&rpcb_clnt_lock);
  176 +
  177 + return cnt;
  178 +}
  179 +
  180 +void rpcb_put_local(void)
  181 +{
  182 + struct rpc_clnt *clnt = rpcb_local_clnt;
  183 + struct rpc_clnt *clnt4 = rpcb_local_clnt4;
  184 + int shutdown;
  185 +
  186 + spin_lock(&rpcb_clnt_lock);
  187 + if (--rpcb_users == 0) {
  188 + rpcb_local_clnt = NULL;
  189 + rpcb_local_clnt4 = NULL;
  190 + }
  191 + shutdown = !rpcb_users;
  192 + spin_unlock(&rpcb_clnt_lock);
  193 +
  194 + if (shutdown) {
  195 + /*
  196 + * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister
  197 + */
  198 + if (clnt4)
  199 + rpc_shutdown_client(clnt4);
  200 + if (clnt)
  201 + rpc_shutdown_client(clnt);
  202 + }
  203 +}
  204 +
  205 +static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4)
  206 +{
  207 + /* Protected by rpcb_create_local_mutex */
  208 + rpcb_local_clnt = clnt;
  209 + rpcb_local_clnt4 = clnt4;
  210 + smp_wmb();
  211 + rpcb_users = 1;
  212 + dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
  213 + "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt,
  214 + rpcb_local_clnt4);
  215 +}
  216 +
164 217 /*
165 218 * Returns zero on success, otherwise a negative errno value
166 219 * is returned.
... ... @@ -205,9 +258,7 @@
205 258 clnt4 = NULL;
206 259 }
207 260  
208   - /* Protected by rpcb_create_local_mutex */
209   - rpcb_local_clnt = clnt;
210   - rpcb_local_clnt4 = clnt4;
  261 + rpcb_set_local(clnt, clnt4);
211 262  
212 263 out:
213 264 return result;
... ... @@ -259,9 +310,7 @@
259 310 clnt4 = NULL;
260 311 }
261 312  
262   - /* Protected by rpcb_create_local_mutex */
263   - rpcb_local_clnt = clnt;
264   - rpcb_local_clnt4 = clnt4;
  313 + rpcb_set_local(clnt, clnt4);
265 314  
266 315 out:
267 316 return result;
268 317  
269 318  
... ... @@ -271,16 +320,16 @@
271 320 * Returns zero on success, otherwise a negative errno value
272 321 * is returned.
273 322 */
274   -static int rpcb_create_local(void)
  323 +int rpcb_create_local(void)
275 324 {
276 325 static DEFINE_MUTEX(rpcb_create_local_mutex);
277 326 int result = 0;
278 327  
279   - if (rpcb_local_clnt)
  328 + if (rpcb_get_local())
280 329 return result;
281 330  
282 331 mutex_lock(&rpcb_create_local_mutex);
283   - if (rpcb_local_clnt)
  332 + if (rpcb_get_local())
284 333 goto out;
285 334  
286 335 if (rpcb_create_local_unix() != 0)
287 336  
... ... @@ -382,12 +431,7 @@
382 431 struct rpc_message msg = {
383 432 .rpc_argp = &map,
384 433 };
385   - int error;
386 434  
387   - error = rpcb_create_local();
388   - if (error)
389   - return error;
390   -
391 435 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
392 436 "rpcbind\n", (port ? "" : "un"),
393 437 prog, vers, prot, port);
394 438  
... ... @@ -522,11 +566,7 @@
522 566 struct rpc_message msg = {
523 567 .rpc_argp = &map,
524 568 };
525   - int error;
526 569  
527   - error = rpcb_create_local();
528   - if (error)
529   - return error;
530 570 if (rpcb_local_clnt4 == NULL)
531 571 return -EPROTONOSUPPORT;
532 572  
... ... @@ -1060,16 +1100,4 @@
1060 1100 .version = rpcb_version,
1061 1101 .stats = &rpcb_stats,
1062 1102 };
1063   -
1064   -/**
1065   - * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister
1066   - *
1067   - */
1068   -void cleanup_rpcb_clnt(void)
1069   -{
1070   - if (rpcb_local_clnt4)
1071   - rpc_shutdown_client(rpcb_local_clnt4);
1072   - if (rpcb_local_clnt)
1073   - rpc_shutdown_client(rpcb_local_clnt);
1074   -}
net/sunrpc/sunrpc_syms.c
... ... @@ -61,8 +61,6 @@
61 61  
62 62 extern struct cache_detail unix_gid_cache;
63 63  
64   -extern void cleanup_rpcb_clnt(void);
65   -
66 64 static int __init
67 65 init_sunrpc(void)
68 66 {
... ... @@ -102,7 +100,6 @@
102 100 static void __exit
103 101 cleanup_sunrpc(void)
104 102 {
105   - cleanup_rpcb_clnt();
106 103 rpcauth_remove_module();
107 104 cleanup_socket_xprt();
108 105 svc_cleanup_xprt_sock();
... ... @@ -366,7 +366,43 @@
366 366 return &serv->sv_pools[pidx % serv->sv_nrpools];
367 367 }
368 368  
  369 +static int svc_rpcb_setup(struct svc_serv *serv)
  370 +{
  371 + int err;
369 372  
  373 + err = rpcb_create_local();
  374 + if (err)
  375 + return err;
  376 +
  377 + /* Remove any stale portmap registrations */
  378 + svc_unregister(serv);
  379 + return 0;
  380 +}
  381 +
  382 +void svc_rpcb_cleanup(struct svc_serv *serv)
  383 +{
  384 + svc_unregister(serv);
  385 + rpcb_put_local();
  386 +}
  387 +EXPORT_SYMBOL_GPL(svc_rpcb_cleanup);
  388 +
  389 +static int svc_uses_rpcbind(struct svc_serv *serv)
  390 +{
  391 + struct svc_program *progp;
  392 + unsigned int i;
  393 +
  394 + for (progp = serv->sv_program; progp; progp = progp->pg_next) {
  395 + for (i = 0; i < progp->pg_nvers; i++) {
  396 + if (progp->pg_vers[i] == NULL)
  397 + continue;
  398 + if (progp->pg_vers[i]->vs_hidden == 0)
  399 + return 1;
  400 + }
  401 + }
  402 +
  403 + return 0;
  404 +}
  405 +
370 406 /*
371 407 * Create an RPC service
372 408 */
... ... @@ -431,8 +467,15 @@
431 467 spin_lock_init(&pool->sp_lock);
432 468 }
433 469  
434   - /* Remove any stale portmap registrations */
435   - svc_unregister(serv);
  470 + if (svc_uses_rpcbind(serv)) {
  471 + if (svc_rpcb_setup(serv) < 0) {
  472 + kfree(serv->sv_pools);
  473 + kfree(serv);
  474 + return NULL;
  475 + }
  476 + if (!serv->sv_shutdown)
  477 + serv->sv_shutdown = svc_rpcb_cleanup;
  478 + }
436 479  
437 480 return serv;
438 481 }
... ... @@ -500,7 +543,6 @@
500 543 if (svc_serv_is_pooled(serv))
501 544 svc_pool_map_put();
502 545  
503   - svc_unregister(serv);
504 546 kfree(serv->sv_pools);
505 547 kfree(serv);
506 548 }