Commit 06886a5a3dc5a5abe0a4d257c26317bde7047be8

Authored by Boaz Harrosh
1 parent 8ce9bdd1fb

exofs: Move all operations to an io_engine

In anticipation for multi-device operations, we separate osd operations
into an abstract I/O API. Currently only one device is used but later
when adding more devices, we will drive all devices in parallel according
to a "data_map" that describes how data is arranged on multiple devices.
The file system level operates, like before, as if there is one object
(inode-number) and an i_size. The io engine will split this to the same
object-number but on multiple device.

At first we introduce Mirror (raid 1) layout. But at the final outcome
we intend to fully implement the pNFS-Objects data-map, including
raid 0,4,5,6 over mirrored devices, over multiple device-groups. And
more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12

* Define an io_state based API for accessing osd storage devices
  in an abstract way.
  Usage:
	First a caller allocates an io state with:
		exofs_get_io_state(struct exofs_sb_info *sbi,
				   struct exofs_io_state** ios);

	Then calles one of:
		exofs_sbi_create(struct exofs_io_state *ios);
		exofs_sbi_remove(struct exofs_io_state *ios);
		exofs_sbi_write(struct exofs_io_state *ios);
		exofs_sbi_read(struct exofs_io_state *ios);
		exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);

	And when done
		exofs_put_io_state(struct exofs_io_state *ios);

* Convert all source files to use this new API
* Convert from bio_alloc to bio_kmalloc
* In io engine we make use of the now fixed osd_req_decode_sense

There are no functional changes or on disk additions after this patch.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>

Showing 5 changed files with 644 additions and 350 deletions Side-by-side Diff

... ... @@ -155,23 +155,5 @@
155 155 (((name_len) + offsetof(struct exofs_dir_entry, name) + \
156 156 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
157 157  
158   -/*************************
159   - * function declarations *
160   - *************************/
161   -/* osd.c */
162   -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
163   - const struct osd_obj_id *obj);
164   -
165   -int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
166   -static inline int exofs_check_ok(struct osd_request *or)
167   -{
168   - return exofs_check_ok_resid(or, NULL, NULL);
169   -}
170   -int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
171   -int exofs_async_op(struct osd_request *or,
172   - osd_req_done_fn *async_done, void *caller_context, u8 *cred);
173   -
174   -int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
175   -
176 158 #endif /*ifndef __EXOFS_COM_H__*/
... ... @@ -30,14 +30,13 @@
30 30 * along with exofs; if not, write to the Free Software
31 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32 32 */
  33 +#ifndef __EXOFS_H__
  34 +#define __EXOFS_H__
33 35  
34 36 #include <linux/fs.h>
35 37 #include <linux/time.h>
36 38 #include "common.h"
37 39  
38   -#ifndef __EXOFS_H__
39   -#define __EXOFS_H__
40   -
41 40 #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
42 41  
43 42 #ifdef CONFIG_EXOFS_DEBUG
... ... @@ -56,6 +55,7 @@
56 55 */
57 56 struct exofs_sb_info {
58 57 struct osd_dev *s_dev; /* returned by get_osd_dev */
  58 + struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
59 59 osd_id s_pid; /* partition ID of file system*/
60 60 int s_timeout; /* timeout for OSD operations */
61 61 uint64_t s_nextid; /* highest object ID used */
... ... @@ -79,6 +79,50 @@
79 79 struct inode vfs_inode; /* normal in-memory inode */
80 80 };
81 81  
  82 +static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
  83 +{
  84 + return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
  85 +}
  86 +
  87 +struct exofs_io_state;
  88 +typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
  89 +
  90 +struct exofs_io_state {
  91 + struct kref kref;
  92 +
  93 + void *private;
  94 + exofs_io_done_fn done;
  95 +
  96 + struct exofs_sb_info *sbi;
  97 + struct osd_obj_id obj;
  98 + u8 *cred;
  99 +
  100 + /* Global read/write IO*/
  101 + loff_t offset;
  102 + unsigned long length;
  103 + void *kern_buff;
  104 + struct bio *bio;
  105 +
  106 + /* Attributes */
  107 + unsigned in_attr_len;
  108 + struct osd_attr *in_attr;
  109 + unsigned out_attr_len;
  110 + struct osd_attr *out_attr;
  111 +
  112 + /* Variable array of size numdevs */
  113 + unsigned numdevs;
  114 + struct exofs_per_dev_state {
  115 + struct osd_request *or;
  116 + struct bio *bio;
  117 + } per_dev[];
  118 +};
  119 +
  120 +static inline unsigned exofs_io_state_size(unsigned numdevs)
  121 +{
  122 + return sizeof(struct exofs_io_state) +
  123 + sizeof(struct exofs_per_dev_state) * numdevs;
  124 +}
  125 +
82 126 /*
83 127 * our inode flags
84 128 */
... ... @@ -130,6 +174,42 @@
130 174 /*************************
131 175 * function declarations *
132 176 *************************/
  177 +
  178 +/* ios.c */
  179 +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
  180 + const struct osd_obj_id *obj);
  181 +int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
  182 + u64 offset, void *p, unsigned length);
  183 +
  184 +int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios);
  185 +void exofs_put_io_state(struct exofs_io_state *ios);
  186 +
  187 +int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
  188 +
  189 +int exofs_sbi_create(struct exofs_io_state *ios);
  190 +int exofs_sbi_remove(struct exofs_io_state *ios);
  191 +int exofs_sbi_write(struct exofs_io_state *ios);
  192 +int exofs_sbi_read(struct exofs_io_state *ios);
  193 +
  194 +int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
  195 +
  196 +int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
  197 +static inline int exofs_oi_write(struct exofs_i_info *oi,
  198 + struct exofs_io_state *ios)
  199 +{
  200 + ios->obj.id = exofs_oi_objno(oi);
  201 + ios->cred = oi->i_cred;
  202 + return exofs_sbi_write(ios);
  203 +}
  204 +
  205 +static inline int exofs_oi_read(struct exofs_i_info *oi,
  206 + struct exofs_io_state *ios)
  207 +{
  208 + ios->obj.id = exofs_oi_objno(oi);
  209 + ios->cred = oi->i_cred;
  210 + return exofs_sbi_read(ios);
  211 +}
  212 +
133 213 /* inode.c */
134 214 void exofs_truncate(struct inode *inode);
135 215 int exofs_setattr(struct dentry *, struct iattr *);
... ... @@ -169,6 +249,7 @@
169 249  
170 250 /* inode.c */
171 251 extern const struct address_space_operations exofs_aops;
  252 +extern const struct osd_attr g_attr_logical_length;
172 253  
173 254 /* namei.c */
174 255 extern const struct inode_operations exofs_dir_inode_operations;
... ... @@ -37,17 +37,18 @@
37 37  
38 38 #include "exofs.h"
39 39  
40   -#ifdef CONFIG_EXOFS_DEBUG
41   -# define EXOFS_DEBUG_OBJ_ISIZE 1
42   -#endif
43   -
44 40 #define EXOFS_DBGMSG2(M...) do {} while (0)
45 41  
  42 +enum { BIO_MAX_PAGES_KMALLOC =
  43 + (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
  44 +};
  45 +
46 46 struct page_collect {
47 47 struct exofs_sb_info *sbi;
48 48 struct request_queue *req_q;
49 49 struct inode *inode;
50 50 unsigned expected_pages;
  51 + struct exofs_io_state *ios;
51 52  
52 53 struct bio *bio;
53 54 unsigned nr_pages;
... ... @@ -56,7 +57,7 @@
56 57 };
57 58  
58 59 static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
59   - struct inode *inode)
  60 + struct inode *inode)
60 61 {
61 62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
62 63  
63 64  
... ... @@ -65,13 +66,11 @@
65 66 pcol->inode = inode;
66 67 pcol->expected_pages = expected_pages;
67 68  
  69 + pcol->ios = NULL;
68 70 pcol->bio = NULL;
69 71 pcol->nr_pages = 0;
70 72 pcol->length = 0;
71 73 pcol->pg_first = -1;
72   -
73   - EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
74   - expected_pages);
75 74 }
76 75  
77 76 static void _pcol_reset(struct page_collect *pcol)
78 77  
79 78  
80 79  
81 80  
82 81  
83 82  
... ... @@ -82,35 +81,49 @@
82 81 pcol->nr_pages = 0;
83 82 pcol->length = 0;
84 83 pcol->pg_first = -1;
85   - EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n",
86   - pcol->inode->i_ino, pcol->expected_pages);
  84 + pcol->ios = NULL;
87 85  
88 86 /* this is probably the end of the loop but in writes
89 87 * it might not end here. don't be left with nothing
90 88 */
91 89 if (!pcol->expected_pages)
92   - pcol->expected_pages = 128;
  90 + pcol->expected_pages = BIO_MAX_PAGES_KMALLOC;
93 91 }
94 92  
95 93 static int pcol_try_alloc(struct page_collect *pcol)
96 94 {
97   - int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES);
  95 + int pages = min_t(unsigned, pcol->expected_pages,
  96 + BIO_MAX_PAGES_KMALLOC);
98 97  
  98 + if (!pcol->ios) { /* First time allocate io_state */
  99 + int ret = exofs_get_io_state(pcol->sbi, &pcol->ios);
  100 +
  101 + if (ret)
  102 + return ret;
  103 + }
  104 +
99 105 for (; pages; pages >>= 1) {
100   - pcol->bio = bio_alloc(GFP_KERNEL, pages);
  106 + pcol->bio = bio_kmalloc(GFP_KERNEL, pages);
101 107 if (likely(pcol->bio))
102 108 return 0;
103 109 }
104 110  
105   - EXOFS_ERR("Failed to kcalloc expected_pages=%u\n",
  111 + EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n",
106 112 pcol->expected_pages);
107 113 return -ENOMEM;
108 114 }
109 115  
110 116 static void pcol_free(struct page_collect *pcol)
111 117 {
112   - bio_put(pcol->bio);
113   - pcol->bio = NULL;
  118 + if (pcol->bio) {
  119 + bio_put(pcol->bio);
  120 + pcol->bio = NULL;
  121 + }
  122 +
  123 + if (pcol->ios) {
  124 + exofs_put_io_state(pcol->ios);
  125 + pcol->ios = NULL;
  126 + }
114 127 }
115 128  
116 129 static int pcol_add_page(struct page_collect *pcol, struct page *page,
117 130  
118 131  
119 132  
... ... @@ -163,22 +176,17 @@
163 176 /* Called at the end of reads, to optionally unlock pages and update their
164 177 * status.
165 178 */
166   -static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
167   - bool do_unlock)
  179 +static int __readpages_done(struct page_collect *pcol, bool do_unlock)
168 180 {
169 181 struct bio_vec *bvec;
170 182 int i;
171 183 u64 resid;
172 184 u64 good_bytes;
173 185 u64 length = 0;
174   - int ret = exofs_check_ok_resid(or, &resid, NULL);
  186 + int ret = exofs_check_io(pcol->ios, &resid);
175 187  
176   - osd_end_request(or);
177   -
178 188 if (likely(!ret))
179 189 good_bytes = pcol->length;
180   - else if (!resid)
181   - good_bytes = 0;
182 190 else
183 191 good_bytes = pcol->length - resid;
184 192  
185 193  
186 194  
... ... @@ -216,13 +224,13 @@
216 224 }
217 225  
218 226 /* callback of async reads */
219   -static void readpages_done(struct osd_request *or, void *p)
  227 +static void readpages_done(struct exofs_io_state *ios, void *p)
220 228 {
221 229 struct page_collect *pcol = p;
222 230  
223   - __readpages_done(or, pcol, true);
  231 + __readpages_done(pcol, true);
224 232 atomic_dec(&pcol->sbi->s_curr_pending);
225   - kfree(p);
  233 + kfree(pcol);
226 234 }
227 235  
228 236 static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
229 237  
230 238  
... ... @@ -240,17 +248,13 @@
240 248  
241 249 unlock_page(page);
242 250 }
243   - pcol_free(pcol);
244 251 }
245 252  
246 253 static int read_exec(struct page_collect *pcol, bool is_sync)
247 254 {
248 255 struct exofs_i_info *oi = exofs_i(pcol->inode);
249   - struct osd_obj_id obj = {pcol->sbi->s_pid,
250   - pcol->inode->i_ino + EXOFS_OBJ_OFF};
251   - struct osd_request *or = NULL;
  256 + struct exofs_io_state *ios = pcol->ios;
252 257 struct page_collect *pcol_copy = NULL;
253   - loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
254 258 int ret;
255 259  
256 260 if (!pcol->bio)
257 261  
258 262  
... ... @@ -259,17 +263,13 @@
259 263 /* see comment in _readpage() about sync reads */
260 264 WARN_ON(is_sync && (pcol->nr_pages != 1));
261 265  
262   - or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
263   - if (unlikely(!or)) {
264   - ret = -ENOMEM;
265   - goto err;
266   - }
  266 + ios->bio = pcol->bio;
  267 + ios->length = pcol->length;
  268 + ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
267 269  
268   - osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
269   -
270 270 if (is_sync) {
271   - exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
272   - return __readpages_done(or, pcol, false);
  271 + exofs_oi_read(oi, pcol->ios);
  272 + return __readpages_done(pcol, false);
273 273 }
274 274  
275 275 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
276 276  
... ... @@ -279,14 +279,16 @@
279 279 }
280 280  
281 281 *pcol_copy = *pcol;
282   - ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred);
  282 + ios->done = readpages_done;
  283 + ios->private = pcol_copy;
  284 + ret = exofs_oi_read(oi, ios);
283 285 if (unlikely(ret))
284 286 goto err;
285 287  
286 288 atomic_inc(&pcol->sbi->s_curr_pending);
287 289  
288 290 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
289   - obj.id, _LLU(i_start), pcol->length);
  291 + ios->obj.id, _LLU(ios->offset), pcol->length);
290 292  
291 293 /* pages ownership was passed to pcol_copy */
292 294 _pcol_reset(pcol);
293 295  
294 296  
... ... @@ -295,12 +297,10 @@
295 297 err:
296 298 if (!is_sync)
297 299 _unlock_pcol_pages(pcol, ret, READ);
298   - else /* Pages unlocked by caller in sync mode only free bio */
299   - pcol_free(pcol);
300 300  
  301 + pcol_free(pcol);
  302 +
301 303 kfree(pcol_copy);
302   - if (or)
303   - osd_end_request(or);
304 304 return ret;
305 305 }
306 306  
... ... @@ -421,9 +421,8 @@
421 421  
422 422 _pcol_init(&pcol, 1, page->mapping->host);
423 423  
424   - /* readpage_strip might call read_exec(,async) inside at several places
425   - * but this is safe for is_async=0 since read_exec will not do anything
426   - * when we have a single page.
  424 + /* readpage_strip might call read_exec(,is_sync==false) at several
  425 + * places but not if we have a single page.
427 426 */
428 427 ret = readpage_strip(&pcol, page);
429 428 if (ret) {
... ... @@ -442,8 +441,8 @@
442 441 return _readpage(page, false);
443 442 }
444 443  
445   -/* Callback for osd_write. All writes are asynchronouse */
446   -static void writepages_done(struct osd_request *or, void *p)
  444 +/* Callback for osd_write. All writes are asynchronous */
  445 +static void writepages_done(struct exofs_io_state *ios, void *p)
447 446 {
448 447 struct page_collect *pcol = p;
449 448 struct bio_vec *bvec;
450 449  
451 450  
... ... @@ -451,16 +450,12 @@
451 450 u64 resid;
452 451 u64 good_bytes;
453 452 u64 length = 0;
  453 + int ret = exofs_check_io(ios, &resid);
454 454  
455   - int ret = exofs_check_ok_resid(or, NULL, &resid);
456   -
457   - osd_end_request(or);
458 455 atomic_dec(&pcol->sbi->s_curr_pending);
459 456  
460 457 if (likely(!ret))
461 458 good_bytes = pcol->length;
462   - else if (!resid)
463   - good_bytes = 0;
464 459 else
465 460 good_bytes = pcol->length - resid;
466 461  
467 462  
468 463  
... ... @@ -498,23 +493,13 @@
498 493 static int write_exec(struct page_collect *pcol)
499 494 {
500 495 struct exofs_i_info *oi = exofs_i(pcol->inode);
501   - struct osd_obj_id obj = {pcol->sbi->s_pid,
502   - pcol->inode->i_ino + EXOFS_OBJ_OFF};
503   - struct osd_request *or = NULL;
  496 + struct exofs_io_state *ios = pcol->ios;
504 497 struct page_collect *pcol_copy = NULL;
505   - loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
506 498 int ret;
507 499  
508 500 if (!pcol->bio)
509 501 return 0;
510 502  
511   - or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
512   - if (unlikely(!or)) {
513   - EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
514   - ret = -ENOMEM;
515   - goto err;
516   - }
517   -
518 503 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
519 504 if (!pcol_copy) {
520 505 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
521 506  
522 507  
... ... @@ -525,16 +510,22 @@
525 510 *pcol_copy = *pcol;
526 511  
527 512 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
528   - osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
529   - ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
  513 +
  514 + ios->bio = pcol_copy->bio;
  515 + ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
  516 + ios->length = pcol_copy->length;
  517 + ios->done = writepages_done;
  518 + ios->private = pcol_copy;
  519 +
  520 + ret = exofs_oi_write(oi, ios);
530 521 if (unlikely(ret)) {
531   - EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
  522 + EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
532 523 goto err;
533 524 }
534 525  
535 526 atomic_inc(&pcol->sbi->s_curr_pending);
536 527 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
537   - pcol->inode->i_ino, pcol->pg_first, _LLU(i_start),
  528 + pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
538 529 pcol->length);
539 530 /* pages ownership was passed to pcol_copy */
540 531 _pcol_reset(pcol);
541 532  
... ... @@ -542,9 +533,9 @@
542 533  
543 534 err:
544 535 _unlock_pcol_pages(pcol, ret, WRITE);
  536 + pcol_free(pcol);
545 537 kfree(pcol_copy);
546   - if (or)
547   - osd_end_request(or);
  538 +
548 539 return ret;
549 540 }
550 541  
... ... @@ -588,6 +579,9 @@
588 579 if (PageError(page))
589 580 ClearPageError(page);
590 581 unlock_page(page);
  582 + EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) "
  583 + "outside the limits\n",
  584 + inode->i_ino, page->index);
591 585 return 0;
592 586 }
593 587 }
... ... @@ -602,6 +596,9 @@
602 596 ret = write_exec(pcol);
603 597 if (unlikely(ret))
604 598 goto fail;
  599 +
  600 + EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n",
  601 + inode->i_ino, page->index);
605 602 goto try_again;
606 603 }
607 604  
... ... @@ -636,6 +633,8 @@
636 633 return 0;
637 634  
638 635 fail:
  636 + EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n",
  637 + inode->i_ino, page->index, ret);
639 638 set_bit(AS_EIO, &page->mapping->flags);
640 639 unlock_page(page);
641 640 return ret;
642 641  
643 642  
... ... @@ -654,14 +653,17 @@
654 653 wbc->range_end >> PAGE_CACHE_SHIFT;
655 654  
656 655 if (start || end)
657   - expected_pages = min(end - start + 1, 32L);
  656 + expected_pages = end - start + 1;
658 657 else
659 658 expected_pages = mapping->nrpages;
660 659  
661   - EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx"
662   - " m->nrpages=%lu start=0x%lx end=0x%lx\n",
  660 + if (expected_pages < 32L)
  661 + expected_pages = 32L;
  662 +
  663 + EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
  664 + "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
663 665 mapping->host->i_ino, wbc->range_start, wbc->range_end,
664   - mapping->nrpages, start, end);
  666 + mapping->nrpages, start, end, expected_pages);
665 667  
666 668 _pcol_init(&pcol, expected_pages, mapping->host);
667 669  
668 670  
669 671  
... ... @@ -773,19 +775,28 @@
773 775 const struct osd_attr g_attr_logical_length = ATTR_DEF(
774 776 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
775 777  
  778 +static int _do_truncate(struct inode *inode)
  779 +{
  780 + struct exofs_i_info *oi = exofs_i(inode);
  781 + loff_t isize = i_size_read(inode);
  782 + int ret;
  783 +
  784 + inode->i_mtime = inode->i_ctime = CURRENT_TIME;
  785 +
  786 + nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
  787 +
  788 + ret = exofs_oi_truncate(oi, (u64)isize);
  789 + EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
  790 + return ret;
  791 +}
  792 +
776 793 /*
777 794 * Truncate a file to the specified size - all we have to do is set the size
778 795 * attribute. We make sure the object exists first.
779 796 */
780 797 void exofs_truncate(struct inode *inode)
781 798 {
782   - struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
783 799 struct exofs_i_info *oi = exofs_i(inode);
784   - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
785   - struct osd_request *or;
786   - struct osd_attr attr;
787   - loff_t isize = i_size_read(inode);
788   - __be64 newsize;
789 800 int ret;
790 801  
791 802 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
792 803  
793 804  
... ... @@ -795,31 +806,14 @@
795 806 return;
796 807 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
797 808 return;
798   - inode->i_mtime = inode->i_ctime = CURRENT_TIME;
799 809  
800   - nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
801   -
802   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
803   - if (unlikely(!or)) {
804   - EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
805   - goto fail;
806   - }
807   -
808   - osd_req_set_attributes(or, &obj);
809   -
810   - newsize = cpu_to_be64((u64)isize);
811   - attr = g_attr_logical_length;
812   - attr.val_ptr = &newsize;
813   - osd_req_add_set_attr_list(or, &attr, 1);
814   -
815 810 /* if we are about to truncate an object, and it hasn't been
816 811 * created yet, wait
817 812 */
818 813 if (unlikely(wait_obj_created(oi)))
819 814 goto fail;
820 815  
821   - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
822   - osd_end_request(or);
  816 + ret = _do_truncate(inode);
823 817 if (ret)
824 818 goto fail;
825 819  
826 820  
827 821  
828 822  
829 823  
830 824  
831 825  
832 826  
833 827  
834 828  
835 829  
836 830  
837 831  
838 832  
839 833  
840 834  
... ... @@ -849,66 +843,57 @@
849 843  
850 844 /*
851 845 * Read an inode from the OSD, and return it as is. We also return the size
852   - * attribute in the 'sanity' argument if we got compiled with debugging turned
853   - * on.
  846 + * attribute in the 'obj_size' argument.
854 847 */
855 848 static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
856   - struct exofs_fcb *inode, uint64_t *sanity)
  849 + struct exofs_fcb *inode, uint64_t *obj_size)
857 850 {
858 851 struct exofs_sb_info *sbi = sb->s_fs_info;
859   - struct osd_request *or;
860   - struct osd_attr attr;
861   - struct osd_obj_id obj = {sbi->s_pid,
862   - oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
  852 + struct osd_attr attrs[2];
  853 + struct exofs_io_state *ios;
863 854 int ret;
864 855  
865   - exofs_make_credential(oi->i_cred, &obj);
866   -
867   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
868   - if (unlikely(!or)) {
869   - EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n");
870   - return -ENOMEM;
  856 + *obj_size = ~0;
  857 + ret = exofs_get_io_state(sbi, &ios);
  858 + if (unlikely(ret)) {
  859 + EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
  860 + return ret;
871 861 }
872   - osd_req_get_attributes(or, &obj);
873 862  
874   - /* we need the inode attribute */
875   - osd_req_add_get_attr_list(or, &g_attr_inode_data, 1);
  863 + ios->obj.id = exofs_oi_objno(oi);
  864 + exofs_make_credential(oi->i_cred, &ios->obj);
  865 + ios->cred = oi->i_cred;
876 866  
877   -#ifdef EXOFS_DEBUG_OBJ_ISIZE
878   - /* we get the size attributes to do a sanity check */
879   - osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
880   -#endif
  867 + attrs[0] = g_attr_inode_data;
  868 + attrs[1] = g_attr_logical_length;
  869 + ios->in_attr = attrs;
  870 + ios->in_attr_len = ARRAY_SIZE(attrs);
881 871  
882   - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
  872 + ret = exofs_sbi_read(ios);
883 873 if (ret)
884 874 goto out;
885 875  
886   - attr = g_attr_inode_data;
887   - ret = extract_attr_from_req(or, &attr);
  876 + ret = extract_attr_from_ios(ios, &attrs[0]);
888 877 if (ret) {
889   - EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n");
  878 + EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
890 879 goto out;
891 880 }
  881 + WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE);
  882 + memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);
892 883  
893   - WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE);
894   - memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
895   -
896   -#ifdef EXOFS_DEBUG_OBJ_ISIZE
897   - attr = g_attr_logical_length;
898   - ret = extract_attr_from_req(or, &attr);
  884 + ret = extract_attr_from_ios(ios, &attrs[1]);
899 885 if (ret) {
900   - EXOFS_ERR("ERROR: extract attr from or failed\n");
  886 + EXOFS_ERR("%s: extract_attr of logical_length failed\n",
  887 + __func__);
901 888 goto out;
902 889 }
903   - *sanity = get_unaligned_be64(attr.val_ptr);
904   -#endif
  890 + *obj_size = get_unaligned_be64(attrs[1].val_ptr);
905 891  
906 892 out:
907   - osd_end_request(or);
  893 + exofs_put_io_state(ios);
908 894 return ret;
909 895 }
910 896  
911   -
912 897 static void __oi_init(struct exofs_i_info *oi)
913 898 {
914 899 init_waitqueue_head(&oi->i_wq);
... ... @@ -922,7 +907,7 @@
922 907 struct exofs_i_info *oi;
923 908 struct exofs_fcb fcb;
924 909 struct inode *inode;
925   - uint64_t uninitialized_var(sanity);
  910 + uint64_t obj_size;
926 911 int ret;
927 912  
928 913 inode = iget_locked(sb, ino);
... ... @@ -934,7 +919,7 @@
934 919 __oi_init(oi);
935 920  
936 921 /* read the inode from the osd */
937   - ret = exofs_get_inode(sb, oi, &fcb, &sanity);
  922 + ret = exofs_get_inode(sb, oi, &fcb, &obj_size);
938 923 if (ret)
939 924 goto bad_inode;
940 925  
941 926  
942 927  
... ... @@ -955,13 +940,12 @@
955 940 inode->i_blkbits = EXOFS_BLKSHIFT;
956 941 inode->i_generation = le32_to_cpu(fcb.i_generation);
957 942  
958   -#ifdef EXOFS_DEBUG_OBJ_ISIZE
959   - if ((inode->i_size != sanity) &&
  943 + if ((inode->i_size != obj_size) &&
960 944 (!exofs_inode_is_fast_symlink(inode))) {
961 945 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
962   - inode->i_size, _LLU(sanity));
  946 + inode->i_size, _LLU(obj_size));
  947 + /* FIXME: call exofs_inode_recovery() */
963 948 }
964   -#endif
965 949  
966 950 oi->i_dir_start_lookup = 0;
967 951  
968 952  
969 953  
970 954  
... ... @@ -1027,24 +1011,31 @@
1027 1011 * set the obj_created flag so that other methods know that the object exists on
1028 1012 * the OSD.
1029 1013 */
1030   -static void create_done(struct osd_request *or, void *p)
  1014 +static void create_done(struct exofs_io_state *ios, void *p)
1031 1015 {
1032 1016 struct inode *inode = p;
1033 1017 struct exofs_i_info *oi = exofs_i(inode);
1034 1018 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1035 1019 int ret;
1036 1020  
1037   - ret = exofs_check_ok(or);
1038   - osd_end_request(or);
  1021 + ret = exofs_check_io(ios, NULL);
  1022 + exofs_put_io_state(ios);
  1023 +
1039 1024 atomic_dec(&sbi->s_curr_pending);
1040 1025  
1041 1026 if (unlikely(ret)) {
1042 1027 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1043   - _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF));
1044   - make_bad_inode(inode);
1045   - } else
1046   - set_obj_created(oi);
  1028 + _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
  1029 + /*TODO: When FS is corrupted creation can fail, object already
  1030 + * exist. Get rid of this asynchronous creation, if exist
  1031 + * increment the obj counter and try the next object. Until we
  1032 + * succeed. All these dangling objects will be made into lost
  1033 + * files by chkfs.exofs
  1034 + */
  1035 + }
1047 1036  
  1037 + set_obj_created(oi);
  1038 +
1048 1039 atomic_dec(&inode->i_count);
1049 1040 wake_up(&oi->i_wq);
1050 1041 }
... ... @@ -1058,8 +1049,7 @@
1058 1049 struct inode *inode;
1059 1050 struct exofs_i_info *oi;
1060 1051 struct exofs_sb_info *sbi;
1061   - struct osd_request *or;
1062   - struct osd_obj_id obj;
  1052 + struct exofs_io_state *ios;
1063 1053 int ret;
1064 1054  
1065 1055 sb = dir->i_sb;
1066 1056  
1067 1057  
1068 1058  
... ... @@ -1096,28 +1086,28 @@
1096 1086  
1097 1087 mark_inode_dirty(inode);
1098 1088  
1099   - obj.partition = sbi->s_pid;
1100   - obj.id = inode->i_ino + EXOFS_OBJ_OFF;
1101   - exofs_make_credential(oi->i_cred, &obj);
1102   -
1103   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1104   - if (unlikely(!or)) {
1105   - EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
1106   - return ERR_PTR(-ENOMEM);
  1089 + ret = exofs_get_io_state(sbi, &ios);
  1090 + if (unlikely(ret)) {
  1091 + EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
  1092 + return ERR_PTR(ret);
1107 1093 }
1108 1094  
1109   - osd_req_create_object(or, &obj);
  1095 + ios->obj.id = exofs_oi_objno(oi);
  1096 + exofs_make_credential(oi->i_cred, &ios->obj);
1110 1097  
1111 1098 /* increment the refcount so that the inode will still be around when we
1112 1099 * reach the callback
1113 1100 */
1114 1101 atomic_inc(&inode->i_count);
1115 1102  
1116   - ret = exofs_async_op(or, create_done, inode, oi->i_cred);
  1103 + ios->done = create_done;
  1104 + ios->private = inode;
  1105 + ios->cred = oi->i_cred;
  1106 + ret = exofs_sbi_create(ios);
1117 1107 if (ret) {
1118 1108 atomic_dec(&inode->i_count);
1119   - osd_end_request(or);
1120   - return ERR_PTR(-EIO);
  1109 + exofs_put_io_state(ios);
  1110 + return ERR_PTR(ret);
1121 1111 }
1122 1112 atomic_inc(&sbi->s_curr_pending);
1123 1113  
1124 1114  
... ... @@ -1135,11 +1125,11 @@
1135 1125 /*
1136 1126 * Callback function from exofs_update_inode().
1137 1127 */
1138   -static void updatei_done(struct osd_request *or, void *p)
  1128 +static void updatei_done(struct exofs_io_state *ios, void *p)
1139 1129 {
1140 1130 struct updatei_args *args = p;
1141 1131  
1142   - osd_end_request(or);
  1132 + exofs_put_io_state(ios);
1143 1133  
1144 1134 atomic_dec(&args->sbi->s_curr_pending);
1145 1135  
... ... @@ -1155,8 +1145,7 @@
1155 1145 struct exofs_i_info *oi = exofs_i(inode);
1156 1146 struct super_block *sb = inode->i_sb;
1157 1147 struct exofs_sb_info *sbi = sb->s_fs_info;
1158   - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
1159   - struct osd_request *or;
  1148 + struct exofs_io_state *ios;
1160 1149 struct osd_attr attr;
1161 1150 struct exofs_fcb *fcb;
1162 1151 struct updatei_args *args;
1163 1152  
1164 1153  
... ... @@ -1193,18 +1182,16 @@
1193 1182 } else
1194 1183 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1195 1184  
1196   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1197   - if (unlikely(!or)) {
1198   - EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n");
1199   - ret = -ENOMEM;
  1185 + ret = exofs_get_io_state(sbi, &ios);
  1186 + if (unlikely(ret)) {
  1187 + EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
1200 1188 goto free_args;
1201 1189 }
1202 1190  
1203   - osd_req_set_attributes(or, &obj);
1204   -
1205 1191 attr = g_attr_inode_data;
1206 1192 attr.val_ptr = fcb;
1207   - osd_req_add_set_attr_list(or, &attr, 1);
  1193 + ios->out_attr_len = 1;
  1194 + ios->out_attr = &attr;
1208 1195  
1209 1196 if (!obj_created(oi)) {
1210 1197 EXOFS_DBGMSG("!obj_created\n");
1211 1198  
1212 1199  
1213 1200  
... ... @@ -1213,22 +1200,19 @@
1213 1200 EXOFS_DBGMSG("wait_event done\n");
1214 1201 }
1215 1202  
1216   - if (do_sync) {
1217   - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
1218   - osd_end_request(or);
1219   - goto free_args;
1220   - } else {
  1203 + if (!do_sync) {
1221 1204 args->sbi = sbi;
  1205 + ios->done = updatei_done;
  1206 + ios->private = args;
  1207 + }
1222 1208  
1223   - ret = exofs_async_op(or, updatei_done, args, oi->i_cred);
1224   - if (ret) {
1225   - osd_end_request(or);
1226   - goto free_args;
1227   - }
  1209 + ret = exofs_oi_write(oi, ios);
  1210 + if (!do_sync && !ret) {
1228 1211 atomic_inc(&sbi->s_curr_pending);
1229 1212 goto out; /* deallocation in updatei_done */
1230 1213 }
1231 1214  
  1215 + exofs_put_io_state(ios);
1232 1216 free_args:
1233 1217 kfree(args);
1234 1218 out:
1235 1219  
... ... @@ -1245,11 +1229,12 @@
1245 1229 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1246 1230 * do.
1247 1231 */
1248   -static void delete_done(struct osd_request *or, void *p)
  1232 +static void delete_done(struct exofs_io_state *ios, void *p)
1249 1233 {
1250   - struct exofs_sb_info *sbi;
1251   - osd_end_request(or);
1252   - sbi = p;
  1234 + struct exofs_sb_info *sbi = p;
  1235 +
  1236 + exofs_put_io_state(ios);
  1237 +
1253 1238 atomic_dec(&sbi->s_curr_pending);
1254 1239 }
1255 1240  
... ... @@ -1263,8 +1248,7 @@
1263 1248 struct exofs_i_info *oi = exofs_i(inode);
1264 1249 struct super_block *sb = inode->i_sb;
1265 1250 struct exofs_sb_info *sbi = sb->s_fs_info;
1266   - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
1267   - struct osd_request *or;
  1251 + struct exofs_io_state *ios;
1268 1252 int ret;
1269 1253  
1270 1254 truncate_inode_pages(&inode->i_data, 0);
1271 1255  
1272 1256  
1273 1257  
... ... @@ -1281,25 +1265,26 @@
1281 1265  
1282 1266 clear_inode(inode);
1283 1267  
1284   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1285   - if (unlikely(!or)) {
1286   - EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n");
  1268 + ret = exofs_get_io_state(sbi, &ios);
  1269 + if (unlikely(ret)) {
  1270 + EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1287 1271 return;
1288 1272 }
1289 1273  
1290   - osd_req_remove_object(or, &obj);
1291   -
1292 1274 /* if we are deleting an obj that hasn't been created yet, wait */
1293 1275 if (!obj_created(oi)) {
1294 1276 BUG_ON(!obj_2bcreated(oi));
1295 1277 wait_event(oi->i_wq, obj_created(oi));
1296 1278 }
1297 1279  
1298   - ret = exofs_async_op(or, delete_done, sbi, oi->i_cred);
  1280 + ios->obj.id = exofs_oi_objno(oi);
  1281 + ios->done = delete_done;
  1282 + ios->private = sbi;
  1283 + ios->cred = oi->i_cred;
  1284 + ret = exofs_sbi_remove(ios);
1299 1285 if (ret) {
1300   - EXOFS_ERR(
1301   - "ERROR: @exofs_delete_inode exofs_async_op failed\n");
1302   - osd_end_request(or);
  1286 + EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
  1287 + exofs_put_io_state(ios);
1303 1288 return;
1304 1289 }
1305 1290 atomic_inc(&sbi->s_curr_pending);
... ... @@ -23,96 +23,336 @@
23 23 */
24 24  
25 25 #include <scsi/scsi_device.h>
26   -#include <scsi/osd_sense.h>
27 26  
28 27 #include "exofs.h"
29 28  
30   -int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
  29 +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
31 30 {
32   - struct osd_sense_info osi;
33   - int ret = osd_req_decode_sense(or, &osi);
  31 + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
  32 +}
34 33  
35   - if (ret) { /* translate to Linux codes */
36   - if (osi.additional_code == scsi_invalid_field_in_cdb) {
37   - if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE)
38   - ret = -EFAULT;
39   - if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID)
40   - ret = -ENOENT;
41   - else
42   - ret = -EINVAL;
43   - } else if (osi.additional_code == osd_quota_error)
44   - ret = -ENOSPC;
45   - else
46   - ret = -EIO;
  34 +int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
  35 + u64 offset, void *p, unsigned length)
  36 +{
  37 + struct osd_request *or = osd_start_request(od, GFP_KERNEL);
  38 +/* struct osd_sense_info osi = {.key = 0};*/
  39 + int ret;
  40 +
  41 + if (unlikely(!or)) {
  42 + EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
  43 + return -ENOMEM;
47 44 }
  45 + ret = osd_req_read_kern(or, obj, offset, p, length);
  46 + if (unlikely(ret)) {
  47 + EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
  48 + goto out;
  49 + }
48 50  
49   - /* FIXME: should be include in osd_sense_info */
50   - if (in_resid)
51   - *in_resid = or->in.req ? or->in.req->resid_len : 0;
  51 + ret = osd_finalize_request(or, 0, cred, NULL);
  52 + if (unlikely(ret)) {
  53 + EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
  54 + goto out;
  55 + }
52 56  
53   - if (out_resid)
54   - *out_resid = or->out.req ? or->out.req->resid_len : 0;
  57 + ret = osd_execute_request(or);
  58 + if (unlikely(ret))
  59 + EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
  60 + /* osd_req_decode_sense(or, ret); */
55 61  
  62 +out:
  63 + osd_end_request(or);
56 64 return ret;
57 65 }
58 66  
59   -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
  67 +int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios)
60 68 {
61   - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
  69 + struct exofs_io_state *ios;
  70 +
  71 + /*TODO: Maybe use kmem_cach per sbi of size
  72 + * exofs_io_state_size(sbi->s_numdevs)
  73 + */
  74 + ios = kzalloc(exofs_io_state_size(1), GFP_KERNEL);
  75 + if (unlikely(!ios)) {
  76 + *pios = NULL;
  77 + return -ENOMEM;
  78 + }
  79 +
  80 + ios->sbi = sbi;
  81 + ios->obj.partition = sbi->s_pid;
  82 + *pios = ios;
  83 + return 0;
62 84 }
63 85  
64   -/*
65   - * Perform a synchronous OSD operation.
66   - */
67   -int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
  86 +void exofs_put_io_state(struct exofs_io_state *ios)
68 87 {
69   - int ret;
  88 + if (ios) {
  89 + unsigned i;
70 90  
71   - or->timeout = timeout;
72   - ret = osd_finalize_request(or, 0, credential, NULL);
73   - if (ret) {
74   - EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
75   - return ret;
  91 + for (i = 0; i < ios->numdevs; i++) {
  92 + struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
  93 +
  94 + if (per_dev->or)
  95 + osd_end_request(per_dev->or);
  96 + if (per_dev->bio)
  97 + bio_put(per_dev->bio);
  98 + }
  99 +
  100 + kfree(ios);
76 101 }
  102 +}
77 103  
78   - ret = osd_execute_request(or);
  104 +static void _sync_done(struct exofs_io_state *ios, void *p)
  105 +{
  106 + struct completion *waiting = p;
79 107  
80   - if (ret)
81   - EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
82   - /* osd_req_decode_sense(or, ret); */
  108 + complete(waiting);
  109 +}
  110 +
  111 +static void _last_io(struct kref *kref)
  112 +{
  113 + struct exofs_io_state *ios = container_of(
  114 + kref, struct exofs_io_state, kref);
  115 +
  116 + ios->done(ios, ios->private);
  117 +}
  118 +
  119 +static void _done_io(struct osd_request *or, void *p)
  120 +{
  121 + struct exofs_io_state *ios = p;
  122 +
  123 + kref_put(&ios->kref, _last_io);
  124 +}
  125 +
  126 +static int exofs_io_execute(struct exofs_io_state *ios)
  127 +{
  128 + DECLARE_COMPLETION_ONSTACK(wait);
  129 + bool sync = (ios->done == NULL);
  130 + int i, ret;
  131 +
  132 + if (sync) {
  133 + ios->done = _sync_done;
  134 + ios->private = &wait;
  135 + }
  136 +
  137 + for (i = 0; i < ios->numdevs; i++) {
  138 + struct osd_request *or = ios->per_dev[i].or;
  139 + if (unlikely(!or))
  140 + continue;
  141 +
  142 + ret = osd_finalize_request(or, 0, ios->cred, NULL);
  143 + if (unlikely(ret)) {
  144 + EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n",
  145 + ret);
  146 + return ret;
  147 + }
  148 + }
  149 +
  150 + kref_init(&ios->kref);
  151 +
  152 + for (i = 0; i < ios->numdevs; i++) {
  153 + struct osd_request *or = ios->per_dev[i].or;
  154 + if (unlikely(!or))
  155 + continue;
  156 +
  157 + kref_get(&ios->kref);
  158 + osd_execute_request_async(or, _done_io, ios);
  159 + }
  160 +
  161 + kref_put(&ios->kref, _last_io);
  162 + ret = 0;
  163 +
  164 + if (sync) {
  165 + wait_for_completion(&wait);
  166 + ret = exofs_check_io(ios, NULL);
  167 + }
83 168 return ret;
84 169 }
85 170  
86   -/*
87   - * Perform an asynchronous OSD operation.
88   - */
89   -int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done,
90   - void *caller_context, u8 *cred)
  171 +int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
91 172 {
92   - int ret;
  173 + enum osd_err_priority acumulated_osd_err = 0;
  174 + int acumulated_lin_err = 0;
  175 + int i;
93 176  
94   - ret = osd_finalize_request(or, 0, cred, NULL);
95   - if (ret) {
96   - EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
97   - return ret;
  177 + for (i = 0; i < ios->numdevs; i++) {
  178 + struct osd_sense_info osi;
  179 + int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi);
  180 +
  181 + if (likely(!ret))
  182 + continue;
  183 +
  184 + if (unlikely(ret == -EFAULT)) {
  185 + EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__);
  186 + /*FIXME: All the pages in this device range should:
  187 + * clear_highpage(page);
  188 + */
  189 + }
  190 +
  191 + if (osi.osd_err_pri >= acumulated_osd_err) {
  192 + acumulated_osd_err = osi.osd_err_pri;
  193 + acumulated_lin_err = ret;
  194 + }
98 195 }
99 196  
100   - ret = osd_execute_request_async(or, async_done, caller_context);
  197 + /* TODO: raid specific residual calculations */
  198 + if (resid) {
  199 + if (likely(!acumulated_lin_err))
  200 + *resid = 0;
  201 + else
  202 + *resid = ios->length;
  203 + }
101 204  
102   - if (ret)
103   - EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret);
  205 + return acumulated_lin_err;
  206 +}
  207 +
  208 +int exofs_sbi_create(struct exofs_io_state *ios)
  209 +{
  210 + int i, ret;
  211 +
  212 + for (i = 0; i < 1; i++) {
  213 + struct osd_request *or;
  214 +
  215 + or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
  216 + if (unlikely(!or)) {
  217 + EXOFS_ERR("%s: osd_start_request failed\n", __func__);
  218 + ret = -ENOMEM;
  219 + goto out;
  220 + }
  221 + ios->per_dev[i].or = or;
  222 + ios->numdevs++;
  223 +
  224 + osd_req_create_object(or, &ios->obj);
  225 + }
  226 + ret = exofs_io_execute(ios);
  227 +
  228 +out:
104 229 return ret;
105 230 }
106 231  
107   -int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
  232 +int exofs_sbi_remove(struct exofs_io_state *ios)
108 233 {
  234 + int i, ret;
  235 +
  236 + for (i = 0; i < 1; i++) {
  237 + struct osd_request *or;
  238 +
  239 + or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
  240 + if (unlikely(!or)) {
  241 + EXOFS_ERR("%s: osd_start_request failed\n", __func__);
  242 + ret = -ENOMEM;
  243 + goto out;
  244 + }
  245 + ios->per_dev[i].or = or;
  246 + ios->numdevs++;
  247 +
  248 + osd_req_remove_object(or, &ios->obj);
  249 + }
  250 + ret = exofs_io_execute(ios);
  251 +
  252 +out:
  253 + return ret;
  254 +}
  255 +
  256 +int exofs_sbi_write(struct exofs_io_state *ios)
  257 +{
  258 + int i, ret;
  259 +
  260 + for (i = 0; i < 1; i++) {
  261 + struct osd_request *or;
  262 +
  263 + or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
  264 + if (unlikely(!or)) {
  265 + EXOFS_ERR("%s: osd_start_request failed\n", __func__);
  266 + ret = -ENOMEM;
  267 + goto out;
  268 + }
  269 + ios->per_dev[i].or = or;
  270 + ios->numdevs++;
  271 +
  272 + if (ios->bio) {
  273 + struct bio *bio;
  274 +
  275 + bio = ios->bio;
  276 +
  277 + osd_req_write(or, &ios->obj, ios->offset, bio,
  278 + ios->length);
  279 +/* EXOFS_DBGMSG("write sync=%d\n", sync);*/
  280 + } else if (ios->kern_buff) {
  281 + osd_req_write_kern(or, &ios->obj, ios->offset,
  282 + ios->kern_buff, ios->length);
  283 +/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/
  284 + } else {
  285 + osd_req_set_attributes(or, &ios->obj);
  286 +/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/
  287 + }
  288 +
  289 + if (ios->out_attr)
  290 + osd_req_add_set_attr_list(or, ios->out_attr,
  291 + ios->out_attr_len);
  292 +
  293 + if (ios->in_attr)
  294 + osd_req_add_get_attr_list(or, ios->in_attr,
  295 + ios->in_attr_len);
  296 + }
  297 + ret = exofs_io_execute(ios);
  298 +
  299 +out:
  300 + return ret;
  301 +}
  302 +
  303 +int exofs_sbi_read(struct exofs_io_state *ios)
  304 +{
  305 + int i, ret;
  306 +
  307 + for (i = 0; i < 1; i++) {
  308 + struct osd_request *or;
  309 +
  310 + or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
  311 + if (unlikely(!or)) {
  312 + EXOFS_ERR("%s: osd_start_request failed\n", __func__);
  313 + ret = -ENOMEM;
  314 + goto out;
  315 + }
  316 + ios->per_dev[i].or = or;
  317 + ios->numdevs++;
  318 +
  319 + if (ios->bio) {
  320 + osd_req_read(or, &ios->obj, ios->offset, ios->bio,
  321 + ios->length);
  322 +/* EXOFS_DBGMSG("read sync=%d\n", sync);*/
  323 + } else if (ios->kern_buff) {
  324 + osd_req_read_kern(or, &ios->obj, ios->offset,
  325 + ios->kern_buff, ios->length);
  326 +/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/
  327 + } else {
  328 + osd_req_get_attributes(or, &ios->obj);
  329 +/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/
  330 + }
  331 +
  332 + if (ios->out_attr)
  333 + osd_req_add_set_attr_list(or, ios->out_attr,
  334 + ios->out_attr_len);
  335 +
  336 + if (ios->in_attr)
  337 + osd_req_add_get_attr_list(or, ios->in_attr,
  338 + ios->in_attr_len);
  339 + }
  340 + ret = exofs_io_execute(ios);
  341 +
  342 +out:
  343 + return ret;
  344 +}
  345 +
  346 +int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
  347 +{
109 348 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
110 349 void *iter = NULL;
111 350 int nelem;
112 351  
113 352 do {
114 353 nelem = 1;
115   - osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
  354 + osd_req_decode_get_attr_list(ios->per_dev[0].or,
  355 + &cur_attr, &nelem, &iter);
116 356 if ((cur_attr.attr_page == attr->attr_page) &&
117 357 (cur_attr.attr_id == attr->attr_id)) {
118 358 attr->len = cur_attr.len;
... ... @@ -122,5 +362,45 @@
122 362 } while (iter);
123 363  
124 364 return -EIO;
  365 +}
  366 +
  367 +int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
  368 +{
  369 + struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
  370 + struct exofs_io_state *ios;
  371 + struct osd_attr attr;
  372 + __be64 newsize;
  373 + int i, ret;
  374 +
  375 + if (exofs_get_io_state(sbi, &ios))
  376 + return -ENOMEM;
  377 +
  378 + ios->obj.id = exofs_oi_objno(oi);
  379 + ios->cred = oi->i_cred;
  380 +
  381 + newsize = cpu_to_be64(size);
  382 + attr = g_attr_logical_length;
  383 + attr.val_ptr = &newsize;
  384 +
  385 + for (i = 0; i < 1; i++) {
  386 + struct osd_request *or;
  387 +
  388 + or = osd_start_request(sbi->s_dev, GFP_KERNEL);
  389 + if (unlikely(!or)) {
  390 + EXOFS_ERR("%s: osd_start_request failed\n", __func__);
  391 + ret = -ENOMEM;
  392 + goto out;
  393 + }
  394 + ios->per_dev[i].or = or;
  395 + ios->numdevs++;
  396 +
  397 + osd_req_set_attributes(or, &ios->obj);
  398 + osd_req_add_set_attr_list(or, &attr, 1);
  399 + }
  400 + ret = exofs_io_execute(ios);
  401 +
  402 +out:
  403 + exofs_put_io_state(ios);
  404 + return ret;
125 405 }
... ... @@ -203,49 +203,40 @@
203 203 {
204 204 struct exofs_sb_info *sbi;
205 205 struct exofs_fscb *fscb;
206   - struct osd_request *or;
207   - struct osd_obj_id obj;
  206 + struct exofs_io_state *ios;
208 207 int ret = -ENOMEM;
209 208  
210   - fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
211   - if (!fscb) {
212   - EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
213   - return -ENOMEM;
214   - }
215   -
216 209 lock_super(sb);
217 210 sbi = sb->s_fs_info;
  211 + fscb = &sbi->s_fscb;
  212 +
  213 + ret = exofs_get_io_state(sbi, &ios);
  214 + if (ret)
  215 + goto out;
  216 +
  217 + ios->length = sizeof(*fscb);
  218 + memset(fscb, 0, ios->length);
218 219 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
219 220 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
220 221 fscb->s_magic = cpu_to_le16(sb->s_magic);
221 222 fscb->s_newfs = 0;
222 223  
223   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
224   - if (unlikely(!or)) {
225   - EXOFS_ERR("exofs_write_super: osd_start_request failed.\n");
226   - goto out;
227   - }
  224 + ios->obj.id = EXOFS_SUPER_ID;
  225 + ios->offset = 0;
  226 + ios->kern_buff = fscb;
  227 + ios->cred = sbi->s_cred;
228 228  
229   - obj.partition = sbi->s_pid;
230   - obj.id = EXOFS_SUPER_ID;
231   - ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
  229 + ret = exofs_sbi_write(ios);
232 230 if (unlikely(ret)) {
233   - EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n");
  231 + EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
234 232 goto out;
235 233 }
236   -
237   - ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
238   - if (unlikely(ret)) {
239   - EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n");
240   - goto out;
241   - }
242 234 sb->s_dirt = 0;
243 235  
244 236 out:
245   - if (or)
246   - osd_end_request(or);
  237 + EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
  238 + exofs_put_io_state(ios);
247 239 unlock_super(sb);
248   - kfree(fscb);
249 240 return ret;
250 241 }
251 242  
252 243  
253 244  
254 245  
255 246  
... ... @@ -302,24 +293,23 @@
302 293 struct inode *root;
303 294 struct exofs_mountopt *opts = data;
304 295 struct exofs_sb_info *sbi; /*extended info */
  296 + struct osd_dev *od; /* Master device */
305 297 struct exofs_fscb fscb; /*on-disk superblock info */
306   - struct osd_request *or = NULL;
307 298 struct osd_obj_id obj;
308 299 int ret;
309 300  
310 301 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
311 302 if (!sbi)
312 303 return -ENOMEM;
313   - sb->s_fs_info = sbi;
314 304  
315 305 /* use mount options to fill superblock */
316   - sbi->s_dev = osduld_path_lookup(opts->dev_name);
317   - if (IS_ERR(sbi->s_dev)) {
318   - ret = PTR_ERR(sbi->s_dev);
319   - sbi->s_dev = NULL;
  306 + od = osduld_path_lookup(opts->dev_name);
  307 + if (IS_ERR(od)) {
  308 + ret = PTR_ERR(od);
320 309 goto free_sbi;
321 310 }
322 311  
  312 + sbi->s_dev = od;
323 313 sbi->s_pid = opts->pid;
324 314 sbi->s_timeout = opts->timeout;
325 315  
326 316  
327 317  
328 318  
... ... @@ -333,36 +323,14 @@
333 323 sb->s_bdev = NULL;
334 324 sb->s_dev = 0;
335 325  
336   - /* read data from on-disk superblock object */
337 326 obj.partition = sbi->s_pid;
338 327 obj.id = EXOFS_SUPER_ID;
339 328 exofs_make_credential(sbi->s_cred, &obj);
340 329  
341   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
342   - if (unlikely(!or)) {
343   - if (!silent)
344   - EXOFS_ERR(
345   - "exofs_fill_super: osd_start_request failed.\n");
346   - ret = -ENOMEM;
  330 + ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
  331 + if (unlikely(ret))
347 332 goto free_sbi;
348   - }
349   - ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
350   - if (unlikely(ret)) {
351   - if (!silent)
352   - EXOFS_ERR(
353   - "exofs_fill_super: osd_req_read_kern failed.\n");
354   - ret = -ENOMEM;
355   - goto free_sbi;
356   - }
357 333  
358   - ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
359   - if (unlikely(ret)) {
360   - if (!silent)
361   - EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
362   - ret = -EIO;
363   - goto free_sbi;
364   - }
365   -
366 334 sb->s_magic = le16_to_cpu(fscb.s_magic);
367 335 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
368 336 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
... ... @@ -380,6 +348,7 @@
380 348 spin_lock_init(&sbi->s_next_gen_lock);
381 349  
382 350 /* set up operation vectors */
  351 + sb->s_fs_info = sbi;
383 352 sb->s_op = &exofs_sops;
384 353 sb->s_export_op = &exofs_export_ops;
385 354 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
386 355  
387 356  
... ... @@ -406,16 +375,14 @@
406 375 }
407 376  
408 377 _exofs_print_device("Mounting", opts->dev_name, sbi->s_dev, sbi->s_pid);
409   - ret = 0;
410   -out:
411   - if (or)
412   - osd_end_request(or);
413   - return ret;
  378 + return 0;
414 379  
415 380 free_sbi:
  381 + EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
  382 + opts->dev_name, sbi->s_pid, ret);
416 383 osduld_put_device(sbi->s_dev); /* NULL safe */
417 384 kfree(sbi);
418   - goto out;
  385 + return ret;
419 386 }
420 387  
421 388 /*
... ... @@ -444,7 +411,7 @@
444 411 {
445 412 struct super_block *sb = dentry->d_sb;
446 413 struct exofs_sb_info *sbi = sb->s_fs_info;
447   - struct osd_obj_id obj = {sbi->s_pid, 0};
  414 + struct exofs_io_state *ios;
448 415 struct osd_attr attrs[] = {
449 416 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
450 417 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
451 418  
452 419  
453 420  
... ... @@ -453,26 +420,25 @@
453 420 };
454 421 uint64_t capacity = ULLONG_MAX;
455 422 uint64_t used = ULLONG_MAX;
456   - struct osd_request *or;
457 423 uint8_t cred_a[OSD_CAP_LEN];
458 424 int ret;
459 425  
460   - /* get used/capacity attributes */
461   - exofs_make_credential(cred_a, &obj);
462   -
463   - or = osd_start_request(sbi->s_dev, GFP_KERNEL);
464   - if (unlikely(!or)) {
465   - EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
466   - return -ENOMEM;
  426 + ret = exofs_get_io_state(sbi, &ios);
  427 + if (ret) {
  428 + EXOFS_DBGMSG("exofs_get_io_state failed.\n");
  429 + return ret;
467 430 }
468 431  
469   - osd_req_get_attributes(or, &obj);
470   - osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs));
471   - ret = exofs_sync_op(or, sbi->s_timeout, cred_a);
  432 + exofs_make_credential(cred_a, &ios->obj);
  433 + ios->cred = sbi->s_cred;
  434 + ios->in_attr = attrs;
  435 + ios->in_attr_len = ARRAY_SIZE(attrs);
  436 +
  437 + ret = exofs_sbi_read(ios);
472 438 if (unlikely(ret))
473 439 goto out;
474 440  
475   - ret = extract_attr_from_req(or, &attrs[0]);
  441 + ret = extract_attr_from_ios(ios, &attrs[0]);
476 442 if (likely(!ret)) {
477 443 capacity = get_unaligned_be64(attrs[0].val_ptr);
478 444 if (unlikely(!capacity))
... ... @@ -480,7 +446,7 @@
480 446 } else
481 447 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
482 448  
483   - ret = extract_attr_from_req(or, &attrs[1]);
  449 + ret = extract_attr_from_ios(ios, &attrs[1]);
484 450 if (likely(!ret))
485 451 used = get_unaligned_be64(attrs[1].val_ptr);
486 452 else
... ... @@ -497,7 +463,7 @@
497 463 buf->f_namelen = EXOFS_NAME_LEN;
498 464  
499 465 out:
500   - osd_end_request(or);
  466 + exofs_put_io_state(ios);
501 467 return ret;
502 468 }
503 469