Commit a5eba3f66f812cbc076a1170b3f888ad63f850b2
Exists in
master
and in
7 other branches
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
* 'for-linus' of git://git.open-osd.org/linux-open-osd: exofs: Multi-device mirror support exofs: Move all operations to an io_engine exofs: move osd.c to ios.c exofs: statfs blocks is sectors not FS blocks exofs: Prints on mount and unmout exofs: refactor exofs_i_info initialization into common helper exofs: dbg-print less exofs: More sane debug print trivial: some small fixes in exofs documentation
Showing 10 changed files Side-by-side Diff
Documentation/filesystems/00-INDEX
... | ... | @@ -36,6 +36,8 @@ |
36 | 36 | - info about directory notification in Linux. |
37 | 37 | ecryptfs.txt |
38 | 38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. |
39 | +exofs.txt | |
40 | + - info, usage, mount options, design about EXOFS. | |
39 | 41 | ext2.txt |
40 | 42 | - info, mount options and specifications for the Ext2 filesystem. |
41 | 43 | ext3.txt |
Documentation/filesystems/exofs.txt
... | ... | @@ -60,13 +60,13 @@ |
60 | 60 | |
61 | 61 | mkfs.exofs --pid=65536 --format /dev/osd0 |
62 | 62 | |
63 | - The --format is optional if not specified no OSD_FORMAT will be | |
64 | - preformed and a clean file system will be created in the specified pid, | |
63 | + The --format is optional. If not specified, no OSD_FORMAT will be | |
64 | + performed and a clean file system will be created in the specified pid, | |
65 | 65 | in the available space of the target. (Use --format=size_in_meg to limit |
66 | 66 | the total LUN space available) |
67 | 67 | |
68 | - If pid already exist it will be deleted and a new one will be created in it's | |
69 | - place. Be careful. | |
68 | + If pid already exists, it will be deleted and a new one will be created in | |
69 | + its place. Be careful. | |
70 | 70 | |
71 | 71 | An exofs lives inside a single OSD partition. You can create multiple exofs |
72 | 72 | filesystems on the same device using multiple pids. |
... | ... | @@ -81,7 +81,7 @@ |
81 | 81 | |
82 | 82 | 7. For reference (See do-exofs example script): |
83 | 83 | do-exofs start - an example of how to perform the above steps. |
84 | - do-exofs stop - an example of how to unmount the file system. | |
84 | + do-exofs stop - an example of how to unmount the file system. | |
85 | 85 | do-exofs format - an example of how to format and mkfs a new exofs. |
86 | 86 | |
87 | 87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): |
... | ... | @@ -104,8 +104,8 @@ |
104 | 104 | exofs specific options: Options are separated by commas (,) |
105 | 105 | pid=<integer> - The partition number to mount/create as |
106 | 106 | container of the filesystem. |
107 | - This option is mandatory | |
108 | - to=<integer> - Timeout in ticks for a single command | |
107 | + This option is mandatory. | |
108 | + to=<integer> - Timeout in ticks for a single command. | |
109 | 109 | default is (60 * HZ) [for debugging only] |
110 | 110 | |
111 | 111 | =============================================================================== |
... | ... | @@ -116,7 +116,7 @@ |
116 | 116 | with a special ID (defined in common.h). |
117 | 117 | Information included in the file system control block is used to fill the |
118 | 118 | in-memory superblock structure at mount time. This object is created before |
119 | - the file system is used by mkexofs.c It contains information such as: | |
119 | + the file system is used by mkexofs.c. It contains information such as: | |
120 | 120 | - The file system's magic number |
121 | 121 | - The next inode number to be allocated |
122 | 122 | |
... | ... | @@ -134,8 +134,8 @@ |
134 | 134 | attributes. This applies to both regular files and other types (directories, |
135 | 135 | device files, symlinks, etc.). |
136 | 136 | |
137 | -* Credentials are generated per object (inode and superblock) when they is | |
138 | - created in memory (read off disk or created). The credential works for all | |
137 | +* Credentials are generated per object (inode and superblock) when they are | |
138 | + created in memory (read from disk or created). The credential works for all | |
139 | 139 | operations and is used as long as the object remains in memory. |
140 | 140 | |
141 | 141 | * Async OSD operations are used whenever possible, but the target may execute |
... | ... | @@ -145,7 +145,8 @@ |
145 | 145 | from executing in reverse order: |
146 | 146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED |
147 | 147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - |
148 | - in create's callback function, and when we successfully do a read_inode. | |
148 | + in create's callback function, and when we successfully do a | |
149 | + read_inode. | |
149 | 150 | OBJ_2BCREATED is set in the beginning of the create function, so we |
150 | 151 | know that we should wait. |
151 | 152 | - create/delete: delete should wait until the object is created |
fs/exofs/Kbuild
fs/exofs/common.h
... | ... | @@ -49,6 +49,7 @@ |
49 | 49 | #define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ |
50 | 50 | #define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ |
51 | 51 | #define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ |
52 | +#define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */ | |
52 | 53 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ |
53 | 54 | |
54 | 55 | /* exofs Application specific page/attribute */ |
55 | 56 | |
56 | 57 | |
57 | 58 | |
58 | 59 | |
... | ... | @@ -78,18 +79,68 @@ |
78 | 79 | #define EXOFS_SUPER_MAGIC 0x5DF5 |
79 | 80 | |
80 | 81 | /* |
81 | - * The file system control block - stored in an object's data (mainly, the one | |
82 | - * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored | |
83 | - * on disk. Right now it just has a magic value, which is basically a sanity | |
84 | - * check on our ability to communicate with the object store. | |
82 | + * The file system control block - stored in object EXOFS_SUPER_ID's data. | |
83 | + * This is where the in-memory superblock is stored on disk. | |
85 | 84 | */ |
85 | +enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; | |
86 | 86 | struct exofs_fscb { |
87 | 87 | __le64 s_nextid; /* Highest object ID used */ |
88 | - __le32 s_numfiles; /* Number of files on fs */ | |
88 | + __le64 s_numfiles; /* Number of files on fs */ | |
89 | + __le32 s_version; /* == EXOFS_FSCB_VER */ | |
89 | 90 | __le16 s_magic; /* Magic signature */ |
90 | 91 | __le16 s_newfs; /* Non-zero if this is a new fs */ |
91 | -}; | |
92 | 92 | |
93 | + /* From here on it's a static part, only written by mkexofs */ | |
94 | + __le64 s_dev_table_oid; /* Resurved, not used */ | |
95 | + __le64 s_dev_table_count; /* == 0 means no dev_table */ | |
96 | +} __packed; | |
97 | + | |
98 | +/* | |
99 | + * Describes the raid used in the FS. It is part of the device table. | |
100 | + * This here is taken from the pNFS-objects definition. In exofs we | |
101 | + * use one raid policy through-out the filesystem. (NOTE: the funny | |
102 | + * alignment at begining. We take care of it at exofs_device_table. | |
103 | + */ | |
104 | +struct exofs_dt_data_map { | |
105 | + __le32 cb_num_comps; | |
106 | + __le64 cb_stripe_unit; | |
107 | + __le32 cb_group_width; | |
108 | + __le32 cb_group_depth; | |
109 | + __le32 cb_mirror_cnt; | |
110 | + __le32 cb_raid_algorithm; | |
111 | +} __packed; | |
112 | + | |
113 | +/* | |
114 | + * This is an osd device information descriptor. It is a single entry in | |
115 | + * the exofs device table. It describes an osd target lun which | |
116 | + * contains data belonging to this FS. (Same partition_id on all devices) | |
117 | + */ | |
118 | +struct exofs_dt_device_info { | |
119 | + __le32 systemid_len; | |
120 | + u8 systemid[OSD_SYSTEMID_LEN]; | |
121 | + __le64 long_name_offset; /* If !0 then offset-in-file */ | |
122 | + __le32 osdname_len; /* */ | |
123 | + u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ | |
124 | +} __packed; | |
125 | + | |
126 | +/* | |
127 | + * The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data. | |
128 | + * It contains the raid used for this multy-device FS and an array of | |
129 | + * participating devices. | |
130 | + */ | |
131 | +struct exofs_device_table { | |
132 | + __le32 dt_version; /* == EXOFS_DT_VER */ | |
133 | + struct exofs_dt_data_map dt_data_map; /* Raid policy to use */ | |
134 | + | |
135 | + /* Resurved space For future use. Total includeing this: | |
136 | + * (8 * sizeof(le64)) | |
137 | + */ | |
138 | + __le64 __Resurved[4]; | |
139 | + | |
140 | + __le64 dt_num_devices; /* Array size */ | |
141 | + struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */ | |
142 | +} __packed; | |
143 | + | |
93 | 144 | /**************************************************************************** |
94 | 145 | * inode-related things |
95 | 146 | ****************************************************************************/ |
... | ... | @@ -154,24 +205,6 @@ |
154 | 205 | #define EXOFS_DIR_REC_LEN(name_len) \ |
155 | 206 | (((name_len) + offsetof(struct exofs_dir_entry, name) + \ |
156 | 207 | EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) |
157 | - | |
158 | -/************************* | |
159 | - * function declarations * | |
160 | - *************************/ | |
161 | -/* osd.c */ | |
162 | -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | |
163 | - const struct osd_obj_id *obj); | |
164 | - | |
165 | -int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid); | |
166 | -static inline int exofs_check_ok(struct osd_request *or) | |
167 | -{ | |
168 | - return exofs_check_ok_resid(or, NULL, NULL); | |
169 | -} | |
170 | -int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred); | |
171 | -int exofs_async_op(struct osd_request *or, | |
172 | - osd_req_done_fn *async_done, void *caller_context, u8 *cred); | |
173 | - | |
174 | -int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); | |
175 | 208 | |
176 | 209 | #endif /*ifndef __EXOFS_COM_H__*/ |
fs/exofs/exofs.h
... | ... | @@ -30,13 +30,17 @@ |
30 | 30 | * along with exofs; if not, write to the Free Software |
31 | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
32 | 32 | */ |
33 | +#ifndef __EXOFS_H__ | |
34 | +#define __EXOFS_H__ | |
33 | 35 | |
34 | 36 | #include <linux/fs.h> |
35 | 37 | #include <linux/time.h> |
36 | 38 | #include "common.h" |
37 | 39 | |
38 | -#ifndef __EXOFS_H__ | |
39 | -#define __EXOFS_H__ | |
40 | +/* FIXME: Remove once pnfs hits mainline | |
41 | + * #include <linux/exportfs/pnfs_osd_xdr.h> | |
42 | + */ | |
43 | +#include "pnfs.h" | |
40 | 44 | |
41 | 45 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) |
42 | 46 | |
... | ... | @@ -55,7 +59,7 @@ |
55 | 59 | * our extension to the in-memory superblock |
56 | 60 | */ |
57 | 61 | struct exofs_sb_info { |
58 | - struct osd_dev *s_dev; /* returned by get_osd_dev */ | |
62 | + struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ | |
59 | 63 | osd_id s_pid; /* partition ID of file system*/ |
60 | 64 | int s_timeout; /* timeout for OSD operations */ |
61 | 65 | uint64_t s_nextid; /* highest object ID used */ |
... | ... | @@ -63,7 +67,11 @@ |
63 | 67 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ |
64 | 68 | u32 s_next_generation; /* next gen # to use */ |
65 | 69 | atomic_t s_curr_pending; /* number of pending commands */ |
66 | - uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ | |
70 | + uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ | |
71 | + | |
72 | + struct pnfs_osd_data_map data_map; /* Default raid to use */ | |
73 | + unsigned s_numdevs; /* Num of devices in array */ | |
74 | + struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */ | |
67 | 75 | }; |
68 | 76 | |
69 | 77 | /* |
... | ... | @@ -79,6 +87,50 @@ |
79 | 87 | struct inode vfs_inode; /* normal in-memory inode */ |
80 | 88 | }; |
81 | 89 | |
90 | +static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) | |
91 | +{ | |
92 | + return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; | |
93 | +} | |
94 | + | |
95 | +struct exofs_io_state; | |
96 | +typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); | |
97 | + | |
98 | +struct exofs_io_state { | |
99 | + struct kref kref; | |
100 | + | |
101 | + void *private; | |
102 | + exofs_io_done_fn done; | |
103 | + | |
104 | + struct exofs_sb_info *sbi; | |
105 | + struct osd_obj_id obj; | |
106 | + u8 *cred; | |
107 | + | |
108 | + /* Global read/write IO*/ | |
109 | + loff_t offset; | |
110 | + unsigned long length; | |
111 | + void *kern_buff; | |
112 | + struct bio *bio; | |
113 | + | |
114 | + /* Attributes */ | |
115 | + unsigned in_attr_len; | |
116 | + struct osd_attr *in_attr; | |
117 | + unsigned out_attr_len; | |
118 | + struct osd_attr *out_attr; | |
119 | + | |
120 | + /* Variable array of size numdevs */ | |
121 | + unsigned numdevs; | |
122 | + struct exofs_per_dev_state { | |
123 | + struct osd_request *or; | |
124 | + struct bio *bio; | |
125 | + } per_dev[]; | |
126 | +}; | |
127 | + | |
128 | +static inline unsigned exofs_io_state_size(unsigned numdevs) | |
129 | +{ | |
130 | + return sizeof(struct exofs_io_state) + | |
131 | + sizeof(struct exofs_per_dev_state) * numdevs; | |
132 | +} | |
133 | + | |
82 | 134 | /* |
83 | 135 | * our inode flags |
84 | 136 | */ |
... | ... | @@ -130,6 +182,42 @@ |
130 | 182 | /************************* |
131 | 183 | * function declarations * |
132 | 184 | *************************/ |
185 | + | |
186 | +/* ios.c */ | |
187 | +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | |
188 | + const struct osd_obj_id *obj); | |
189 | +int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | |
190 | + u64 offset, void *p, unsigned length); | |
191 | + | |
192 | +int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); | |
193 | +void exofs_put_io_state(struct exofs_io_state *ios); | |
194 | + | |
195 | +int exofs_check_io(struct exofs_io_state *ios, u64 *resid); | |
196 | + | |
197 | +int exofs_sbi_create(struct exofs_io_state *ios); | |
198 | +int exofs_sbi_remove(struct exofs_io_state *ios); | |
199 | +int exofs_sbi_write(struct exofs_io_state *ios); | |
200 | +int exofs_sbi_read(struct exofs_io_state *ios); | |
201 | + | |
202 | +int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); | |
203 | + | |
204 | +int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); | |
205 | +static inline int exofs_oi_write(struct exofs_i_info *oi, | |
206 | + struct exofs_io_state *ios) | |
207 | +{ | |
208 | + ios->obj.id = exofs_oi_objno(oi); | |
209 | + ios->cred = oi->i_cred; | |
210 | + return exofs_sbi_write(ios); | |
211 | +} | |
212 | + | |
213 | +static inline int exofs_oi_read(struct exofs_i_info *oi, | |
214 | + struct exofs_io_state *ios) | |
215 | +{ | |
216 | + ios->obj.id = exofs_oi_objno(oi); | |
217 | + ios->cred = oi->i_cred; | |
218 | + return exofs_sbi_read(ios); | |
219 | +} | |
220 | + | |
133 | 221 | /* inode.c */ |
134 | 222 | void exofs_truncate(struct inode *inode); |
135 | 223 | int exofs_setattr(struct dentry *, struct iattr *); |
... | ... | @@ -169,6 +257,7 @@ |
169 | 257 | |
170 | 258 | /* inode.c */ |
171 | 259 | extern const struct address_space_operations exofs_aops; |
260 | +extern const struct osd_attr g_attr_logical_length; | |
172 | 261 | |
173 | 262 | /* namei.c */ |
174 | 263 | extern const struct inode_operations exofs_dir_inode_operations; |
fs/exofs/inode.c
... | ... | @@ -37,15 +37,18 @@ |
37 | 37 | |
38 | 38 | #include "exofs.h" |
39 | 39 | |
40 | -#ifdef CONFIG_EXOFS_DEBUG | |
41 | -# define EXOFS_DEBUG_OBJ_ISIZE 1 | |
42 | -#endif | |
40 | +#define EXOFS_DBGMSG2(M...) do {} while (0) | |
43 | 41 | |
42 | +enum { BIO_MAX_PAGES_KMALLOC = | |
43 | + (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | |
44 | +}; | |
45 | + | |
44 | 46 | struct page_collect { |
45 | 47 | struct exofs_sb_info *sbi; |
46 | 48 | struct request_queue *req_q; |
47 | 49 | struct inode *inode; |
48 | 50 | unsigned expected_pages; |
51 | + struct exofs_io_state *ios; | |
49 | 52 | |
50 | 53 | struct bio *bio; |
51 | 54 | unsigned nr_pages; |
52 | 55 | |
53 | 56 | |
54 | 57 | |
... | ... | @@ -54,22 +57,23 @@ |
54 | 57 | }; |
55 | 58 | |
56 | 59 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
57 | - struct inode *inode) | |
60 | + struct inode *inode) | |
58 | 61 | { |
59 | 62 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
60 | 63 | |
61 | 64 | pcol->sbi = sbi; |
62 | - pcol->req_q = osd_request_queue(sbi->s_dev); | |
65 | + /* Create master bios on first Q, later on cloning, each clone will be | |
66 | + * allocated on it's destination Q | |
67 | + */ | |
68 | + pcol->req_q = osd_request_queue(sbi->s_ods[0]); | |
63 | 69 | pcol->inode = inode; |
64 | 70 | pcol->expected_pages = expected_pages; |
65 | 71 | |
72 | + pcol->ios = NULL; | |
66 | 73 | pcol->bio = NULL; |
67 | 74 | pcol->nr_pages = 0; |
68 | 75 | pcol->length = 0; |
69 | 76 | pcol->pg_first = -1; |
70 | - | |
71 | - EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | |
72 | - expected_pages); | |
73 | 77 | } |
74 | 78 | |
75 | 79 | static void _pcol_reset(struct page_collect *pcol) |
76 | 80 | |
77 | 81 | |
78 | 82 | |
79 | 83 | |
80 | 84 | |
81 | 85 | |
... | ... | @@ -80,35 +84,49 @@ |
80 | 84 | pcol->nr_pages = 0; |
81 | 85 | pcol->length = 0; |
82 | 86 | pcol->pg_first = -1; |
83 | - EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | |
84 | - pcol->inode->i_ino, pcol->expected_pages); | |
87 | + pcol->ios = NULL; | |
85 | 88 | |
86 | 89 | /* this is probably the end of the loop but in writes |
87 | 90 | * it might not end here. don't be left with nothing |
88 | 91 | */ |
89 | 92 | if (!pcol->expected_pages) |
90 | - pcol->expected_pages = 128; | |
93 | + pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; | |
91 | 94 | } |
92 | 95 | |
93 | 96 | static int pcol_try_alloc(struct page_collect *pcol) |
94 | 97 | { |
95 | - int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | |
98 | + int pages = min_t(unsigned, pcol->expected_pages, | |
99 | + BIO_MAX_PAGES_KMALLOC); | |
96 | 100 | |
101 | + if (!pcol->ios) { /* First time allocate io_state */ | |
102 | + int ret = exofs_get_io_state(pcol->sbi, &pcol->ios); | |
103 | + | |
104 | + if (ret) | |
105 | + return ret; | |
106 | + } | |
107 | + | |
97 | 108 | for (; pages; pages >>= 1) { |
98 | - pcol->bio = bio_alloc(GFP_KERNEL, pages); | |
109 | + pcol->bio = bio_kmalloc(GFP_KERNEL, pages); | |
99 | 110 | if (likely(pcol->bio)) |
100 | 111 | return 0; |
101 | 112 | } |
102 | 113 | |
103 | - EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | |
114 | + EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", | |
104 | 115 | pcol->expected_pages); |
105 | 116 | return -ENOMEM; |
106 | 117 | } |
107 | 118 | |
108 | 119 | static void pcol_free(struct page_collect *pcol) |
109 | 120 | { |
110 | - bio_put(pcol->bio); | |
111 | - pcol->bio = NULL; | |
121 | + if (pcol->bio) { | |
122 | + bio_put(pcol->bio); | |
123 | + pcol->bio = NULL; | |
124 | + } | |
125 | + | |
126 | + if (pcol->ios) { | |
127 | + exofs_put_io_state(pcol->ios); | |
128 | + pcol->ios = NULL; | |
129 | + } | |
112 | 130 | } |
113 | 131 | |
114 | 132 | static int pcol_add_page(struct page_collect *pcol, struct page *page, |
115 | 133 | |
116 | 134 | |
117 | 135 | |
... | ... | @@ -161,22 +179,17 @@ |
161 | 179 | /* Called at the end of reads, to optionally unlock pages and update their |
162 | 180 | * status. |
163 | 181 | */ |
164 | -static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | |
165 | - bool do_unlock) | |
182 | +static int __readpages_done(struct page_collect *pcol, bool do_unlock) | |
166 | 183 | { |
167 | 184 | struct bio_vec *bvec; |
168 | 185 | int i; |
169 | 186 | u64 resid; |
170 | 187 | u64 good_bytes; |
171 | 188 | u64 length = 0; |
172 | - int ret = exofs_check_ok_resid(or, &resid, NULL); | |
189 | + int ret = exofs_check_io(pcol->ios, &resid); | |
173 | 190 | |
174 | - osd_end_request(or); | |
175 | - | |
176 | 191 | if (likely(!ret)) |
177 | 192 | good_bytes = pcol->length; |
178 | - else if (!resid) | |
179 | - good_bytes = 0; | |
180 | 193 | else |
181 | 194 | good_bytes = pcol->length - resid; |
182 | 195 | |
... | ... | @@ -198,7 +211,7 @@ |
198 | 211 | else |
199 | 212 | page_stat = ret; |
200 | 213 | |
201 | - EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | |
214 | + EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", | |
202 | 215 | inode->i_ino, page->index, |
203 | 216 | page_stat ? "bad_bytes" : "good_bytes"); |
204 | 217 | |
205 | 218 | |
206 | 219 | |
... | ... | @@ -214,13 +227,13 @@ |
214 | 227 | } |
215 | 228 | |
216 | 229 | /* callback of async reads */ |
217 | -static void readpages_done(struct osd_request *or, void *p) | |
230 | +static void readpages_done(struct exofs_io_state *ios, void *p) | |
218 | 231 | { |
219 | 232 | struct page_collect *pcol = p; |
220 | 233 | |
221 | - __readpages_done(or, pcol, true); | |
234 | + __readpages_done(pcol, true); | |
222 | 235 | atomic_dec(&pcol->sbi->s_curr_pending); |
223 | - kfree(p); | |
236 | + kfree(pcol); | |
224 | 237 | } |
225 | 238 | |
226 | 239 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) |
227 | 240 | |
228 | 241 | |
... | ... | @@ -238,17 +251,13 @@ |
238 | 251 | |
239 | 252 | unlock_page(page); |
240 | 253 | } |
241 | - pcol_free(pcol); | |
242 | 254 | } |
243 | 255 | |
244 | 256 | static int read_exec(struct page_collect *pcol, bool is_sync) |
245 | 257 | { |
246 | 258 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
247 | - struct osd_obj_id obj = {pcol->sbi->s_pid, | |
248 | - pcol->inode->i_ino + EXOFS_OBJ_OFF}; | |
249 | - struct osd_request *or = NULL; | |
259 | + struct exofs_io_state *ios = pcol->ios; | |
250 | 260 | struct page_collect *pcol_copy = NULL; |
251 | - loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | |
252 | 261 | int ret; |
253 | 262 | |
254 | 263 | if (!pcol->bio) |
255 | 264 | |
256 | 265 | |
... | ... | @@ -257,17 +266,13 @@ |
257 | 266 | /* see comment in _readpage() about sync reads */ |
258 | 267 | WARN_ON(is_sync && (pcol->nr_pages != 1)); |
259 | 268 | |
260 | - or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | |
261 | - if (unlikely(!or)) { | |
262 | - ret = -ENOMEM; | |
263 | - goto err; | |
264 | - } | |
269 | + ios->bio = pcol->bio; | |
270 | + ios->length = pcol->length; | |
271 | + ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; | |
265 | 272 | |
266 | - osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); | |
267 | - | |
268 | 273 | if (is_sync) { |
269 | - exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | |
270 | - return __readpages_done(or, pcol, false); | |
274 | + exofs_oi_read(oi, pcol->ios); | |
275 | + return __readpages_done(pcol, false); | |
271 | 276 | } |
272 | 277 | |
273 | 278 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
274 | 279 | |
... | ... | @@ -277,14 +282,16 @@ |
277 | 282 | } |
278 | 283 | |
279 | 284 | *pcol_copy = *pcol; |
280 | - ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | |
285 | + ios->done = readpages_done; | |
286 | + ios->private = pcol_copy; | |
287 | + ret = exofs_oi_read(oi, ios); | |
281 | 288 | if (unlikely(ret)) |
282 | 289 | goto err; |
283 | 290 | |
284 | 291 | atomic_inc(&pcol->sbi->s_curr_pending); |
285 | 292 | |
286 | 293 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", |
287 | - obj.id, _LLU(i_start), pcol->length); | |
294 | + ios->obj.id, _LLU(ios->offset), pcol->length); | |
288 | 295 | |
289 | 296 | /* pages ownership was passed to pcol_copy */ |
290 | 297 | _pcol_reset(pcol); |
291 | 298 | |
292 | 299 | |
... | ... | @@ -293,12 +300,10 @@ |
293 | 300 | err: |
294 | 301 | if (!is_sync) |
295 | 302 | _unlock_pcol_pages(pcol, ret, READ); |
296 | - else /* Pages unlocked by caller in sync mode only free bio */ | |
297 | - pcol_free(pcol); | |
298 | 303 | |
304 | + pcol_free(pcol); | |
305 | + | |
299 | 306 | kfree(pcol_copy); |
300 | - if (or) | |
301 | - osd_end_request(or); | |
302 | 307 | return ret; |
303 | 308 | } |
304 | 309 | |
305 | 310 | |
... | ... | @@ -370,12 +375,12 @@ |
370 | 375 | if (len != PAGE_CACHE_SIZE) |
371 | 376 | zero_user(page, len, PAGE_CACHE_SIZE - len); |
372 | 377 | |
373 | - EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | |
378 | + EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | |
374 | 379 | inode->i_ino, page->index, len); |
375 | 380 | |
376 | 381 | ret = pcol_add_page(pcol, page, len); |
377 | 382 | if (ret) { |
378 | - EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | |
383 | + EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " | |
379 | 384 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", |
380 | 385 | page, len, pcol->nr_pages, pcol->length); |
381 | 386 | |
... | ... | @@ -419,9 +424,8 @@ |
419 | 424 | |
420 | 425 | _pcol_init(&pcol, 1, page->mapping->host); |
421 | 426 | |
422 | - /* readpage_strip might call read_exec(,async) inside at several places | |
423 | - * but this is safe for is_async=0 since read_exec will not do anything | |
424 | - * when we have a single page. | |
427 | + /* readpage_strip might call read_exec(,is_sync==false) at several | |
428 | + * places but not if we have a single page. | |
425 | 429 | */ |
426 | 430 | ret = readpage_strip(&pcol, page); |
427 | 431 | if (ret) { |
... | ... | @@ -440,8 +444,8 @@ |
440 | 444 | return _readpage(page, false); |
441 | 445 | } |
442 | 446 | |
443 | -/* Callback for osd_write. All writes are asynchronouse */ | |
444 | -static void writepages_done(struct osd_request *or, void *p) | |
447 | +/* Callback for osd_write. All writes are asynchronous */ | |
448 | +static void writepages_done(struct exofs_io_state *ios, void *p) | |
445 | 449 | { |
446 | 450 | struct page_collect *pcol = p; |
447 | 451 | struct bio_vec *bvec; |
448 | 452 | |
449 | 453 | |
... | ... | @@ -449,16 +453,12 @@ |
449 | 453 | u64 resid; |
450 | 454 | u64 good_bytes; |
451 | 455 | u64 length = 0; |
456 | + int ret = exofs_check_io(ios, &resid); | |
452 | 457 | |
453 | - int ret = exofs_check_ok_resid(or, NULL, &resid); | |
454 | - | |
455 | - osd_end_request(or); | |
456 | 458 | atomic_dec(&pcol->sbi->s_curr_pending); |
457 | 459 | |
458 | 460 | if (likely(!ret)) |
459 | 461 | good_bytes = pcol->length; |
460 | - else if (!resid) | |
461 | - good_bytes = 0; | |
462 | 462 | else |
463 | 463 | good_bytes = pcol->length - resid; |
464 | 464 | |
... | ... | @@ -482,7 +482,7 @@ |
482 | 482 | |
483 | 483 | update_write_page(page, page_stat); |
484 | 484 | unlock_page(page); |
485 | - EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | |
485 | + EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", | |
486 | 486 | inode->i_ino, page->index, page_stat); |
487 | 487 | |
488 | 488 | length += bvec->bv_len; |
489 | 489 | |
490 | 490 | |
... | ... | @@ -496,23 +496,13 @@ |
496 | 496 | static int write_exec(struct page_collect *pcol) |
497 | 497 | { |
498 | 498 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
499 | - struct osd_obj_id obj = {pcol->sbi->s_pid, | |
500 | - pcol->inode->i_ino + EXOFS_OBJ_OFF}; | |
501 | - struct osd_request *or = NULL; | |
499 | + struct exofs_io_state *ios = pcol->ios; | |
502 | 500 | struct page_collect *pcol_copy = NULL; |
503 | - loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | |
504 | 501 | int ret; |
505 | 502 | |
506 | 503 | if (!pcol->bio) |
507 | 504 | return 0; |
508 | 505 | |
509 | - or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | |
510 | - if (unlikely(!or)) { | |
511 | - EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | |
512 | - ret = -ENOMEM; | |
513 | - goto err; | |
514 | - } | |
515 | - | |
516 | 506 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
517 | 507 | if (!pcol_copy) { |
518 | 508 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); |
519 | 509 | |
520 | 510 | |
... | ... | @@ -523,16 +513,22 @@ |
523 | 513 | *pcol_copy = *pcol; |
524 | 514 | |
525 | 515 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ |
526 | - osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); | |
527 | - ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | |
516 | + | |
517 | + ios->bio = pcol_copy->bio; | |
518 | + ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; | |
519 | + ios->length = pcol_copy->length; | |
520 | + ios->done = writepages_done; | |
521 | + ios->private = pcol_copy; | |
522 | + | |
523 | + ret = exofs_oi_write(oi, ios); | |
528 | 524 | if (unlikely(ret)) { |
529 | - EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | |
525 | + EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); | |
530 | 526 | goto err; |
531 | 527 | } |
532 | 528 | |
533 | 529 | atomic_inc(&pcol->sbi->s_curr_pending); |
534 | 530 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", |
535 | - pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | |
531 | + pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), | |
536 | 532 | pcol->length); |
537 | 533 | /* pages ownership was passed to pcol_copy */ |
538 | 534 | _pcol_reset(pcol); |
539 | 535 | |
... | ... | @@ -540,9 +536,9 @@ |
540 | 536 | |
541 | 537 | err: |
542 | 538 | _unlock_pcol_pages(pcol, ret, WRITE); |
539 | + pcol_free(pcol); | |
543 | 540 | kfree(pcol_copy); |
544 | - if (or) | |
545 | - osd_end_request(or); | |
541 | + | |
546 | 542 | return ret; |
547 | 543 | } |
548 | 544 | |
... | ... | @@ -586,6 +582,9 @@ |
586 | 582 | if (PageError(page)) |
587 | 583 | ClearPageError(page); |
588 | 584 | unlock_page(page); |
585 | + EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " | |
586 | + "outside the limits\n", | |
587 | + inode->i_ino, page->index); | |
589 | 588 | return 0; |
590 | 589 | } |
591 | 590 | } |
... | ... | @@ -600,6 +599,9 @@ |
600 | 599 | ret = write_exec(pcol); |
601 | 600 | if (unlikely(ret)) |
602 | 601 | goto fail; |
602 | + | |
603 | + EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", | |
604 | + inode->i_ino, page->index); | |
603 | 605 | goto try_again; |
604 | 606 | } |
605 | 607 | |
... | ... | @@ -609,7 +611,7 @@ |
609 | 611 | goto fail; |
610 | 612 | } |
611 | 613 | |
612 | - EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | |
614 | + EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | |
613 | 615 | inode->i_ino, page->index, len); |
614 | 616 | |
615 | 617 | ret = pcol_add_page(pcol, page, len); |
... | ... | @@ -634,6 +636,8 @@ |
634 | 636 | return 0; |
635 | 637 | |
636 | 638 | fail: |
639 | + EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", | |
640 | + inode->i_ino, page->index, ret); | |
637 | 641 | set_bit(AS_EIO, &page->mapping->flags); |
638 | 642 | unlock_page(page); |
639 | 643 | return ret; |
640 | 644 | |
641 | 645 | |
... | ... | @@ -652,14 +656,17 @@ |
652 | 656 | wbc->range_end >> PAGE_CACHE_SHIFT; |
653 | 657 | |
654 | 658 | if (start || end) |
655 | - expected_pages = min(end - start + 1, 32L); | |
659 | + expected_pages = end - start + 1; | |
656 | 660 | else |
657 | 661 | expected_pages = mapping->nrpages; |
658 | 662 | |
659 | - EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | |
660 | - " m->nrpages=%lu start=0x%lx end=0x%lx\n", | |
663 | + if (expected_pages < 32L) | |
664 | + expected_pages = 32L; | |
665 | + | |
666 | + EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " | |
667 | + "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", | |
661 | 668 | mapping->host->i_ino, wbc->range_start, wbc->range_end, |
662 | - mapping->nrpages, start, end); | |
669 | + mapping->nrpages, start, end, expected_pages); | |
663 | 670 | |
664 | 671 | _pcol_init(&pcol, expected_pages, mapping->host); |
665 | 672 | |
666 | 673 | |
667 | 674 | |
... | ... | @@ -771,19 +778,28 @@ |
771 | 778 | const struct osd_attr g_attr_logical_length = ATTR_DEF( |
772 | 779 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); |
773 | 780 | |
781 | +static int _do_truncate(struct inode *inode) | |
782 | +{ | |
783 | + struct exofs_i_info *oi = exofs_i(inode); | |
784 | + loff_t isize = i_size_read(inode); | |
785 | + int ret; | |
786 | + | |
787 | + inode->i_mtime = inode->i_ctime = CURRENT_TIME; | |
788 | + | |
789 | + nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | |
790 | + | |
791 | + ret = exofs_oi_truncate(oi, (u64)isize); | |
792 | + EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); | |
793 | + return ret; | |
794 | +} | |
795 | + | |
774 | 796 | /* |
775 | 797 | * Truncate a file to the specified size - all we have to do is set the size |
776 | 798 | * attribute. We make sure the object exists first. |
777 | 799 | */ |
778 | 800 | void exofs_truncate(struct inode *inode) |
779 | 801 | { |
780 | - struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | |
781 | 802 | struct exofs_i_info *oi = exofs_i(inode); |
782 | - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | |
783 | - struct osd_request *or; | |
784 | - struct osd_attr attr; | |
785 | - loff_t isize = i_size_read(inode); | |
786 | - __be64 newsize; | |
787 | 803 | int ret; |
788 | 804 | |
789 | 805 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
790 | 806 | |
791 | 807 | |
... | ... | @@ -793,31 +809,14 @@ |
793 | 809 | return; |
794 | 810 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
795 | 811 | return; |
796 | - inode->i_mtime = inode->i_ctime = CURRENT_TIME; | |
797 | 812 | |
798 | - nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | |
799 | - | |
800 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
801 | - if (unlikely(!or)) { | |
802 | - EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | |
803 | - goto fail; | |
804 | - } | |
805 | - | |
806 | - osd_req_set_attributes(or, &obj); | |
807 | - | |
808 | - newsize = cpu_to_be64((u64)isize); | |
809 | - attr = g_attr_logical_length; | |
810 | - attr.val_ptr = &newsize; | |
811 | - osd_req_add_set_attr_list(or, &attr, 1); | |
812 | - | |
813 | 813 | /* if we are about to truncate an object, and it hasn't been |
814 | 814 | * created yet, wait |
815 | 815 | */ |
816 | 816 | if (unlikely(wait_obj_created(oi))) |
817 | 817 | goto fail; |
818 | 818 | |
819 | - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | |
820 | - osd_end_request(or); | |
819 | + ret = _do_truncate(inode); | |
821 | 820 | if (ret) |
822 | 821 | goto fail; |
823 | 822 | |
824 | 823 | |
825 | 824 | |
826 | 825 | |
827 | 826 | |
828 | 827 | |
829 | 828 | |
830 | 829 | |
831 | 830 | |
832 | 831 | |
833 | 832 | |
834 | 833 | |
835 | 834 | |
836 | 835 | |
837 | 836 | |
838 | 837 | |
... | ... | @@ -847,65 +846,62 @@ |
847 | 846 | |
848 | 847 | /* |
849 | 848 | * Read an inode from the OSD, and return it as is. We also return the size |
850 | - * attribute in the 'sanity' argument if we got compiled with debugging turned | |
851 | - * on. | |
849 | + * attribute in the 'obj_size' argument. | |
852 | 850 | */ |
853 | 851 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, |
854 | - struct exofs_fcb *inode, uint64_t *sanity) | |
852 | + struct exofs_fcb *inode, uint64_t *obj_size) | |
855 | 853 | { |
856 | 854 | struct exofs_sb_info *sbi = sb->s_fs_info; |
857 | - struct osd_request *or; | |
858 | - struct osd_attr attr; | |
859 | - struct osd_obj_id obj = {sbi->s_pid, | |
860 | - oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | |
855 | + struct osd_attr attrs[2]; | |
856 | + struct exofs_io_state *ios; | |
861 | 857 | int ret; |
862 | 858 | |
863 | - exofs_make_credential(oi->i_cred, &obj); | |
864 | - | |
865 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
866 | - if (unlikely(!or)) { | |
867 | - EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | |
868 | - return -ENOMEM; | |
859 | + *obj_size = ~0; | |
860 | + ret = exofs_get_io_state(sbi, &ios); | |
861 | + if (unlikely(ret)) { | |
862 | + EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | |
863 | + return ret; | |
869 | 864 | } |
870 | - osd_req_get_attributes(or, &obj); | |
871 | 865 | |
872 | - /* we need the inode attribute */ | |
873 | - osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | |
866 | + ios->obj.id = exofs_oi_objno(oi); | |
867 | + exofs_make_credential(oi->i_cred, &ios->obj); | |
868 | + ios->cred = oi->i_cred; | |
874 | 869 | |
875 | -#ifdef EXOFS_DEBUG_OBJ_ISIZE | |
876 | - /* we get the size attributes to do a sanity check */ | |
877 | - osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | |
878 | -#endif | |
870 | + attrs[0] = g_attr_inode_data; | |
871 | + attrs[1] = g_attr_logical_length; | |
872 | + ios->in_attr = attrs; | |
873 | + ios->in_attr_len = ARRAY_SIZE(attrs); | |
879 | 874 | |
880 | - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | |
875 | + ret = exofs_sbi_read(ios); | |
881 | 876 | if (ret) |
882 | 877 | goto out; |
883 | 878 | |
884 | - attr = g_attr_inode_data; | |
885 | - ret = extract_attr_from_req(or, &attr); | |
879 | + ret = extract_attr_from_ios(ios, &attrs[0]); | |
886 | 880 | if (ret) { |
887 | - EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | |
881 | + EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); | |
888 | 882 | goto out; |
889 | 883 | } |
884 | + WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); | |
885 | + memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); | |
890 | 886 | |
891 | - WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | |
892 | - memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | |
893 | - | |
894 | -#ifdef EXOFS_DEBUG_OBJ_ISIZE | |
895 | - attr = g_attr_logical_length; | |
896 | - ret = extract_attr_from_req(or, &attr); | |
887 | + ret = extract_attr_from_ios(ios, &attrs[1]); | |
897 | 888 | if (ret) { |
898 | - EXOFS_ERR("ERROR: extract attr from or failed\n"); | |
889 | + EXOFS_ERR("%s: extract_attr of logical_length failed\n", | |
890 | + __func__); | |
899 | 891 | goto out; |
900 | 892 | } |
901 | - *sanity = get_unaligned_be64(attr.val_ptr); | |
902 | -#endif | |
893 | + *obj_size = get_unaligned_be64(attrs[1].val_ptr); | |
903 | 894 | |
904 | 895 | out: |
905 | - osd_end_request(or); | |
896 | + exofs_put_io_state(ios); | |
906 | 897 | return ret; |
907 | 898 | } |
908 | 899 | |
900 | +static void __oi_init(struct exofs_i_info *oi) | |
901 | +{ | |
902 | + init_waitqueue_head(&oi->i_wq); | |
903 | + oi->i_flags = 0; | |
904 | +} | |
909 | 905 | /* |
910 | 906 | * Fill in an inode read from the OSD and set it up for use |
911 | 907 | */ |
... | ... | @@ -914,7 +910,7 @@ |
914 | 910 | struct exofs_i_info *oi; |
915 | 911 | struct exofs_fcb fcb; |
916 | 912 | struct inode *inode; |
917 | - uint64_t uninitialized_var(sanity); | |
913 | + uint64_t obj_size; | |
918 | 914 | int ret; |
919 | 915 | |
920 | 916 | inode = iget_locked(sb, ino); |
921 | 917 | |
922 | 918 | |
... | ... | @@ -923,13 +919,13 @@ |
923 | 919 | if (!(inode->i_state & I_NEW)) |
924 | 920 | return inode; |
925 | 921 | oi = exofs_i(inode); |
922 | + __oi_init(oi); | |
926 | 923 | |
927 | 924 | /* read the inode from the osd */ |
928 | - ret = exofs_get_inode(sb, oi, &fcb, &sanity); | |
925 | + ret = exofs_get_inode(sb, oi, &fcb, &obj_size); | |
929 | 926 | if (ret) |
930 | 927 | goto bad_inode; |
931 | 928 | |
932 | - init_waitqueue_head(&oi->i_wq); | |
933 | 929 | set_obj_created(oi); |
934 | 930 | |
935 | 931 | /* copy stuff from on-disk struct to in-memory struct */ |
936 | 932 | |
937 | 933 | |
... | ... | @@ -947,14 +943,12 @@ |
947 | 943 | inode->i_blkbits = EXOFS_BLKSHIFT; |
948 | 944 | inode->i_generation = le32_to_cpu(fcb.i_generation); |
949 | 945 | |
950 | -#ifdef EXOFS_DEBUG_OBJ_ISIZE | |
951 | - if ((inode->i_size != sanity) && | |
946 | + if ((inode->i_size != obj_size) && | |
952 | 947 | (!exofs_inode_is_fast_symlink(inode))) { |
953 | - EXOFS_ERR("WARNING: Size of object from inode and " | |
954 | - "attributes differ (%lld != %llu)\n", | |
955 | - inode->i_size, _LLU(sanity)); | |
948 | + EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", | |
949 | + inode->i_size, _LLU(obj_size)); | |
950 | + /* FIXME: call exofs_inode_recovery() */ | |
956 | 951 | } |
957 | -#endif | |
958 | 952 | |
959 | 953 | oi->i_dir_start_lookup = 0; |
960 | 954 | |
961 | 955 | |
962 | 956 | |
963 | 957 | |
... | ... | @@ -1020,24 +1014,31 @@ |
1020 | 1014 | * set the obj_created flag so that other methods know that the object exists on |
1021 | 1015 | * the OSD. |
1022 | 1016 | */ |
1023 | -static void create_done(struct osd_request *or, void *p) | |
1017 | +static void create_done(struct exofs_io_state *ios, void *p) | |
1024 | 1018 | { |
1025 | 1019 | struct inode *inode = p; |
1026 | 1020 | struct exofs_i_info *oi = exofs_i(inode); |
1027 | 1021 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
1028 | 1022 | int ret; |
1029 | 1023 | |
1030 | - ret = exofs_check_ok(or); | |
1031 | - osd_end_request(or); | |
1024 | + ret = exofs_check_io(ios, NULL); | |
1025 | + exofs_put_io_state(ios); | |
1026 | + | |
1032 | 1027 | atomic_dec(&sbi->s_curr_pending); |
1033 | 1028 | |
1034 | 1029 | if (unlikely(ret)) { |
1035 | 1030 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", |
1036 | - _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | |
1037 | - make_bad_inode(inode); | |
1038 | - } else | |
1039 | - set_obj_created(oi); | |
1031 | + _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); | |
1032 | + /*TODO: When FS is corrupted creation can fail, object already | |
1033 | + * exist. Get rid of this asynchronous creation, if exist | |
1034 | + * increment the obj counter and try the next object. Until we | |
1035 | + * succeed. All these dangling objects will be made into lost | |
1036 | + * files by chkfs.exofs | |
1037 | + */ | |
1038 | + } | |
1040 | 1039 | |
1040 | + set_obj_created(oi); | |
1041 | + | |
1041 | 1042 | atomic_dec(&inode->i_count); |
1042 | 1043 | wake_up(&oi->i_wq); |
1043 | 1044 | } |
... | ... | @@ -1051,8 +1052,7 @@ |
1051 | 1052 | struct inode *inode; |
1052 | 1053 | struct exofs_i_info *oi; |
1053 | 1054 | struct exofs_sb_info *sbi; |
1054 | - struct osd_request *or; | |
1055 | - struct osd_obj_id obj; | |
1055 | + struct exofs_io_state *ios; | |
1056 | 1056 | int ret; |
1057 | 1057 | |
1058 | 1058 | sb = dir->i_sb; |
1059 | 1059 | |
... | ... | @@ -1061,8 +1061,8 @@ |
1061 | 1061 | return ERR_PTR(-ENOMEM); |
1062 | 1062 | |
1063 | 1063 | oi = exofs_i(inode); |
1064 | + __oi_init(oi); | |
1064 | 1065 | |
1065 | - init_waitqueue_head(&oi->i_wq); | |
1066 | 1066 | set_obj_2bcreated(oi); |
1067 | 1067 | |
1068 | 1068 | sbi = sb->s_fs_info; |
1069 | 1069 | |
1070 | 1070 | |
1071 | 1071 | |
... | ... | @@ -1089,28 +1089,28 @@ |
1089 | 1089 | |
1090 | 1090 | mark_inode_dirty(inode); |
1091 | 1091 | |
1092 | - obj.partition = sbi->s_pid; | |
1093 | - obj.id = inode->i_ino + EXOFS_OBJ_OFF; | |
1094 | - exofs_make_credential(oi->i_cred, &obj); | |
1095 | - | |
1096 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
1097 | - if (unlikely(!or)) { | |
1098 | - EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | |
1099 | - return ERR_PTR(-ENOMEM); | |
1092 | + ret = exofs_get_io_state(sbi, &ios); | |
1093 | + if (unlikely(ret)) { | |
1094 | + EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); | |
1095 | + return ERR_PTR(ret); | |
1100 | 1096 | } |
1101 | 1097 | |
1102 | - osd_req_create_object(or, &obj); | |
1098 | + ios->obj.id = exofs_oi_objno(oi); | |
1099 | + exofs_make_credential(oi->i_cred, &ios->obj); | |
1103 | 1100 | |
1104 | 1101 | /* increment the refcount so that the inode will still be around when we |
1105 | 1102 | * reach the callback |
1106 | 1103 | */ |
1107 | 1104 | atomic_inc(&inode->i_count); |
1108 | 1105 | |
1109 | - ret = exofs_async_op(or, create_done, inode, oi->i_cred); | |
1106 | + ios->done = create_done; | |
1107 | + ios->private = inode; | |
1108 | + ios->cred = oi->i_cred; | |
1109 | + ret = exofs_sbi_create(ios); | |
1110 | 1110 | if (ret) { |
1111 | 1111 | atomic_dec(&inode->i_count); |
1112 | - osd_end_request(or); | |
1113 | - return ERR_PTR(-EIO); | |
1112 | + exofs_put_io_state(ios); | |
1113 | + return ERR_PTR(ret); | |
1114 | 1114 | } |
1115 | 1115 | atomic_inc(&sbi->s_curr_pending); |
1116 | 1116 | |
1117 | 1117 | |
... | ... | @@ -1128,11 +1128,11 @@ |
1128 | 1128 | /* |
1129 | 1129 | * Callback function from exofs_update_inode(). |
1130 | 1130 | */ |
1131 | -static void updatei_done(struct osd_request *or, void *p) | |
1131 | +static void updatei_done(struct exofs_io_state *ios, void *p) | |
1132 | 1132 | { |
1133 | 1133 | struct updatei_args *args = p; |
1134 | 1134 | |
1135 | - osd_end_request(or); | |
1135 | + exofs_put_io_state(ios); | |
1136 | 1136 | |
1137 | 1137 | atomic_dec(&args->sbi->s_curr_pending); |
1138 | 1138 | |
... | ... | @@ -1148,8 +1148,7 @@ |
1148 | 1148 | struct exofs_i_info *oi = exofs_i(inode); |
1149 | 1149 | struct super_block *sb = inode->i_sb; |
1150 | 1150 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1151 | - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | |
1152 | - struct osd_request *or; | |
1151 | + struct exofs_io_state *ios; | |
1153 | 1152 | struct osd_attr attr; |
1154 | 1153 | struct exofs_fcb *fcb; |
1155 | 1154 | struct updatei_args *args; |
1156 | 1155 | |
1157 | 1156 | |
... | ... | @@ -1186,18 +1185,16 @@ |
1186 | 1185 | } else |
1187 | 1186 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); |
1188 | 1187 | |
1189 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
1190 | - if (unlikely(!or)) { | |
1191 | - EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | |
1192 | - ret = -ENOMEM; | |
1188 | + ret = exofs_get_io_state(sbi, &ios); | |
1189 | + if (unlikely(ret)) { | |
1190 | + EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | |
1193 | 1191 | goto free_args; |
1194 | 1192 | } |
1195 | 1193 | |
1196 | - osd_req_set_attributes(or, &obj); | |
1197 | - | |
1198 | 1194 | attr = g_attr_inode_data; |
1199 | 1195 | attr.val_ptr = fcb; |
1200 | - osd_req_add_set_attr_list(or, &attr, 1); | |
1196 | + ios->out_attr_len = 1; | |
1197 | + ios->out_attr = &attr; | |
1201 | 1198 | |
1202 | 1199 | if (!obj_created(oi)) { |
1203 | 1200 | EXOFS_DBGMSG("!obj_created\n"); |
1204 | 1201 | |
1205 | 1202 | |
1206 | 1203 | |
... | ... | @@ -1206,22 +1203,19 @@ |
1206 | 1203 | EXOFS_DBGMSG("wait_event done\n"); |
1207 | 1204 | } |
1208 | 1205 | |
1209 | - if (do_sync) { | |
1210 | - ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | |
1211 | - osd_end_request(or); | |
1212 | - goto free_args; | |
1213 | - } else { | |
1206 | + if (!do_sync) { | |
1214 | 1207 | args->sbi = sbi; |
1208 | + ios->done = updatei_done; | |
1209 | + ios->private = args; | |
1210 | + } | |
1215 | 1211 | |
1216 | - ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | |
1217 | - if (ret) { | |
1218 | - osd_end_request(or); | |
1219 | - goto free_args; | |
1220 | - } | |
1212 | + ret = exofs_oi_write(oi, ios); | |
1213 | + if (!do_sync && !ret) { | |
1221 | 1214 | atomic_inc(&sbi->s_curr_pending); |
1222 | 1215 | goto out; /* deallocation in updatei_done */ |
1223 | 1216 | } |
1224 | 1217 | |
1218 | + exofs_put_io_state(ios); | |
1225 | 1219 | free_args: |
1226 | 1220 | kfree(args); |
1227 | 1221 | out: |
1228 | 1222 | |
... | ... | @@ -1238,11 +1232,12 @@ |
1238 | 1232 | * Callback function from exofs_delete_inode() - don't have much cleaning up to |
1239 | 1233 | * do. |
1240 | 1234 | */ |
1241 | -static void delete_done(struct osd_request *or, void *p) | |
1235 | +static void delete_done(struct exofs_io_state *ios, void *p) | |
1242 | 1236 | { |
1243 | - struct exofs_sb_info *sbi; | |
1244 | - osd_end_request(or); | |
1245 | - sbi = p; | |
1237 | + struct exofs_sb_info *sbi = p; | |
1238 | + | |
1239 | + exofs_put_io_state(ios); | |
1240 | + | |
1246 | 1241 | atomic_dec(&sbi->s_curr_pending); |
1247 | 1242 | } |
1248 | 1243 | |
... | ... | @@ -1256,8 +1251,7 @@ |
1256 | 1251 | struct exofs_i_info *oi = exofs_i(inode); |
1257 | 1252 | struct super_block *sb = inode->i_sb; |
1258 | 1253 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1259 | - struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | |
1260 | - struct osd_request *or; | |
1254 | + struct exofs_io_state *ios; | |
1261 | 1255 | int ret; |
1262 | 1256 | |
1263 | 1257 | truncate_inode_pages(&inode->i_data, 0); |
1264 | 1258 | |
1265 | 1259 | |
1266 | 1260 | |
... | ... | @@ -1274,25 +1268,26 @@ |
1274 | 1268 | |
1275 | 1269 | clear_inode(inode); |
1276 | 1270 | |
1277 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
1278 | - if (unlikely(!or)) { | |
1279 | - EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | |
1271 | + ret = exofs_get_io_state(sbi, &ios); | |
1272 | + if (unlikely(ret)) { | |
1273 | + EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); | |
1280 | 1274 | return; |
1281 | 1275 | } |
1282 | 1276 | |
1283 | - osd_req_remove_object(or, &obj); | |
1284 | - | |
1285 | 1277 | /* if we are deleting an obj that hasn't been created yet, wait */ |
1286 | 1278 | if (!obj_created(oi)) { |
1287 | 1279 | BUG_ON(!obj_2bcreated(oi)); |
1288 | 1280 | wait_event(oi->i_wq, obj_created(oi)); |
1289 | 1281 | } |
1290 | 1282 | |
1291 | - ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | |
1283 | + ios->obj.id = exofs_oi_objno(oi); | |
1284 | + ios->done = delete_done; | |
1285 | + ios->private = sbi; | |
1286 | + ios->cred = oi->i_cred; | |
1287 | + ret = exofs_sbi_remove(ios); | |
1292 | 1288 | if (ret) { |
1293 | - EXOFS_ERR( | |
1294 | - "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | |
1295 | - osd_end_request(or); | |
1289 | + EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); | |
1290 | + exofs_put_io_state(ios); | |
1296 | 1291 | return; |
1297 | 1292 | } |
1298 | 1293 | atomic_inc(&sbi->s_curr_pending); |
fs/exofs/ios.c
1 | +/* | |
2 | + * Copyright (C) 2005, 2006 | |
3 | + * Avishay Traeger (avishay@gmail.com) | |
4 | + * Copyright (C) 2008, 2009 | |
5 | + * Boaz Harrosh <bharrosh@panasas.com> | |
6 | + * | |
7 | + * This file is part of exofs. | |
8 | + * | |
9 | + * exofs is free software; you can redistribute it and/or modify | |
10 | + * it under the terms of the GNU General Public License as published by | |
11 | + * the Free Software Foundation. Since it is based on ext2, and the only | |
12 | + * valid version of GPL for the Linux kernel is version 2, the only valid | |
13 | + * version of GPL for exofs is version 2. | |
14 | + * | |
15 | + * exofs is distributed in the hope that it will be useful, | |
16 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | + * GNU General Public License for more details. | |
19 | + * | |
20 | + * You should have received a copy of the GNU General Public License | |
21 | + * along with exofs; if not, write to the Free Software | |
22 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
23 | + */ | |
24 | + | |
25 | +#include <scsi/scsi_device.h> | |
26 | + | |
27 | +#include "exofs.h" | |
28 | + | |
29 | +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | |
30 | +{ | |
31 | + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | |
32 | +} | |
33 | + | |
34 | +int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | |
35 | + u64 offset, void *p, unsigned length) | |
36 | +{ | |
37 | + struct osd_request *or = osd_start_request(od, GFP_KERNEL); | |
38 | +/* struct osd_sense_info osi = {.key = 0};*/ | |
39 | + int ret; | |
40 | + | |
41 | + if (unlikely(!or)) { | |
42 | + EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); | |
43 | + return -ENOMEM; | |
44 | + } | |
45 | + ret = osd_req_read_kern(or, obj, offset, p, length); | |
46 | + if (unlikely(ret)) { | |
47 | + EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); | |
48 | + goto out; | |
49 | + } | |
50 | + | |
51 | + ret = osd_finalize_request(or, 0, cred, NULL); | |
52 | + if (unlikely(ret)) { | |
53 | + EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | |
54 | + goto out; | |
55 | + } | |
56 | + | |
57 | + ret = osd_execute_request(or); | |
58 | + if (unlikely(ret)) | |
59 | + EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | |
60 | + /* osd_req_decode_sense(or, ret); */ | |
61 | + | |
62 | +out: | |
63 | + osd_end_request(or); | |
64 | + return ret; | |
65 | +} | |
66 | + | |
67 | +int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) | |
68 | +{ | |
69 | + struct exofs_io_state *ios; | |
70 | + | |
71 | + /*TODO: Maybe use kmem_cach per sbi of size | |
72 | + * exofs_io_state_size(sbi->s_numdevs) | |
73 | + */ | |
74 | + ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL); | |
75 | + if (unlikely(!ios)) { | |
76 | + *pios = NULL; | |
77 | + return -ENOMEM; | |
78 | + } | |
79 | + | |
80 | + ios->sbi = sbi; | |
81 | + ios->obj.partition = sbi->s_pid; | |
82 | + *pios = ios; | |
83 | + return 0; | |
84 | +} | |
85 | + | |
86 | +void exofs_put_io_state(struct exofs_io_state *ios) | |
87 | +{ | |
88 | + if (ios) { | |
89 | + unsigned i; | |
90 | + | |
91 | + for (i = 0; i < ios->numdevs; i++) { | |
92 | + struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; | |
93 | + | |
94 | + if (per_dev->or) | |
95 | + osd_end_request(per_dev->or); | |
96 | + if (per_dev->bio) | |
97 | + bio_put(per_dev->bio); | |
98 | + } | |
99 | + | |
100 | + kfree(ios); | |
101 | + } | |
102 | +} | |
103 | + | |
104 | +static void _sync_done(struct exofs_io_state *ios, void *p) | |
105 | +{ | |
106 | + struct completion *waiting = p; | |
107 | + | |
108 | + complete(waiting); | |
109 | +} | |
110 | + | |
111 | +static void _last_io(struct kref *kref) | |
112 | +{ | |
113 | + struct exofs_io_state *ios = container_of( | |
114 | + kref, struct exofs_io_state, kref); | |
115 | + | |
116 | + ios->done(ios, ios->private); | |
117 | +} | |
118 | + | |
119 | +static void _done_io(struct osd_request *or, void *p) | |
120 | +{ | |
121 | + struct exofs_io_state *ios = p; | |
122 | + | |
123 | + kref_put(&ios->kref, _last_io); | |
124 | +} | |
125 | + | |
126 | +static int exofs_io_execute(struct exofs_io_state *ios) | |
127 | +{ | |
128 | + DECLARE_COMPLETION_ONSTACK(wait); | |
129 | + bool sync = (ios->done == NULL); | |
130 | + int i, ret; | |
131 | + | |
132 | + if (sync) { | |
133 | + ios->done = _sync_done; | |
134 | + ios->private = &wait; | |
135 | + } | |
136 | + | |
137 | + for (i = 0; i < ios->numdevs; i++) { | |
138 | + struct osd_request *or = ios->per_dev[i].or; | |
139 | + if (unlikely(!or)) | |
140 | + continue; | |
141 | + | |
142 | + ret = osd_finalize_request(or, 0, ios->cred, NULL); | |
143 | + if (unlikely(ret)) { | |
144 | + EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", | |
145 | + ret); | |
146 | + return ret; | |
147 | + } | |
148 | + } | |
149 | + | |
150 | + kref_init(&ios->kref); | |
151 | + | |
152 | + for (i = 0; i < ios->numdevs; i++) { | |
153 | + struct osd_request *or = ios->per_dev[i].or; | |
154 | + if (unlikely(!or)) | |
155 | + continue; | |
156 | + | |
157 | + kref_get(&ios->kref); | |
158 | + osd_execute_request_async(or, _done_io, ios); | |
159 | + } | |
160 | + | |
161 | + kref_put(&ios->kref, _last_io); | |
162 | + ret = 0; | |
163 | + | |
164 | + if (sync) { | |
165 | + wait_for_completion(&wait); | |
166 | + ret = exofs_check_io(ios, NULL); | |
167 | + } | |
168 | + return ret; | |
169 | +} | |
170 | + | |
171 | +int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | |
172 | +{ | |
173 | + enum osd_err_priority acumulated_osd_err = 0; | |
174 | + int acumulated_lin_err = 0; | |
175 | + int i; | |
176 | + | |
177 | + for (i = 0; i < ios->numdevs; i++) { | |
178 | + struct osd_sense_info osi; | |
179 | + int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); | |
180 | + | |
181 | + if (likely(!ret)) | |
182 | + continue; | |
183 | + | |
184 | + if (unlikely(ret == -EFAULT)) { | |
185 | + EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); | |
186 | + /*FIXME: All the pages in this device range should: | |
187 | + * clear_highpage(page); | |
188 | + */ | |
189 | + } | |
190 | + | |
191 | + if (osi.osd_err_pri >= acumulated_osd_err) { | |
192 | + acumulated_osd_err = osi.osd_err_pri; | |
193 | + acumulated_lin_err = ret; | |
194 | + } | |
195 | + } | |
196 | + | |
197 | + /* TODO: raid specific residual calculations */ | |
198 | + if (resid) { | |
199 | + if (likely(!acumulated_lin_err)) | |
200 | + *resid = 0; | |
201 | + else | |
202 | + *resid = ios->length; | |
203 | + } | |
204 | + | |
205 | + return acumulated_lin_err; | |
206 | +} | |
207 | + | |
208 | +int exofs_sbi_create(struct exofs_io_state *ios) | |
209 | +{ | |
210 | + int i, ret; | |
211 | + | |
212 | + for (i = 0; i < ios->sbi->s_numdevs; i++) { | |
213 | + struct osd_request *or; | |
214 | + | |
215 | + or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | |
216 | + if (unlikely(!or)) { | |
217 | + EXOFS_ERR("%s: osd_start_request failed\n", __func__); | |
218 | + ret = -ENOMEM; | |
219 | + goto out; | |
220 | + } | |
221 | + ios->per_dev[i].or = or; | |
222 | + ios->numdevs++; | |
223 | + | |
224 | + osd_req_create_object(or, &ios->obj); | |
225 | + } | |
226 | + ret = exofs_io_execute(ios); | |
227 | + | |
228 | +out: | |
229 | + return ret; | |
230 | +} | |
231 | + | |
232 | +int exofs_sbi_remove(struct exofs_io_state *ios) | |
233 | +{ | |
234 | + int i, ret; | |
235 | + | |
236 | + for (i = 0; i < ios->sbi->s_numdevs; i++) { | |
237 | + struct osd_request *or; | |
238 | + | |
239 | + or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | |
240 | + if (unlikely(!or)) { | |
241 | + EXOFS_ERR("%s: osd_start_request failed\n", __func__); | |
242 | + ret = -ENOMEM; | |
243 | + goto out; | |
244 | + } | |
245 | + ios->per_dev[i].or = or; | |
246 | + ios->numdevs++; | |
247 | + | |
248 | + osd_req_remove_object(or, &ios->obj); | |
249 | + } | |
250 | + ret = exofs_io_execute(ios); | |
251 | + | |
252 | +out: | |
253 | + return ret; | |
254 | +} | |
255 | + | |
256 | +int exofs_sbi_write(struct exofs_io_state *ios) | |
257 | +{ | |
258 | + int i, ret; | |
259 | + | |
260 | + for (i = 0; i < ios->sbi->s_numdevs; i++) { | |
261 | + struct osd_request *or; | |
262 | + | |
263 | + or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); | |
264 | + if (unlikely(!or)) { | |
265 | + EXOFS_ERR("%s: osd_start_request failed\n", __func__); | |
266 | + ret = -ENOMEM; | |
267 | + goto out; | |
268 | + } | |
269 | + ios->per_dev[i].or = or; | |
270 | + ios->numdevs++; | |
271 | + | |
272 | + if (ios->bio) { | |
273 | + struct bio *bio; | |
274 | + | |
275 | + if (i != 0) { | |
276 | + bio = bio_kmalloc(GFP_KERNEL, | |
277 | + ios->bio->bi_max_vecs); | |
278 | + if (unlikely(!bio)) { | |
279 | + ret = -ENOMEM; | |
280 | + goto out; | |
281 | + } | |
282 | + | |
283 | + __bio_clone(bio, ios->bio); | |
284 | + bio->bi_bdev = NULL; | |
285 | + bio->bi_next = NULL; | |
286 | + ios->per_dev[i].bio = bio; | |
287 | + } else { | |
288 | + bio = ios->bio; | |
289 | + } | |
290 | + | |
291 | + osd_req_write(or, &ios->obj, ios->offset, bio, | |
292 | + ios->length); | |
293 | +/* EXOFS_DBGMSG("write sync=%d\n", sync);*/ | |
294 | + } else if (ios->kern_buff) { | |
295 | + osd_req_write_kern(or, &ios->obj, ios->offset, | |
296 | + ios->kern_buff, ios->length); | |
297 | +/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ | |
298 | + } else { | |
299 | + osd_req_set_attributes(or, &ios->obj); | |
300 | +/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ | |
301 | + } | |
302 | + | |
303 | + if (ios->out_attr) | |
304 | + osd_req_add_set_attr_list(or, ios->out_attr, | |
305 | + ios->out_attr_len); | |
306 | + | |
307 | + if (ios->in_attr) | |
308 | + osd_req_add_get_attr_list(or, ios->in_attr, | |
309 | + ios->in_attr_len); | |
310 | + } | |
311 | + ret = exofs_io_execute(ios); | |
312 | + | |
313 | +out: | |
314 | + return ret; | |
315 | +} | |
316 | + | |
317 | +int exofs_sbi_read(struct exofs_io_state *ios) | |
318 | +{ | |
319 | + int i, ret; | |
320 | + | |
321 | + for (i = 0; i < 1; i++) { | |
322 | + struct osd_request *or; | |
323 | + unsigned first_dev = (unsigned)ios->obj.id; | |
324 | + | |
325 | + first_dev %= ios->sbi->s_numdevs; | |
326 | + or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL); | |
327 | + if (unlikely(!or)) { | |
328 | + EXOFS_ERR("%s: osd_start_request failed\n", __func__); | |
329 | + ret = -ENOMEM; | |
330 | + goto out; | |
331 | + } | |
332 | + ios->per_dev[i].or = or; | |
333 | + ios->numdevs++; | |
334 | + | |
335 | + if (ios->bio) { | |
336 | + osd_req_read(or, &ios->obj, ios->offset, ios->bio, | |
337 | + ios->length); | |
338 | +/* EXOFS_DBGMSG("read sync=%d\n", sync);*/ | |
339 | + } else if (ios->kern_buff) { | |
340 | + osd_req_read_kern(or, &ios->obj, ios->offset, | |
341 | + ios->kern_buff, ios->length); | |
342 | +/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/ | |
343 | + } else { | |
344 | + osd_req_get_attributes(or, &ios->obj); | |
345 | +/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/ | |
346 | + } | |
347 | + | |
348 | + if (ios->out_attr) | |
349 | + osd_req_add_set_attr_list(or, ios->out_attr, | |
350 | + ios->out_attr_len); | |
351 | + | |
352 | + if (ios->in_attr) | |
353 | + osd_req_add_get_attr_list(or, ios->in_attr, | |
354 | + ios->in_attr_len); | |
355 | + } | |
356 | + ret = exofs_io_execute(ios); | |
357 | + | |
358 | +out: | |
359 | + return ret; | |
360 | +} | |
361 | + | |
362 | +int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) | |
363 | +{ | |
364 | + struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | |
365 | + void *iter = NULL; | |
366 | + int nelem; | |
367 | + | |
368 | + do { | |
369 | + nelem = 1; | |
370 | + osd_req_decode_get_attr_list(ios->per_dev[0].or, | |
371 | + &cur_attr, &nelem, &iter); | |
372 | + if ((cur_attr.attr_page == attr->attr_page) && | |
373 | + (cur_attr.attr_id == attr->attr_id)) { | |
374 | + attr->len = cur_attr.len; | |
375 | + attr->val_ptr = cur_attr.val_ptr; | |
376 | + return 0; | |
377 | + } | |
378 | + } while (iter); | |
379 | + | |
380 | + return -EIO; | |
381 | +} | |
382 | + | |
383 | +int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) | |
384 | +{ | |
385 | + struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; | |
386 | + struct exofs_io_state *ios; | |
387 | + struct osd_attr attr; | |
388 | + __be64 newsize; | |
389 | + int i, ret; | |
390 | + | |
391 | + if (exofs_get_io_state(sbi, &ios)) | |
392 | + return -ENOMEM; | |
393 | + | |
394 | + ios->obj.id = exofs_oi_objno(oi); | |
395 | + ios->cred = oi->i_cred; | |
396 | + | |
397 | + newsize = cpu_to_be64(size); | |
398 | + attr = g_attr_logical_length; | |
399 | + attr.val_ptr = &newsize; | |
400 | + | |
401 | + for (i = 0; i < sbi->s_numdevs; i++) { | |
402 | + struct osd_request *or; | |
403 | + | |
404 | + or = osd_start_request(sbi->s_ods[i], GFP_KERNEL); | |
405 | + if (unlikely(!or)) { | |
406 | + EXOFS_ERR("%s: osd_start_request failed\n", __func__); | |
407 | + ret = -ENOMEM; | |
408 | + goto out; | |
409 | + } | |
410 | + ios->per_dev[i].or = or; | |
411 | + ios->numdevs++; | |
412 | + | |
413 | + osd_req_set_attributes(or, &ios->obj); | |
414 | + osd_req_add_set_attr_list(or, &attr, 1); | |
415 | + } | |
416 | + ret = exofs_io_execute(ios); | |
417 | + | |
418 | +out: | |
419 | + exofs_put_io_state(ios); | |
420 | + return ret; | |
421 | +} |
fs/exofs/osd.c
1 | -/* | |
2 | - * Copyright (C) 2005, 2006 | |
3 | - * Avishay Traeger (avishay@gmail.com) | |
4 | - * Copyright (C) 2008, 2009 | |
5 | - * Boaz Harrosh <bharrosh@panasas.com> | |
6 | - * | |
7 | - * This file is part of exofs. | |
8 | - * | |
9 | - * exofs is free software; you can redistribute it and/or modify | |
10 | - * it under the terms of the GNU General Public License as published by | |
11 | - * the Free Software Foundation. Since it is based on ext2, and the only | |
12 | - * valid version of GPL for the Linux kernel is version 2, the only valid | |
13 | - * version of GPL for exofs is version 2. | |
14 | - * | |
15 | - * exofs is distributed in the hope that it will be useful, | |
16 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | - * GNU General Public License for more details. | |
19 | - * | |
20 | - * You should have received a copy of the GNU General Public License | |
21 | - * along with exofs; if not, write to the Free Software | |
22 | - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
23 | - */ | |
24 | - | |
25 | -#include <scsi/scsi_device.h> | |
26 | -#include <scsi/osd_sense.h> | |
27 | - | |
28 | -#include "exofs.h" | |
29 | - | |
30 | -int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | |
31 | -{ | |
32 | - struct osd_sense_info osi; | |
33 | - int ret = osd_req_decode_sense(or, &osi); | |
34 | - | |
35 | - if (ret) { /* translate to Linux codes */ | |
36 | - if (osi.additional_code == scsi_invalid_field_in_cdb) { | |
37 | - if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) | |
38 | - ret = -EFAULT; | |
39 | - if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) | |
40 | - ret = -ENOENT; | |
41 | - else | |
42 | - ret = -EINVAL; | |
43 | - } else if (osi.additional_code == osd_quota_error) | |
44 | - ret = -ENOSPC; | |
45 | - else | |
46 | - ret = -EIO; | |
47 | - } | |
48 | - | |
49 | - /* FIXME: should be include in osd_sense_info */ | |
50 | - if (in_resid) | |
51 | - *in_resid = or->in.req ? or->in.req->resid_len : 0; | |
52 | - | |
53 | - if (out_resid) | |
54 | - *out_resid = or->out.req ? or->out.req->resid_len : 0; | |
55 | - | |
56 | - return ret; | |
57 | -} | |
58 | - | |
59 | -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | |
60 | -{ | |
61 | - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | |
62 | -} | |
63 | - | |
64 | -/* | |
65 | - * Perform a synchronous OSD operation. | |
66 | - */ | |
67 | -int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | |
68 | -{ | |
69 | - int ret; | |
70 | - | |
71 | - or->timeout = timeout; | |
72 | - ret = osd_finalize_request(or, 0, credential, NULL); | |
73 | - if (ret) { | |
74 | - EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | |
75 | - return ret; | |
76 | - } | |
77 | - | |
78 | - ret = osd_execute_request(or); | |
79 | - | |
80 | - if (ret) | |
81 | - EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | |
82 | - /* osd_req_decode_sense(or, ret); */ | |
83 | - return ret; | |
84 | -} | |
85 | - | |
86 | -/* | |
87 | - * Perform an asynchronous OSD operation. | |
88 | - */ | |
89 | -int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, | |
90 | - void *caller_context, u8 *cred) | |
91 | -{ | |
92 | - int ret; | |
93 | - | |
94 | - ret = osd_finalize_request(or, 0, cred, NULL); | |
95 | - if (ret) { | |
96 | - EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | |
97 | - return ret; | |
98 | - } | |
99 | - | |
100 | - ret = osd_execute_request_async(or, async_done, caller_context); | |
101 | - | |
102 | - if (ret) | |
103 | - EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); | |
104 | - return ret; | |
105 | -} | |
106 | - | |
107 | -int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | |
108 | -{ | |
109 | - struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | |
110 | - void *iter = NULL; | |
111 | - int nelem; | |
112 | - | |
113 | - do { | |
114 | - nelem = 1; | |
115 | - osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | |
116 | - if ((cur_attr.attr_page == attr->attr_page) && | |
117 | - (cur_attr.attr_id == attr->attr_id)) { | |
118 | - attr->len = cur_attr.len; | |
119 | - attr->val_ptr = cur_attr.val_ptr; | |
120 | - return 0; | |
121 | - } | |
122 | - } while (iter); | |
123 | - | |
124 | - return -EIO; | |
125 | -} |
fs/exofs/pnfs.h
1 | +/* | |
2 | + * Copyright (C) 2008, 2009 | |
3 | + * Boaz Harrosh <bharrosh@panasas.com> | |
4 | + * | |
5 | + * This file is part of exofs. | |
6 | + * | |
7 | + * exofs is free software; you can redistribute it and/or modify it under the | |
8 | + * terms of the GNU General Public License version 2 as published by the Free | |
9 | + * Software Foundation. | |
10 | + * | |
11 | + */ | |
12 | + | |
13 | +/* FIXME: Remove this file once pnfs hits mainline */ | |
14 | + | |
15 | +#ifndef __EXOFS_PNFS_H__ | |
16 | +#define __EXOFS_PNFS_H__ | |
17 | + | |
18 | +#if defined(CONFIG_PNFS) | |
19 | + | |
20 | + | |
21 | +/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */ | |
22 | +#include "../nfs/objlayout/pnfs_osd_xdr.h" | |
23 | + | |
24 | +#else /* defined(CONFIG_PNFS) */ | |
25 | + | |
26 | +enum pnfs_iomode { | |
27 | + IOMODE_READ = 1, | |
28 | + IOMODE_RW = 2, | |
29 | + IOMODE_ANY = 3, | |
30 | +}; | |
31 | + | |
32 | +/* Layout Structure */ | |
33 | +enum pnfs_osd_raid_algorithm4 { | |
34 | + PNFS_OSD_RAID_0 = 1, | |
35 | + PNFS_OSD_RAID_4 = 2, | |
36 | + PNFS_OSD_RAID_5 = 3, | |
37 | + PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ | |
38 | +}; | |
39 | + | |
40 | +struct pnfs_osd_data_map { | |
41 | + u32 odm_num_comps; | |
42 | + u64 odm_stripe_unit; | |
43 | + u32 odm_group_width; | |
44 | + u32 odm_group_depth; | |
45 | + u32 odm_mirror_cnt; | |
46 | + u32 odm_raid_algorithm; | |
47 | +}; | |
48 | + | |
49 | +#endif /* else defined(CONFIG_PNFS) */ | |
50 | + | |
51 | +#endif /* __EXOFS_PNFS_H__ */ |
fs/exofs/super.c
... | ... | @@ -203,49 +203,45 @@ |
203 | 203 | { |
204 | 204 | struct exofs_sb_info *sbi; |
205 | 205 | struct exofs_fscb *fscb; |
206 | - struct osd_request *or; | |
207 | - struct osd_obj_id obj; | |
206 | + struct exofs_io_state *ios; | |
208 | 207 | int ret = -ENOMEM; |
209 | 208 | |
210 | - fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); | |
211 | - if (!fscb) { | |
212 | - EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); | |
213 | - return -ENOMEM; | |
214 | - } | |
215 | - | |
216 | 209 | lock_super(sb); |
217 | 210 | sbi = sb->s_fs_info; |
211 | + fscb = &sbi->s_fscb; | |
212 | + | |
213 | + ret = exofs_get_io_state(sbi, &ios); | |
214 | + if (ret) | |
215 | + goto out; | |
216 | + | |
217 | + /* Note: We only write the changing part of the fscb. .i.e upto the | |
218 | + * the fscb->s_dev_table_oid member. There is no read-modify-write | |
219 | + * here. | |
220 | + */ | |
221 | + ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); | |
222 | + memset(fscb, 0, ios->length); | |
218 | 223 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
219 | 224 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); |
220 | 225 | fscb->s_magic = cpu_to_le16(sb->s_magic); |
221 | 226 | fscb->s_newfs = 0; |
227 | + fscb->s_version = EXOFS_FSCB_VER; | |
222 | 228 | |
223 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
224 | - if (unlikely(!or)) { | |
225 | - EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); | |
226 | - goto out; | |
227 | - } | |
229 | + ios->obj.id = EXOFS_SUPER_ID; | |
230 | + ios->offset = 0; | |
231 | + ios->kern_buff = fscb; | |
232 | + ios->cred = sbi->s_cred; | |
228 | 233 | |
229 | - obj.partition = sbi->s_pid; | |
230 | - obj.id = EXOFS_SUPER_ID; | |
231 | - ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb)); | |
234 | + ret = exofs_sbi_write(ios); | |
232 | 235 | if (unlikely(ret)) { |
233 | - EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); | |
236 | + EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); | |
234 | 237 | goto out; |
235 | 238 | } |
236 | - | |
237 | - ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | |
238 | - if (unlikely(ret)) { | |
239 | - EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); | |
240 | - goto out; | |
241 | - } | |
242 | 239 | sb->s_dirt = 0; |
243 | 240 | |
244 | 241 | out: |
245 | - if (or) | |
246 | - osd_end_request(or); | |
242 | + EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); | |
243 | + exofs_put_io_state(ios); | |
247 | 244 | unlock_super(sb); |
248 | - kfree(fscb); | |
249 | 245 | return ret; |
250 | 246 | } |
251 | 247 | |
... | ... | @@ -257,6 +253,29 @@ |
257 | 253 | sb->s_dirt = 0; |
258 | 254 | } |
259 | 255 | |
256 | +static void _exofs_print_device(const char *msg, const char *dev_path, | |
257 | + struct osd_dev *od, u64 pid) | |
258 | +{ | |
259 | + const struct osd_dev_info *odi = osduld_device_info(od); | |
260 | + | |
261 | + printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n", | |
262 | + msg, dev_path ?: "", odi->osdname, _LLU(pid)); | |
263 | +} | |
264 | + | |
265 | +void exofs_free_sbi(struct exofs_sb_info *sbi) | |
266 | +{ | |
267 | + while (sbi->s_numdevs) { | |
268 | + int i = --sbi->s_numdevs; | |
269 | + struct osd_dev *od = sbi->s_ods[i]; | |
270 | + | |
271 | + if (od) { | |
272 | + sbi->s_ods[i] = NULL; | |
273 | + osduld_put_device(od); | |
274 | + } | |
275 | + } | |
276 | + kfree(sbi); | |
277 | +} | |
278 | + | |
260 | 279 | /* |
261 | 280 | * This function is called when the vfs is freeing the superblock. We just |
262 | 281 | * need to free our own part. |
263 | 282 | |
... | ... | @@ -279,11 +298,182 @@ |
279 | 298 | msecs_to_jiffies(100)); |
280 | 299 | } |
281 | 300 | |
282 | - osduld_put_device(sbi->s_dev); | |
283 | - kfree(sb->s_fs_info); | |
301 | + _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid); | |
302 | + | |
303 | + exofs_free_sbi(sbi); | |
284 | 304 | sb->s_fs_info = NULL; |
285 | 305 | } |
286 | 306 | |
307 | +static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | |
308 | + struct exofs_device_table *dt) | |
309 | +{ | |
310 | + sbi->data_map.odm_num_comps = | |
311 | + le32_to_cpu(dt->dt_data_map.cb_num_comps); | |
312 | + sbi->data_map.odm_stripe_unit = | |
313 | + le64_to_cpu(dt->dt_data_map.cb_stripe_unit); | |
314 | + sbi->data_map.odm_group_width = | |
315 | + le32_to_cpu(dt->dt_data_map.cb_group_width); | |
316 | + sbi->data_map.odm_group_depth = | |
317 | + le32_to_cpu(dt->dt_data_map.cb_group_depth); | |
318 | + sbi->data_map.odm_mirror_cnt = | |
319 | + le32_to_cpu(dt->dt_data_map.cb_mirror_cnt); | |
320 | + sbi->data_map.odm_raid_algorithm = | |
321 | + le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); | |
322 | + | |
323 | +/* FIXME: Hard coded mirror only for now. if not so do not mount */ | |
324 | + if ((sbi->data_map.odm_num_comps != numdevs) || | |
325 | + (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) || | |
326 | + (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) || | |
327 | + (sbi->data_map.odm_mirror_cnt != (numdevs - 1))) | |
328 | + return -EINVAL; | |
329 | + else | |
330 | + return 0; | |
331 | +} | |
332 | + | |
333 | +/* @odi is valid only as long as @fscb_dev is valid */ | |
334 | +static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, | |
335 | + struct osd_dev_info *odi) | |
336 | +{ | |
337 | + odi->systemid_len = le32_to_cpu(dt_dev->systemid_len); | |
338 | + memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len); | |
339 | + | |
340 | + odi->osdname_len = le32_to_cpu(dt_dev->osdname_len); | |
341 | + odi->osdname = dt_dev->osdname; | |
342 | + | |
343 | + /* FIXME support long names. Will need a _put function */ | |
344 | + if (dt_dev->long_name_offset) | |
345 | + return -EINVAL; | |
346 | + | |
347 | + /* Make sure osdname is printable! | |
348 | + * mkexofs should give us space for a null-terminator else the | |
349 | + * device-table is invalid. | |
350 | + */ | |
351 | + if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname))) | |
352 | + odi->osdname_len = sizeof(dt_dev->osdname) - 1; | |
353 | + dt_dev->osdname[odi->osdname_len] = 0; | |
354 | + | |
355 | + /* If it's all zeros something is bad we read past end-of-obj */ | |
356 | + return !(odi->systemid_len || odi->osdname_len); | |
357 | +} | |
358 | + | |
359 | +static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | |
360 | + unsigned table_count) | |
361 | +{ | |
362 | + struct exofs_sb_info *sbi = *psbi; | |
363 | + struct osd_dev *fscb_od; | |
364 | + struct osd_obj_id obj = {.partition = sbi->s_pid, | |
365 | + .id = EXOFS_DEVTABLE_ID}; | |
366 | + struct exofs_device_table *dt; | |
367 | + unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + | |
368 | + sizeof(*dt); | |
369 | + unsigned numdevs, i; | |
370 | + int ret; | |
371 | + | |
372 | + dt = kmalloc(table_bytes, GFP_KERNEL); | |
373 | + if (unlikely(!dt)) { | |
374 | + EXOFS_ERR("ERROR: allocating %x bytes for device table\n", | |
375 | + table_bytes); | |
376 | + return -ENOMEM; | |
377 | + } | |
378 | + | |
379 | + fscb_od = sbi->s_ods[0]; | |
380 | + sbi->s_ods[0] = NULL; | |
381 | + sbi->s_numdevs = 0; | |
382 | + ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); | |
383 | + if (unlikely(ret)) { | |
384 | + EXOFS_ERR("ERROR: reading device table\n"); | |
385 | + goto out; | |
386 | + } | |
387 | + | |
388 | + numdevs = le64_to_cpu(dt->dt_num_devices); | |
389 | + if (unlikely(!numdevs)) { | |
390 | + ret = -EINVAL; | |
391 | + goto out; | |
392 | + } | |
393 | + WARN_ON(table_count != numdevs); | |
394 | + | |
395 | + ret = _read_and_match_data_map(sbi, numdevs, dt); | |
396 | + if (unlikely(ret)) | |
397 | + goto out; | |
398 | + | |
399 | + if (likely(numdevs > 1)) { | |
400 | + unsigned size = numdevs * sizeof(sbi->s_ods[0]); | |
401 | + | |
402 | + sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); | |
403 | + if (unlikely(!sbi)) { | |
404 | + ret = -ENOMEM; | |
405 | + goto out; | |
406 | + } | |
407 | + memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0])); | |
408 | + *psbi = sbi; | |
409 | + } | |
410 | + | |
411 | + for (i = 0; i < numdevs; i++) { | |
412 | + struct exofs_fscb fscb; | |
413 | + struct osd_dev_info odi; | |
414 | + struct osd_dev *od; | |
415 | + | |
416 | + if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) { | |
417 | + EXOFS_ERR("ERROR: Read all-zeros device entry\n"); | |
418 | + ret = -EINVAL; | |
419 | + goto out; | |
420 | + } | |
421 | + | |
422 | + printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", | |
423 | + i, odi.osdname); | |
424 | + | |
425 | + /* On all devices the device table is identical. The user can | |
426 | + * specify any one of the participating devices on the command | |
427 | + * line. We always keep them in device-table order. | |
428 | + */ | |
429 | + if (fscb_od && osduld_device_same(fscb_od, &odi)) { | |
430 | + sbi->s_ods[i] = fscb_od; | |
431 | + ++sbi->s_numdevs; | |
432 | + fscb_od = NULL; | |
433 | + continue; | |
434 | + } | |
435 | + | |
436 | + od = osduld_info_lookup(&odi); | |
437 | + if (unlikely(IS_ERR(od))) { | |
438 | + ret = PTR_ERR(od); | |
439 | + EXOFS_ERR("ERROR: device requested is not found " | |
440 | + "osd_name-%s =>%d\n", odi.osdname, ret); | |
441 | + goto out; | |
442 | + } | |
443 | + | |
444 | + sbi->s_ods[i] = od; | |
445 | + ++sbi->s_numdevs; | |
446 | + | |
447 | + /* Read the fscb of the other devices to make sure the FS | |
448 | + * partition is there. | |
449 | + */ | |
450 | + ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, | |
451 | + sizeof(fscb)); | |
452 | + if (unlikely(ret)) { | |
453 | + EXOFS_ERR("ERROR: Malformed participating device " | |
454 | + "error reading fscb osd_name-%s\n", | |
455 | + odi.osdname); | |
456 | + goto out; | |
457 | + } | |
458 | + | |
459 | + /* TODO: verify other information is correct and FS-uuid | |
460 | + * matches. Benny what did you say about device table | |
461 | + * generation and old devices? | |
462 | + */ | |
463 | + } | |
464 | + | |
465 | +out: | |
466 | + kfree(dt); | |
467 | + if (unlikely(!ret && fscb_od)) { | |
468 | + EXOFS_ERR( | |
469 | + "ERROR: Bad device-table container device not present\n"); | |
470 | + osduld_put_device(fscb_od); | |
471 | + ret = -EINVAL; | |
472 | + } | |
473 | + | |
474 | + return ret; | |
475 | +} | |
476 | + | |
287 | 477 | /* |
288 | 478 | * Read the superblock from the OSD and fill in the fields |
289 | 479 | */ |
290 | 480 | |
291 | 481 | |
292 | 482 | |
293 | 483 | |
294 | 484 | |
... | ... | @@ -292,24 +482,25 @@ |
292 | 482 | struct inode *root; |
293 | 483 | struct exofs_mountopt *opts = data; |
294 | 484 | struct exofs_sb_info *sbi; /*extended info */ |
485 | + struct osd_dev *od; /* Master device */ | |
295 | 486 | struct exofs_fscb fscb; /*on-disk superblock info */ |
296 | - struct osd_request *or = NULL; | |
297 | 487 | struct osd_obj_id obj; |
488 | + unsigned table_count; | |
298 | 489 | int ret; |
299 | 490 | |
300 | 491 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
301 | 492 | if (!sbi) |
302 | 493 | return -ENOMEM; |
303 | - sb->s_fs_info = sbi; | |
304 | 494 | |
305 | 495 | /* use mount options to fill superblock */ |
306 | - sbi->s_dev = osduld_path_lookup(opts->dev_name); | |
307 | - if (IS_ERR(sbi->s_dev)) { | |
308 | - ret = PTR_ERR(sbi->s_dev); | |
309 | - sbi->s_dev = NULL; | |
496 | + od = osduld_path_lookup(opts->dev_name); | |
497 | + if (IS_ERR(od)) { | |
498 | + ret = PTR_ERR(od); | |
310 | 499 | goto free_sbi; |
311 | 500 | } |
312 | 501 | |
502 | + sbi->s_ods[0] = od; | |
503 | + sbi->s_numdevs = 1; | |
313 | 504 | sbi->s_pid = opts->pid; |
314 | 505 | sbi->s_timeout = opts->timeout; |
315 | 506 | |
316 | 507 | |
317 | 508 | |
318 | 509 | |
... | ... | @@ -323,36 +514,14 @@ |
323 | 514 | sb->s_bdev = NULL; |
324 | 515 | sb->s_dev = 0; |
325 | 516 | |
326 | - /* read data from on-disk superblock object */ | |
327 | 517 | obj.partition = sbi->s_pid; |
328 | 518 | obj.id = EXOFS_SUPER_ID; |
329 | 519 | exofs_make_credential(sbi->s_cred, &obj); |
330 | 520 | |
331 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
332 | - if (unlikely(!or)) { | |
333 | - if (!silent) | |
334 | - EXOFS_ERR( | |
335 | - "exofs_fill_super: osd_start_request failed.\n"); | |
336 | - ret = -ENOMEM; | |
521 | + ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); | |
522 | + if (unlikely(ret)) | |
337 | 523 | goto free_sbi; |
338 | - } | |
339 | - ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb)); | |
340 | - if (unlikely(ret)) { | |
341 | - if (!silent) | |
342 | - EXOFS_ERR( | |
343 | - "exofs_fill_super: osd_req_read_kern failed.\n"); | |
344 | - ret = -ENOMEM; | |
345 | - goto free_sbi; | |
346 | - } | |
347 | 524 | |
348 | - ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred); | |
349 | - if (unlikely(ret)) { | |
350 | - if (!silent) | |
351 | - EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n"); | |
352 | - ret = -EIO; | |
353 | - goto free_sbi; | |
354 | - } | |
355 | - | |
356 | 525 | sb->s_magic = le16_to_cpu(fscb.s_magic); |
357 | 526 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); |
358 | 527 | sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); |
359 | 528 | |
360 | 529 | |
... | ... | @@ -364,12 +533,26 @@ |
364 | 533 | ret = -EINVAL; |
365 | 534 | goto free_sbi; |
366 | 535 | } |
536 | + if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { | |
537 | + EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", | |
538 | + EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); | |
539 | + ret = -EINVAL; | |
540 | + goto free_sbi; | |
541 | + } | |
367 | 542 | |
368 | 543 | /* start generation numbers from a random point */ |
369 | 544 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
370 | 545 | spin_lock_init(&sbi->s_next_gen_lock); |
371 | 546 | |
547 | + table_count = le64_to_cpu(fscb.s_dev_table_count); | |
548 | + if (table_count) { | |
549 | + ret = exofs_read_lookup_dev_table(&sbi, table_count); | |
550 | + if (unlikely(ret)) | |
551 | + goto free_sbi; | |
552 | + } | |
553 | + | |
372 | 554 | /* set up operation vectors */ |
555 | + sb->s_fs_info = sbi; | |
373 | 556 | sb->s_op = &exofs_sops; |
374 | 557 | sb->s_export_op = &exofs_export_ops; |
375 | 558 | root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); |
376 | 559 | |
... | ... | @@ -395,16 +578,15 @@ |
395 | 578 | goto free_sbi; |
396 | 579 | } |
397 | 580 | |
398 | - ret = 0; | |
399 | -out: | |
400 | - if (or) | |
401 | - osd_end_request(or); | |
402 | - return ret; | |
581 | + _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0], | |
582 | + sbi->s_pid); | |
583 | + return 0; | |
403 | 584 | |
404 | 585 | free_sbi: |
405 | - osduld_put_device(sbi->s_dev); /* NULL safe */ | |
406 | - kfree(sbi); | |
407 | - goto out; | |
586 | + EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", | |
587 | + opts->dev_name, sbi->s_pid, ret); | |
588 | + exofs_free_sbi(sbi); | |
589 | + return ret; | |
408 | 590 | } |
409 | 591 | |
410 | 592 | /* |
... | ... | @@ -433,7 +615,7 @@ |
433 | 615 | { |
434 | 616 | struct super_block *sb = dentry->d_sb; |
435 | 617 | struct exofs_sb_info *sbi = sb->s_fs_info; |
436 | - struct osd_obj_id obj = {sbi->s_pid, 0}; | |
618 | + struct exofs_io_state *ios; | |
437 | 619 | struct osd_attr attrs[] = { |
438 | 620 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, |
439 | 621 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), |
440 | 622 | |
441 | 623 | |
442 | 624 | |
443 | 625 | |
444 | 626 | |
... | ... | @@ -442,32 +624,33 @@ |
442 | 624 | }; |
443 | 625 | uint64_t capacity = ULLONG_MAX; |
444 | 626 | uint64_t used = ULLONG_MAX; |
445 | - struct osd_request *or; | |
446 | 627 | uint8_t cred_a[OSD_CAP_LEN]; |
447 | 628 | int ret; |
448 | 629 | |
449 | - /* get used/capacity attributes */ | |
450 | - exofs_make_credential(cred_a, &obj); | |
451 | - | |
452 | - or = osd_start_request(sbi->s_dev, GFP_KERNEL); | |
453 | - if (unlikely(!or)) { | |
454 | - EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n"); | |
455 | - return -ENOMEM; | |
630 | + ret = exofs_get_io_state(sbi, &ios); | |
631 | + if (ret) { | |
632 | + EXOFS_DBGMSG("exofs_get_io_state failed.\n"); | |
633 | + return ret; | |
456 | 634 | } |
457 | 635 | |
458 | - osd_req_get_attributes(or, &obj); | |
459 | - osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); | |
460 | - ret = exofs_sync_op(or, sbi->s_timeout, cred_a); | |
636 | + exofs_make_credential(cred_a, &ios->obj); | |
637 | + ios->cred = sbi->s_cred; | |
638 | + ios->in_attr = attrs; | |
639 | + ios->in_attr_len = ARRAY_SIZE(attrs); | |
640 | + | |
641 | + ret = exofs_sbi_read(ios); | |
461 | 642 | if (unlikely(ret)) |
462 | 643 | goto out; |
463 | 644 | |
464 | - ret = extract_attr_from_req(or, &attrs[0]); | |
465 | - if (likely(!ret)) | |
645 | + ret = extract_attr_from_ios(ios, &attrs[0]); | |
646 | + if (likely(!ret)) { | |
466 | 647 | capacity = get_unaligned_be64(attrs[0].val_ptr); |
467 | - else | |
648 | + if (unlikely(!capacity)) | |
649 | + capacity = ULLONG_MAX; | |
650 | + } else | |
468 | 651 | EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); |
469 | 652 | |
470 | - ret = extract_attr_from_req(or, &attrs[1]); | |
653 | + ret = extract_attr_from_ios(ios, &attrs[1]); | |
471 | 654 | if (likely(!ret)) |
472 | 655 | used = get_unaligned_be64(attrs[1].val_ptr); |
473 | 656 | else |
474 | 657 | |
... | ... | @@ -476,15 +659,15 @@ |
476 | 659 | /* fill in the stats buffer */ |
477 | 660 | buf->f_type = EXOFS_SUPER_MAGIC; |
478 | 661 | buf->f_bsize = EXOFS_BLKSIZE; |
479 | - buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); | |
480 | - buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); | |
662 | + buf->f_blocks = capacity >> 9; | |
663 | + buf->f_bfree = (capacity - used) >> 9; | |
481 | 664 | buf->f_bavail = buf->f_bfree; |
482 | 665 | buf->f_files = sbi->s_numfiles; |
483 | 666 | buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; |
484 | 667 | buf->f_namelen = EXOFS_NAME_LEN; |
485 | 668 | |
486 | 669 | out: |
487 | - osd_end_request(or); | |
670 | + exofs_put_io_state(ios); | |
488 | 671 | return ret; |
489 | 672 | } |
490 | 673 |