Blame view
fs/ceph/file.c
44.6 KB
3d14c5d2b ceph: factor out ... |
1 |
#include <linux/ceph/ceph_debug.h> |
124e68e74 ceph: file operat... |
2 |
|
3d14c5d2b ceph: factor out ... |
3 |
#include <linux/module.h> |
124e68e74 ceph: file operat... |
4 |
#include <linux/sched.h> |
5a0e3ad6a include cleanup: ... |
5 |
#include <linux/slab.h> |
124e68e74 ceph: file operat... |
6 |
#include <linux/file.h> |
5ef50c3be ceph: simplify+fi... |
7 |
#include <linux/mount.h> |
124e68e74 ceph: file operat... |
8 9 |
#include <linux/namei.h> #include <linux/writeback.h> |
ad7a60de8 ceph: punch hole ... |
10 |
#include <linux/falloc.h> |
124e68e74 ceph: file operat... |
11 12 13 |
#include "super.h" #include "mds_client.h" |
99ccbd229 ceph: use fscache... |
14 |
#include "cache.h" |
124e68e74 ceph: file operat... |
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
/* * Ceph file operations * * Implement basic open/close functionality, and implement * read/write. * * We implement three modes of file I/O: * - buffered uses the generic_file_aio_{read,write} helpers * * - synchronous is used when there is multi-client read/write * sharing, avoids the page cache, and synchronously waits for an * ack from the OSD. * * - direct io takes the variant of the sync path that references * user pages directly. * * fsync() flushes and waits on dirty pages, but just queues metadata * for writeback: since the MDS can recover size and mtime there is no * need to wait for MDS acknowledgement. */ |
b5b98989d ceph: combine as ... |
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
/* * Calculate the length sum of direct io vectors that can * be combined into one page vector. */ static size_t dio_get_pagev_size(const struct iov_iter *it) { const struct iovec *iov = it->iov; const struct iovec *iovend = iov + it->nr_segs; size_t size; size = iov->iov_len - it->iov_offset; /* * An iov can be page vectored when both the current tail * and the next base are page aligned. */ while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { size += iov->iov_len; } dout("dio_get_pagevlen len = %zu ", size); return size; } /* * Allocate a page vector based on (@it, @nbytes). * The return value is the tuple describing a page vector, * that is (@pages, @page_align, @num_pages). */ static struct page ** dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, size_t *page_align, int *num_pages) { struct iov_iter tmp_it = *it; size_t align; struct page **pages; int ret = 0, idx, npages; align = (unsigned long)(it->iov->iov_base + it->iov_offset) & (PAGE_SIZE - 1); npages = calc_pages_for(align, nbytes); pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL); if (!pages) { pages = vmalloc(sizeof(*pages) * npages); if (!pages) return ERR_PTR(-ENOMEM); } for (idx = 0; idx < npages; ) { size_t start; ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes, npages - idx, &start); if (ret < 0) goto fail; iov_iter_advance(&tmp_it, ret); nbytes -= ret; idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE; } BUG_ON(nbytes != 0); *num_pages = npages; *page_align = align; dout("dio_get_pages_alloc: got %d pages align %zu ", npages, align); return pages; fail: ceph_put_page_vector(pages, idx, false); return ERR_PTR(ret); } |
124e68e74 ceph: file operat... |
106 107 108 109 110 111 112 113 |
/* * Prepare an open request. Preallocate ceph_cap to avoid an * inopportune ENOMEM later. */ static struct ceph_mds_request * prepare_open_request(struct super_block *sb, int flags, int create_mode) { |
3d14c5d2b ceph: factor out ... |
114 115 |
struct ceph_fs_client *fsc = ceph_sb_to_client(sb); struct ceph_mds_client *mdsc = fsc->mdsc; |
124e68e74 ceph: file operat... |
116 117 118 119 120 121 122 123 124 125 126 127 128 |
struct ceph_mds_request *req; int want_auth = USE_ANY_MDS; int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; if (flags & (O_WRONLY|O_RDWR|O_CREAT|O_TRUNC)) want_auth = USE_AUTH_MDS; req = ceph_mdsc_create_request(mdsc, op, want_auth); if (IS_ERR(req)) goto out; req->r_fmode = ceph_flags_to_mode(flags); req->r_args.open.flags = cpu_to_le32(flags); req->r_args.open.mode = cpu_to_le32(create_mode); |
124e68e74 ceph: file operat... |
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
out: return req; } /* * initialize private struct file data. * if we fail, clean up by dropping fmode reference on the ceph_inode */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { struct ceph_file_info *cf; int ret = 0; switch (inode->i_mode & S_IFMT) { case S_IFREG: |
46b59b2be ceph: disable fsc... |
144 145 |
ceph_fscache_register_inode_cookie(inode); ceph_fscache_file_set_cookie(inode, file); |
124e68e74 ceph: file operat... |
146 147 148 149 |
case S_IFDIR: dout("init_file %p %p 0%o (regular) ", inode, file, inode->i_mode); |
99ec26977 ceph: use kmem_ca... |
150 |
cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); |
124e68e74 ceph: file operat... |
151 152 153 154 155 156 |
if (cf == NULL) { ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ return -ENOMEM; } cf->fmode = fmode; cf->next_offset = 2; |
fdd4e1583 ceph: rework dcac... |
157 |
cf->readdir_cache_idx = -1; |
124e68e74 ceph: file operat... |
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
file->private_data = cf; BUG_ON(inode->i_fop->release != ceph_release); break; case S_IFLNK: dout("init_file %p %p 0%o (symlink) ", inode, file, inode->i_mode); ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ break; default: dout("init_file %p %p 0%o (special) ", inode, file, inode->i_mode); /* * we need to drop the open ref now, since we don't * have .release set to ceph_release. */ ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ BUG_ON(inode->i_fop->release == ceph_release); /* call the proper open fop */ ret = inode->i_fop->open(inode, file); } return ret; } /* |
77310320c ceph: renew caps ... |
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
* try renew caps after session gets killed. */ int ceph_renew_caps(struct inode *inode) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; int err, flags, wanted; spin_lock(&ci->i_ceph_lock); wanted = __ceph_caps_file_wanted(ci); if (__ceph_is_any_real_caps(ci) && (!(wanted & CEPH_CAP_ANY_WR) == 0 || ci->i_auth_cap)) { int issued = __ceph_caps_issued(ci, NULL); spin_unlock(&ci->i_ceph_lock); dout("renew caps %p want %s issued %s updating mds_wanted ", inode, ceph_cap_string(wanted), ceph_cap_string(issued)); ceph_check_caps(ci, 0, NULL); return 0; } spin_unlock(&ci->i_ceph_lock); flags = 0; if ((wanted & CEPH_CAP_FILE_RD) && (wanted & CEPH_CAP_FILE_WR)) flags = O_RDWR; else if (wanted & CEPH_CAP_FILE_RD) flags = O_RDONLY; else if (wanted & CEPH_CAP_FILE_WR) flags = O_WRONLY; #ifdef O_LAZY if (wanted & CEPH_CAP_FILE_LAZYIO) flags |= O_LAZY; #endif req = prepare_open_request(inode->i_sb, flags, 0); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; } req->r_inode = inode; ihold(inode); req->r_num_caps = 1; req->r_fmode = -1; err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); out: dout("renew caps %p open result=%d ", inode, err); return err < 0 ? err : 0; } /* |
124e68e74 ceph: file operat... |
242 243 244 245 246 247 248 249 |
* If we already have the requisite capabilities, we can satisfy * the open request locally (no need to request new caps from the * MDS). We do, however, need to inform the MDS (asynchronously) * if our wanted caps set expands. */ int ceph_open(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); |
3d14c5d2b ceph: factor out ... |
250 251 |
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; |
124e68e74 ceph: file operat... |
252 253 |
struct ceph_mds_request *req; struct ceph_file_info *cf = file->private_data; |
124e68e74 ceph: file operat... |
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
int err; int flags, fmode, wanted; if (cf) { dout("open file %p is already opened ", file); return 0; } /* filter out O_CREAT|O_EXCL; vfs did that already. yuck. */ flags = file->f_flags & ~(O_CREAT|O_EXCL); if (S_ISDIR(inode->i_mode)) flags = O_DIRECTORY; /* mds likes to know */ dout("open inode %p ino %llx.%llx file %p flags %d (%d) ", inode, ceph_vinop(inode), file, flags, file->f_flags); fmode = ceph_flags_to_mode(flags); wanted = ceph_caps_for_mode(fmode); /* snapped files are read-only */ if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) return -EROFS; /* trivially open snapdir */ if (ceph_snap(inode) == CEPH_SNAPDIR) { |
be655596b ceph: use i_ceph_... |
280 |
spin_lock(&ci->i_ceph_lock); |
124e68e74 ceph: file operat... |
281 |
__ceph_get_fmode(ci, fmode); |
be655596b ceph: use i_ceph_... |
282 |
spin_unlock(&ci->i_ceph_lock); |
124e68e74 ceph: file operat... |
283 284 285 286 |
return ceph_init_file(inode, file, fmode); } /* |
7421ab804 ceph: fix open fo... |
287 288 |
* No need to block if we have caps on the auth MDS (for * write) or any MDS (for read). Update wanted set |
124e68e74 ceph: file operat... |
289 290 |
* asynchronously. */ |
be655596b ceph: use i_ceph_... |
291 |
spin_lock(&ci->i_ceph_lock); |
7421ab804 ceph: fix open fo... |
292 293 |
if (__ceph_is_any_real_caps(ci) && (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { |
124e68e74 ceph: file operat... |
294 295 296 297 298 299 300 301 |
int mds_wanted = __ceph_caps_mds_wanted(ci); int issued = __ceph_caps_issued(ci, NULL); dout("open %p fmode %d want %s issued %s using existing ", inode, fmode, ceph_cap_string(wanted), ceph_cap_string(issued)); __ceph_get_fmode(ci, fmode); |
be655596b ceph: use i_ceph_... |
302 |
spin_unlock(&ci->i_ceph_lock); |
124e68e74 ceph: file operat... |
303 304 305 306 307 308 309 310 311 312 313 |
/* adjust wanted? */ if ((issued & wanted) != wanted && (mds_wanted & wanted) != wanted && ceph_snap(inode) != CEPH_SNAPDIR) ceph_check_caps(ci, 0, NULL); return ceph_init_file(inode, file, fmode); } else if (ceph_snap(inode) != CEPH_NOSNAP && (ci->i_snap_caps & wanted) == wanted) { __ceph_get_fmode(ci, fmode); |
be655596b ceph: use i_ceph_... |
314 |
spin_unlock(&ci->i_ceph_lock); |
124e68e74 ceph: file operat... |
315 316 |
return ceph_init_file(inode, file, fmode); } |
99ccbd229 ceph: use fscache... |
317 |
|
be655596b ceph: use i_ceph_... |
318 |
spin_unlock(&ci->i_ceph_lock); |
124e68e74 ceph: file operat... |
319 320 321 322 323 324 325 326 |
dout("open fmode %d wants %s ", fmode, ceph_cap_string(wanted)); req = prepare_open_request(inode->i_sb, flags, 0); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; } |
70b666c3b ceph: use ihold w... |
327 328 |
req->r_inode = inode; ihold(inode); |
99ccbd229 ceph: use fscache... |
329 |
|
124e68e74 ceph: file operat... |
330 |
req->r_num_caps = 1; |
e36d571d7 ceph: no need to ... |
331 |
err = ceph_mdsc_do_request(mdsc, NULL, req); |
124e68e74 ceph: file operat... |
332 333 334 335 336 337 338 339 340 341 342 |
if (!err) err = ceph_init_file(inode, file, req->r_fmode); ceph_mdsc_put_request(req); dout("open result=%d on %llx.%llx ", err, ceph_vinop(inode)); out: return err; } /* |
5ef50c3be ceph: simplify+fi... |
343 344 |
* Do a lookup + open with a single request. If we get a non-existent * file or symlink, return 1 so the VFS can retry. |
124e68e74 ceph: file operat... |
345 |
*/ |
5ef50c3be ceph: simplify+fi... |
346 |
int ceph_atomic_open(struct inode *dir, struct dentry *dentry, |
30d904947 kill struct opendata |
347 |
struct file *file, unsigned flags, umode_t mode, |
d95852777 make ->atomic_ope... |
348 |
int *opened) |
124e68e74 ceph: file operat... |
349 |
{ |
3d14c5d2b ceph: factor out ... |
350 351 |
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; |
124e68e74 ceph: file operat... |
352 |
struct ceph_mds_request *req; |
5ef50c3be ceph: simplify+fi... |
353 |
struct dentry *dn; |
b1ee94aa5 ceph: include the... |
354 |
struct ceph_acls_info acls = {}; |
315f24088 ceph: fix securit... |
355 |
int mask; |
124e68e74 ceph: file operat... |
356 |
int err; |
124e68e74 ceph: file operat... |
357 |
|
a455589f1 assorted conversi... |
358 359 360 |
dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o ", dir, dentry, dentry, |
5ef50c3be ceph: simplify+fi... |
361 362 363 364 365 366 367 368 |
d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); if (dentry->d_name.len > NAME_MAX) return -ENAMETOOLONG; err = ceph_init_dentry(dentry); if (err < 0) return err; |
124e68e74 ceph: file operat... |
369 |
|
b1ee94aa5 ceph: include the... |
370 371 372 373 374 |
if (flags & O_CREAT) { err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) return err; } |
124e68e74 ceph: file operat... |
375 376 |
/* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); |
b1ee94aa5 ceph: include the... |
377 378 379 380 |
if (IS_ERR(req)) { err = PTR_ERR(req); goto out_acl; } |
124e68e74 ceph: file operat... |
381 382 383 384 385 |
req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
b1ee94aa5 ceph: include the... |
386 387 388 389 |
if (acls.pagelist) { req->r_pagelist = acls.pagelist; acls.pagelist = NULL; } |
124e68e74 ceph: file operat... |
390 |
} |
315f24088 ceph: fix securit... |
391 392 393 394 395 |
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; req->r_args.open.mask = cpu_to_le32(mask); |
124e68e74 ceph: file operat... |
396 |
req->r_locked_dir = dir; /* caller holds dir->i_mutex */ |
acda76578 ceph: fix bad par... |
397 398 399 |
err = ceph_mdsc_do_request(mdsc, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); |
bf91c3150 ceph: fix atomic_... |
400 |
err = ceph_handle_snapdir(req, dentry, err); |
79aec9844 ceph: Check for e... |
401 |
if (err) |
b1ee94aa5 ceph: include the... |
402 |
goto out_req; |
79aec9844 ceph: Check for e... |
403 |
|
a43137f7b ceph: remove the ... |
404 |
if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) |
124e68e74 ceph: file operat... |
405 |
err = ceph_handle_notrace_create(dir, dentry); |
2d83bde9a ceph: implement i... |
406 |
|
00699ad85 Use the right pre... |
407 |
if (d_in_lookup(dentry)) { |
5ef50c3be ceph: simplify+fi... |
408 409 410 411 412 413 414 415 |
dn = ceph_finish_lookup(req, dentry, err); if (IS_ERR(dn)) err = PTR_ERR(dn); } else { /* we were given a hashed negative dentry */ dn = NULL; } if (err) |
b1ee94aa5 ceph: include the... |
416 |
goto out_req; |
2b0143b5c VFS: normal files... |
417 |
if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) { |
5ef50c3be ceph: simplify+fi... |
418 419 420 421 422 423 424 |
/* make vfs retry on splice, ENOENT, or symlink */ dout("atomic_open finish_no_open on dn %p ", dn); err = finish_no_open(file, dn); } else { dout("atomic_open finish_open on dn %p ", dn); |
6e8575faa ceph: Check for c... |
425 |
if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { |
2b0143b5c VFS: normal files... |
426 |
ceph_init_inode_acls(d_inode(dentry), &acls); |
6e8575faa ceph: Check for c... |
427 428 |
*opened |= FILE_CREATED; } |
5ef50c3be ceph: simplify+fi... |
429 430 |
err = finish_open(file, dentry, ceph_open, opened); } |
b1ee94aa5 ceph: include the... |
431 |
out_req: |
ab866549b ceph: drop extra ... |
432 433 |
if (!req->r_err && req->r_target_inode) ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode); |
5ef50c3be ceph: simplify+fi... |
434 |
ceph_mdsc_put_request(req); |
b1ee94aa5 ceph: include the... |
435 436 |
out_acl: ceph_release_acls_info(&acls); |
5ef50c3be ceph: simplify+fi... |
437 438 |
dout("atomic_open result=%d ", err); |
d95852777 make ->atomic_ope... |
439 |
return err; |
124e68e74 ceph: file operat... |
440 441 442 443 444 445 446 447 448 449 450 451 452 453 |
} int ceph_release(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *cf = file->private_data; dout("release inode %p file %p ", inode, file); ceph_put_fmode(ci, cf->fmode); if (cf->last_readdir) ceph_mdsc_put_request(cf->last_readdir); kfree(cf->last_name); kfree(cf->dir_info); |
124e68e74 ceph: file operat... |
454 |
kmem_cache_free(ceph_file_cachep, cf); |
195d3ce2c ceph: return EBAD... |
455 456 |
/* wake up anyone waiting for caps on this inode */ |
03066f234 ceph: use complet... |
457 |
wake_up_all(&ci->i_cap_wq); |
124e68e74 ceph: file operat... |
458 459 |
return 0; } |
83701246a ceph: sync read i... |
460 |
enum { |
c8fe9b17d ceph: Asynchronou... |
461 462 463 |
HAVE_RETRIED = 1, CHECK_EOF = 2, READ_INLINE = 3, |
83701246a ceph: sync read i... |
464 |
}; |
124e68e74 ceph: file operat... |
465 |
/* |
124e68e74 ceph: file operat... |
466 467 468 469 470 471 472 473 |
* Read a range of bytes striped over one or more objects. Iterate over * objects we stripe over. (That's not atomic, but good enough for now.) * * If we get a short result from the OSD, check against i_size; we need to * only return a short read to the caller if we hit EOF. */ static int striped_read(struct inode *inode, u64 off, u64 len, |
6a026589b ceph: fix sync re... |
474 |
struct page **pages, int num_pages, |
c8fe9b17d ceph: Asynchronou... |
475 |
int *checkeof) |
124e68e74 ceph: file operat... |
476 |
{ |
3d14c5d2b ceph: factor out ... |
477 |
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
124e68e74 ceph: file operat... |
478 |
struct ceph_inode_info *ci = ceph_inode(inode); |
688bac461 ceph: cleanup typ... |
479 |
u64 pos, this_len, left; |
99c88e690 ceph: use i_size_... |
480 |
loff_t i_size; |
c8fe9b17d ceph: Asynchronou... |
481 482 |
int page_align, pages_left; int read, ret; |
124e68e74 ceph: file operat... |
483 |
struct page **page_pos; |
124e68e74 ceph: file operat... |
484 485 486 487 488 489 490 491 492 493 494 495 |
bool hit_stripe, was_short; /* * we may need to do multiple reads. not atomic, unfortunately. */ pos = off; left = len; page_pos = pages; pages_left = num_pages; read = 0; more: |
c8fe9b17d ceph: Asynchronou... |
496 |
page_align = pos & ~PAGE_MASK; |
124e68e74 ceph: file operat... |
497 |
this_len = left; |
3d14c5d2b ceph: factor out ... |
498 |
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
124e68e74 ceph: file operat... |
499 500 501 |
&ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, |
b7495fc2f ceph: make page a... |
502 |
page_pos, pages_left, page_align); |
124e68e74 ceph: file operat... |
503 504 |
if (ret == -ENOENT) ret = 0; |
0e98728fa ceph: fix ENOENT ... |
505 506 |
hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; |
688bac461 ceph: cleanup typ... |
507 508 |
dout("striped_read %llu~%llu (read %u) got %d%s%s ", pos, left, read, |
124e68e74 ceph: file operat... |
509 |
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); |
99c88e690 ceph: use i_size_... |
510 |
i_size = i_size_read(inode); |
02ae66d8b ceph: fix bugs ab... |
511 512 |
if (ret >= 0) { int didpages; |
99c88e690 ceph: use i_size_... |
513 514 |
if (was_short && (pos + ret < i_size)) { int zlen = min(this_len - ret, i_size - pos - ret); |
c8fe9b17d ceph: Asynchronou... |
515 |
int zoff = (off & ~PAGE_MASK) + read + ret; |
02ae66d8b ceph: fix bugs ab... |
516 517 |
dout(" zero gap %llu to %llu ", |
1487a688d ceph: properly ze... |
518 519 520 |
pos + ret, pos + ret + zlen); ceph_zero_page_vector_range(zoff, zlen, pages); ret += zlen; |
124e68e74 ceph: file operat... |
521 |
} |
02ae66d8b ceph: fix bugs ab... |
522 |
|
09cbfeaf1 mm, fs: get rid o... |
523 |
didpages = (page_align + ret) >> PAGE_SHIFT; |
124e68e74 ceph: file operat... |
524 525 526 527 528 |
pos += ret; read = pos - off; left -= ret; page_pos += didpages; pages_left -= didpages; |
02ae66d8b ceph: fix bugs ab... |
529 |
/* hit stripe and need continue*/ |
99c88e690 ceph: use i_size_... |
530 |
if (left && hit_stripe && pos < i_size) |
124e68e74 ceph: file operat... |
531 532 |
goto more; } |
ee7289bfa ceph: allow sync_... |
533 |
if (read > 0) { |
02ae66d8b ceph: fix bugs ab... |
534 |
ret = read; |
c3cd62839 ceph: fix short s... |
535 |
/* did we bounce off eof? */ |
99c88e690 ceph: use i_size_... |
536 |
if (pos + left > i_size) |
83701246a ceph: sync read i... |
537 |
*checkeof = CHECK_EOF; |
124e68e74 ceph: file operat... |
538 |
} |
124e68e74 ceph: file operat... |
539 540 541 542 543 544 545 546 547 548 549 |
dout("striped_read returns %d ", ret); return ret; } /* * Completely synchronous read and write methods. Direct from __user * buffer to osd, or directly to user pages (if O_DIRECT). * * If the read spans object boundary, just do multiple reads. */ |
8eb4efb09 ceph: implement r... |
550 551 |
static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, int *checkeof) |
124e68e74 ceph: file operat... |
552 |
{ |
8eb4efb09 ceph: implement r... |
553 |
struct file *file = iocb->ki_filp; |
496ad9aa8 new helper: file_... |
554 |
struct inode *inode = file_inode(file); |
124e68e74 ceph: file operat... |
555 |
struct page **pages; |
8eb4efb09 ceph: implement r... |
556 |
u64 off = iocb->ki_pos; |
ab226e21a ceph: fix direct-... |
557 |
int num_pages, ret; |
2b777c9dd ceph_sync_read: s... |
558 |
size_t len = iov_iter_count(i); |
124e68e74 ceph: file operat... |
559 |
|
8eb4efb09 ceph: implement r... |
560 561 562 |
dout("sync_read on file %p %llu~%u %s ", file, off, (unsigned)len, |
124e68e74 ceph: file operat... |
563 |
(file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
d0d0db226 ceph: check zero ... |
564 565 566 |
if (!len) return 0; |
e98b6fed8 ceph: fix comment... |
567 568 569 570 571 572 |
/* * flush any page cache pages in this range. this * will make concurrent normal and sync io slow, * but it will at least behave sensibly when they are * in sequence. */ |
8eb4efb09 ceph: implement r... |
573 574 |
ret = filemap_write_and_wait_range(inode->i_mapping, off, off + len); |
29065a513 ceph: sync read/w... |
575 |
if (ret < 0) |
8eb4efb09 ceph: implement r... |
576 |
return ret; |
29065a513 ceph: sync read/w... |
577 |
|
c8fe9b17d ceph: Asynchronou... |
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 |
num_pages = calc_pages_for(off, len); pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); ret = striped_read(inode, off, len, pages, num_pages, checkeof); if (ret > 0) { int l, k = 0; size_t left = ret; while (left) { size_t page_off = off & ~PAGE_MASK; size_t copy = min_t(size_t, left, PAGE_SIZE - page_off); l = copy_page_to_iter(pages[k++], page_off, copy, i); off += l; left -= l; if (l < copy) |
8eb4efb09 ceph: implement r... |
596 597 |
break; } |
8eb4efb09 ceph: implement r... |
598 |
} |
c8fe9b17d ceph: Asynchronou... |
599 |
ceph_release_page_vector(pages, num_pages); |
124e68e74 ceph: file operat... |
600 |
|
8eb4efb09 ceph: implement r... |
601 602 603 604 |
if (off > iocb->ki_pos) { ret = off - iocb->ki_pos; iocb->ki_pos = off; } |
124e68e74 ceph: file operat... |
605 |
|
124e68e74 ceph: file operat... |
606 607 608 609 |
dout("sync_read result %d ", ret); return ret; } |
c8fe9b17d ceph: Asynchronou... |
610 611 612 613 614 615 616 617 |
struct ceph_aio_request { struct kiocb *iocb; size_t total_len; int write; int error; struct list_head osd_reqs; unsigned num_reqs; atomic_t pending_reqs; |
5be0389da ceph: re-send AIO... |
618 |
struct timespec mtime; |
c8fe9b17d ceph: Asynchronou... |
619 620 |
struct ceph_cap_flush *prealloc_cf; }; |
5be0389da ceph: re-send AIO... |
621 622 623 624 625 626 |
struct ceph_aio_work { struct work_struct work; struct ceph_osd_request *req; }; static void ceph_aio_retry_work(struct work_struct *work); |
c8fe9b17d ceph: Asynchronou... |
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 |
static void ceph_aio_complete(struct inode *inode, struct ceph_aio_request *aio_req) { struct ceph_inode_info *ci = ceph_inode(inode); int ret; if (!atomic_dec_and_test(&aio_req->pending_reqs)) return; ret = aio_req->error; if (!ret) ret = aio_req->total_len; dout("ceph_aio_complete %p rc %d ", inode, ret); if (ret >= 0 && aio_req->write) { int dirty; loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len; if (endoff > i_size_read(inode)) { if (ceph_inode_set_size(inode, endoff)) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); } spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &aio_req->prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); } ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR : CEPH_CAP_FILE_RD)); aio_req->iocb->ki_complete(aio_req->iocb, ret, 0); ceph_free_cap_flush(aio_req->prealloc_cf); kfree(aio_req); } |
85e084feb libceph: drop msg... |
670 |
static void ceph_aio_complete_req(struct ceph_osd_request *req) |
c8fe9b17d ceph: Asynchronou... |
671 672 673 674 675 676 677 678 679 680 681 682 683 |
{ int rc = req->r_result; struct inode *inode = req->r_inode; struct ceph_aio_request *aio_req = req->r_priv; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); int num_pages = calc_pages_for((u64)osd_data->alignment, osd_data->length); dout("ceph_aio_complete_req %p rc %d bytes %llu ", inode, rc, osd_data->length); if (rc == -EOLDSNAPC) { |
5be0389da ceph: re-send AIO... |
684 685 686 687 688 689 690 691 692 693 694 695 696 |
struct ceph_aio_work *aio_work; BUG_ON(!aio_req->write); aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS); if (aio_work) { INIT_WORK(&aio_work->work, ceph_aio_retry_work); aio_work->req = req; queue_work(ceph_inode_to_client(inode)->wb_wq, &aio_work->work); return; } rc = -ENOMEM; } else if (!aio_req->write) { |
c8fe9b17d ceph: Asynchronou... |
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 |
if (rc == -ENOENT) rc = 0; if (rc >= 0 && osd_data->length > rc) { int zoff = osd_data->alignment + rc; int zlen = osd_data->length - rc; /* * If read is satisfied by single OSD request, * it can pass EOF. Otherwise read is within * i_size. */ if (aio_req->num_reqs == 1) { loff_t i_size = i_size_read(inode); loff_t endoff = aio_req->iocb->ki_pos + rc; if (endoff < i_size) zlen = min_t(size_t, zlen, i_size - endoff); aio_req->total_len = rc + zlen; } if (zlen > 0) ceph_zero_page_vector_range(zoff, zlen, osd_data->pages); } } |
a22bd5ffa ceph: set user pa... |
721 |
ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write); |
c8fe9b17d ceph: Asynchronou... |
722 723 724 725 726 727 728 729 |
ceph_osdc_put_request(req); if (rc < 0) cmpxchg(&aio_req->error, 0, rc); ceph_aio_complete(inode, aio_req); return; } |
5be0389da ceph: re-send AIO... |
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 |
static void ceph_aio_retry_work(struct work_struct *work) { struct ceph_aio_work *aio_work = container_of(work, struct ceph_aio_work, work); struct ceph_osd_request *orig_req = aio_work->req; struct ceph_aio_request *aio_req = orig_req->r_priv; struct inode *inode = orig_req->r_inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; struct ceph_osd_request *req; int ret; spin_lock(&ci->i_ceph_lock); if (__ceph_have_pending_cap_snap(ci)) { struct ceph_cap_snap *capsnap = list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap, ci_item); snapc = ceph_get_snap_context(capsnap->context); } else { BUG_ON(!ci->i_head_snapc); snapc = ceph_get_snap_context(ci->i_head_snapc); } spin_unlock(&ci->i_ceph_lock); req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2, false, GFP_NOFS); |
1418bf076 ceph: checking fo... |
757 758 |
if (!req) { ret = -ENOMEM; |
5be0389da ceph: re-send AIO... |
759 760 761 762 763 764 765 |
req = orig_req; goto out; } req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE; |
63244fa12 libceph: introduc... |
766 |
ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc); |
d30291b98 libceph: variable... |
767 |
ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid); |
5be0389da ceph: re-send AIO... |
768 |
|
13d1ad16d libceph: move mes... |
769 770 771 772 773 774 |
ret = ceph_osdc_alloc_messages(req, GFP_NOFS); if (ret) { ceph_osdc_put_request(req); req = orig_req; goto out; } |
5be0389da ceph: re-send AIO... |
775 776 777 |
req->r_ops[0] = orig_req->r_ops[0]; osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); |
bb873b539 libceph: switch t... |
778 779 |
req->r_mtime = aio_req->mtime; req->r_data_offset = req->r_ops[0].extent.offset; |
5be0389da ceph: re-send AIO... |
780 |
|
5be0389da ceph: re-send AIO... |
781 782 783 784 785 786 787 788 789 |
ceph_osdc_put_request(orig_req); req->r_callback = ceph_aio_complete_req; req->r_inode = inode; req->r_priv = aio_req; ret = ceph_osdc_start_request(req->r_osdc, req, false); out: if (ret < 0) { |
5be0389da ceph: re-send AIO... |
790 |
req->r_result = ret; |
85e084feb libceph: drop msg... |
791 |
ceph_aio_complete_req(req); |
5be0389da ceph: re-send AIO... |
792 |
} |
db6aed702 ceph: fix snap co... |
793 |
ceph_put_snap_context(snapc); |
5be0389da ceph: re-send AIO... |
794 795 |
kfree(aio_work); } |
124e68e74 ceph: file operat... |
796 |
/* |
26be88087 libceph: change h... |
797 798 799 800 801 802 803 804 805 806 |
* Write commit request unsafe callback, called to tell us when a * request is unsafe (that is, in flight--has been handed to the * messenger to send to its target osd). It is called again when * we've received a response message indicating the request is * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request * is completed early (and unsuccessfully) due to a timeout or * interrupt. * * This is used if we requested both an ACK and ONDISK commit reply * from the OSD. |
124e68e74 ceph: file operat... |
807 |
*/ |
26be88087 libceph: change h... |
808 |
static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) |
124e68e74 ceph: file operat... |
809 810 |
{ struct ceph_inode_info *ci = ceph_inode(req->r_inode); |
26be88087 libceph: change h... |
811 812 813 814 815 816 817 818 819 |
dout("%s %p tid %llu %ssafe ", __func__, req, req->r_tid, unsafe ? "un" : ""); if (unsafe) { ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); spin_lock(&ci->i_unsafe_lock); list_add_tail(&req->r_unsafe_item, &ci->i_unsafe_writes); spin_unlock(&ci->i_unsafe_lock); |
fe5da05e9 libceph: redo cal... |
820 821 |
complete_all(&req->r_completion); |
26be88087 libceph: change h... |
822 823 824 825 826 827 |
} else { spin_lock(&ci->i_unsafe_lock); list_del_init(&req->r_unsafe_item); spin_unlock(&ci->i_unsafe_lock); ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); } |
124e68e74 ceph: file operat... |
828 |
} |
9a5530c63 ceph: wait unsafe... |
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 |
/* * Wait on any unsafe replies for the given inode. First wait on the * newest request, and make that the upper bound. Then, if there are * more requests, keep waiting on the oldest as long as it is still older * than the original request. */ void ceph_sync_write_wait(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct list_head *head = &ci->i_unsafe_writes; struct ceph_osd_request *req; u64 last_tid; if (!S_ISREG(inode->i_mode)) return; spin_lock(&ci->i_unsafe_lock); if (list_empty(head)) goto out; /* set upper bound as _last_ entry in chain */ req = list_last_entry(head, struct ceph_osd_request, r_unsafe_item); last_tid = req->r_tid; do { ceph_osdc_get_request(req); spin_unlock(&ci->i_unsafe_lock); dout("sync_write_wait on tid %llu (until %llu) ", req->r_tid, last_tid); wait_for_completion(&req->r_safe_completion); ceph_osdc_put_request(req); spin_lock(&ci->i_unsafe_lock); /* * from here on look at first entry in chain, since we * only want to wait for anything older than last_tid */ if (list_empty(head)) break; req = list_first_entry(head, struct ceph_osd_request, r_unsafe_item); } while (req->r_tid < last_tid); out: spin_unlock(&ci->i_unsafe_lock); } |
e8344e668 ceph: Implement w... |
878 |
|
e8344e668 ceph: Implement w... |
879 |
static ssize_t |
c8fe9b17d ceph: Asynchronou... |
880 881 882 |
ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct ceph_snap_context *snapc, struct ceph_cap_flush **pcf) |
124e68e74 ceph: file operat... |
883 |
{ |
e8344e668 ceph: Implement w... |
884 |
struct file *file = iocb->ki_filp; |
496ad9aa8 new helper: file_... |
885 |
struct inode *inode = file_inode(file); |
124e68e74 ceph: file operat... |
886 |
struct ceph_inode_info *ci = ceph_inode(inode); |
3d14c5d2b ceph: factor out ... |
887 |
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
acead002b libceph: don't bu... |
888 |
struct ceph_vino vino; |
124e68e74 ceph: file operat... |
889 890 |
struct ceph_osd_request *req; struct page **pages; |
c8fe9b17d ceph: Asynchronou... |
891 892 |
struct ceph_aio_request *aio_req = NULL; int num_pages = 0; |
124e68e74 ceph: file operat... |
893 |
int flags; |
124e68e74 ceph: file operat... |
894 |
int ret; |
c2050a454 fs: Replace curre... |
895 |
struct timespec mtime = current_time(inode); |
c8fe9b17d ceph: Asynchronou... |
896 897 898 |
size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; |
124e68e74 ceph: file operat... |
899 |
|
c8fe9b17d ceph: Asynchronou... |
900 |
if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) |
124e68e74 ceph: file operat... |
901 |
return -EROFS; |
c8fe9b17d ceph: Asynchronou... |
902 903 904 |
dout("sync_direct_read_write (%s) on file %p %lld~%u ", (write ? "write" : "read"), file, pos, (unsigned)count); |
124e68e74 ceph: file operat... |
905 |
|
e8344e668 ceph: Implement w... |
906 |
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count); |
29065a513 ceph: sync read/w... |
907 908 |
if (ret < 0) return ret; |
c8fe9b17d ceph: Asynchronou... |
909 |
if (write) { |
5d7eb1a32 ceph: ignore erro... |
910 |
int ret2 = invalidate_inode_pages2_range(inode->i_mapping, |
09cbfeaf1 mm, fs: get rid o... |
911 912 |
pos >> PAGE_SHIFT, (pos + count) >> PAGE_SHIFT); |
5d7eb1a32 ceph: ignore erro... |
913 |
if (ret2 < 0) |
c8fe9b17d ceph: Asynchronou... |
914 915 |
dout("invalidate_inode_pages2_range returned %d ", ret); |
29065a513 ceph: sync read/w... |
916 |
|
c8fe9b17d ceph: Asynchronou... |
917 918 919 920 921 922 |
flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE; } else { flags = CEPH_OSD_FLAG_READ; } |
124e68e74 ceph: file operat... |
923 |
|
c8fe9b17d ceph: Asynchronou... |
924 925 926 927 |
while (iov_iter_count(iter) > 0) { u64 size = dio_get_pagev_size(iter); size_t start = 0; ssize_t len; |
e8344e668 ceph: Implement w... |
928 |
|
e8344e668 ceph: Implement w... |
929 930 |
vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
c8fe9b17d ceph: Asynchronou... |
931 932 933 934 935 936 |
vino, pos, &size, 0, /*include a 'startsync' command*/ write ? 2 : 1, write ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ, flags, snapc, |
e8344e668 ceph: Implement w... |
937 938 939 940 941 |
ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { ret = PTR_ERR(req); |
eab87235c ceph_sync_{,direc... |
942 |
break; |
e8344e668 ceph: Implement w... |
943 |
} |
124e68e74 ceph: file operat... |
944 |
|
c8fe9b17d ceph: Asynchronou... |
945 946 |
len = size; pages = dio_get_pages_alloc(iter, len, &start, &num_pages); |
b5b98989d ceph: combine as ... |
947 |
if (IS_ERR(pages)) { |
64c313116 ceph_sync_direct_... |
948 |
ceph_osdc_put_request(req); |
b5b98989d ceph: combine as ... |
949 |
ret = PTR_ERR(pages); |
64c313116 ceph_sync_direct_... |
950 |
break; |
124e68e74 ceph: file operat... |
951 952 953 |
} /* |
c8fe9b17d ceph: Asynchronou... |
954 955 |
* To simplify error handling, allow AIO when IO within i_size * or IO can be satisfied by single OSD request. |
124e68e74 ceph: file operat... |
956 |
*/ |
c8fe9b17d ceph: Asynchronou... |
957 958 959 960 961 962 963 964 |
if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) && (len == count || pos + count <= i_size_read(inode))) { aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL); if (aio_req) { aio_req->iocb = iocb; aio_req->write = write; INIT_LIST_HEAD(&aio_req->osd_reqs); if (write) { |
5be0389da ceph: re-send AIO... |
965 |
aio_req->mtime = mtime; |
c8fe9b17d ceph: Asynchronou... |
966 967 968 969 970 971 972 973 974 975 976 977 |
swap(aio_req->prealloc_cf, *pcf); } } /* ignore error */ } if (write) { /* * throw out any page cache pages in this range. this * may block. */ truncate_inode_pages_range(inode->i_mapping, pos, |
09cbfeaf1 mm, fs: get rid o... |
978 |
(pos+len) | (PAGE_SIZE - 1)); |
c8fe9b17d ceph: Asynchronou... |
979 980 |
osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); |
bb873b539 libceph: switch t... |
981 |
req->r_mtime = mtime; |
c8fe9b17d ceph: Asynchronou... |
982 |
} |
c8fe9b17d ceph: Asynchronou... |
983 984 |
osd_req_op_extent_osd_data_pages(req, 0, pages, len, start, false, false); |
e8344e668 ceph: Implement w... |
985 |
|
c8fe9b17d ceph: Asynchronou... |
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 |
if (aio_req) { aio_req->total_len += len; aio_req->num_reqs++; atomic_inc(&aio_req->pending_reqs); req->r_callback = ceph_aio_complete_req; req->r_inode = inode; req->r_priv = aio_req; list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs); pos += len; iov_iter_advance(iter, len); continue; } ret = ceph_osdc_start_request(req->r_osdc, req, false); |
e8344e668 ceph: Implement w... |
1002 1003 |
if (!ret) ret = ceph_osdc_wait_request(&fsc->client->osdc, req); |
c8fe9b17d ceph: Asynchronou... |
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 |
size = i_size_read(inode); if (!write) { if (ret == -ENOENT) ret = 0; if (ret >= 0 && ret < len && pos + ret < size) { int zlen = min_t(size_t, len - ret, size - pos - ret); ceph_zero_page_vector_range(start + ret, zlen, pages); ret += zlen; } if (ret >= 0) len = ret; } |
a22bd5ffa ceph: set user pa... |
1018 |
ceph_put_page_vector(pages, num_pages, !write); |
e8344e668 ceph: Implement w... |
1019 |
|
e8344e668 ceph: Implement w... |
1020 |
ceph_osdc_put_request(req); |
c8fe9b17d ceph: Asynchronou... |
1021 |
if (ret < 0) |
e8344e668 ceph: Implement w... |
1022 |
break; |
64c313116 ceph_sync_direct_... |
1023 |
|
c8fe9b17d ceph: Asynchronou... |
1024 1025 1026 1027 |
pos += len; iov_iter_advance(iter, len); if (!write && pos >= size) |
e8344e668 ceph: Implement w... |
1028 |
break; |
64c313116 ceph_sync_direct_... |
1029 |
|
c8fe9b17d ceph: Asynchronou... |
1030 1031 |
if (write && pos > size) { if (ceph_inode_set_size(inode, pos)) |
64c313116 ceph_sync_direct_... |
1032 1033 1034 1035 |
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); } |
e8344e668 ceph: Implement w... |
1036 |
} |
c8fe9b17d ceph: Asynchronou... |
1037 |
if (aio_req) { |
fc8c3892f ceph: fix use-aft... |
1038 |
LIST_HEAD(osd_reqs); |
c8fe9b17d ceph: Asynchronou... |
1039 1040 1041 1042 1043 1044 1045 |
if (aio_req->num_reqs == 0) { kfree(aio_req); return ret; } ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR : CEPH_CAP_FILE_RD); |
fc8c3892f ceph: fix use-aft... |
1046 1047 1048 |
list_splice(&aio_req->osd_reqs, &osd_reqs); while (!list_empty(&osd_reqs)) { req = list_first_entry(&osd_reqs, |
c8fe9b17d ceph: Asynchronou... |
1049 1050 1051 1052 1053 1054 1055 1056 |
struct ceph_osd_request, r_unsafe_item); list_del_init(&req->r_unsafe_item); if (ret >= 0) ret = ceph_osdc_start_request(req->r_osdc, req, false); if (ret < 0) { req->r_result = ret; |
85e084feb libceph: drop msg... |
1057 |
ceph_aio_complete_req(req); |
c8fe9b17d ceph: Asynchronou... |
1058 1059 1060 1061 1062 1063 1064 |
} } return -EIOCBQUEUED; } if (ret != -EOLDSNAPC && pos > iocb->ki_pos) { ret = pos - iocb->ki_pos; |
e8344e668 ceph: Implement w... |
1065 |
iocb->ki_pos = pos; |
e8344e668 ceph: Implement w... |
1066 1067 1068 |
} return ret; } |
e8344e668 ceph: Implement w... |
1069 1070 1071 1072 1073 1074 1075 |
/* * Synchronous write, straight from __user pointer or user pages. * * If write spans object boundary, just do multiple writes. (For a * correct atomic write, we should e.g. take write locks on all * objects, rollback on failure, etc.) */ |
06fee30f6 ceph: fix append ... |
1076 |
static ssize_t |
5dda377cf ceph: set i_head_... |
1077 1078 |
ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, struct ceph_snap_context *snapc) |
e8344e668 ceph: Implement w... |
1079 1080 1081 1082 1083 |
{ struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
e8344e668 ceph: Implement w... |
1084 1085 1086 1087 1088 1089 1090 1091 1092 |
struct ceph_vino vino; struct ceph_osd_request *req; struct page **pages; u64 len; int num_pages; int written = 0; int flags; int check_caps = 0; int ret; |
c2050a454 fs: Replace curre... |
1093 |
struct timespec mtime = current_time(inode); |
4908b822b ceph: switch to -... |
1094 |
size_t count = iov_iter_count(from); |
e8344e668 ceph: Implement w... |
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 |
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; dout("sync_write on file %p %lld~%u ", file, pos, (unsigned)count); ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count); if (ret < 0) return ret; ret = invalidate_inode_pages2_range(inode->i_mapping, |
09cbfeaf1 mm, fs: get rid o... |
1107 1108 |
pos >> PAGE_SHIFT, (pos + count) >> PAGE_SHIFT); |
e8344e668 ceph: Implement w... |
1109 1110 1111 1112 1113 1114 1115 1116 |
if (ret < 0) dout("invalidate_inode_pages2_range returned %d ", ret); flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK; |
4908b822b ceph: switch to -... |
1117 |
while ((len = iov_iter_count(from)) > 0) { |
e8344e668 ceph: Implement w... |
1118 1119 |
size_t left; int n; |
e8344e668 ceph: Implement w... |
1120 1121 |
vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
715e4cd40 libceph: specify ... |
1122 |
vino, pos, &len, 0, 1, |
e8344e668 ceph: Implement w... |
1123 1124 1125 1126 1127 1128 |
CEPH_OSD_OP_WRITE, flags, snapc, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { ret = PTR_ERR(req); |
eab87235c ceph_sync_{,direc... |
1129 |
break; |
e8344e668 ceph: Implement w... |
1130 1131 1132 1133 1134 1135 |
} /* * write from beginning of first page, * regardless of io alignment */ |
09cbfeaf1 mm, fs: get rid o... |
1136 |
num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
e8344e668 ceph: Implement w... |
1137 |
|
687265e5a ceph: switch some... |
1138 |
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); |
124e68e74 ceph: file operat... |
1139 1140 1141 1142 |
if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; } |
e8344e668 ceph: Implement w... |
1143 1144 1145 |
left = len; for (n = 0; n < num_pages; n++) { |
125d725c9 ceph: cast PAGE_S... |
1146 |
size_t plen = min_t(size_t, left, PAGE_SIZE); |
4908b822b ceph: switch to -... |
1147 |
ret = copy_page_from_iter(pages[n], 0, plen, from); |
e8344e668 ceph: Implement w... |
1148 1149 1150 1151 1152 |
if (ret != plen) { ret = -EFAULT; break; } left -= ret; |
e8344e668 ceph: Implement w... |
1153 |
} |
124e68e74 ceph: file operat... |
1154 1155 1156 1157 |
if (ret < 0) { ceph_release_page_vector(pages, num_pages); goto out; } |
e8344e668 ceph: Implement w... |
1158 1159 1160 |
/* get a second commit callback */ req->r_unsafe_callback = ceph_sync_write_unsafe; req->r_inode = inode; |
124e68e74 ceph: file operat... |
1161 |
|
e8344e668 ceph: Implement w... |
1162 1163 |
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, true); |
02ee07d30 libceph: hold off... |
1164 |
|
bb873b539 libceph: switch t... |
1165 |
req->r_mtime = mtime; |
e8344e668 ceph: Implement w... |
1166 1167 1168 |
ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!ret) ret = ceph_osdc_wait_request(&fsc->client->osdc, req); |
124e68e74 ceph: file operat... |
1169 1170 |
out: |
e8344e668 ceph: Implement w... |
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 |
ceph_osdc_put_request(req); if (ret == 0) { pos += len; written += len; if (pos > i_size_read(inode)) { check_caps = ceph_inode_set_size(inode, pos); if (check_caps) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); } } else break; } |
124e68e74 ceph: file operat... |
1186 |
|
e8344e668 ceph: Implement w... |
1187 |
if (ret != -EOLDSNAPC && written > 0) { |
124e68e74 ceph: file operat... |
1188 |
ret = written; |
e8344e668 ceph: Implement w... |
1189 |
iocb->ki_pos = pos; |
124e68e74 ceph: file operat... |
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 |
} return ret; } /* * Wrap generic_file_aio_read with checks for cap bits on the inode. * Atomically grab references, so that those bits are not released * back to the MDS mid-read. * * Hmm, the sync read case isn't actually async... should it be? */ |
3644424dc ceph: switch to -... |
1201 |
static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) |
124e68e74 ceph: file operat... |
1202 1203 |
{ struct file *filp = iocb->ki_filp; |
2962507ca ceph: perform laz... |
1204 |
struct ceph_file_info *fi = filp->private_data; |
66ee59af6 fs: remove ki_nbytes |
1205 |
size_t len = iov_iter_count(to); |
496ad9aa8 new helper: file_... |
1206 |
struct inode *inode = file_inode(filp); |
124e68e74 ceph: file operat... |
1207 |
struct ceph_inode_info *ci = ceph_inode(inode); |
3738daa68 ceph: fetch inlin... |
1208 |
struct page *pinned_page = NULL; |
124e68e74 ceph: file operat... |
1209 |
ssize_t ret; |
2962507ca ceph: perform laz... |
1210 |
int want, got = 0; |
83701246a ceph: sync read i... |
1211 |
int retry_op = 0, read = 0; |
124e68e74 ceph: file operat... |
1212 |
|
6a026589b ceph: fix sync re... |
1213 |
again: |
8eb4efb09 ceph: implement r... |
1214 1215 1216 |
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p ", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); |
2962507ca ceph: perform laz... |
1217 1218 1219 1220 |
if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_CACHE; |
3738daa68 ceph: fetch inlin... |
1221 |
ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); |
124e68e74 ceph: file operat... |
1222 |
if (ret < 0) |
8eb4efb09 ceph: implement r... |
1223 |
return ret; |
124e68e74 ceph: file operat... |
1224 |
|
2962507ca ceph: perform laz... |
1225 |
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || |
2ba48ce51 mirror O_APPEND a... |
1226 |
(iocb->ki_flags & IOCB_DIRECT) || |
8eb4efb09 ceph: implement r... |
1227 |
(fi->flags & CEPH_F_SYNC)) { |
8eb4efb09 ceph: implement r... |
1228 1229 1230 1231 1232 |
dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s ", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); |
83701246a ceph: sync read i... |
1233 |
if (ci->i_inline_version == CEPH_INLINE_NONE) { |
c8fe9b17d ceph: Asynchronou... |
1234 1235 1236 1237 1238 1239 1240 1241 |
if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) { ret = ceph_direct_read_write(iocb, to, NULL, NULL); if (ret >= 0 && ret < len) retry_op = CHECK_EOF; } else { ret = ceph_sync_read(iocb, to, &retry_op); } |
83701246a ceph: sync read i... |
1242 1243 1244 |
} else { retry_op = READ_INLINE; } |
8eb4efb09 ceph: implement r... |
1245 |
} else { |
8eb4efb09 ceph: implement r... |
1246 1247 |
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s ", |
3644424dc ceph: switch to -... |
1248 |
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
8eb4efb09 ceph: implement r... |
1249 |
ceph_cap_string(got)); |
124e68e74 ceph: file operat... |
1250 |
|
3644424dc ceph: switch to -... |
1251 |
ret = generic_file_read_iter(iocb, to); |
8eb4efb09 ceph: implement r... |
1252 |
} |
124e68e74 ceph: file operat... |
1253 1254 1255 |
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d ", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); |
3738daa68 ceph: fetch inlin... |
1256 |
if (pinned_page) { |
09cbfeaf1 mm, fs: get rid o... |
1257 |
put_page(pinned_page); |
3738daa68 ceph: fetch inlin... |
1258 1259 |
pinned_page = NULL; } |
124e68e74 ceph: file operat... |
1260 |
ceph_put_cap_refs(ci, got); |
c8fe9b17d ceph: Asynchronou... |
1261 |
if (retry_op > HAVE_RETRIED && ret >= 0) { |
83701246a ceph: sync read i... |
1262 1263 1264 1265 |
int statret; struct page *page = NULL; loff_t i_size; if (retry_op == READ_INLINE) { |
687265e5a ceph: switch some... |
1266 |
page = __page_cache_alloc(GFP_KERNEL); |
83701246a ceph: sync read i... |
1267 1268 1269 |
if (!page) return -ENOMEM; } |
6a026589b ceph: fix sync re... |
1270 |
|
83701246a ceph: sync read i... |
1271 1272 1273 |
statret = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, !!page); if (statret < 0) { |
0d7718f66 ceph: fix error h... |
1274 1275 |
if (page) __free_page(page); |
83701246a ceph: sync read i... |
1276 1277 1278 1279 1280 1281 |
if (statret == -ENODATA) { BUG_ON(retry_op != READ_INLINE); goto again; } return statret; } |
6a026589b ceph: fix sync re... |
1282 |
|
83701246a ceph: sync read i... |
1283 1284 |
i_size = i_size_read(inode); if (retry_op == READ_INLINE) { |
fcc02d2a0 ceph: fix reading... |
1285 1286 |
BUG_ON(ret > 0 || read > 0); if (iocb->ki_pos < i_size && |
09cbfeaf1 mm, fs: get rid o... |
1287 |
iocb->ki_pos < PAGE_SIZE) { |
83701246a ceph: sync read i... |
1288 1289 |
loff_t end = min_t(loff_t, i_size, iocb->ki_pos + len); |
09cbfeaf1 mm, fs: get rid o... |
1290 |
end = min_t(loff_t, end, PAGE_SIZE); |
83701246a ceph: sync read i... |
1291 1292 1293 1294 1295 1296 |
if (statret < end) zero_user_segment(page, statret, end); ret = copy_page_to_iter(page, iocb->ki_pos & ~PAGE_MASK, end - iocb->ki_pos, to); iocb->ki_pos += ret; |
fcc02d2a0 ceph: fix reading... |
1297 1298 1299 1300 1301 1302 1303 1304 |
read += ret; } if (iocb->ki_pos < i_size && read < len) { size_t zlen = min_t(size_t, len - read, i_size - iocb->ki_pos); ret = iov_iter_zero(zlen, to); iocb->ki_pos += ret; read += ret; |
83701246a ceph: sync read i... |
1305 1306 |
} __free_pages(page, 0); |
fcc02d2a0 ceph: fix reading... |
1307 |
return read; |
83701246a ceph: sync read i... |
1308 |
} |
6a026589b ceph: fix sync re... |
1309 1310 |
/* hit EOF or hole? */ |
83701246a ceph: sync read i... |
1311 |
if (retry_op == CHECK_EOF && iocb->ki_pos < i_size && |
fcc02d2a0 ceph: fix reading... |
1312 |
ret < len) { |
8eb4efb09 ceph: implement r... |
1313 |
dout("sync_read hit hole, ppos %lld < size %lld" |
99c88e690 ceph: use i_size_... |
1314 1315 |
", reading more ", iocb->ki_pos, i_size); |
8eb4efb09 ceph: implement r... |
1316 |
|
6a026589b ceph: fix sync re... |
1317 |
read += ret; |
6a026589b ceph: fix sync re... |
1318 |
len -= ret; |
c8fe9b17d ceph: Asynchronou... |
1319 |
retry_op = HAVE_RETRIED; |
6a026589b ceph: fix sync re... |
1320 1321 1322 |
goto again; } } |
8eb4efb09 ceph: implement r... |
1323 |
|
6a026589b ceph: fix sync re... |
1324 1325 |
if (ret >= 0) ret += read; |
124e68e74 ceph: file operat... |
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 |
return ret; } /* * Take cap references to avoid releasing caps to MDS mid-write. * * If we are synchronous, and write with an old snap context, the OSD * may return EOLDSNAPC. In that case, retry the write.. _after_ * dropping our cap refs and allowing the pending snap to logically * complete _before_ this write occurs. * * If we are near ENOSPC, write synchronously. */ |
4908b822b ceph: switch to -... |
1339 |
static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) |
124e68e74 ceph: file operat... |
1340 1341 |
{ struct file *file = iocb->ki_filp; |
33caad324 ceph: perform laz... |
1342 |
struct ceph_file_info *fi = file->private_data; |
496ad9aa8 new helper: file_... |
1343 |
struct inode *inode = file_inode(file); |
124e68e74 ceph: file operat... |
1344 |
struct ceph_inode_info *ci = ceph_inode(inode); |
3d14c5d2b ceph: factor out ... |
1345 1346 |
struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; |
f66fd9f09 ceph: pre-allocat... |
1347 |
struct ceph_cap_flush *prealloc_cf; |
3309dd04c switch generic_wr... |
1348 |
ssize_t count, written = 0; |
03d254ede ceph: apply write... |
1349 |
int err, want, got; |
3309dd04c switch generic_wr... |
1350 |
loff_t pos; |
124e68e74 ceph: file operat... |
1351 1352 1353 |
if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; |
f66fd9f09 ceph: pre-allocat... |
1354 1355 1356 |
prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) return -ENOMEM; |
5955102c9 wrappers for ->i_... |
1357 |
inode_lock(inode); |
03d254ede ceph: apply write... |
1358 |
|
03d254ede ceph: apply write... |
1359 |
/* We can write back this queue in page reclaim */ |
de1414a65 fs: export inode_... |
1360 |
current->backing_dev_info = inode_to_bdi(inode); |
03d254ede ceph: apply write... |
1361 |
|
55b0b31cb ceph: get inode s... |
1362 1363 1364 1365 1366 |
if (iocb->ki_flags & IOCB_APPEND) { err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); if (err < 0) goto out; } |
3309dd04c switch generic_wr... |
1367 1368 |
err = generic_write_checks(iocb, from); if (err <= 0) |
03d254ede ceph: apply write... |
1369 |
goto out; |
3309dd04c switch generic_wr... |
1370 1371 |
pos = iocb->ki_pos; count = iov_iter_count(from); |
5fa8e0a1c fs: Rename file_r... |
1372 |
err = file_remove_privs(file); |
03d254ede ceph: apply write... |
1373 1374 1375 1376 1377 1378 |
if (err) goto out; err = file_update_time(file); if (err) goto out; |
28127bdd2 ceph: convert inl... |
1379 1380 1381 1382 1383 |
if (ci->i_inline_version != CEPH_INLINE_NONE) { err = ceph_uninline_data(file, NULL); if (err < 0) goto out; } |
124e68e74 ceph: file operat... |
1384 |
retry_snap: |
b7ec35b30 libceph: change c... |
1385 |
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { |
03d254ede ceph: apply write... |
1386 |
err = -ENOSPC; |
6070e0c1e ceph: don't early... |
1387 1388 |
goto out; } |
03d254ede ceph: apply write... |
1389 |
|
ac7f29bf2 ceph: fix printk ... |
1390 1391 |
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu ", |
99c88e690 ceph: use i_size_... |
1392 |
inode, ceph_vinop(inode), pos, count, i_size_read(inode)); |
7971bd92b ceph: revert comm... |
1393 1394 1395 1396 |
if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_BUFFER; |
03d254ede ceph: apply write... |
1397 |
got = 0; |
3738daa68 ceph: fetch inlin... |
1398 1399 |
err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count, &got, NULL); |
03d254ede ceph: apply write... |
1400 |
if (err < 0) |
37505d576 ceph: take i_mute... |
1401 |
goto out; |
124e68e74 ceph: file operat... |
1402 |
|
ac7f29bf2 ceph: fix printk ... |
1403 1404 |
dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s ", |
03d254ede ceph: apply write... |
1405 |
inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); |
7971bd92b ceph: revert comm... |
1406 1407 |
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
2ba48ce51 mirror O_APPEND a... |
1408 |
(iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { |
5dda377cf ceph: set i_head_... |
1409 |
struct ceph_snap_context *snapc; |
4908b822b ceph: switch to -... |
1410 |
struct iov_iter data; |
5955102c9 wrappers for ->i_... |
1411 |
inode_unlock(inode); |
5dda377cf ceph: set i_head_... |
1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 |
spin_lock(&ci->i_ceph_lock); if (__ceph_have_pending_cap_snap(ci)) { struct ceph_cap_snap *capsnap = list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap, ci_item); snapc = ceph_get_snap_context(capsnap->context); } else { BUG_ON(!ci->i_head_snapc); snapc = ceph_get_snap_context(ci->i_head_snapc); } spin_unlock(&ci->i_ceph_lock); |
4908b822b ceph: switch to -... |
1425 1426 |
/* we might need to revert back to that point */ data = *from; |
2ba48ce51 mirror O_APPEND a... |
1427 |
if (iocb->ki_flags & IOCB_DIRECT) |
c8fe9b17d ceph: Asynchronou... |
1428 1429 |
written = ceph_direct_read_write(iocb, &data, snapc, &prealloc_cf); |
e8344e668 ceph: Implement w... |
1430 |
else |
5dda377cf ceph: set i_head_... |
1431 |
written = ceph_sync_write(iocb, &data, pos, snapc); |
0e5dd45ce ceph: Move the pl... |
1432 1433 1434 1435 1436 |
if (written == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying ", inode, ceph_vinop(inode), |
4908b822b ceph: switch to -... |
1437 |
pos, (unsigned)count); |
5955102c9 wrappers for ->i_... |
1438 |
inode_lock(inode); |
0e5dd45ce ceph: Move the pl... |
1439 1440 |
goto retry_snap; } |
4908b822b ceph: switch to -... |
1441 1442 |
if (written > 0) iov_iter_advance(from, written); |
5dda377cf ceph: set i_head_... |
1443 |
ceph_put_snap_context(snapc); |
7971bd92b ceph: revert comm... |
1444 |
} else { |
b0d7c2231 ceph: introduce i... |
1445 1446 1447 1448 1449 1450 1451 |
/* * No need to acquire the i_truncate_mutex. Because * the MDS revokes Fwb caps before sending truncate * message to us. We can't get Fwb cap while there * are pending vmtruncate. So write and vmtruncate * can not run at the same time */ |
4908b822b ceph: switch to -... |
1452 |
written = generic_perform_write(file, from, pos); |
aec605f42 ceph_aio_write():... |
1453 1454 |
if (likely(written >= 0)) iocb->ki_pos = pos + written; |
5955102c9 wrappers for ->i_... |
1455 |
inode_unlock(inode); |
7971bd92b ceph: revert comm... |
1456 |
} |
d8de9ab63 ceph: avoid carry... |
1457 |
|
03d254ede ceph: apply write... |
1458 |
if (written >= 0) { |
fca65b4ad ceph: do not call... |
1459 |
int dirty; |
be655596b ceph: use i_ceph_... |
1460 |
spin_lock(&ci->i_ceph_lock); |
28127bdd2 ceph: convert inl... |
1461 |
ci->i_inline_version = CEPH_INLINE_NONE; |
f66fd9f09 ceph: pre-allocat... |
1462 1463 |
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); |
be655596b ceph: use i_ceph_... |
1464 |
spin_unlock(&ci->i_ceph_lock); |
fca65b4ad ceph: do not call... |
1465 1466 |
if (dirty) __mark_inode_dirty(inode, dirty); |
124e68e74 ceph: file operat... |
1467 |
} |
7971bd92b ceph: revert comm... |
1468 |
|
124e68e74 ceph: file operat... |
1469 1470 |
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s ", |
4908b822b ceph: switch to -... |
1471 |
inode, ceph_vinop(inode), pos, (unsigned)count, |
7971bd92b ceph: revert comm... |
1472 |
ceph_cap_string(got)); |
124e68e74 ceph: file operat... |
1473 |
ceph_put_cap_refs(ci, got); |
7971bd92b ceph: revert comm... |
1474 |
|
6aa657c85 ceph: use generic... |
1475 |
if (written >= 0) { |
b7ec35b30 libceph: change c... |
1476 |
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) |
6aa657c85 ceph: use generic... |
1477 1478 1479 |
iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); |
6070e0c1e ceph: don't early... |
1480 |
} |
03d254ede ceph: apply write... |
1481 |
|
2f75e9e17 ceph: replace hol... |
1482 |
goto out_unlocked; |
03d254ede ceph: apply write... |
1483 |
out: |
5955102c9 wrappers for ->i_... |
1484 |
inode_unlock(inode); |
2f75e9e17 ceph: replace hol... |
1485 |
out_unlocked: |
f66fd9f09 ceph: pre-allocat... |
1486 |
ceph_free_cap_flush(prealloc_cf); |
03d254ede ceph: apply write... |
1487 |
current->backing_dev_info = NULL; |
03d254ede ceph: apply write... |
1488 |
return written ? written : err; |
124e68e74 ceph: file operat... |
1489 1490 1491 1492 1493 |
} /* * llseek. be sure to verify file size on SEEK_END. */ |
965c8e59c lseek: the "whenc... |
1494 |
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) |
124e68e74 ceph: file operat... |
1495 1496 |
{ struct inode *inode = file->f_mapping->host; |
99c88e690 ceph: use i_size_... |
1497 |
loff_t i_size; |
955818cd5 ceph: Correctly r... |
1498 |
loff_t ret; |
124e68e74 ceph: file operat... |
1499 |
|
5955102c9 wrappers for ->i_... |
1500 |
inode_lock(inode); |
6a82c47aa ceph: fix SEEK_CU... |
1501 |
|
965c8e59c lseek: the "whenc... |
1502 |
if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { |
508b32d86 ceph: request xat... |
1503 |
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); |
955818cd5 ceph: Correctly r... |
1504 |
if (ret < 0) |
124e68e74 ceph: file operat... |
1505 |
goto out; |
06222e491 fs: handle SEEK_H... |
1506 |
} |
99c88e690 ceph: use i_size_... |
1507 |
i_size = i_size_read(inode); |
965c8e59c lseek: the "whenc... |
1508 |
switch (whence) { |
06222e491 fs: handle SEEK_H... |
1509 |
case SEEK_END: |
99c88e690 ceph: use i_size_... |
1510 |
offset += i_size; |
124e68e74 ceph: file operat... |
1511 1512 1513 1514 1515 1516 1517 1518 1519 |
break; case SEEK_CUR: /* * Here we special-case the lseek(fd, 0, SEEK_CUR) * position-querying operation. Avoid rewriting the "same" * f_pos value back to the file because a concurrent read(), * write() or lseek() might have altered it */ if (offset == 0) { |
955818cd5 ceph: Correctly r... |
1520 |
ret = file->f_pos; |
124e68e74 ceph: file operat... |
1521 1522 1523 1524 |
goto out; } offset += file->f_pos; break; |
06222e491 fs: handle SEEK_H... |
1525 |
case SEEK_DATA: |
99c88e690 ceph: use i_size_... |
1526 |
if (offset >= i_size) { |
06222e491 fs: handle SEEK_H... |
1527 1528 1529 1530 1531 |
ret = -ENXIO; goto out; } break; case SEEK_HOLE: |
99c88e690 ceph: use i_size_... |
1532 |
if (offset >= i_size) { |
06222e491 fs: handle SEEK_H... |
1533 1534 1535 |
ret = -ENXIO; goto out; } |
99c88e690 ceph: use i_size_... |
1536 |
offset = i_size; |
06222e491 fs: handle SEEK_H... |
1537 |
break; |
124e68e74 ceph: file operat... |
1538 |
} |
955818cd5 ceph: Correctly r... |
1539 |
ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
124e68e74 ceph: file operat... |
1540 1541 |
out: |
5955102c9 wrappers for ->i_... |
1542 |
inode_unlock(inode); |
955818cd5 ceph: Correctly r... |
1543 |
return ret; |
124e68e74 ceph: file operat... |
1544 |
} |
ad7a60de8 ceph: punch hole ... |
1545 1546 1547 1548 |
static inline void ceph_zero_partial_page( struct inode *inode, loff_t offset, unsigned size) { struct page *page; |
09cbfeaf1 mm, fs: get rid o... |
1549 |
pgoff_t index = offset >> PAGE_SHIFT; |
ad7a60de8 ceph: punch hole ... |
1550 1551 1552 1553 |
page = find_lock_page(inode->i_mapping, index); if (page) { wait_on_page_writeback(page); |
09cbfeaf1 mm, fs: get rid o... |
1554 |
zero_user(page, offset & (PAGE_SIZE - 1), size); |
ad7a60de8 ceph: punch hole ... |
1555 |
unlock_page(page); |
09cbfeaf1 mm, fs: get rid o... |
1556 |
put_page(page); |
ad7a60de8 ceph: punch hole ... |
1557 1558 1559 1560 1561 1562 |
} } static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset, loff_t length) { |
09cbfeaf1 mm, fs: get rid o... |
1563 |
loff_t nearly = round_up(offset, PAGE_SIZE); |
ad7a60de8 ceph: punch hole ... |
1564 1565 1566 1567 1568 1569 1570 1571 |
if (offset < nearly) { loff_t size = nearly - offset; if (length < size) size = length; ceph_zero_partial_page(inode, offset, size); offset += size; length -= size; } |
09cbfeaf1 mm, fs: get rid o... |
1572 1573 |
if (length >= PAGE_SIZE) { loff_t size = round_down(length, PAGE_SIZE); |
ad7a60de8 ceph: punch hole ... |
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 |
truncate_pagecache_range(inode, offset, offset + size - 1); offset += size; length -= size; } if (length) ceph_zero_partial_page(inode, offset, length); } static int ceph_zero_partial_object(struct inode *inode, loff_t offset, loff_t *length) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req; int ret = 0; loff_t zero = 0; int op; if (!length) { op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE; length = &zero; } else { op = CEPH_OSD_OP_ZERO; } req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), offset, length, |
715e4cd40 libceph: specify ... |
1602 |
0, 1, op, |
ad7a60de8 ceph: punch hole ... |
1603 1604 1605 1606 1607 1608 1609 |
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, NULL, 0, 0, false); if (IS_ERR(req)) { ret = PTR_ERR(req); goto out; } |
bb873b539 libceph: switch t... |
1610 |
req->r_mtime = inode->i_mtime; |
ad7a60de8 ceph: punch hole ... |
1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 |
ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!ret) { ret = ceph_osdc_wait_request(&fsc->client->osdc, req); if (ret == -ENOENT) ret = 0; } ceph_osdc_put_request(req); out: return ret; } static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length) { int ret = 0; struct ceph_inode_info *ci = ceph_inode(inode); |
7627151ea libceph: define n... |
1627 1628 1629 |
s32 stripe_unit = ci->i_layout.stripe_unit; s32 stripe_count = ci->i_layout.stripe_count; s32 object_size = ci->i_layout.object_size; |
b314a90d8 ceph: fix falloca... |
1630 1631 1632 1633 1634 1635 1636 |
u64 object_set_size = object_size * stripe_count; u64 nearly, t; /* round offset up to next period boundary */ nearly = offset + object_set_size - 1; t = nearly; nearly -= do_div(t, object_set_size); |
ad7a60de8 ceph: punch hole ... |
1637 |
|
ad7a60de8 ceph: punch hole ... |
1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 |
while (length && offset < nearly) { loff_t size = length; ret = ceph_zero_partial_object(inode, offset, &size); if (ret < 0) return ret; offset += size; length -= size; } while (length >= object_set_size) { int i; loff_t pos = offset; for (i = 0; i < stripe_count; ++i) { ret = ceph_zero_partial_object(inode, pos, NULL); if (ret < 0) return ret; pos += stripe_unit; } offset += object_set_size; length -= object_set_size; } while (length) { loff_t size = length; ret = ceph_zero_partial_object(inode, offset, &size); if (ret < 0) return ret; offset += size; length -= size; } return ret; } static long ceph_fallocate(struct file *file, int mode, loff_t offset, loff_t length) { struct ceph_file_info *fi = file->private_data; |
aa8b60e07 fs: ceph: new hel... |
1673 |
struct inode *inode = file_inode(file); |
ad7a60de8 ceph: punch hole ... |
1674 1675 1676 |
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; |
f66fd9f09 ceph: pre-allocat... |
1677 |
struct ceph_cap_flush *prealloc_cf; |
ad7a60de8 ceph: punch hole ... |
1678 1679 1680 1681 1682 |
int want, got = 0; int dirty; int ret = 0; loff_t endoff = 0; loff_t size; |
494d77bf8 ceph: check unsup... |
1683 1684 |
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; |
ad7a60de8 ceph: punch hole ... |
1685 1686 |
if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; |
f66fd9f09 ceph: pre-allocat... |
1687 1688 1689 |
prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) return -ENOMEM; |
5955102c9 wrappers for ->i_... |
1690 |
inode_lock(inode); |
ad7a60de8 ceph: punch hole ... |
1691 1692 1693 1694 1695 |
if (ceph_snap(inode) != CEPH_NOSNAP) { ret = -EROFS; goto unlock; } |
b7ec35b30 libceph: change c... |
1696 1697 |
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !(mode & FALLOC_FL_PUNCH_HOLE)) { |
ad7a60de8 ceph: punch hole ... |
1698 1699 1700 |
ret = -ENOSPC; goto unlock; } |
28127bdd2 ceph: convert inl... |
1701 1702 1703 1704 1705 |
if (ci->i_inline_version != CEPH_INLINE_NONE) { ret = ceph_uninline_data(file, NULL); if (ret < 0) goto unlock; } |
ad7a60de8 ceph: punch hole ... |
1706 1707 1708 1709 1710 1711 1712 1713 |
size = i_size_read(inode); if (!(mode & FALLOC_FL_KEEP_SIZE)) endoff = offset + length; if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_BUFFER; |
3738daa68 ceph: fetch inlin... |
1714 |
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL); |
ad7a60de8 ceph: punch hole ... |
1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 |
if (ret < 0) goto unlock; if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset < size) ceph_zero_pagecache_range(inode, offset, length); ret = ceph_zero_objects(inode, offset, length); } else if (endoff > size) { truncate_pagecache_range(inode, size, -1); if (ceph_inode_set_size(inode, endoff)) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); } if (!ret) { spin_lock(&ci->i_ceph_lock); |
28127bdd2 ceph: convert inl... |
1731 |
ci->i_inline_version = CEPH_INLINE_NONE; |
f66fd9f09 ceph: pre-allocat... |
1732 1733 |
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); |
ad7a60de8 ceph: punch hole ... |
1734 1735 1736 1737 1738 1739 1740 |
spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); } ceph_put_cap_refs(ci, got); unlock: |
5955102c9 wrappers for ->i_... |
1741 |
inode_unlock(inode); |
f66fd9f09 ceph: pre-allocat... |
1742 |
ceph_free_cap_flush(prealloc_cf); |
ad7a60de8 ceph: punch hole ... |
1743 1744 |
return ret; } |
124e68e74 ceph: file operat... |
1745 1746 1747 1748 |
const struct file_operations ceph_file_fops = { .open = ceph_open, .release = ceph_release, .llseek = ceph_llseek, |
3644424dc ceph: switch to -... |
1749 |
.read_iter = ceph_read_iter, |
4908b822b ceph: switch to -... |
1750 |
.write_iter = ceph_write_iter, |
124e68e74 ceph: file operat... |
1751 1752 |
.mmap = ceph_mmap, .fsync = ceph_fsync, |
40819f6fb ceph: add flock/f... |
1753 1754 |
.lock = ceph_lock, .flock = ceph_flock, |
3551dd79a ceph: switch to i... |
1755 |
.splice_write = iter_file_splice_write, |
124e68e74 ceph: file operat... |
1756 1757 |
.unlocked_ioctl = ceph_ioctl, .compat_ioctl = ceph_ioctl, |
ad7a60de8 ceph: punch hole ... |
1758 |
.fallocate = ceph_fallocate, |
124e68e74 ceph: file operat... |
1759 |
}; |