Commit e84661aa84e2e003738563f65155d4f12dc474e7

Authored by Christoph Hellwig
Committed by Alex Elder
1 parent bf59170a66

xfs: add online discard support

Now that we have reliably tracking of deleted extents in a
transaction we can easily implement "online" discard support
which calls blkdev_issue_discard once a transaction commits.

The actual discard is a two stage operation as we first have
to mark the busy extent as not available for reuse before we
can start the actual discard.  Note that we don't bother
supporting discard for the non-delaylog mode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>

Showing 10 changed files with 96 additions and 8 deletions Side-by-side Diff

Documentation/filesystems/xfs.txt
... ... @@ -39,6 +39,12 @@
39 39 drive level write caching to be enabled, for devices that
40 40 support write barriers.
41 41  
  42 + discard
  43 + Issue command to let the block device reclaim space freed by the
  44 + filesystem. This is useful for SSD devices, thinly provisioned
  45 + LUNs and virtual machine images, but may have a performance
  46 + impact. This option is incompatible with the nodelaylog option.
  47 +
42 48 dmapi
43 49 Enable the DMAPI (Data Management API) event callouts.
44 50 Use with the "mtpt" option.
fs/xfs/linux-2.6/xfs_discard.c
... ... @@ -191,4 +191,33 @@
191 191 return -XFS_ERROR(EFAULT);
192 192 return 0;
193 193 }
  194 +
  195 +int
  196 +xfs_discard_extents(
  197 + struct xfs_mount *mp,
  198 + struct list_head *list)
  199 +{
  200 + struct xfs_busy_extent *busyp;
  201 + int error = 0;
  202 +
  203 + list_for_each_entry(busyp, list, list) {
  204 + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
  205 + busyp->length);
  206 +
  207 + error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
  208 + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
  209 + XFS_FSB_TO_BB(mp, busyp->length),
  210 + GFP_NOFS, 0);
  211 + if (error && error != EOPNOTSUPP) {
  212 + xfs_info(mp,
  213 + "discard failed for extent [0x%llu,%u], error %d",
  214 + (unsigned long long)busyp->bno,
  215 + busyp->length,
  216 + error);
  217 + return error;
  218 + }
  219 + }
  220 +
  221 + return 0;
  222 +}
fs/xfs/linux-2.6/xfs_discard.h
... ... @@ -2,8 +2,10 @@
2 2 #define XFS_DISCARD_H 1
3 3  
4 4 struct fstrim_range;
  5 +struct list_head;
5 6  
6 7 extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
  8 +extern int xfs_discard_extents(struct xfs_mount *, struct list_head *);
7 9  
8 10 #endif /* XFS_DISCARD_H */
fs/xfs/linux-2.6/xfs_super.c
... ... @@ -110,8 +110,10 @@
110 110 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
111 111 #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
112 112 #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
113   -#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */
114   -#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
  113 +#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */
  114 +#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */
  115 +#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
  116 +#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
115 117  
116 118 /*
117 119 * Table driven mount option parser.
... ... @@ -355,6 +357,10 @@
355 357 mp->m_flags |= XFS_MOUNT_DELAYLOG;
356 358 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
357 359 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
  360 + } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
  361 + mp->m_flags |= XFS_MOUNT_DISCARD;
  362 + } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
  363 + mp->m_flags &= ~XFS_MOUNT_DISCARD;
358 364 } else if (!strcmp(this_char, "ihashsize")) {
359 365 xfs_warn(mp,
360 366 "ihashsize no longer used, option is deprecated.");
... ... @@ -388,6 +394,13 @@
388 394 return EINVAL;
389 395 }
390 396  
  397 + if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
  398 + !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
  399 + xfs_warn(mp,
  400 + "the discard option is incompatible with the nodelaylog option");
  401 + return EINVAL;
  402 + }
  403 +
391 404 #ifndef CONFIG_XFS_QUOTA
392 405 if (XFS_IS_QUOTA_RUNNING(mp)) {
393 406 xfs_warn(mp, "quota support not available in this kernel.");
... ... @@ -488,6 +501,7 @@
488 501 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
489 502 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
490 503 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
  504 + { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
491 505 { 0, NULL }
492 506 };
493 507 static struct proc_xfs_info xfs_info_unset[] = {
... ... @@ -187,6 +187,8 @@
187 187 xfs_agnumber_t agno;
188 188 xfs_agblock_t bno;
189 189 xfs_extlen_t length;
  190 + unsigned int flags;
  191 +#define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */
190 192 };
191 193  
192 194 /*
... ... @@ -2609,6 +2609,18 @@
2609 2609 xfs_agblock_t bend = bbno + busyp->length;
2610 2610  
2611 2611 /*
  2612 + * This extent is currently being discarded. Give the thread
  2613 + * performing the discard a chance to mark the extent unbusy
  2614 + * and retry.
  2615 + */
  2616 + if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) {
  2617 + spin_unlock(&pag->pagb_lock);
  2618 + delay(1);
  2619 + spin_lock(&pag->pagb_lock);
  2620 + return false;
  2621 + }
  2622 +
  2623 + /*
2612 2624 * If there is a busy extent overlapping a user allocation, we have
2613 2625 * no choice but to force the log and retry the search.
2614 2626 *
... ... @@ -2813,7 +2825,8 @@
2813 2825 * If this is a metadata allocation, try to reuse the busy
2814 2826 * extent instead of trimming the allocation.
2815 2827 */
2816   - if (!args->userdata) {
  2828 + if (!args->userdata &&
  2829 + !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) {
2817 2830 if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
2818 2831 busyp, fbno, flen,
2819 2832 false))
2820 2833  
... ... @@ -2979,10 +2992,16 @@
2979 2992 kmem_free(busyp);
2980 2993 }
2981 2994  
  2995 +/*
  2996 + * Remove all extents on the passed in list from the busy extents tree.
  2997 + * If do_discard is set skip extents that need to be discarded, and mark
  2998 + * these as undergoing a discard operation instead.
  2999 + */
2982 3000 void
2983 3001 xfs_alloc_busy_clear(
2984 3002 struct xfs_mount *mp,
2985   - struct list_head *list)
  3003 + struct list_head *list,
  3004 + bool do_discard)
2986 3005 {
2987 3006 struct xfs_busy_extent *busyp, *n;
2988 3007 struct xfs_perag *pag = NULL;
... ... @@ -2999,7 +3018,10 @@
2999 3018 agno = busyp->agno;
3000 3019 }
3001 3020  
3002   - xfs_alloc_busy_clear_one(mp, pag, busyp);
  3021 + if (do_discard && busyp->length)
  3022 + busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
  3023 + else
  3024 + xfs_alloc_busy_clear_one(mp, pag, busyp);
3003 3025 }
3004 3026  
3005 3027 if (pag) {
... ... @@ -140,7 +140,8 @@
140 140 xfs_agblock_t bno, xfs_extlen_t len);
141 141  
142 142 void
143   -xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
  143 +xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list,
  144 + bool do_discard);
144 145  
145 146 int
146 147 xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
fs/xfs/xfs_log_cil.c
... ... @@ -29,6 +29,7 @@
29 29 #include "xfs_mount.h"
30 30 #include "xfs_error.h"
31 31 #include "xfs_alloc.h"
  32 +#include "xfs_discard.h"
32 33  
33 34 /*
34 35 * Perform initial CIL structure initialisation. If the CIL is not
35 36  
36 37  
... ... @@ -361,18 +362,28 @@
361 362 int abort)
362 363 {
363 364 struct xfs_cil_ctx *ctx = args;
  365 + struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
364 366  
365 367 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
366 368 ctx->start_lsn, abort);
367 369  
368 370 xfs_alloc_busy_sort(&ctx->busy_extents);
369   - xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
  371 + xfs_alloc_busy_clear(mp, &ctx->busy_extents,
  372 + (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
370 373  
371 374 spin_lock(&ctx->cil->xc_cil_lock);
372 375 list_del(&ctx->committing);
373 376 spin_unlock(&ctx->cil->xc_cil_lock);
374 377  
375 378 xlog_cil_free_logvec(ctx->lv_chain);
  379 +
  380 + if (!list_empty(&ctx->busy_extents)) {
  381 + ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
  382 +
  383 + xfs_discard_extents(mp, &ctx->busy_extents);
  384 + xfs_alloc_busy_clear(mp, &ctx->busy_extents, false);
  385 + }
  386 +
376 387 kmem_free(ctx);
377 388 }
378 389  
... ... @@ -224,6 +224,7 @@
224 224 #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
225 225 operations, typically for
226 226 disk errors in metadata */
  227 +#define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */
227 228 #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to
228 229 user */
229 230 #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment
... ... @@ -609,7 +609,7 @@
609 609 struct xfs_trans *tp)
610 610 {
611 611 xfs_alloc_busy_sort(&tp->t_busy);
612   - xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
  612 + xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false);
613 613  
614 614 atomic_dec(&tp->t_mountp->m_active_trans);
615 615 xfs_trans_free_dqinfo(tp);