xfs: Do background CIL flushes via a workqueue

Doing background CIL flushes adds significant latency to whatever async transaction that triggers it. To avoid blocking async transactions on things like waiting for log buffer IO to complete, move the CIL push off into a workqueue. By moving the push work into a workqueue, we remove all the latency that the commit adds from the foreground transaction commit path. This also means that single threaded workloads won't do the CIL push procssing, leaving them more CPU to do more async transactions. To do this, we need to keep track of the sequence number we have pushed work for. This avoids having many transaction commits attempting to schedule work for the same sequence, and ensures that we only ever have one push (background or forced) in progress at a time. It also means that we don't need to take the CIL lock in write mode to check for potential background push races, which reduces lock contention. To avoid potential issues with "smart" IO schedulers, don't use the workqueue for log force triggered flushes. Instead, do them directly so that the log IO is done directly by the process issuing the log force and so doesn't get stuck on IO elevator queue idling incorrectly delaying the log IO from the workqueue. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>

xfs: Do background CIL flushes via a workqueue
Doing background CIL flushes adds significant latency to whatever async transaction that triggers it. To avoid blocking async transactions on things like waiting for log buffer IO to complete, move the CIL push off into a workqueue. By moving the push work into a workqueue, we remove all the latency that the commit adds from the foreground transaction commit path. This also means that single threaded workloads won't do the CIL push procssing, leaving them more CPU to do more async transactions. To do this, we need to keep track of the sequence number we have pushed work for. This avoids having many transaction commits attempting to schedule work for the same sequence, and ensures that we only ever have one push (background or forced) in progress at a time. It also means that we don't need to take the CIL lock in write mode to check for potential background push races, which reduces lock contention. To avoid potential issues with "smart" IO schedulers, don't use the workqueue for log force triggered flushes. Instead, do them directly so that the log IO is done directly by the process issuing the log force and so doesn't get stuck on IO elevator queue idling incorrectly delaying the log IO from the workqueue. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Dave Chinner · Ben Myers
1 parent 04913fdd91
Showing 4 changed files with 160 additions and 94 deletions Side-by-side Diff
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_mount.h
fs/xfs/xfs_super.c
@@ -32,58 +32,6 @@
 #include "xfs_discard.h"
  
 /*
- * Perform initial CIL structure initialisation.
- */
-int
-xlog_cil_init(
-	struct log	*log)
-{
-	struct xfs_cil	*cil;
-	struct xfs_cil_ctx *ctx;
-
-	cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
-	if (!cil)
-		return ENOMEM;
-
-	ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
-	if (!ctx) {
-		kmem_free(cil);
-		return ENOMEM;
-	}
-
-	INIT_LIST_HEAD(&cil->xc_cil);
-	INIT_LIST_HEAD(&cil->xc_committing);
-	spin_lock_init(&cil->xc_cil_lock);
-	init_rwsem(&cil->xc_ctx_lock);
-	init_waitqueue_head(&cil->xc_commit_wait);
-
-	INIT_LIST_HEAD(&ctx->committing);
-	INIT_LIST_HEAD(&ctx->busy_extents);
-	ctx->sequence = 1;
-	ctx->cil = cil;
-	cil->xc_ctx = ctx;
-	cil->xc_current_sequence = ctx->sequence;
-
-	cil->xc_log = log;
-	log->l_cilp = cil;
-	return 0;
-}
-
-void
-xlog_cil_destroy(
-	struct log	*log)
-{
-	if (log->l_cilp->xc_ctx) {
-		if (log->l_cilp->xc_ctx->ticket)
-			xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
-		kmem_free(log->l_cilp->xc_ctx);
-	}
-
-	ASSERT(list_empty(&log->l_cilp->xc_cil));
-	kmem_free(log->l_cilp);
-}
-
-/*
  * Allocate a new ticket. Failing to get a new ticket makes it really hard to
  * recover, so we don't allow failure here. Also, we allocate in a context that
  * we don't want to be issuing transactions from, so we need to tell the
@@ -426,8 +374,7 @@
  */
 STATIC int
 xlog_cil_push(
-	struct log		*log,
-	xfs_lsn_t		push_seq)
+	struct log		*log)
 {
 	struct xfs_cil		*cil = log->l_cilp;
 	struct xfs_log_vec	*lv;
  
  
  
  
  
  
  
@@ -443,39 +390,36 @@
 	struct xfs_log_iovec	lhdr;
 	struct xfs_log_vec	lvhdr = { NULL };
 	xfs_lsn_t		commit_lsn;
+	xfs_lsn_t		push_seq;
  
 	if (!cil)
 		return 0;
  
-	ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
-
 	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
 	new_ctx->ticket = xlog_cil_ticket_alloc(log);
  
-	/*
-	 * Lock out transaction commit, but don't block for background pushes
-	 * unless we are well over the CIL space limit. See the definition of
-	 * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
-	 * used here.
-	 */
-	if (!down_write_trylock(&cil->xc_ctx_lock)) {
-		if (!push_seq &&
-		    cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
-			goto out_free_ticket;
-		down_write(&cil->xc_ctx_lock);
-	}
+	down_write(&cil->xc_ctx_lock);
 	ctx = cil->xc_ctx;
  
-	/* check if we've anything to push */
-	if (list_empty(&cil->xc_cil))
-		goto out_skip;
+	spin_lock(&cil->xc_cil_lock);
+	push_seq = cil->xc_push_seq;
+	ASSERT(push_seq <= ctx->sequence);
  
-	/* check for spurious background flush */
-	if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+	/*
+	 * Check if we've anything to push. If there is nothing, then we don't
+	 * move on to a new sequence number and so we have to be able to push
+	 * this sequence again later.
+	 */
+	if (list_empty(&cil->xc_cil)) {
+		cil->xc_push_seq = 0;
+		spin_unlock(&cil->xc_cil_lock);
 		goto out_skip;
+	}
+	spin_unlock(&cil->xc_cil_lock);
  
+
 	/* check for a previously pushed seqeunce */
-	if (push_seq && push_seq < cil->xc_ctx->sequence)
+	if (push_seq < cil->xc_ctx->sequence)
 		goto out_skip;
  
 	/*
@@ -629,7 +573,6 @@
  
 out_skip:
 	up_write(&cil->xc_ctx_lock);
-out_free_ticket:
 	xfs_log_ticket_put(new_ctx->ticket);
 	kmem_free(new_ctx);
 	return 0;
  
@@ -641,7 +584,83 @@
 	return XFS_ERROR(EIO);
 }
  
+static void
+xlog_cil_push_work(
+	struct work_struct	*work)
+{
+	struct xfs_cil		*cil = container_of(work, struct xfs_cil,
+							xc_push_work);
+	xlog_cil_push(cil->xc_log);
+}
+
 /*
+ * We need to push CIL every so often so we don't cache more than we can fit in
+ * the log. The limit really is that a checkpoint can't be more than half the
+ * log (the current checkpoint is not allowed to overwrite the previous
+ * checkpoint), but commit latency and memory usage limit this to a smaller
+ * size.
+ */
+static void
+xlog_cil_push_background(
+	struct log	*log)
+{
+	struct xfs_cil	*cil = log->l_cilp;
+
+	/*
+	 * The cil won't be empty because we are called while holding the
+	 * context lock so whatever we added to the CIL will still be there
+	 */
+	ASSERT(!list_empty(&cil->xc_cil));
+
+	/*
+	 * don't do a background push if we haven't used up all the
+	 * space available yet.
+	 */
+	if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+		return;
+
+	spin_lock(&cil->xc_cil_lock);
+	if (cil->xc_push_seq < cil->xc_current_sequence) {
+		cil->xc_push_seq = cil->xc_current_sequence;
+		queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+	}
+	spin_unlock(&cil->xc_cil_lock);
+
+}
+
+static void
+xlog_cil_push_foreground(
+	struct log	*log,
+	xfs_lsn_t	push_seq)
+{
+	struct xfs_cil	*cil = log->l_cilp;
+
+	if (!cil)
+		return;
+
+	ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
+
+	/* start on any pending background push to minimise wait time on it */
+	flush_work(&cil->xc_push_work);
+
+	/*
+	 * If the CIL is empty or we've already pushed the sequence then
+	 * there's no work we need to do.
+	 */
+	spin_lock(&cil->xc_cil_lock);
+	if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
+		spin_unlock(&cil->xc_cil_lock);
+		return;
+	}
+
+	cil->xc_push_seq = push_seq;
+	spin_unlock(&cil->xc_cil_lock);
+
+	/* do the push now */
+	xlog_cil_push(log);
+}
+
+/*
  * Commit a transaction with the given vector to the Committed Item List.
  *
  * To do this, we need to format the item, pin it in memory if required and
@@ -667,7 +686,6 @@
 {
 	struct log		*log = mp->m_log;
 	int			log_flags = 0;
-	int			push = 0;
 	struct xfs_log_vec	*log_vector;
  
 	if (flags & XFS_TRANS_RELEASE_LOG_RES)
  
@@ -719,21 +737,9 @@
 	 */
 	xfs_trans_free_items(tp, *commit_lsn, 0);
  
-	/* check for background commit before unlock */
-	if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
-		push = 1;
+	xlog_cil_push_background(log);
  
 	up_read(&log->l_cilp->xc_ctx_lock);
-
-	/*
-	 * We need to push CIL every so often so we don't cache more than we
-	 * can fit in the log. The limit really is that a checkpoint can't be
-	 * more than half the log (the current checkpoint is not allowed to
-	 * overwrite the previous checkpoint), but commit latency and memory
-	 * usage limit this to a smaller size in most cases.
-	 */
-	if (push)
-		xlog_cil_push(log, 0);
 	return 0;
 }
  
@@ -746,9 +752,6 @@
  *
  * We return the current commit lsn to allow the callers to determine if a
  * iclog flush is necessary following this call.
- *
- * XXX: Initially, just push the CIL unconditionally and return whatever
- * commit lsn is there. It'll be empty, so this is broken for now.
  */
 xfs_lsn_t
 xlog_cil_force_lsn(
@@ -766,8 +769,7 @@
 	 * xlog_cil_push() handles racing pushes for the same sequence,
 	 * so no need to deal with it here.
 	 */
-	if (sequence == cil->xc_current_sequence)
-		xlog_cil_push(log, sequence);
+	xlog_cil_push_foreground(log, sequence);
  
 	/*
 	 * See if we can find a previous sequence still committing.
@@ -825,5 +827,58 @@
 	if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
 		return false;
 	return true;
+}
+
+/*
+ * Perform initial CIL structure initialisation.
+ */
+int
+xlog_cil_init(
+	struct log	*log)
+{
+	struct xfs_cil	*cil;
+	struct xfs_cil_ctx *ctx;
+
+	cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
+	if (!cil)
+		return ENOMEM;
+
+	ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
+	if (!ctx) {
+		kmem_free(cil);
+		return ENOMEM;
+	}
+
+	INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
+	INIT_LIST_HEAD(&cil->xc_cil);
+	INIT_LIST_HEAD(&cil->xc_committing);
+	spin_lock_init(&cil->xc_cil_lock);
+	init_rwsem(&cil->xc_ctx_lock);
+	init_waitqueue_head(&cil->xc_commit_wait);
+
+	INIT_LIST_HEAD(&ctx->committing);
+	INIT_LIST_HEAD(&ctx->busy_extents);
+	ctx->sequence = 1;
+	ctx->cil = cil;
+	cil->xc_ctx = ctx;
+	cil->xc_current_sequence = ctx->sequence;
+
+	cil->xc_log = log;
+	log->l_cilp = cil;
+	return 0;
+}
+
+void
+xlog_cil_destroy(
+	struct log	*log)
+{
+	if (log->l_cilp->xc_ctx) {
+		if (log->l_cilp->xc_ctx->ticket)
+			xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
+		kmem_free(log->l_cilp->xc_ctx);
+	}
+
+	ASSERT(list_empty(&log->l_cilp->xc_cil));
+	kmem_free(log->l_cilp);
 }
@@ -417,6 +417,8 @@
 	struct list_head	xc_committing;
 	wait_queue_head_t	xc_commit_wait;
 	xfs_lsn_t		xc_current_sequence;
+	struct work_struct	xc_push_work;
+	xfs_lsn_t		xc_push_seq;
 };
  
 /*
@@ -214,6 +214,7 @@
  
 	struct workqueue_struct	*m_data_workqueue;
 	struct workqueue_struct	*m_unwritten_workqueue;
+	struct workqueue_struct	*m_cil_workqueue;
 } xfs_mount_t;
  
 /*
@@ -773,8 +773,14 @@
 	if (!mp->m_unwritten_workqueue)
 		goto out_destroy_data_iodone_queue;
  
+	mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
+			WQ_MEM_RECLAIM, 0, mp->m_fsname);
+	if (!mp->m_cil_workqueue)
+		goto out_destroy_unwritten;
 	return 0;
  
+out_destroy_unwritten:
+	destroy_workqueue(mp->m_unwritten_workqueue);
 out_destroy_data_iodone_queue:
 	destroy_workqueue(mp->m_data_workqueue);
 out:
@@ -785,6 +791,7 @@
 xfs_destroy_mount_workqueues(
 	struct xfs_mount	*mp)
 {
+	destroy_workqueue(mp->m_cil_workqueue);
 	destroy_workqueue(mp->m_data_workqueue);
 	destroy_workqueue(mp->m_unwritten_workqueue);
 }
...	...	@@ -32,58 +32,6 @@
32	32	#include "xfs_discard.h"
33	33
34	34	/*
35		- * Perform initial CIL structure initialisation.
36		- */
37		-int
38		-xlog_cil_init(
39		- struct log *log)
40		-{
41		- struct xfs_cil *cil;
42		- struct xfs_cil_ctx *ctx;
43		-
44		- cil = kmem_zalloc(sizeof(*cil), KM_SLEEP\|KM_MAYFAIL);
45		- if (!cil)
46		- return ENOMEM;
47		-
48		- ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP\|KM_MAYFAIL);
49		- if (!ctx) {
50		- kmem_free(cil);
51		- return ENOMEM;
52		- }
53		-
54		- INIT_LIST_HEAD(&cil->xc_cil);
55		- INIT_LIST_HEAD(&cil->xc_committing);
56		- spin_lock_init(&cil->xc_cil_lock);
57		- init_rwsem(&cil->xc_ctx_lock);
58		- init_waitqueue_head(&cil->xc_commit_wait);
59		-
60		- INIT_LIST_HEAD(&ctx->committing);
61		- INIT_LIST_HEAD(&ctx->busy_extents);
62		- ctx->sequence = 1;
63		- ctx->cil = cil;
64		- cil->xc_ctx = ctx;
65		- cil->xc_current_sequence = ctx->sequence;
66		-
67		- cil->xc_log = log;
68		- log->l_cilp = cil;
69		- return 0;
70		-}
71		-
72		-void
73		-xlog_cil_destroy(
74		- struct log *log)
75		-{
76		- if (log->l_cilp->xc_ctx) {
77		- if (log->l_cilp->xc_ctx->ticket)
78		- xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
79		- kmem_free(log->l_cilp->xc_ctx);
80		- }
81		-
82		- ASSERT(list_empty(&log->l_cilp->xc_cil));
83		- kmem_free(log->l_cilp);
84		-}
85		-
86		-/*
87	35	* Allocate a new ticket. Failing to get a new ticket makes it really hard to
88	36	* recover, so we don't allow failure here. Also, we allocate in a context that
89	37	* we don't want to be issuing transactions from, so we need to tell the
...	...	@@ -426,8 +374,7 @@
426	374	*/
427	375	STATIC int
428	376	xlog_cil_push(
429		- struct log *log,
430		- xfs_lsn_t push_seq)
	377	+ struct log *log)
431	378	{
432	379	struct xfs_cil *cil = log->l_cilp;
433	380	struct xfs_log_vec *lv;
434	381
435	382
436	383
437	384
438	385
439	386
440	387
...	...	@@ -443,39 +390,36 @@
443	390	struct xfs_log_iovec lhdr;
444	391	struct xfs_log_vec lvhdr = { NULL };
445	392	xfs_lsn_t commit_lsn;
	393	+ xfs_lsn_t push_seq;
446	394
447	395	if (!cil)
448	396	return 0;
449	397
450		- ASSERT(!push_seq \|\| push_seq <= cil->xc_ctx->sequence);
451		-
452	398	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP\|KM_NOFS);
453	399	new_ctx->ticket = xlog_cil_ticket_alloc(log);
454	400
455		- /*
456		- * Lock out transaction commit, but don't block for background pushes
457		- * unless we are well over the CIL space limit. See the definition of
458		- * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
459		- * used here.
460		- */
461		- if (!down_write_trylock(&cil->xc_ctx_lock)) {
462		- if (!push_seq &&
463		- cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
464		- goto out_free_ticket;
465		- down_write(&cil->xc_ctx_lock);
466		- }
	401	+ down_write(&cil->xc_ctx_lock);
467	402	ctx = cil->xc_ctx;
468	403
469		- /* check if we've anything to push */
470		- if (list_empty(&cil->xc_cil))
471		- goto out_skip;
	404	+ spin_lock(&cil->xc_cil_lock);
	405	+ push_seq = cil->xc_push_seq;
	406	+ ASSERT(push_seq <= ctx->sequence);
472	407
473		- /* check for spurious background flush */
474		- if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
	408	+ /*
	409	+ * Check if we've anything to push. If there is nothing, then we don't
	410	+ * move on to a new sequence number and so we have to be able to push
	411	+ * this sequence again later.
	412	+ */
	413	+ if (list_empty(&cil->xc_cil)) {
	414	+ cil->xc_push_seq = 0;
	415	+ spin_unlock(&cil->xc_cil_lock);
475	416	goto out_skip;
	417	+ }
	418	+ spin_unlock(&cil->xc_cil_lock);
476	419
	420	+
477	421	/* check for a previously pushed seqeunce */
478		- if (push_seq && push_seq < cil->xc_ctx->sequence)
	422	+ if (push_seq < cil->xc_ctx->sequence)
479	423	goto out_skip;
480	424
481	425	/*
...	...	@@ -629,7 +573,6 @@
629	573
630	574	out_skip:
631	575	up_write(&cil->xc_ctx_lock);
632		-out_free_ticket:
633	576	xfs_log_ticket_put(new_ctx->ticket);
634	577	kmem_free(new_ctx);
635	578	return 0;
636	579
...	...	@@ -641,7 +584,83 @@
641	584	return XFS_ERROR(EIO);
642	585	}
643	586
	587	+static void
	588	+xlog_cil_push_work(
	589	+ struct work_struct *work)
	590	+{
	591	+ struct xfs_cil *cil = container_of(work, struct xfs_cil,
	592	+ xc_push_work);
	593	+ xlog_cil_push(cil->xc_log);
	594	+}
	595	+
644	596	/*
	597	+ * We need to push CIL every so often so we don't cache more than we can fit in
	598	+ * the log. The limit really is that a checkpoint can't be more than half the
	599	+ * log (the current checkpoint is not allowed to overwrite the previous
	600	+ * checkpoint), but commit latency and memory usage limit this to a smaller
	601	+ * size.
	602	+ */
	603	+static void
	604	+xlog_cil_push_background(
	605	+ struct log *log)
	606	+{
	607	+ struct xfs_cil *cil = log->l_cilp;
	608	+
	609	+ /*
	610	+ * The cil won't be empty because we are called while holding the
	611	+ * context lock so whatever we added to the CIL will still be there
	612	+ */
	613	+ ASSERT(!list_empty(&cil->xc_cil));
	614	+
	615	+ /*
	616	+ * don't do a background push if we haven't used up all the
	617	+ * space available yet.
	618	+ */
	619	+ if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
	620	+ return;
	621	+
	622	+ spin_lock(&cil->xc_cil_lock);
	623	+ if (cil->xc_push_seq < cil->xc_current_sequence) {
	624	+ cil->xc_push_seq = cil->xc_current_sequence;
	625	+ queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
	626	+ }
	627	+ spin_unlock(&cil->xc_cil_lock);
	628	+
	629	+}
	630	+
	631	+static void
	632	+xlog_cil_push_foreground(
	633	+ struct log *log,
	634	+ xfs_lsn_t push_seq)
	635	+{
	636	+ struct xfs_cil *cil = log->l_cilp;
	637	+
	638	+ if (!cil)
	639	+ return;
	640	+
	641	+ ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
	642	+
	643	+ /* start on any pending background push to minimise wait time on it */
	644	+ flush_work(&cil->xc_push_work);
	645	+
	646	+ /*
	647	+ * If the CIL is empty or we've already pushed the sequence then
	648	+ * there's no work we need to do.
	649	+ */
	650	+ spin_lock(&cil->xc_cil_lock);
	651	+ if (list_empty(&cil->xc_cil) \|\| push_seq <= cil->xc_push_seq) {
	652	+ spin_unlock(&cil->xc_cil_lock);
	653	+ return;
	654	+ }
	655	+
	656	+ cil->xc_push_seq = push_seq;
	657	+ spin_unlock(&cil->xc_cil_lock);
	658	+
	659	+ /* do the push now */
	660	+ xlog_cil_push(log);
	661	+}
	662	+
	663	+/*
645	664	* Commit a transaction with the given vector to the Committed Item List.
646	665	*
647	666	* To do this, we need to format the item, pin it in memory if required and
...	...	@@ -667,7 +686,6 @@
667	686	{
668	687	struct log *log = mp->m_log;
669	688	int log_flags = 0;
670		- int push = 0;
671	689	struct xfs_log_vec *log_vector;
672	690
673	691	if (flags & XFS_TRANS_RELEASE_LOG_RES)
674	692
...	...	@@ -719,21 +737,9 @@
719	737	*/
720	738	xfs_trans_free_items(tp, *commit_lsn, 0);
721	739
722		- /* check for background commit before unlock */
723		- if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
724		- push = 1;
	740	+ xlog_cil_push_background(log);
725	741
726	742	up_read(&log->l_cilp->xc_ctx_lock);
727		-
728		- /*
729		- * We need to push CIL every so often so we don't cache more than we
730		- * can fit in the log. The limit really is that a checkpoint can't be
731		- * more than half the log (the current checkpoint is not allowed to
732		- * overwrite the previous checkpoint), but commit latency and memory
733		- * usage limit this to a smaller size in most cases.
734		- */
735		- if (push)
736		- xlog_cil_push(log, 0);
737	743	return 0;
738	744	}
739	745
...	...	@@ -746,9 +752,6 @@
746	752	*
747	753	* We return the current commit lsn to allow the callers to determine if a
748	754	* iclog flush is necessary following this call.
749		- *
750		- * XXX: Initially, just push the CIL unconditionally and return whatever
751		- * commit lsn is there. It'll be empty, so this is broken for now.
752	755	*/
753	756	xfs_lsn_t
754	757	xlog_cil_force_lsn(
...	...	@@ -766,8 +769,7 @@
766	769	* xlog_cil_push() handles racing pushes for the same sequence,
767	770	* so no need to deal with it here.
768	771	*/
769		- if (sequence == cil->xc_current_sequence)
770		- xlog_cil_push(log, sequence);
	772	+ xlog_cil_push_foreground(log, sequence);
771	773
772	774	/*
773	775	* See if we can find a previous sequence still committing.
...	...	@@ -825,5 +827,58 @@
825	827	if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
826	828	return false;
827	829	return true;
	830	+}
	831	+
	832	+/*
	833	+ * Perform initial CIL structure initialisation.
	834	+ */
	835	+int
	836	+xlog_cil_init(
	837	+ struct log *log)
	838	+{
	839	+ struct xfs_cil *cil;
	840	+ struct xfs_cil_ctx *ctx;
	841	+
	842	+ cil = kmem_zalloc(sizeof(*cil), KM_SLEEP\|KM_MAYFAIL);
	843	+ if (!cil)
	844	+ return ENOMEM;
	845	+
	846	+ ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP\|KM_MAYFAIL);
	847	+ if (!ctx) {
	848	+ kmem_free(cil);
	849	+ return ENOMEM;
	850	+ }
	851	+
	852	+ INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
	853	+ INIT_LIST_HEAD(&cil->xc_cil);
	854	+ INIT_LIST_HEAD(&cil->xc_committing);
	855	+ spin_lock_init(&cil->xc_cil_lock);
	856	+ init_rwsem(&cil->xc_ctx_lock);
	857	+ init_waitqueue_head(&cil->xc_commit_wait);
	858	+
	859	+ INIT_LIST_HEAD(&ctx->committing);
	860	+ INIT_LIST_HEAD(&ctx->busy_extents);
	861	+ ctx->sequence = 1;
	862	+ ctx->cil = cil;
	863	+ cil->xc_ctx = ctx;
	864	+ cil->xc_current_sequence = ctx->sequence;
	865	+
	866	+ cil->xc_log = log;
	867	+ log->l_cilp = cil;
	868	+ return 0;
	869	+}
	870	+
	871	+void
	872	+xlog_cil_destroy(
	873	+ struct log *log)
	874	+{
	875	+ if (log->l_cilp->xc_ctx) {
	876	+ if (log->l_cilp->xc_ctx->ticket)
	877	+ xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
	878	+ kmem_free(log->l_cilp->xc_ctx);
	879	+ }
	880	+
	881	+ ASSERT(list_empty(&log->l_cilp->xc_cil));
	882	+ kmem_free(log->l_cilp);
828	883	}
...	...	@@ -417,6 +417,8 @@
417	417	struct list_head xc_committing;
418	418	wait_queue_head_t xc_commit_wait;
419	419	xfs_lsn_t xc_current_sequence;
	420	+ struct work_struct xc_push_work;
	421	+ xfs_lsn_t xc_push_seq;
420	422	};
421	423
422	424	/*
...	...	@@ -214,6 +214,7 @@
214	214
215	215	struct workqueue_struct *m_data_workqueue;
216	216	struct workqueue_struct *m_unwritten_workqueue;
	217	+ struct workqueue_struct *m_cil_workqueue;
217	218	} xfs_mount_t;
218	219
219	220	/*
...	...	@@ -773,8 +773,14 @@
773	773	if (!mp->m_unwritten_workqueue)
774	774	goto out_destroy_data_iodone_queue;
775	775
	776	+ mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
	777	+ WQ_MEM_RECLAIM, 0, mp->m_fsname);
	778	+ if (!mp->m_cil_workqueue)
	779	+ goto out_destroy_unwritten;
776	780	return 0;
777	781
	782	+out_destroy_unwritten:
	783	+ destroy_workqueue(mp->m_unwritten_workqueue);
778	784	out_destroy_data_iodone_queue:
779	785	destroy_workqueue(mp->m_data_workqueue);
780	786	out:
...	...	@@ -785,6 +791,7 @@
785	791	xfs_destroy_mount_workqueues(
786	792	struct xfs_mount *mp)
787	793	{
	794	+ destroy_workqueue(mp->m_cil_workqueue);
788	795	destroy_workqueue(mp->m_data_workqueue);
789	796	destroy_workqueue(mp->m_unwritten_workqueue);
790	797	}