Commit bbb68dfaba73e8338fe0f1dc711cc1d261daec87

Authored by Tejun Heo
1 parent 36e227d242

workqueue: mark a work item being canceled as such

There can be two reasons try_to_grab_pending() can fail with -EAGAIN.
One is when someone else is queueing or deqeueing the work item.  With
the previous patches, it is guaranteed that PENDING and queued state
will soon agree making it safe to busy-retry in this case.

The other is if multiple __cancel_work_timer() invocations are racing
one another.  __cancel_work_timer() grabs PENDING and then waits for
running instances of the target work item on all CPUs while holding
PENDING and !queued.  try_to_grab_pending() invoked from another task
will keep returning -EAGAIN while the current owner is waiting.

Not distinguishing the two cases is okay because __cancel_work_timer()
is the only user of try_to_grab_pending() and it invokes
wait_on_work() whenever grabbing fails.  For the first case, busy
looping should be fine but wait_on_work() doesn't cause any critical
problem.  For the latter case, the new contender usually waits for the
same condition as the current owner, so no unnecessarily extended
busy-looping happens.  Combined, these make __cancel_work_timer()
technically correct even without irq protection while grabbing PENDING
or distinguishing the two different cases.

While the current code is technically correct, not distinguishing the
two cases makes it difficult to use try_to_grab_pending() for other
purposes than canceling because it's impossible to tell whether it's
safe to busy-retry grabbing.

This patch adds a mechanism to mark a work item being canceled.
try_to_grab_pending() now disables irq on success and returns -EAGAIN
to indicate that grabbing failed but PENDING and queued states are
gonna agree soon and it's safe to busy-loop.  It returns -ENOENT if
the work item is being canceled and it may stay PENDING && !queued for
arbitrary amount of time.

__cancel_work_timer() is modified to mark the work canceling with
WORK_OFFQ_CANCELING after grabbing PENDING, thus making
try_to_grab_pending() fail with -ENOENT instead of -EAGAIN.  Also, it
invokes wait_on_work() iff grabbing failed with -ENOENT.  This isn't
necessary for correctness but makes it consistent with other future
users of try_to_grab_pending().

v2: try_to_grab_pending() was testing preempt_count() to ensure that
    the caller has disabled preemption.  This triggers spuriously if
    !CONFIG_PREEMPT_COUNT.  Use preemptible() instead.  Reported by
    Fengguang Wu.

v3: Updated so that try_to_grab_pending() disables irq on success
    rather than requiring preemption disabled by the caller.  This
    makes busy-looping easier and will allow try_to_grap_pending() to
    be used from bh/irq contexts.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>

Showing 2 changed files with 76 additions and 19 deletions Side-by-side Diff

include/linux/workqueue.h
... ... @@ -70,7 +70,10 @@
70 70  
71 71 /* data contains off-queue information when !WORK_STRUCT_CWQ */
72 72 WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS,
73   - WORK_OFFQ_FLAG_BITS = 0,
  73 +
  74 + WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
  75 +
  76 + WORK_OFFQ_FLAG_BITS = 1,
74 77 WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
75 78  
76 79 /* convenience constants */
... ... @@ -537,15 +537,20 @@
537 537 * contain the pointer to the queued cwq. Once execution starts, the flag
538 538 * is cleared and the high bits contain OFFQ flags and CPU number.
539 539 *
540   - * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data()
541   - * can be used to set the cwq, cpu or clear work->data. These functions
542   - * should only be called while the work is owned - ie. while the PENDING
543   - * bit is set.
  540 + * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
  541 + * and clear_work_data() can be used to set the cwq, cpu or clear
  542 + * work->data. These functions should only be called while the work is
  543 + * owned - ie. while the PENDING bit is set.
544 544 *
545   - * get_work_[g]cwq() can be used to obtain the gcwq or cwq
546   - * corresponding to a work. gcwq is available once the work has been
547   - * queued anywhere after initialization. cwq is available only from
548   - * queueing until execution starts.
  545 + * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
  546 + * a work. gcwq is available once the work has been queued anywhere after
  547 + * initialization until it is sync canceled. cwq is available only while
  548 + * the work item is queued.
  549 + *
  550 + * %WORK_OFFQ_CANCELING is used to mark a work item which is being
  551 + * canceled. While being canceled, a work item may have its PENDING set
  552 + * but stay off timer and worklist for arbitrarily long and nobody should
  553 + * try to steal the PENDING bit.
549 554 */
550 555 static inline void set_work_data(struct work_struct *work, unsigned long data,
551 556 unsigned long flags)
... ... @@ -600,6 +605,22 @@
600 605 return get_gcwq(cpu);
601 606 }
602 607  
  608 +static void mark_work_canceling(struct work_struct *work)
  609 +{
  610 + struct global_cwq *gcwq = get_work_gcwq(work);
  611 + unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
  612 +
  613 + set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
  614 + WORK_STRUCT_PENDING);
  615 +}
  616 +
  617 +static bool work_is_canceling(struct work_struct *work)
  618 +{
  619 + unsigned long data = atomic_long_read(&work->data);
  620 +
  621 + return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
  622 +}
  623 +
603 624 /*
604 625 * Policy functions. These define the policies on how the global worker
605 626 * pools are managed. Unless noted otherwise, these functions assume that
606 627  
... ... @@ -1005,9 +1026,10 @@
1005 1026 }
1006 1027  
1007 1028 /**
1008   - * try_to_grab_pending - steal work item from worklist
  1029 + * try_to_grab_pending - steal work item from worklist and disable irq
1009 1030 * @work: work item to steal
1010 1031 * @is_dwork: @work is a delayed_work
  1032 + * @flags: place to store irq state
1011 1033 *
1012 1034 * Try to grab PENDING bit of @work. This function can handle @work in any
1013 1035 * stable state - idle, on timer or on worklist. Return values are
1014 1036  
1015 1037  
1016 1038  
... ... @@ -1015,13 +1037,30 @@
1015 1037 * 1 if @work was pending and we successfully stole PENDING
1016 1038 * 0 if @work was idle and we claimed PENDING
1017 1039 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
  1040 + * -ENOENT if someone else is canceling @work, this state may persist
  1041 + * for arbitrarily long
1018 1042 *
1019   - * On >= 0 return, the caller owns @work's PENDING bit.
  1043 + * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
  1044 + * preempted while holding PENDING and @work off queue, preemption must be
  1045 + * disabled on entry. This ensures that we don't return -EAGAIN while
  1046 + * another task is preempted in this function.
  1047 + *
  1048 + * On successful return, >= 0, irq is disabled and the caller is
  1049 + * responsible for releasing it using local_irq_restore(*@flags).
  1050 + *
  1051 + * This function is safe to call from any context other than IRQ handler.
  1052 + * An IRQ handler may run on top of delayed_work_timer_fn() which can make
  1053 + * this function return -EAGAIN perpetually.
1020 1054 */
1021   -static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
  1055 +static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  1056 + unsigned long *flags)
1022 1057 {
1023 1058 struct global_cwq *gcwq;
1024 1059  
  1060 + WARN_ON_ONCE(in_irq());
  1061 +
  1062 + local_irq_save(*flags);
  1063 +
1025 1064 /* try to steal the timer if it exists */
1026 1065 if (is_dwork) {
1027 1066 struct delayed_work *dwork = to_delayed_work(work);
1028 1067  
... ... @@ -1040,9 +1079,9 @@
1040 1079 */
1041 1080 gcwq = get_work_gcwq(work);
1042 1081 if (!gcwq)
1043   - return -EAGAIN;
  1082 + goto fail;
1044 1083  
1045   - spin_lock_irq(&gcwq->lock);
  1084 + spin_lock(&gcwq->lock);
1046 1085 if (!list_empty(&work->entry)) {
1047 1086 /*
1048 1087 * This work is queued, but perhaps we locked the wrong gcwq.
1049 1088  
... ... @@ -1057,12 +1096,16 @@
1057 1096 get_work_color(work),
1058 1097 *work_data_bits(work) & WORK_STRUCT_DELAYED);
1059 1098  
1060   - spin_unlock_irq(&gcwq->lock);
  1099 + spin_unlock(&gcwq->lock);
1061 1100 return 1;
1062 1101 }
1063 1102 }
1064   - spin_unlock_irq(&gcwq->lock);
1065   -
  1103 + spin_unlock(&gcwq->lock);
  1104 +fail:
  1105 + local_irq_restore(*flags);
  1106 + if (work_is_canceling(work))
  1107 + return -ENOENT;
  1108 + cpu_relax();
1066 1109 return -EAGAIN;
1067 1110 }
1068 1111  
1069 1112  
1070 1113  
... ... @@ -2839,13 +2882,24 @@
2839 2882  
2840 2883 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2841 2884 {
  2885 + unsigned long flags;
2842 2886 int ret;
2843 2887  
2844 2888 do {
2845   - ret = try_to_grab_pending(work, is_dwork);
2846   - wait_on_work(work);
  2889 + ret = try_to_grab_pending(work, is_dwork, &flags);
  2890 + /*
  2891 + * If someone else is canceling, wait for the same event it
  2892 + * would be waiting for before retrying.
  2893 + */
  2894 + if (unlikely(ret == -ENOENT))
  2895 + wait_on_work(work);
2847 2896 } while (unlikely(ret < 0));
2848 2897  
  2898 + /* tell other tasks trying to grab @work to back off */
  2899 + mark_work_canceling(work);
  2900 + local_irq_restore(flags);
  2901 +
  2902 + wait_on_work(work);
2849 2903 clear_work_data(work);
2850 2904 return ret;
2851 2905 }