Commit d55262c4d164759a8debe772da6c9b16059dec47
1 parent
4c16bd327c
Exists in
master
and in
20 other branches
workqueue: update sysfs interface to reflect NUMA awareness and a kernel param t…
…o disable NUMA affinity Unbound workqueues are now NUMA aware. Let's add some control knobs and update sysfs interface accordingly. * Add kernel param workqueue.numa_disable which disables NUMA affinity globally. * Replace sysfs file "pool_id" with "pool_ids" which contain node:pool_id pairs. This change is userland-visible but "pool_id" hasn't seen a release yet, so this is okay. * Add a new sysf files "numa" which can toggle NUMA affinity on individual workqueues. This is implemented as attrs->no_numa whichn is special in that it isn't part of a pool's attributes. It only affects how apply_workqueue_attrs() picks which pools to use. After "pool_ids" change, first_pwq() doesn't have any user left. Removed. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Showing 3 changed files with 73 additions and 23 deletions Side-by-side Diff
Documentation/kernel-parameters.txt
... | ... | @@ -3222,6 +3222,15 @@ |
3222 | 3222 | or other driver-specific files in the |
3223 | 3223 | Documentation/watchdog/ directory. |
3224 | 3224 | |
3225 | + workqueue.disable_numa | |
3226 | + By default, all work items queued to unbound | |
3227 | + workqueues are affine to the NUMA nodes they're | |
3228 | + issued on, which results in better behavior in | |
3229 | + general. If NUMA affinity needs to be disabled for | |
3230 | + whatever reason, this option can be used. Note | |
3231 | + that this also can be controlled per-workqueue for | |
3232 | + workqueues visible under /sys/bus/workqueue/. | |
3233 | + | |
3225 | 3234 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of |
3226 | 3235 | default x2apic cluster mode on platforms |
3227 | 3236 | supporting x2apic. |
include/linux/workqueue.h
... | ... | @@ -119,10 +119,15 @@ |
119 | 119 | /* |
120 | 120 | * A struct for workqueue attributes. This can be used to change |
121 | 121 | * attributes of an unbound workqueue. |
122 | + * | |
123 | + * Unlike other fields, ->no_numa isn't a property of a worker_pool. It | |
124 | + * only modifies how apply_workqueue_attrs() select pools and thus doesn't | |
125 | + * participate in pool hash calculations or equality comparisons. | |
122 | 126 | */ |
123 | 127 | struct workqueue_attrs { |
124 | 128 | int nice; /* nice level */ |
125 | 129 | cpumask_var_t cpumask; /* allowed CPUs */ |
130 | + bool no_numa; /* disable NUMA affinity */ | |
126 | 131 | }; |
127 | 132 | |
128 | 133 | static inline struct delayed_work *to_delayed_work(struct work_struct *work) |
kernel/workqueue.c
... | ... | @@ -268,6 +268,9 @@ |
268 | 268 | static cpumask_var_t *wq_numa_possible_cpumask; |
269 | 269 | /* possible CPUs of each node */ |
270 | 270 | |
271 | +static bool wq_disable_numa; | |
272 | +module_param_named(disable_numa, wq_disable_numa, bool, 0444); | |
273 | + | |
271 | 274 | static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ |
272 | 275 | |
273 | 276 | /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */ |
... | ... | @@ -517,21 +520,6 @@ |
517 | 520 | } |
518 | 521 | |
519 | 522 | /** |
520 | - * first_pwq - return the first pool_workqueue of the specified workqueue | |
521 | - * @wq: the target workqueue | |
522 | - * | |
523 | - * This must be called either with wq->mutex held or sched RCU read locked. | |
524 | - * If the pwq needs to be used beyond the locking in effect, the caller is | |
525 | - * responsible for guaranteeing that the pwq stays online. | |
526 | - */ | |
527 | -static struct pool_workqueue *first_pwq(struct workqueue_struct *wq) | |
528 | -{ | |
529 | - assert_rcu_or_wq_mutex(wq); | |
530 | - return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue, | |
531 | - pwqs_node); | |
532 | -} | |
533 | - | |
534 | -/** | |
535 | 523 | * unbound_pwq_by_node - return the unbound pool_workqueue for the given node |
536 | 524 | * @wq: the target workqueue |
537 | 525 | * @node: the node ID |
538 | 526 | |
539 | 527 | |
... | ... | @@ -3114,16 +3102,21 @@ |
3114 | 3102 | __ATTR_NULL, |
3115 | 3103 | }; |
3116 | 3104 | |
3117 | -static ssize_t wq_pool_id_show(struct device *dev, | |
3118 | - struct device_attribute *attr, char *buf) | |
3105 | +static ssize_t wq_pool_ids_show(struct device *dev, | |
3106 | + struct device_attribute *attr, char *buf) | |
3119 | 3107 | { |
3120 | 3108 | struct workqueue_struct *wq = dev_to_wq(dev); |
3121 | - struct worker_pool *pool; | |
3122 | - int written; | |
3109 | + const char *delim = ""; | |
3110 | + int node, written = 0; | |
3123 | 3111 | |
3124 | 3112 | rcu_read_lock_sched(); |
3125 | - pool = first_pwq(wq)->pool; | |
3126 | - written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id); | |
3113 | + for_each_node(node) { | |
3114 | + written += scnprintf(buf + written, PAGE_SIZE - written, | |
3115 | + "%s%d:%d", delim, node, | |
3116 | + unbound_pwq_by_node(wq, node)->pool->id); | |
3117 | + delim = " "; | |
3118 | + } | |
3119 | + written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | |
3127 | 3120 | rcu_read_unlock_sched(); |
3128 | 3121 | |
3129 | 3122 | return written; |
3130 | 3123 | |
3131 | 3124 | |
... | ... | @@ -3212,10 +3205,46 @@ |
3212 | 3205 | return ret ?: count; |
3213 | 3206 | } |
3214 | 3207 | |
3208 | +static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr, | |
3209 | + char *buf) | |
3210 | +{ | |
3211 | + struct workqueue_struct *wq = dev_to_wq(dev); | |
3212 | + int written; | |
3213 | + | |
3214 | + mutex_lock(&wq->mutex); | |
3215 | + written = scnprintf(buf, PAGE_SIZE, "%d\n", | |
3216 | + !wq->unbound_attrs->no_numa); | |
3217 | + mutex_unlock(&wq->mutex); | |
3218 | + | |
3219 | + return written; | |
3220 | +} | |
3221 | + | |
3222 | +static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr, | |
3223 | + const char *buf, size_t count) | |
3224 | +{ | |
3225 | + struct workqueue_struct *wq = dev_to_wq(dev); | |
3226 | + struct workqueue_attrs *attrs; | |
3227 | + int v, ret; | |
3228 | + | |
3229 | + attrs = wq_sysfs_prep_attrs(wq); | |
3230 | + if (!attrs) | |
3231 | + return -ENOMEM; | |
3232 | + | |
3233 | + ret = -EINVAL; | |
3234 | + if (sscanf(buf, "%d", &v) == 1) { | |
3235 | + attrs->no_numa = !v; | |
3236 | + ret = apply_workqueue_attrs(wq, attrs); | |
3237 | + } | |
3238 | + | |
3239 | + free_workqueue_attrs(attrs); | |
3240 | + return ret ?: count; | |
3241 | +} | |
3242 | + | |
3215 | 3243 | static struct device_attribute wq_sysfs_unbound_attrs[] = { |
3216 | - __ATTR(pool_id, 0444, wq_pool_id_show, NULL), | |
3244 | + __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL), | |
3217 | 3245 | __ATTR(nice, 0644, wq_nice_show, wq_nice_store), |
3218 | 3246 | __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), |
3247 | + __ATTR(numa, 0644, wq_numa_show, wq_numa_store), | |
3219 | 3248 | __ATTR_NULL, |
3220 | 3249 | }; |
3221 | 3250 | |
... | ... | @@ -3750,7 +3779,7 @@ |
3750 | 3779 | static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node, |
3751 | 3780 | int cpu_going_down, cpumask_t *cpumask) |
3752 | 3781 | { |
3753 | - if (!wq_numa_enabled) | |
3782 | + if (!wq_numa_enabled || attrs->no_numa) | |
3754 | 3783 | goto use_dfl; |
3755 | 3784 | |
3756 | 3785 | /* does @node have any online CPUs @attrs wants? */ |
... | ... | @@ -3951,6 +3980,8 @@ |
3951 | 3980 | cpumask = target_attrs->cpumask; |
3952 | 3981 | |
3953 | 3982 | mutex_lock(&wq->mutex); |
3983 | + if (wq->unbound_attrs->no_numa) | |
3984 | + goto out_unlock; | |
3954 | 3985 | |
3955 | 3986 | copy_workqueue_attrs(target_attrs, wq->unbound_attrs); |
3956 | 3987 | pwq = unbound_pwq_by_node(wq, node); |
... | ... | @@ -4762,6 +4793,11 @@ |
4762 | 4793 | |
4763 | 4794 | if (num_possible_nodes() <= 1) |
4764 | 4795 | return; |
4796 | + | |
4797 | + if (wq_disable_numa) { | |
4798 | + pr_info("workqueue: NUMA affinity support disabled\n"); | |
4799 | + return; | |
4800 | + } | |
4765 | 4801 | |
4766 | 4802 | wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL); |
4767 | 4803 | BUG_ON(!wq_update_unbound_numa_attrs_buf); |