Commit 42a7fc4a6598221f1a547a76cdd45a8ab4d90e93

Authored by Greg Banks
Committed by Linus Torvalds
1 parent cda1fd4abd

[PATCH] knfsd: provide sunrpc pool_mode module option

Provide a module param "pool_mode" for sunrpc.ko which allows a sysadmin to
choose the mode for mapping NFS thread service pools to CPUs.  Values are:

auto	    choose a mapping mode heuristically
global	    (default, same as the pre-2.6.19 code) a single global pool
percpu	    one pool per CPU
pernode	    one pool per NUMA node

Note that since 2.6.19 the hardcoded behaviour has been "auto", this patch
makes the default "global".

The pool mode can be changed after boot/modprobe using /sys, if the NFS and
lockd services have been shut down.  A useful side effect of this change is to
fix a small memory leak when unloading the module.

Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 130 additions and 19 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -1685,6 +1685,22 @@
1685 1685 stifb= [HW]
1686 1686 Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
1687 1687  
  1688 + sunrpc.pool_mode=
  1689 + [NFS]
  1690 + Control how the NFS server code allocates CPUs to
  1691 + service thread pools. Depending on how many NICs
  1692 + you have and where their interrupts are bound, this
  1693 + option will affect which CPUs will do NFS serving.
  1694 + Note: this parameter cannot be changed while the
  1695 + NFS server is running.
  1696 +
  1697 + auto the server chooses an appropriate mode
  1698 + automatically using heuristics
  1699 + global a single global pool contains all CPUs
  1700 + percpu one pool for each CPU
  1701 + pernode one pool for each NUMA node (equivalent
  1702 + to global on non-NUMA machines)
  1703 +
1688 1704 swiotlb= [IA-64] Number of I/O TLB slabs
1689 1705  
1690 1706 switches= [HW,M68k]
... ... @@ -27,22 +27,26 @@
27 27  
28 28 #define RPCDBG_FACILITY RPCDBG_SVCDSP
29 29  
  30 +#define svc_serv_is_pooled(serv) ((serv)->sv_function)
  31 +
30 32 /*
31 33 * Mode for mapping cpus to pools.
32 34 */
33 35 enum {
34   - SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */
  36 + SVC_POOL_AUTO = -1, /* choose one of the others */
35 37 SVC_POOL_GLOBAL, /* no mapping, just a single global pool
36 38 * (legacy & UP mode) */
37 39 SVC_POOL_PERCPU, /* one pool per cpu */
38 40 SVC_POOL_PERNODE /* one pool per numa node */
39 41 };
  42 +#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
40 43  
41 44 /*
42 45 * Structure for mapping cpus to pools and vice versa.
43 46 * Setup once during sunrpc initialisation.
44 47 */
45 48 static struct svc_pool_map {
  49 + int count; /* How many svc_servs use us */
46 50 int mode; /* Note: int not enum to avoid
47 51 * warnings about "enumeration value
48 52 * not handled in switch" */
49 53  
50 54  
51 55  
... ... @@ -50,10 +54,64 @@
50 54 unsigned int *pool_to; /* maps pool id to cpu or node */
51 55 unsigned int *to_pool; /* maps cpu or node to pool id */
52 56 } svc_pool_map = {
53   - .mode = SVC_POOL_NONE
  57 + .count = 0,
  58 + .mode = SVC_POOL_DEFAULT
54 59 };
  60 +static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
55 61  
  62 +static int
  63 +param_set_pool_mode(const char *val, struct kernel_param *kp)
  64 +{
  65 + int *ip = (int *)kp->arg;
  66 + struct svc_pool_map *m = &svc_pool_map;
  67 + int err;
56 68  
  69 + mutex_lock(&svc_pool_map_mutex);
  70 +
  71 + err = -EBUSY;
  72 + if (m->count)
  73 + goto out;
  74 +
  75 + err = 0;
  76 + if (!strncmp(val, "auto", 4))
  77 + *ip = SVC_POOL_AUTO;
  78 + else if (!strncmp(val, "global", 6))
  79 + *ip = SVC_POOL_GLOBAL;
  80 + else if (!strncmp(val, "percpu", 6))
  81 + *ip = SVC_POOL_PERCPU;
  82 + else if (!strncmp(val, "pernode", 7))
  83 + *ip = SVC_POOL_PERNODE;
  84 + else
  85 + err = -EINVAL;
  86 +
  87 +out:
  88 + mutex_unlock(&svc_pool_map_mutex);
  89 + return err;
  90 +}
  91 +
  92 +static int
  93 +param_get_pool_mode(char *buf, struct kernel_param *kp)
  94 +{
  95 + int *ip = (int *)kp->arg;
  96 +
  97 + switch (*ip)
  98 + {
  99 + case SVC_POOL_AUTO:
  100 + return strlcpy(buf, "auto", 20);
  101 + case SVC_POOL_GLOBAL:
  102 + return strlcpy(buf, "global", 20);
  103 + case SVC_POOL_PERCPU:
  104 + return strlcpy(buf, "percpu", 20);
  105 + case SVC_POOL_PERNODE:
  106 + return strlcpy(buf, "pernode", 20);
  107 + default:
  108 + return sprintf(buf, "%d", *ip);
  109 + }
  110 +}
  111 +
  112 +module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
  113 + &svc_pool_map.mode, 0644);
  114 +
57 115 /*
58 116 * Detect best pool mapping mode heuristically,
59 117 * according to the machine's topology.
60 118  
61 119  
62 120  
63 121  
... ... @@ -166,18 +224,25 @@
166 224  
167 225  
168 226 /*
169   - * Build the global map of cpus to pools and vice versa.
  227 + * Add a reference to the global map of cpus to pools (and
  228 + * vice versa). Initialise the map if we're the first user.
  229 + * Returns the number of pools.
170 230 */
171 231 static unsigned int
172   -svc_pool_map_init(void)
  232 +svc_pool_map_get(void)
173 233 {
174 234 struct svc_pool_map *m = &svc_pool_map;
175 235 int npools = -1;
176 236  
177   - if (m->mode != SVC_POOL_NONE)
  237 + mutex_lock(&svc_pool_map_mutex);
  238 +
  239 + if (m->count++) {
  240 + mutex_unlock(&svc_pool_map_mutex);
178 241 return m->npools;
  242 + }
179 243  
180   - m->mode = svc_pool_map_choose_mode();
  244 + if (m->mode == SVC_POOL_AUTO)
  245 + m->mode = svc_pool_map_choose_mode();
181 246  
182 247 switch (m->mode) {
183 248 case SVC_POOL_PERCPU:
184 249  
185 250  
... ... @@ -195,10 +260,37 @@
195 260 }
196 261 m->npools = npools;
197 262  
  263 + mutex_unlock(&svc_pool_map_mutex);
198 264 return m->npools;
199 265 }
200 266  
  267 +
201 268 /*
  269 + * Drop a reference to the global map of cpus to pools.
  270 + * When the last reference is dropped, the map data is
  271 + * freed; this allows the sysadmin to change the pool
  272 + * mode using the pool_mode module option without
  273 + * rebooting or re-loading sunrpc.ko.
  274 + */
  275 +static void
  276 +svc_pool_map_put(void)
  277 +{
  278 + struct svc_pool_map *m = &svc_pool_map;
  279 +
  280 + mutex_lock(&svc_pool_map_mutex);
  281 +
  282 + if (!--m->count) {
  283 + m->mode = SVC_POOL_DEFAULT;
  284 + kfree(m->to_pool);
  285 + kfree(m->pool_to);
  286 + m->npools = 0;
  287 + }
  288 +
  289 + mutex_unlock(&svc_pool_map_mutex);
  290 +}
  291 +
  292 +
  293 +/*
202 294 * Set the current thread's cpus_allowed mask so that it
203 295 * will only run on cpus in the given pool.
204 296 *
205 297  
... ... @@ -212,10 +304,9 @@
212 304  
213 305 /*
214 306 * The caller checks for sv_nrpools > 1, which
215   - * implies that we've been initialized and the
216   - * map mode is not NONE.
  307 + * implies that we've been initialized.
217 308 */
218   - BUG_ON(m->mode == SVC_POOL_NONE);
  309 + BUG_ON(m->count == 0);
219 310  
220 311 switch (m->mode)
221 312 {
222 313  
... ... @@ -246,18 +337,19 @@
246 337 unsigned int pidx = 0;
247 338  
248 339 /*
249   - * SVC_POOL_NONE happens in a pure client when
  340 + * An uninitialised map happens in a pure client when
250 341 * lockd is brought up, so silently treat it the
251 342 * same as SVC_POOL_GLOBAL.
252 343 */
253   -
254   - switch (m->mode) {
255   - case SVC_POOL_PERCPU:
256   - pidx = m->to_pool[cpu];
257   - break;
258   - case SVC_POOL_PERNODE:
259   - pidx = m->to_pool[cpu_to_node(cpu)];
260   - break;
  344 + if (svc_serv_is_pooled(serv)) {
  345 + switch (m->mode) {
  346 + case SVC_POOL_PERCPU:
  347 + pidx = m->to_pool[cpu];
  348 + break;
  349 + case SVC_POOL_PERNODE:
  350 + pidx = m->to_pool[cpu_to_node(cpu)];
  351 + break;
  352 + }
261 353 }
262 354 return &serv->sv_pools[pidx % serv->sv_nrpools];
263 355 }
... ... @@ -347,7 +439,7 @@
347 439 svc_thread_fn func, int sig, struct module *mod)
348 440 {
349 441 struct svc_serv *serv;
350   - unsigned int npools = svc_pool_map_init();
  442 + unsigned int npools = svc_pool_map_get();
351 443  
352 444 serv = __svc_create(prog, bufsize, npools, shutdown);
353 445  
... ... @@ -396,6 +488,9 @@
396 488 BUG_ON(!list_empty(&serv->sv_tempsocks));
397 489  
398 490 cache_clean_deferred(serv);
  491 +
  492 + if (svc_serv_is_pooled(serv))
  493 + svc_pool_map_put();
399 494  
400 495 /* Unregister service with the portmapper */
401 496 svc_register(serv, 0, 0);