Commit c6ea41fbbe08f270a8edef99dc369faf809d1bd6

Authored by Mikulas Patocka
Committed by Alasdair G Kergon
1 parent a705a34a56

dm kcopyd: preallocate sub jobs to avoid deadlock

There's a possible theoretical deadlock in dm-kcopyd because multiple
allocations from the same mempool are required to finish a request.
Avoid this by preallocating sub jobs.

There is a mempool of 512 entries. Each request requires up to 9
entries from the mempool. If we have at least 57 concurrent requests
running, the mempool may overflow and mempool allocations may start
blocking until another entry is freed to the mempool. Because the same
thread is used to free entries to the mempool and allocate entries from
the mempool, this may result in a deadlock.

This patch changes it so that one mempool entry contains all 9 "struct
kcopyd_job" required to fulfill the whole request. The allocation is
done only once in dm_kcopyd_copy and no further mempool allocations are
done during request processing.

If dm_kcopyd_copy is not run in the completion thread, this
implementation is deadlock-free.

MIN_JOBS needs reducing accordingly and we've chosen to reduce it
further to 8.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

Showing 1 changed file with 29 additions and 20 deletions Side-by-side Diff

drivers/md/dm-kcopyd.c
... ... @@ -27,6 +27,10 @@
27 27  
28 28 #include "dm.h"
29 29  
  30 +#define SUB_JOB_SIZE 128
  31 +#define SPLIT_COUNT 8
  32 +#define MIN_JOBS 8
  33 +
30 34 /*-----------------------------------------------------------------
31 35 * Each kcopyd client has its own little pool of preallocated
32 36 * pages for kcopyd io.
33 37  
34 38  
... ... @@ -216,16 +220,17 @@
216 220 struct mutex lock;
217 221 atomic_t sub_jobs;
218 222 sector_t progress;
  223 +
  224 + struct kcopyd_job *master_job;
219 225 };
220 226  
221   -/* FIXME: this should scale with the number of pages */
222   -#define MIN_JOBS 512
223   -
224 227 static struct kmem_cache *_job_cache;
225 228  
226 229 int __init dm_kcopyd_init(void)
227 230 {
228   - _job_cache = KMEM_CACHE(kcopyd_job, 0);
  231 + _job_cache = kmem_cache_create("kcopyd_job",
  232 + sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1),
  233 + __alignof__(struct kcopyd_job), 0, NULL);
229 234 if (!_job_cache)
230 235 return -ENOMEM;
231 236  
... ... @@ -299,7 +304,12 @@
299 304  
300 305 if (job->pages)
301 306 kcopyd_put_pages(kc, job->pages);
302   - mempool_free(job, kc->job_pool);
  307 + /*
  308 + * If this is the master job, the sub jobs have already
  309 + * completed so we can free everything.
  310 + */
  311 + if (job->master_job == job)
  312 + mempool_free(job, kc->job_pool);
303 313 fn(read_err, write_err, context);
304 314  
305 315 if (atomic_dec_and_test(&kc->nr_jobs))
306 316  
... ... @@ -460,14 +470,14 @@
460 470 wake(kc);
461 471 }
462 472  
463   -#define SUB_JOB_SIZE 128
464 473 static void segment_complete(int read_err, unsigned long write_err,
465 474 void *context)
466 475 {
467 476 /* FIXME: tidy this function */
468 477 sector_t progress = 0;
469 478 sector_t count = 0;
470   - struct kcopyd_job *job = (struct kcopyd_job *) context;
  479 + struct kcopyd_job *sub_job = (struct kcopyd_job *) context;
  480 + struct kcopyd_job *job = sub_job->master_job;
471 481 struct dm_kcopyd_client *kc = job->kc;
472 482  
473 483 mutex_lock(&job->lock);
... ... @@ -498,8 +508,6 @@
498 508  
499 509 if (count) {
500 510 int i;
501   - struct kcopyd_job *sub_job = mempool_alloc(kc->job_pool,
502   - GFP_NOIO);
503 511  
504 512 *sub_job = *job;
505 513 sub_job->source.sector += progress;
... ... @@ -511,7 +519,7 @@
511 519 }
512 520  
513 521 sub_job->fn = segment_complete;
514   - sub_job->context = job;
  522 + sub_job->context = sub_job;
515 523 dispatch_job(sub_job);
516 524  
517 525 } else if (atomic_dec_and_test(&job->sub_jobs)) {
518 526  
519 527  
520 528  
... ... @@ -531,19 +539,19 @@
531 539 }
532 540  
533 541 /*
534   - * Create some little jobs that will do the move between
535   - * them.
  542 + * Create some sub jobs to share the work between them.
536 543 */
537   -#define SPLIT_COUNT 8
538   -static void split_job(struct kcopyd_job *job)
  544 +static void split_job(struct kcopyd_job *master_job)
539 545 {
540 546 int i;
541 547  
542   - atomic_inc(&job->kc->nr_jobs);
  548 + atomic_inc(&master_job->kc->nr_jobs);
543 549  
544   - atomic_set(&job->sub_jobs, SPLIT_COUNT);
545   - for (i = 0; i < SPLIT_COUNT; i++)
546   - segment_complete(0, 0u, job);
  550 + atomic_set(&master_job->sub_jobs, SPLIT_COUNT);
  551 + for (i = 0; i < SPLIT_COUNT; i++) {
  552 + master_job[i + 1].master_job = master_job;
  553 + segment_complete(0, 0u, &master_job[i + 1]);
  554 + }
547 555 }
548 556  
549 557 int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
... ... @@ -553,7 +561,8 @@
553 561 struct kcopyd_job *job;
554 562  
555 563 /*
556   - * Allocate a new job.
  564 + * Allocate an array of jobs consisting of one master job
  565 + * followed by SPLIT_COUNT sub jobs.
557 566 */
558 567 job = mempool_alloc(kc->job_pool, GFP_NOIO);
559 568  
560 569  
... ... @@ -577,10 +586,10 @@
577 586  
578 587 job->fn = fn;
579 588 job->context = context;
  589 + job->master_job = job;
580 590  
581 591 if (job->source.count <= SUB_JOB_SIZE)
582 592 dispatch_job(job);
583   -
584 593 else {
585 594 mutex_init(&job->lock);
586 595 job->progress = 0;