Commit 216284c352a0061f5b20acff2c4e50fb43fea183

Authored by Tejun Heo
Committed by Jens Axboe
1 parent dc86900e0a

block, cfq: fix race condition in cic creation path and tighten locking

cfq_get_io_context() would fail if multiple tasks race to insert cic's
for the same association.  This patch restructures
cfq_get_io_context() such that slow path insertion race is handled
properly.

Note that the restructuring also makes cfq_get_io_context() called
under queue_lock and performs both ioc and cfqd insertions while
holding both ioc and queue locks.  This is part of on-going locking
tightening and will be used to simplify synchronization rules.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

Showing 1 changed file with 76 additions and 59 deletions Side-by-side Diff

... ... @@ -2908,13 +2908,10 @@
2908 2908 {
2909 2909 struct cfq_data *cfqd = cic_to_cfqd(cic);
2910 2910 struct cfq_queue *cfqq;
2911   - unsigned long flags;
2912 2911  
2913 2912 if (unlikely(!cfqd))
2914 2913 return;
2915 2914  
2916   - spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2917   -
2918 2915 cfqq = cic->cfqq[BLK_RW_ASYNC];
2919 2916 if (cfqq) {
2920 2917 struct cfq_queue *new_cfqq;
... ... @@ -2929,8 +2926,6 @@
2929 2926 cfqq = cic->cfqq[BLK_RW_SYNC];
2930 2927 if (cfqq)
2931 2928 cfq_mark_cfqq_prio_changed(cfqq);
2932   -
2933   - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2934 2929 }
2935 2930  
2936 2931 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
... ... @@ -2958,7 +2953,6 @@
2958 2953 {
2959 2954 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
2960 2955 struct cfq_data *cfqd = cic_to_cfqd(cic);
2961   - unsigned long flags;
2962 2956 struct request_queue *q;
2963 2957  
2964 2958 if (unlikely(!cfqd))
... ... @@ -2966,8 +2960,6 @@
2966 2960  
2967 2961 q = cfqd->queue;
2968 2962  
2969   - spin_lock_irqsave(q->queue_lock, flags);
2970   -
2971 2963 if (sync_cfqq) {
2972 2964 /*
2973 2965 * Drop reference to sync queue. A new sync queue will be
... ... @@ -2977,8 +2969,6 @@
2977 2969 cic_set_cfqq(cic, NULL, 1);
2978 2970 cfq_put_queue(sync_cfqq);
2979 2971 }
2980   -
2981   - spin_unlock_irqrestore(q->queue_lock, flags);
2982 2972 }
2983 2973 #endif /* CONFIG_CFQ_GROUP_IOSCHED */
2984 2974  
2985 2975  
2986 2976  
2987 2977  
... ... @@ -3142,17 +3132,32 @@
3142 3132 return cic;
3143 3133 }
3144 3134  
3145   -/*
3146   - * Add cic into ioc, using cfqd as the search key. This enables us to lookup
3147   - * the process specific cfq io context when entered from the block layer.
3148   - * Also adds the cic to a per-cfqd list, used when this queue is removed.
  3135 +/**
  3136 + * cfq_create_cic - create and link a cfq_io_context
  3137 + * @cfqd: cfqd of interest
  3138 + * @gfp_mask: allocation mask
  3139 + *
  3140 + * Make sure cfq_io_context linking %current->io_context and @cfqd exists.
  3141 + * If ioc and/or cic doesn't exist, they will be created using @gfp_mask.
3149 3142 */
3150   -static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
3151   - struct cfq_io_context *cic, gfp_t gfp_mask)
  3143 +static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
3152 3144 {
3153   - unsigned long flags;
3154   - int ret;
  3145 + struct request_queue *q = cfqd->queue;
  3146 + struct cfq_io_context *cic = NULL;
  3147 + struct io_context *ioc;
  3148 + int ret = -ENOMEM;
3155 3149  
  3150 + might_sleep_if(gfp_mask & __GFP_WAIT);
  3151 +
  3152 + /* allocate stuff */
  3153 + ioc = current_io_context(gfp_mask, q->node);
  3154 + if (!ioc)
  3155 + goto out;
  3156 +
  3157 + cic = cfq_alloc_io_context(cfqd, gfp_mask);
  3158 + if (!cic)
  3159 + goto out;
  3160 +
3156 3161 ret = radix_tree_preload(gfp_mask);
3157 3162 if (ret)
3158 3163 goto out;
3159 3164  
3160 3165  
3161 3166  
3162 3167  
3163 3168  
3164 3169  
3165 3170  
3166 3171  
3167 3172  
3168 3173  
3169 3174  
... ... @@ -3161,53 +3166,72 @@
3161 3166 cic->key = cfqd;
3162 3167 cic->q = cfqd->queue;
3163 3168  
3164   - spin_lock_irqsave(&ioc->lock, flags);
3165   - ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic);
3166   - if (!ret)
3167   - hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
3168   - spin_unlock_irqrestore(&ioc->lock, flags);
  3169 + /* lock both q and ioc and try to link @cic */
  3170 + spin_lock_irq(q->queue_lock);
  3171 + spin_lock(&ioc->lock);
3169 3172  
3170   - radix_tree_preload_end();
3171   -
3172   - if (!ret) {
3173   - spin_lock_irqsave(cfqd->queue->queue_lock, flags);
  3173 + ret = radix_tree_insert(&ioc->radix_root, q->id, cic);
  3174 + if (likely(!ret)) {
  3175 + hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
3174 3176 list_add(&cic->queue_list, &cfqd->cic_list);
3175   - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
  3177 + cic = NULL;
  3178 + } else if (ret == -EEXIST) {
  3179 + /* someone else already did it */
  3180 + ret = 0;
3176 3181 }
  3182 +
  3183 + spin_unlock(&ioc->lock);
  3184 + spin_unlock_irq(q->queue_lock);
  3185 +
  3186 + radix_tree_preload_end();
3177 3187 out:
3178 3188 if (ret)
3179 3189 printk(KERN_ERR "cfq: cic link failed!\n");
  3190 + if (cic)
  3191 + cfq_cic_free(cic);
3180 3192 return ret;
3181 3193 }
3182 3194  
3183   -/*
3184   - * Setup general io context and cfq io context. There can be several cfq
3185   - * io contexts per general io context, if this process is doing io to more
3186   - * than one device managed by cfq.
  3195 +/**
  3196 + * cfq_get_io_context - acquire cfq_io_context and bump refcnt on io_context
  3197 + * @cfqd: cfqd to setup cic for
  3198 + * @gfp_mask: allocation mask
  3199 + *
  3200 + * Return cfq_io_context associating @cfqd and %current->io_context and
  3201 + * bump refcnt on io_context. If ioc or cic doesn't exist, they're created
  3202 + * using @gfp_mask.
  3203 + *
  3204 + * Must be called under queue_lock which may be released and re-acquired.
  3205 + * This function also may sleep depending on @gfp_mask.
3187 3206 */
3188 3207 static struct cfq_io_context *
3189 3208 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3190 3209 {
3191   - struct io_context *ioc = NULL;
  3210 + struct request_queue *q = cfqd->queue;
3192 3211 struct cfq_io_context *cic = NULL;
  3212 + struct io_context *ioc;
  3213 + int err;
3193 3214  
3194   - might_sleep_if(gfp_mask & __GFP_WAIT);
  3215 + lockdep_assert_held(q->queue_lock);
3195 3216  
3196   - ioc = current_io_context(gfp_mask, cfqd->queue->node);
3197   - if (!ioc)
3198   - goto err;
  3217 + while (true) {
  3218 + /* fast path */
  3219 + ioc = current->io_context;
  3220 + if (likely(ioc)) {
  3221 + cic = cfq_cic_lookup(cfqd, ioc);
  3222 + if (likely(cic))
  3223 + break;
  3224 + }
3199 3225  
3200   - cic = cfq_cic_lookup(cfqd, ioc);
3201   - if (cic)
3202   - goto out;
  3226 + /* slow path - unlock, create missing ones and retry */
  3227 + spin_unlock_irq(q->queue_lock);
  3228 + err = cfq_create_cic(cfqd, gfp_mask);
  3229 + spin_lock_irq(q->queue_lock);
  3230 + if (err)
  3231 + return NULL;
  3232 + }
3203 3233  
3204   - cic = cfq_alloc_io_context(cfqd, gfp_mask);
3205   - if (cic == NULL)
3206   - goto err;
3207   -
3208   - if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
3209   - goto err;
3210   -out:
  3234 + /* bump @ioc's refcnt and handle changed notifications */
3211 3235 get_io_context(ioc);
3212 3236  
3213 3237 if (unlikely(cic->changed)) {
... ... @@ -3220,10 +3244,6 @@
3220 3244 }
3221 3245  
3222 3246 return cic;
3223   -err:
3224   - if (cic)
3225   - cfq_cic_free(cic);
3226   - return NULL;
3227 3247 }
3228 3248  
3229 3249 static void
3230 3250  
3231 3251  
... ... @@ -3759,14 +3779,11 @@
3759 3779 const int rw = rq_data_dir(rq);
3760 3780 const bool is_sync = rq_is_sync(rq);
3761 3781 struct cfq_queue *cfqq;
3762   - unsigned long flags;
3763 3782  
3764 3783 might_sleep_if(gfp_mask & __GFP_WAIT);
3765 3784  
  3785 + spin_lock_irq(q->queue_lock);
3766 3786 cic = cfq_get_io_context(cfqd, gfp_mask);
3767   -
3768   - spin_lock_irqsave(q->queue_lock, flags);
3769   -
3770 3787 if (!cic)
3771 3788 goto queue_fail;
3772 3789  
3773 3790  
... ... @@ -3802,12 +3819,12 @@
3802 3819 rq->elevator_private[0] = cic;
3803 3820 rq->elevator_private[1] = cfqq;
3804 3821 rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
3805   - spin_unlock_irqrestore(q->queue_lock, flags);
  3822 + spin_unlock_irq(q->queue_lock);
3806 3823 return 0;
3807 3824  
3808 3825 queue_fail:
3809 3826 cfq_schedule_dispatch(cfqd);
3810   - spin_unlock_irqrestore(q->queue_lock, flags);
  3827 + spin_unlock_irq(q->queue_lock);
3811 3828 cfq_log(cfqd, "set_request fail");
3812 3829 return 1;
3813 3830 }