Commit 216284c352a0061f5b20acff2c4e50fb43fea183
Committed by
Jens Axboe
1 parent
dc86900e0a
Exists in
master
and in
38 other branches
block, cfq: fix race condition in cic creation path and tighten locking
cfq_get_io_context() would fail if multiple tasks race to insert cic's for the same association. This patch restructures cfq_get_io_context() such that slow path insertion race is handled properly. Note that the restructuring also makes cfq_get_io_context() called under queue_lock and performs both ioc and cfqd insertions while holding both ioc and queue locks. This is part of on-going locking tightening and will be used to simplify synchronization rules. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Showing 1 changed file with 76 additions and 59 deletions Side-by-side Diff
block/cfq-iosched.c
... | ... | @@ -2908,13 +2908,10 @@ |
2908 | 2908 | { |
2909 | 2909 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2910 | 2910 | struct cfq_queue *cfqq; |
2911 | - unsigned long flags; | |
2912 | 2911 | |
2913 | 2912 | if (unlikely(!cfqd)) |
2914 | 2913 | return; |
2915 | 2914 | |
2916 | - spin_lock_irqsave(cfqd->queue->queue_lock, flags); | |
2917 | - | |
2918 | 2915 | cfqq = cic->cfqq[BLK_RW_ASYNC]; |
2919 | 2916 | if (cfqq) { |
2920 | 2917 | struct cfq_queue *new_cfqq; |
... | ... | @@ -2929,8 +2926,6 @@ |
2929 | 2926 | cfqq = cic->cfqq[BLK_RW_SYNC]; |
2930 | 2927 | if (cfqq) |
2931 | 2928 | cfq_mark_cfqq_prio_changed(cfqq); |
2932 | - | |
2933 | - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | |
2934 | 2929 | } |
2935 | 2930 | |
2936 | 2931 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
... | ... | @@ -2958,7 +2953,6 @@ |
2958 | 2953 | { |
2959 | 2954 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); |
2960 | 2955 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2961 | - unsigned long flags; | |
2962 | 2956 | struct request_queue *q; |
2963 | 2957 | |
2964 | 2958 | if (unlikely(!cfqd)) |
... | ... | @@ -2966,8 +2960,6 @@ |
2966 | 2960 | |
2967 | 2961 | q = cfqd->queue; |
2968 | 2962 | |
2969 | - spin_lock_irqsave(q->queue_lock, flags); | |
2970 | - | |
2971 | 2963 | if (sync_cfqq) { |
2972 | 2964 | /* |
2973 | 2965 | * Drop reference to sync queue. A new sync queue will be |
... | ... | @@ -2977,8 +2969,6 @@ |
2977 | 2969 | cic_set_cfqq(cic, NULL, 1); |
2978 | 2970 | cfq_put_queue(sync_cfqq); |
2979 | 2971 | } |
2980 | - | |
2981 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
2982 | 2972 | } |
2983 | 2973 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
2984 | 2974 | |
2985 | 2975 | |
2986 | 2976 | |
2987 | 2977 | |
... | ... | @@ -3142,17 +3132,32 @@ |
3142 | 3132 | return cic; |
3143 | 3133 | } |
3144 | 3134 | |
3145 | -/* | |
3146 | - * Add cic into ioc, using cfqd as the search key. This enables us to lookup | |
3147 | - * the process specific cfq io context when entered from the block layer. | |
3148 | - * Also adds the cic to a per-cfqd list, used when this queue is removed. | |
3135 | +/** | |
3136 | + * cfq_create_cic - create and link a cfq_io_context | |
3137 | + * @cfqd: cfqd of interest | |
3138 | + * @gfp_mask: allocation mask | |
3139 | + * | |
3140 | + * Make sure cfq_io_context linking %current->io_context and @cfqd exists. | |
3141 | + * If ioc and/or cic doesn't exist, they will be created using @gfp_mask. | |
3149 | 3142 | */ |
3150 | -static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | |
3151 | - struct cfq_io_context *cic, gfp_t gfp_mask) | |
3143 | +static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask) | |
3152 | 3144 | { |
3153 | - unsigned long flags; | |
3154 | - int ret; | |
3145 | + struct request_queue *q = cfqd->queue; | |
3146 | + struct cfq_io_context *cic = NULL; | |
3147 | + struct io_context *ioc; | |
3148 | + int ret = -ENOMEM; | |
3155 | 3149 | |
3150 | + might_sleep_if(gfp_mask & __GFP_WAIT); | |
3151 | + | |
3152 | + /* allocate stuff */ | |
3153 | + ioc = current_io_context(gfp_mask, q->node); | |
3154 | + if (!ioc) | |
3155 | + goto out; | |
3156 | + | |
3157 | + cic = cfq_alloc_io_context(cfqd, gfp_mask); | |
3158 | + if (!cic) | |
3159 | + goto out; | |
3160 | + | |
3156 | 3161 | ret = radix_tree_preload(gfp_mask); |
3157 | 3162 | if (ret) |
3158 | 3163 | goto out; |
3159 | 3164 | |
3160 | 3165 | |
3161 | 3166 | |
3162 | 3167 | |
3163 | 3168 | |
3164 | 3169 | |
3165 | 3170 | |
3166 | 3171 | |
3167 | 3172 | |
3168 | 3173 | |
3169 | 3174 | |
... | ... | @@ -3161,53 +3166,72 @@ |
3161 | 3166 | cic->key = cfqd; |
3162 | 3167 | cic->q = cfqd->queue; |
3163 | 3168 | |
3164 | - spin_lock_irqsave(&ioc->lock, flags); | |
3165 | - ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic); | |
3166 | - if (!ret) | |
3167 | - hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); | |
3168 | - spin_unlock_irqrestore(&ioc->lock, flags); | |
3169 | + /* lock both q and ioc and try to link @cic */ | |
3170 | + spin_lock_irq(q->queue_lock); | |
3171 | + spin_lock(&ioc->lock); | |
3169 | 3172 | |
3170 | - radix_tree_preload_end(); | |
3171 | - | |
3172 | - if (!ret) { | |
3173 | - spin_lock_irqsave(cfqd->queue->queue_lock, flags); | |
3173 | + ret = radix_tree_insert(&ioc->radix_root, q->id, cic); | |
3174 | + if (likely(!ret)) { | |
3175 | + hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); | |
3174 | 3176 | list_add(&cic->queue_list, &cfqd->cic_list); |
3175 | - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | |
3177 | + cic = NULL; | |
3178 | + } else if (ret == -EEXIST) { | |
3179 | + /* someone else already did it */ | |
3180 | + ret = 0; | |
3176 | 3181 | } |
3182 | + | |
3183 | + spin_unlock(&ioc->lock); | |
3184 | + spin_unlock_irq(q->queue_lock); | |
3185 | + | |
3186 | + radix_tree_preload_end(); | |
3177 | 3187 | out: |
3178 | 3188 | if (ret) |
3179 | 3189 | printk(KERN_ERR "cfq: cic link failed!\n"); |
3190 | + if (cic) | |
3191 | + cfq_cic_free(cic); | |
3180 | 3192 | return ret; |
3181 | 3193 | } |
3182 | 3194 | |
3183 | -/* | |
3184 | - * Setup general io context and cfq io context. There can be several cfq | |
3185 | - * io contexts per general io context, if this process is doing io to more | |
3186 | - * than one device managed by cfq. | |
3195 | +/** | |
3196 | + * cfq_get_io_context - acquire cfq_io_context and bump refcnt on io_context | |
3197 | + * @cfqd: cfqd to setup cic for | |
3198 | + * @gfp_mask: allocation mask | |
3199 | + * | |
3200 | + * Return cfq_io_context associating @cfqd and %current->io_context and | |
3201 | + * bump refcnt on io_context. If ioc or cic doesn't exist, they're created | |
3202 | + * using @gfp_mask. | |
3203 | + * | |
3204 | + * Must be called under queue_lock which may be released and re-acquired. | |
3205 | + * This function also may sleep depending on @gfp_mask. | |
3187 | 3206 | */ |
3188 | 3207 | static struct cfq_io_context * |
3189 | 3208 | cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) |
3190 | 3209 | { |
3191 | - struct io_context *ioc = NULL; | |
3210 | + struct request_queue *q = cfqd->queue; | |
3192 | 3211 | struct cfq_io_context *cic = NULL; |
3212 | + struct io_context *ioc; | |
3213 | + int err; | |
3193 | 3214 | |
3194 | - might_sleep_if(gfp_mask & __GFP_WAIT); | |
3215 | + lockdep_assert_held(q->queue_lock); | |
3195 | 3216 | |
3196 | - ioc = current_io_context(gfp_mask, cfqd->queue->node); | |
3197 | - if (!ioc) | |
3198 | - goto err; | |
3217 | + while (true) { | |
3218 | + /* fast path */ | |
3219 | + ioc = current->io_context; | |
3220 | + if (likely(ioc)) { | |
3221 | + cic = cfq_cic_lookup(cfqd, ioc); | |
3222 | + if (likely(cic)) | |
3223 | + break; | |
3224 | + } | |
3199 | 3225 | |
3200 | - cic = cfq_cic_lookup(cfqd, ioc); | |
3201 | - if (cic) | |
3202 | - goto out; | |
3226 | + /* slow path - unlock, create missing ones and retry */ | |
3227 | + spin_unlock_irq(q->queue_lock); | |
3228 | + err = cfq_create_cic(cfqd, gfp_mask); | |
3229 | + spin_lock_irq(q->queue_lock); | |
3230 | + if (err) | |
3231 | + return NULL; | |
3232 | + } | |
3203 | 3233 | |
3204 | - cic = cfq_alloc_io_context(cfqd, gfp_mask); | |
3205 | - if (cic == NULL) | |
3206 | - goto err; | |
3207 | - | |
3208 | - if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) | |
3209 | - goto err; | |
3210 | -out: | |
3234 | + /* bump @ioc's refcnt and handle changed notifications */ | |
3211 | 3235 | get_io_context(ioc); |
3212 | 3236 | |
3213 | 3237 | if (unlikely(cic->changed)) { |
... | ... | @@ -3220,10 +3244,6 @@ |
3220 | 3244 | } |
3221 | 3245 | |
3222 | 3246 | return cic; |
3223 | -err: | |
3224 | - if (cic) | |
3225 | - cfq_cic_free(cic); | |
3226 | - return NULL; | |
3227 | 3247 | } |
3228 | 3248 | |
3229 | 3249 | static void |
3230 | 3250 | |
3231 | 3251 | |
... | ... | @@ -3759,14 +3779,11 @@ |
3759 | 3779 | const int rw = rq_data_dir(rq); |
3760 | 3780 | const bool is_sync = rq_is_sync(rq); |
3761 | 3781 | struct cfq_queue *cfqq; |
3762 | - unsigned long flags; | |
3763 | 3782 | |
3764 | 3783 | might_sleep_if(gfp_mask & __GFP_WAIT); |
3765 | 3784 | |
3785 | + spin_lock_irq(q->queue_lock); | |
3766 | 3786 | cic = cfq_get_io_context(cfqd, gfp_mask); |
3767 | - | |
3768 | - spin_lock_irqsave(q->queue_lock, flags); | |
3769 | - | |
3770 | 3787 | if (!cic) |
3771 | 3788 | goto queue_fail; |
3772 | 3789 | |
3773 | 3790 | |
... | ... | @@ -3802,12 +3819,12 @@ |
3802 | 3819 | rq->elevator_private[0] = cic; |
3803 | 3820 | rq->elevator_private[1] = cfqq; |
3804 | 3821 | rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); |
3805 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
3822 | + spin_unlock_irq(q->queue_lock); | |
3806 | 3823 | return 0; |
3807 | 3824 | |
3808 | 3825 | queue_fail: |
3809 | 3826 | cfq_schedule_dispatch(cfqd); |
3810 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
3827 | + spin_unlock_irq(q->queue_lock); | |
3811 | 3828 | cfq_log(cfqd, "set_request fail"); |
3812 | 3829 | return 1; |
3813 | 3830 | } |