Blame view

block/blk-iolatency.c 28 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
d70675121   Josef Bacik   block: introduce ...
2
3
4
5
6
7
8
9
10
11
12
  /*
   * Block rq-qos base io controller
   *
   * This works similar to wbt with a few exceptions
   *
   * - It's bio based, so the latency covers the whole block layer in addition to
   *   the actual io.
   * - We will throttle all IO that comes in here if we need to.
   * - We use the mean latency over the 100ms window.  This is because writes can
   *   be particularly fast, which could give us a false sense of the impact of
   *   other workloads on our protected workload.
a284390b3   Josef Bacik   blk-iolatency: fi...
13
14
   * - By default there's no throttling, we set the queue_depth to UINT_MAX so
   *   that we can have as many outstanding bio's as we're allowed to.  Only at
d70675121   Josef Bacik   block: introduce ...
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
   *   throttle time do we pay attention to the actual queue depth.
   *
   * The hierarchy works like the cpu controller does, we track the latency at
   * every configured node, and each configured node has it's own independent
   * queue depth.  This means that we only care about our latency targets at the
   * peer level.  Some group at the bottom of the hierarchy isn't going to affect
   * a group at the end of some other path if we're only configred at leaf level.
   *
   * Consider the following
   *
   *                   root blkg
   *             /                     \
   *        fast (target=5ms)     slow (target=10ms)
   *         /     \                  /        \
   *       a        b          normal(15ms)   unloved
   *
   * "a" and "b" have no target, but their combined io under "fast" cannot exceed
   * an average latency of 5ms.  If it does then we will throttle the "slow"
   * group.  In the case of "normal", if it exceeds its 15ms target, we will
   * throttle "unloved", but nobody else.
   *
   * In this example "fast", "slow", and "normal" will be the only groups actually
   * accounting their io latencies.  We have to walk up the heirarchy to the root
   * on every submit and complete so we can do the appropriate stat recording and
   * adjust the queue depth of ourselves if needed.
   *
   * There are 2 ways we throttle IO.
   *
   * 1) Queue depth throttling.  As we throttle down we will adjust the maximum
   * number of IO's we're allowed to have in flight.  This starts at (u64)-1 down
   * to 1.  If the group is only ever submitting IO for itself then this is the
   * only way we throttle.
   *
   * 2) Induced delay throttling.  This is for the case that a group is generating
   * IO that has to be issued by the root cg to avoid priority inversion. So think
   * REQ_META or REQ_SWAP.  If we are already at qd == 1 and we're getting a lot
   * of work done for us on behalf of the root cg and are being asked to scale
   * down more then we induce a latency at userspace return.  We accumulate the
   * total amount of time we need to be punished by doing
   *
   * total_time += min_lat_nsec - actual_io_completion
   *
   * and then at throttle time will do
   *
   * throttle_time = min(total_time, NSEC_PER_SEC)
   *
   * This induced delay will throttle back the activity that is generating the
   * root cg issued io's, wethere that's some metadata intensive operation or the
   * group is using so much memory that it is pushing us into swap.
   *
   * Copyright (C) 2018 Josef Bacik
   */
  #include <linux/kernel.h>
  #include <linux/blk_types.h>
  #include <linux/backing-dev.h>
  #include <linux/module.h>
  #include <linux/timer.h>
  #include <linux/memcontrol.h>
c480bcf97   Dennis Zhou (Facebook)   block: make iolat...
73
  #include <linux/sched/loadavg.h>
d70675121   Josef Bacik   block: introduce ...
74
75
  #include <linux/sched/signal.h>
  #include <trace/events/block.h>
8c772a9bf   Liu Bo   blk-iolatency: fi...
76
  #include <linux/blk-mq.h>
d70675121   Josef Bacik   block: introduce ...
77
78
  #include "blk-rq-qos.h"
  #include "blk-stat.h"
373e915cd   Bart Van Assche   blk-iolatency: #i...
79
  #include "blk.h"
d70675121   Josef Bacik   block: introduce ...
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  
  #define DEFAULT_SCALE_COOKIE 1000000U
  
  static struct blkcg_policy blkcg_policy_iolatency;
  struct iolatency_grp;
  
  struct blk_iolatency {
  	struct rq_qos rqos;
  	struct timer_list timer;
  	atomic_t enabled;
  };
  
  static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
  {
  	return container_of(rqos, struct blk_iolatency, rqos);
  }
  
  static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
  {
  	return atomic_read(&blkiolat->enabled) > 0;
  }
  
  struct child_latency_info {
  	spinlock_t lock;
  
  	/* Last time we adjusted the scale of everybody. */
  	u64 last_scale_event;
  
  	/* The latency that we missed. */
  	u64 scale_lat;
  
  	/* Total io's from all of our children for the last summation. */
  	u64 nr_samples;
  
  	/* The guy who actually changed the latency numbers. */
  	struct iolatency_grp *scale_grp;
  
  	/* Cookie to tell if we need to scale up or down. */
  	atomic_t scale_cookie;
  };
1fa2840e5   Josef Bacik   blk-iolatency: us...
120
121
122
123
124
125
126
127
128
129
130
  struct percentile_stats {
  	u64 total;
  	u64 missed;
  };
  
  struct latency_stat {
  	union {
  		struct percentile_stats ps;
  		struct blk_rq_stat rqs;
  	};
  };
d70675121   Josef Bacik   block: introduce ...
131
132
  struct iolatency_grp {
  	struct blkg_policy_data pd;
1fa2840e5   Josef Bacik   blk-iolatency: us...
133
  	struct latency_stat __percpu *stats;
451bb7c33   Josef Bacik   blk-iolatency: ke...
134
  	struct latency_stat cur_stat;
d70675121   Josef Bacik   block: introduce ...
135
136
137
138
139
140
141
142
143
  	struct blk_iolatency *blkiolat;
  	struct rq_depth rq_depth;
  	struct rq_wait rq_wait;
  	atomic64_t window_start;
  	atomic_t scale_cookie;
  	u64 min_lat_nsec;
  	u64 cur_win_nsec;
  
  	/* total running average of our io latency. */
c480bcf97   Dennis Zhou (Facebook)   block: make iolat...
144
  	u64 lat_avg;
d70675121   Josef Bacik   block: introduce ...
145
146
147
  
  	/* Our current number of IO's for the last summation. */
  	u64 nr_samples;
1fa2840e5   Josef Bacik   blk-iolatency: us...
148
  	bool ssd;
d70675121   Josef Bacik   block: introduce ...
149
150
  	struct child_latency_info child_lat;
  };
c480bcf97   Dennis Zhou (Facebook)   block: make iolat...
151
152
153
154
  #define BLKIOLATENCY_MIN_WIN_SIZE (100 * NSEC_PER_MSEC)
  #define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
  /*
   * These are the constants used to fake the fixed-point moving average
8508cf3ff   Johannes Weiner   sched: loadavg: c...
155
   * calculation just like load average.  The call to calc_load() folds
c480bcf97   Dennis Zhou (Facebook)   block: make iolat...
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
   * (FIXED_1 (2048) - exp_factor) * new_sample into lat_avg.  The sampling
   * window size is bucketed to try to approximately calculate average
   * latency such that 1/exp (decay rate) is [1 min, 2.5 min) when windows
   * elapse immediately.  Note, windows only elapse with IO activity.  Idle
   * periods extend the most recent window.
   */
  #define BLKIOLATENCY_NR_EXP_FACTORS 5
  #define BLKIOLATENCY_EXP_BUCKET_SIZE (BLKIOLATENCY_MAX_WIN_SIZE / \
  				      (BLKIOLATENCY_NR_EXP_FACTORS - 1))
  static const u64 iolatency_exp_factors[BLKIOLATENCY_NR_EXP_FACTORS] = {
  	2045, // exp(1/600) - 600 samples
  	2039, // exp(1/240) - 240 samples
  	2031, // exp(1/120) - 120 samples
  	2023, // exp(1/80)  - 80 samples
  	2014, // exp(1/60)  - 60 samples
  };
d70675121   Josef Bacik   block: introduce ...
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  static inline struct iolatency_grp *pd_to_lat(struct blkg_policy_data *pd)
  {
  	return pd ? container_of(pd, struct iolatency_grp, pd) : NULL;
  }
  
  static inline struct iolatency_grp *blkg_to_lat(struct blkcg_gq *blkg)
  {
  	return pd_to_lat(blkg_to_pd(blkg, &blkcg_policy_iolatency));
  }
  
  static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
  {
  	return pd_to_blkg(&iolat->pd);
  }
1fa2840e5   Josef Bacik   blk-iolatency: us...
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
  static inline void latency_stat_init(struct iolatency_grp *iolat,
  				     struct latency_stat *stat)
  {
  	if (iolat->ssd) {
  		stat->ps.total = 0;
  		stat->ps.missed = 0;
  	} else
  		blk_rq_stat_init(&stat->rqs);
  }
  
  static inline void latency_stat_sum(struct iolatency_grp *iolat,
  				    struct latency_stat *sum,
  				    struct latency_stat *stat)
  {
  	if (iolat->ssd) {
  		sum->ps.total += stat->ps.total;
  		sum->ps.missed += stat->ps.missed;
  	} else
  		blk_rq_stat_sum(&sum->rqs, &stat->rqs);
  }
  
  static inline void latency_stat_record_time(struct iolatency_grp *iolat,
  					    u64 req_time)
  {
  	struct latency_stat *stat = get_cpu_ptr(iolat->stats);
  	if (iolat->ssd) {
  		if (req_time >= iolat->min_lat_nsec)
  			stat->ps.missed++;
  		stat->ps.total++;
  	} else
  		blk_rq_stat_add(&stat->rqs, req_time);
  	put_cpu_ptr(stat);
  }
  
  static inline bool latency_sum_ok(struct iolatency_grp *iolat,
  				  struct latency_stat *stat)
  {
  	if (iolat->ssd) {
  		u64 thresh = div64_u64(stat->ps.total, 10);
  		thresh = max(thresh, 1ULL);
  		return stat->ps.missed < thresh;
  	}
  	return stat->rqs.mean <= iolat->min_lat_nsec;
  }
  
  static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
  				       struct latency_stat *stat)
  {
  	if (iolat->ssd)
  		return stat->ps.total;
  	return stat->rqs.nr_samples;
  }
  
  static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
  					      struct latency_stat *stat)
  {
  	int exp_idx;
  
  	if (iolat->ssd)
  		return;
  
  	/*
8508cf3ff   Johannes Weiner   sched: loadavg: c...
248
  	 * calc_load() takes in a number stored in fixed point representation.
1fa2840e5   Josef Bacik   blk-iolatency: us...
249
250
251
252
253
254
255
256
  	 * Because we are using this for IO time in ns, the values stored
  	 * are significantly larger than the FIXED_1 denominator (2048).
  	 * Therefore, rounding errors in the calculation are negligible and
  	 * can be ignored.
  	 */
  	exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
  			div64_u64(iolat->cur_win_nsec,
  				  BLKIOLATENCY_EXP_BUCKET_SIZE));
8508cf3ff   Johannes Weiner   sched: loadavg: c...
257
258
259
  	iolat->lat_avg = calc_load(iolat->lat_avg,
  				   iolatency_exp_factors[exp_idx],
  				   stat->rqs.mean);
1fa2840e5   Josef Bacik   blk-iolatency: us...
260
  }
d3fcdff19   Josef Bacik   block: convert io...
261
  static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data)
d70675121   Josef Bacik   block: introduce ...
262
  {
d3fcdff19   Josef Bacik   block: convert io...
263
264
265
  	atomic_dec(&rqw->inflight);
  	wake_up(&rqw->wait);
  }
d70675121   Josef Bacik   block: introduce ...
266

d3fcdff19   Josef Bacik   block: convert io...
267
268
269
  static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data)
  {
  	struct iolatency_grp *iolat = private_data;
d70675121   Josef Bacik   block: introduce ...
270
271
272
273
274
  	return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
  }
  
  static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
  				       struct iolatency_grp *iolat,
d53375608   Christoph Hellwig   block: remove the...
275
  				       bool issue_as_root,
d70675121   Josef Bacik   block: introduce ...
276
  				       bool use_memdelay)
d70675121   Josef Bacik   block: introduce ...
277
278
279
  {
  	struct rq_wait *rqw = &iolat->rq_wait;
  	unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
d70675121   Josef Bacik   block: introduce ...
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  
  	if (use_delay)
  		blkcg_schedule_throttle(rqos->q, use_memdelay);
  
  	/*
  	 * To avoid priority inversions we want to just take a slot if we are
  	 * issuing as root.  If we're being killed off there's no point in
  	 * delaying things, we may have been killed by OOM so throttling may
  	 * make recovery take even longer, so just let the IO's through so the
  	 * task can go away.
  	 */
  	if (issue_as_root || fatal_signal_pending(current)) {
  		atomic_inc(&rqw->inflight);
  		return;
  	}
d3fcdff19   Josef Bacik   block: convert io...
295
  	rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
d70675121   Josef Bacik   block: introduce ...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
  }
  
  #define SCALE_DOWN_FACTOR 2
  #define SCALE_UP_FACTOR 4
  
  static inline unsigned long scale_amount(unsigned long qd, bool up)
  {
  	return max(up ? qd >> SCALE_UP_FACTOR : qd >> SCALE_DOWN_FACTOR, 1UL);
  }
  
  /*
   * We scale the qd down faster than we scale up, so we need to use this helper
   * to adjust the scale_cookie accordingly so we don't prematurely get
   * scale_cookie at DEFAULT_SCALE_COOKIE and unthrottle too much.
   *
   * Each group has their own local copy of the last scale cookie they saw, so if
   * the global scale cookie goes up or down they know which way they need to go
   * based on their last knowledge of it.
   */
  static void scale_cookie_change(struct blk_iolatency *blkiolat,
  				struct child_latency_info *lat_info,
  				bool up)
  {
ff4cee089   Josef Bacik   blk-iolatency: us...
319
  	unsigned long qd = blkiolat->rqos.q->nr_requests;
d70675121   Josef Bacik   block: introduce ...
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  	unsigned long scale = scale_amount(qd, up);
  	unsigned long old = atomic_read(&lat_info->scale_cookie);
  	unsigned long max_scale = qd << 1;
  	unsigned long diff = 0;
  
  	if (old < DEFAULT_SCALE_COOKIE)
  		diff = DEFAULT_SCALE_COOKIE - old;
  
  	if (up) {
  		if (scale + old > DEFAULT_SCALE_COOKIE)
  			atomic_set(&lat_info->scale_cookie,
  				   DEFAULT_SCALE_COOKIE);
  		else if (diff > qd)
  			atomic_inc(&lat_info->scale_cookie);
  		else
  			atomic_add(scale, &lat_info->scale_cookie);
  	} else {
  		/*
  		 * We don't want to dig a hole so deep that it takes us hours to
  		 * dig out of it.  Just enough that we don't throttle/unthrottle
  		 * with jagged workloads but can still unthrottle once pressure
  		 * has sufficiently dissipated.
  		 */
  		if (diff > qd) {
  			if (diff < max_scale)
  				atomic_dec(&lat_info->scale_cookie);
  		} else {
  			atomic_sub(scale, &lat_info->scale_cookie);
  		}
  	}
  }
  
  /*
   * Change the queue depth of the iolatency_grp.  We add/subtract 1/16th of the
   * queue depth at a time so we don't get wild swings and hopefully dial in to
   * fairer distribution of the overall queue depth.
   */
  static void scale_change(struct iolatency_grp *iolat, bool up)
  {
ff4cee089   Josef Bacik   blk-iolatency: us...
359
  	unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
d70675121   Josef Bacik   block: introduce ...
360
361
  	unsigned long scale = scale_amount(qd, up);
  	unsigned long old = iolat->rq_depth.max_depth;
d70675121   Josef Bacik   block: introduce ...
362
363
364
365
366
367
368
369
370
  
  	if (old > qd)
  		old = qd;
  
  	if (up) {
  		if (old == 1 && blkcg_unuse_delay(lat_to_blkg(iolat)))
  			return;
  
  		if (old < qd) {
d70675121   Josef Bacik   block: introduce ...
371
372
373
374
375
  			old += scale;
  			old = min(old, qd);
  			iolat->rq_depth.max_depth = old;
  			wake_up_all(&iolat->rq_wait.wait);
  		}
9f60511a0   Josef Bacik   blk-iolatency: de...
376
  	} else {
d70675121   Josef Bacik   block: introduce ...
377
  		old >>= 1;
d70675121   Josef Bacik   block: introduce ...
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
  		iolat->rq_depth.max_depth = max(old, 1UL);
  	}
  }
  
  /* Check our parent and see if the scale cookie has changed. */
  static void check_scale_change(struct iolatency_grp *iolat)
  {
  	struct iolatency_grp *parent;
  	struct child_latency_info *lat_info;
  	unsigned int cur_cookie;
  	unsigned int our_cookie = atomic_read(&iolat->scale_cookie);
  	u64 scale_lat;
  	unsigned int old;
  	int direction = 0;
  
  	if (lat_to_blkg(iolat)->parent == NULL)
  		return;
  
  	parent = blkg_to_lat(lat_to_blkg(iolat)->parent);
  	if (!parent)
  		return;
  
  	lat_info = &parent->child_lat;
  	cur_cookie = atomic_read(&lat_info->scale_cookie);
  	scale_lat = READ_ONCE(lat_info->scale_lat);
  
  	if (cur_cookie < our_cookie)
  		direction = -1;
  	else if (cur_cookie > our_cookie)
  		direction = 1;
  	else
  		return;
  
  	old = atomic_cmpxchg(&iolat->scale_cookie, our_cookie, cur_cookie);
  
  	/* Somebody beat us to the punch, just bail. */
  	if (old != our_cookie)
  		return;
  
  	if (direction < 0 && iolat->min_lat_nsec) {
  		u64 samples_thresh;
  
  		if (!scale_lat || iolat->min_lat_nsec <= scale_lat)
  			return;
  
  		/*
  		 * Sometimes high priority groups are their own worst enemy, so
  		 * instead of taking it out on some poor other group that did 5%
  		 * or less of the IO's for the last summation just skip this
  		 * scale down event.
  		 */
  		samples_thresh = lat_info->nr_samples * 5;
22ed8a93a   Josef Bacik   blk-iolatency: de...
430
  		samples_thresh = max(1ULL, div64_u64(samples_thresh, 100));
d70675121   Josef Bacik   block: introduce ...
431
432
433
434
435
436
437
438
439
440
441
442
443
  		if (iolat->nr_samples <= samples_thresh)
  			return;
  	}
  
  	/* We're as low as we can go. */
  	if (iolat->rq_depth.max_depth == 1 && direction < 0) {
  		blkcg_use_delay(lat_to_blkg(iolat));
  		return;
  	}
  
  	/* We're back to the default cookie, unthrottle all the things. */
  	if (cur_cookie == DEFAULT_SCALE_COOKIE) {
  		blkcg_clear_delay(lat_to_blkg(iolat));
a284390b3   Josef Bacik   blk-iolatency: fi...
444
  		iolat->rq_depth.max_depth = UINT_MAX;
d70675121   Josef Bacik   block: introduce ...
445
446
447
448
449
450
  		wake_up_all(&iolat->rq_wait.wait);
  		return;
  	}
  
  	scale_change(iolat, direction > 0);
  }
d53375608   Christoph Hellwig   block: remove the...
451
  static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
d70675121   Josef Bacik   block: introduce ...
452
453
  {
  	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
5cdf2e3fe   Dennis Zhou   blkcg: associate ...
454
  	struct blkcg_gq *blkg = bio->bi_blkg;
d70675121   Josef Bacik   block: introduce ...
455
456
457
458
  	bool issue_as_root = bio_issue_as_root_blkg(bio);
  
  	if (!blk_iolatency_enabled(blkiolat))
  		return;
d70675121   Josef Bacik   block: introduce ...
459
460
461
462
463
464
465
466
  	while (blkg && blkg->parent) {
  		struct iolatency_grp *iolat = blkg_to_lat(blkg);
  		if (!iolat) {
  			blkg = blkg->parent;
  			continue;
  		}
  
  		check_scale_change(iolat);
d53375608   Christoph Hellwig   block: remove the...
467
  		__blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
d70675121   Josef Bacik   block: introduce ...
468
469
470
471
472
473
474
475
476
477
478
  				     (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
  		blkg = blkg->parent;
  	}
  	if (!timer_pending(&blkiolat->timer))
  		mod_timer(&blkiolat->timer, jiffies + HZ);
  }
  
  static void iolatency_record_time(struct iolatency_grp *iolat,
  				  struct bio_issue *issue, u64 now,
  				  bool issue_as_root)
  {
d70675121   Josef Bacik   block: introduce ...
479
480
  	u64 start = bio_issue_time(issue);
  	u64 req_time;
71e9690b5   Josef Bacik   blk-iolatency: tr...
481
482
483
484
485
  	/*
  	 * Have to do this so we are truncated to the correct time that our
  	 * issue is truncated to.
  	 */
  	now = __bio_issue_time(now);
d70675121   Josef Bacik   block: introduce ...
486
487
488
489
490
491
492
493
494
  	if (now <= start)
  		return;
  
  	req_time = now - start;
  
  	/*
  	 * We don't want to count issue_as_root bio's in the cgroups latency
  	 * statistics as it could skew the numbers downwards.
  	 */
a284390b3   Josef Bacik   blk-iolatency: fi...
495
  	if (unlikely(issue_as_root && iolat->rq_depth.max_depth != UINT_MAX)) {
d70675121   Josef Bacik   block: introduce ...
496
497
498
499
500
  		u64 sub = iolat->min_lat_nsec;
  		if (req_time < sub)
  			blkcg_add_delay(lat_to_blkg(iolat), now, sub - req_time);
  		return;
  	}
1fa2840e5   Josef Bacik   blk-iolatency: us...
501
  	latency_stat_record_time(iolat, req_time);
d70675121   Josef Bacik   block: introduce ...
502
503
504
505
506
507
508
509
510
511
  }
  
  #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
  #define BLKIOLATENCY_MIN_GOOD_SAMPLES 5
  
  static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
  {
  	struct blkcg_gq *blkg = lat_to_blkg(iolat);
  	struct iolatency_grp *parent;
  	struct child_latency_info *lat_info;
1fa2840e5   Josef Bacik   blk-iolatency: us...
512
  	struct latency_stat stat;
d70675121   Josef Bacik   block: introduce ...
513
  	unsigned long flags;
1fa2840e5   Josef Bacik   blk-iolatency: us...
514
  	int cpu;
d70675121   Josef Bacik   block: introduce ...
515

1fa2840e5   Josef Bacik   blk-iolatency: us...
516
  	latency_stat_init(iolat, &stat);
d70675121   Josef Bacik   block: introduce ...
517
518
  	preempt_disable();
  	for_each_online_cpu(cpu) {
1fa2840e5   Josef Bacik   blk-iolatency: us...
519
  		struct latency_stat *s;
d70675121   Josef Bacik   block: introduce ...
520
  		s = per_cpu_ptr(iolat->stats, cpu);
1fa2840e5   Josef Bacik   blk-iolatency: us...
521
522
  		latency_stat_sum(iolat, &stat, s);
  		latency_stat_init(iolat, s);
d70675121   Josef Bacik   block: introduce ...
523
524
  	}
  	preempt_enable();
d70675121   Josef Bacik   block: introduce ...
525
526
527
528
529
  	parent = blkg_to_lat(blkg->parent);
  	if (!parent)
  		return;
  
  	lat_info = &parent->child_lat;
1fa2840e5   Josef Bacik   blk-iolatency: us...
530
  	iolat_update_total_lat_avg(iolat, &stat);
d70675121   Josef Bacik   block: introduce ...
531
532
  
  	/* Everything is ok and we don't need to adjust the scale. */
1fa2840e5   Josef Bacik   blk-iolatency: us...
533
  	if (latency_sum_ok(iolat, &stat) &&
d70675121   Josef Bacik   block: introduce ...
534
535
536
537
538
  	    atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
  		return;
  
  	/* Somebody beat us to the punch, just bail. */
  	spin_lock_irqsave(&lat_info->lock, flags);
451bb7c33   Josef Bacik   blk-iolatency: ke...
539
540
  
  	latency_stat_sum(iolat, &iolat->cur_stat, &stat);
d70675121   Josef Bacik   block: introduce ...
541
  	lat_info->nr_samples -= iolat->nr_samples;
451bb7c33   Josef Bacik   blk-iolatency: ke...
542
543
  	lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat);
  	iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat);
d70675121   Josef Bacik   block: introduce ...
544
545
  
  	if ((lat_info->last_scale_event >= now ||
451bb7c33   Josef Bacik   blk-iolatency: ke...
546
  	    now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME))
d70675121   Josef Bacik   block: introduce ...
547
  		goto out;
451bb7c33   Josef Bacik   blk-iolatency: ke...
548
549
550
  	if (latency_sum_ok(iolat, &iolat->cur_stat) &&
  	    latency_sum_ok(iolat, &stat)) {
  		if (latency_stat_samples(iolat, &iolat->cur_stat) <
1fa2840e5   Josef Bacik   blk-iolatency: us...
551
552
  		    BLKIOLATENCY_MIN_GOOD_SAMPLES)
  			goto out;
d70675121   Josef Bacik   block: introduce ...
553
554
555
556
  		if (lat_info->scale_grp == iolat) {
  			lat_info->last_scale_event = now;
  			scale_cookie_change(iolat->blkiolat, lat_info, true);
  		}
451bb7c33   Josef Bacik   blk-iolatency: ke...
557
558
  	} else if (lat_info->scale_lat == 0 ||
  		   lat_info->scale_lat >= iolat->min_lat_nsec) {
d70675121   Josef Bacik   block: introduce ...
559
560
561
562
563
564
565
566
  		lat_info->last_scale_event = now;
  		if (!lat_info->scale_grp ||
  		    lat_info->scale_lat > iolat->min_lat_nsec) {
  			WRITE_ONCE(lat_info->scale_lat, iolat->min_lat_nsec);
  			lat_info->scale_grp = iolat;
  		}
  		scale_cookie_change(iolat->blkiolat, lat_info, false);
  	}
451bb7c33   Josef Bacik   blk-iolatency: ke...
567
  	latency_stat_init(iolat, &iolat->cur_stat);
d70675121   Josef Bacik   block: introduce ...
568
569
570
571
572
573
574
575
576
577
578
579
580
  out:
  	spin_unlock_irqrestore(&lat_info->lock, flags);
  }
  
  static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
  {
  	struct blkcg_gq *blkg;
  	struct rq_wait *rqw;
  	struct iolatency_grp *iolat;
  	u64 window_start;
  	u64 now = ktime_to_ns(ktime_get());
  	bool issue_as_root = bio_issue_as_root_blkg(bio);
  	bool enabled = false;
391f552af   Liu Bo   Blk-iolatency: wa...
581
  	int inflight = 0;
d70675121   Josef Bacik   block: introduce ...
582
583
  
  	blkg = bio->bi_blkg;
13369816c   Dennis Zhou   block: fix blk-io...
584
  	if (!blkg || !bio_flagged(bio, BIO_TRACKED))
d70675121   Josef Bacik   block: introduce ...
585
586
587
588
589
590
591
  		return;
  
  	iolat = blkg_to_lat(bio->bi_blkg);
  	if (!iolat)
  		return;
  
  	enabled = blk_iolatency_enabled(iolat->blkiolat);
8c772a9bf   Liu Bo   blk-iolatency: fi...
592
593
  	if (!enabled)
  		return;
d70675121   Josef Bacik   block: introduce ...
594
595
596
597
598
599
600
  	while (blkg && blkg->parent) {
  		iolat = blkg_to_lat(blkg);
  		if (!iolat) {
  			blkg = blkg->parent;
  			continue;
  		}
  		rqw = &iolat->rq_wait;
391f552af   Liu Bo   Blk-iolatency: wa...
601
602
  		inflight = atomic_dec_return(&rqw->inflight);
  		WARN_ON_ONCE(inflight < 0);
c9b3007fe   Dennis Zhou   blk-iolatency: fi...
603
604
605
606
607
608
609
610
611
612
613
614
615
616
  		/*
  		 * If bi_status is BLK_STS_AGAIN, the bio wasn't actually
  		 * submitted, so do not account for it.
  		 */
  		if (iolat->min_lat_nsec && bio->bi_status != BLK_STS_AGAIN) {
  			iolatency_record_time(iolat, &bio->bi_issue, now,
  					      issue_as_root);
  			window_start = atomic64_read(&iolat->window_start);
  			if (now > window_start &&
  			    (now - window_start) >= iolat->cur_win_nsec) {
  				if (atomic64_cmpxchg(&iolat->window_start,
  					     window_start, now) == window_start)
  					iolatency_check_latencies(iolat, now);
  			}
d70675121   Josef Bacik   block: introduce ...
617
  		}
d70675121   Josef Bacik   block: introduce ...
618
  		wake_up(&rqw->wait);
d70675121   Josef Bacik   block: introduce ...
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
  		blkg = blkg->parent;
  	}
  }
  
  static void blkcg_iolatency_exit(struct rq_qos *rqos)
  {
  	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
  
  	del_timer_sync(&blkiolat->timer);
  	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
  	kfree(blkiolat);
  }
  
  static struct rq_qos_ops blkcg_iolatency_ops = {
  	.throttle = blkcg_iolatency_throttle,
d70675121   Josef Bacik   block: introduce ...
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
  	.done_bio = blkcg_iolatency_done_bio,
  	.exit = blkcg_iolatency_exit,
  };
  
  static void blkiolatency_timer_fn(struct timer_list *t)
  {
  	struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
  	struct blkcg_gq *blkg;
  	struct cgroup_subsys_state *pos_css;
  	u64 now = ktime_to_ns(ktime_get());
  
  	rcu_read_lock();
  	blkg_for_each_descendant_pre(blkg, pos_css,
  				     blkiolat->rqos.q->root_blkg) {
  		struct iolatency_grp *iolat;
  		struct child_latency_info *lat_info;
  		unsigned long flags;
  		u64 cookie;
  
  		/*
  		 * We could be exiting, don't access the pd unless we have a
  		 * ref on the blkg.
  		 */
7754f669f   Dennis Zhou   blkcg: rename blk...
657
  		if (!blkg_tryget(blkg))
d70675121   Josef Bacik   block: introduce ...
658
659
660
661
  			continue;
  
  		iolat = blkg_to_lat(blkg);
  		if (!iolat)
52a1199cc   Josef Bacik   blk-iolatency: fi...
662
  			goto next;
d70675121   Josef Bacik   block: introduce ...
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
  
  		lat_info = &iolat->child_lat;
  		cookie = atomic_read(&lat_info->scale_cookie);
  
  		if (cookie >= DEFAULT_SCALE_COOKIE)
  			goto next;
  
  		spin_lock_irqsave(&lat_info->lock, flags);
  		if (lat_info->last_scale_event >= now)
  			goto next_lock;
  
  		/*
  		 * We scaled down but don't have a scale_grp, scale up and carry
  		 * on.
  		 */
  		if (lat_info->scale_grp == NULL) {
  			scale_cookie_change(iolat->blkiolat, lat_info, true);
  			goto next_lock;
  		}
  
  		/*
  		 * It's been 5 seconds since our last scale event, clear the
  		 * scale grp in case the group that needed the scale down isn't
  		 * doing any IO currently.
  		 */
  		if (now - lat_info->last_scale_event >=
  		    ((u64)NSEC_PER_SEC * 5))
  			lat_info->scale_grp = NULL;
  next_lock:
  		spin_unlock_irqrestore(&lat_info->lock, flags);
  next:
  		blkg_put(blkg);
  	}
  	rcu_read_unlock();
  }
  
  int blk_iolatency_init(struct request_queue *q)
  {
  	struct blk_iolatency *blkiolat;
  	struct rq_qos *rqos;
  	int ret;
  
  	blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
  	if (!blkiolat)
  		return -ENOMEM;
  
  	rqos = &blkiolat->rqos;
beab17fc2   Tejun Heo   blkcg: s/RQ_QOS_C...
710
  	rqos->id = RQ_QOS_LATENCY;
d70675121   Josef Bacik   block: introduce ...
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
  	rqos->ops = &blkcg_iolatency_ops;
  	rqos->q = q;
  
  	rq_qos_add(q, rqos);
  
  	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
  	if (ret) {
  		rq_qos_del(q, rqos);
  		kfree(blkiolat);
  		return ret;
  	}
  
  	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
  
  	return 0;
  }
8c772a9bf   Liu Bo   blk-iolatency: fi...
727
728
729
730
731
  /*
   * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
   * return 0.
   */
  static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
d70675121   Josef Bacik   block: introduce ...
732
733
  {
  	struct iolatency_grp *iolat = blkg_to_lat(blkg);
d70675121   Josef Bacik   block: introduce ...
734
735
736
  	u64 oldval = iolat->min_lat_nsec;
  
  	iolat->min_lat_nsec = val;
c480bcf97   Dennis Zhou (Facebook)   block: make iolat...
737
738
739
  	iolat->cur_win_nsec = max_t(u64, val << 4, BLKIOLATENCY_MIN_WIN_SIZE);
  	iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
  				    BLKIOLATENCY_MAX_WIN_SIZE);
d70675121   Josef Bacik   block: introduce ...
740
741
  
  	if (!oldval && val)
8c772a9bf   Liu Bo   blk-iolatency: fi...
742
  		return 1;
5de0073fc   Tejun Heo   blk-iolatency: cl...
743
744
  	if (oldval && !val) {
  		blkcg_clear_delay(blkg);
8c772a9bf   Liu Bo   blk-iolatency: fi...
745
  		return -1;
5de0073fc   Tejun Heo   blk-iolatency: cl...
746
  	}
8c772a9bf   Liu Bo   blk-iolatency: fi...
747
  	return 0;
d70675121   Josef Bacik   block: introduce ...
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
  }
  
  static void iolatency_clear_scaling(struct blkcg_gq *blkg)
  {
  	if (blkg->parent) {
  		struct iolatency_grp *iolat = blkg_to_lat(blkg->parent);
  		struct child_latency_info *lat_info;
  		if (!iolat)
  			return;
  
  		lat_info = &iolat->child_lat;
  		spin_lock(&lat_info->lock);
  		atomic_set(&lat_info->scale_cookie, DEFAULT_SCALE_COOKIE);
  		lat_info->last_scale_event = 0;
  		lat_info->scale_grp = NULL;
  		lat_info->scale_lat = 0;
  		spin_unlock(&lat_info->lock);
  	}
  }
  
  static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
  			     size_t nbytes, loff_t off)
  {
  	struct blkcg *blkcg = css_to_blkcg(of_css(of));
  	struct blkcg_gq *blkg;
d70675121   Josef Bacik   block: introduce ...
773
774
775
776
777
778
  	struct blkg_conf_ctx ctx;
  	struct iolatency_grp *iolat;
  	char *p, *tok;
  	u64 lat_val = 0;
  	u64 oldval;
  	int ret;
8c772a9bf   Liu Bo   blk-iolatency: fi...
779
  	int enable = 0;
d70675121   Josef Bacik   block: introduce ...
780
781
782
783
784
785
  
  	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
  	if (ret)
  		return ret;
  
  	iolat = blkg_to_lat(ctx.blkg);
d70675121   Josef Bacik   block: introduce ...
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
  	p = ctx.body;
  
  	ret = -EINVAL;
  	while ((tok = strsep(&p, " "))) {
  		char key[16];
  		char val[21];	/* 18446744073709551616 */
  
  		if (sscanf(tok, "%15[^=]=%20s", key, val) != 2)
  			goto out;
  
  		if (!strcmp(key, "target")) {
  			u64 v;
  
  			if (!strcmp(val, "max"))
  				lat_val = 0;
  			else if (sscanf(val, "%llu", &v) == 1)
  				lat_val = v * NSEC_PER_USEC;
  			else
  				goto out;
  		} else {
  			goto out;
  		}
  	}
  
  	/* Walk up the tree to see if our new val is lower than it should be. */
  	blkg = ctx.blkg;
  	oldval = iolat->min_lat_nsec;
8c772a9bf   Liu Bo   blk-iolatency: fi...
813
814
815
816
817
  	enable = iolatency_set_min_lat_nsec(blkg, lat_val);
  	if (enable) {
  		WARN_ON_ONCE(!blk_get_queue(blkg->q));
  		blkg_get(blkg);
  	}
d70675121   Josef Bacik   block: introduce ...
818
819
820
821
822
823
824
  	if (oldval != iolat->min_lat_nsec) {
  		iolatency_clear_scaling(blkg);
  	}
  
  	ret = 0;
  out:
  	blkg_conf_finish(&ctx);
8c772a9bf   Liu Bo   blk-iolatency: fi...
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
  	if (ret == 0 && enable) {
  		struct iolatency_grp *tmp = blkg_to_lat(blkg);
  		struct blk_iolatency *blkiolat = tmp->blkiolat;
  
  		blk_mq_freeze_queue(blkg->q);
  
  		if (enable == 1)
  			atomic_inc(&blkiolat->enabled);
  		else if (enable == -1)
  			atomic_dec(&blkiolat->enabled);
  		else
  			WARN_ON_ONCE(1);
  
  		blk_mq_unfreeze_queue(blkg->q);
  
  		blkg_put(blkg);
  		blk_put_queue(blkg->q);
  	}
d70675121   Josef Bacik   block: introduce ...
843
844
845
846
847
848
849
850
851
852
853
854
855
  	return ret ?: nbytes;
  }
  
  static u64 iolatency_prfill_limit(struct seq_file *sf,
  				  struct blkg_policy_data *pd, int off)
  {
  	struct iolatency_grp *iolat = pd_to_lat(pd);
  	const char *dname = blkg_dev_name(pd->blkg);
  
  	if (!dname || !iolat->min_lat_nsec)
  		return 0;
  	seq_printf(sf, "%s target=%llu
  ",
88b7210c8   Arnd Bergmann   block: iolatency:...
856
  		   dname, div_u64(iolat->min_lat_nsec, NSEC_PER_USEC));
d70675121   Josef Bacik   block: introduce ...
857
858
859
860
861
862
863
864
865
866
  	return 0;
  }
  
  static int iolatency_print_limit(struct seq_file *sf, void *v)
  {
  	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
  			  iolatency_prfill_limit,
  			  &blkcg_policy_iolatency, seq_cft(sf)->private, false);
  	return 0;
  }
1fa2840e5   Josef Bacik   blk-iolatency: us...
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
  static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
  				 size_t size)
  {
  	struct latency_stat stat;
  	int cpu;
  
  	latency_stat_init(iolat, &stat);
  	preempt_disable();
  	for_each_online_cpu(cpu) {
  		struct latency_stat *s;
  		s = per_cpu_ptr(iolat->stats, cpu);
  		latency_stat_sum(iolat, &stat, s);
  	}
  	preempt_enable();
  
  	if (iolat->rq_depth.max_depth == UINT_MAX)
  		return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
  				 (unsigned long long)stat.ps.missed,
  				 (unsigned long long)stat.ps.total);
  	return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
  			 (unsigned long long)stat.ps.missed,
  			 (unsigned long long)stat.ps.total,
  			 iolat->rq_depth.max_depth);
  }
d70675121   Josef Bacik   block: introduce ...
891
892
893
894
  static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
  				size_t size)
  {
  	struct iolatency_grp *iolat = pd_to_lat(pd);
1fa2840e5   Josef Bacik   blk-iolatency: us...
895
896
  	unsigned long long avg_lat;
  	unsigned long long cur_win;
07b0fdecb   Tejun Heo   blkcg: allow blkc...
897
898
  	if (!blkcg_debug_stats)
  		return 0;
1fa2840e5   Josef Bacik   blk-iolatency: us...
899
900
  	if (iolat->ssd)
  		return iolatency_ssd_stat(iolat, buf, size);
d70675121   Josef Bacik   block: introduce ...
901

1fa2840e5   Josef Bacik   blk-iolatency: us...
902
903
  	avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
  	cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
a284390b3   Josef Bacik   blk-iolatency: fi...
904
  	if (iolat->rq_depth.max_depth == UINT_MAX)
c480bcf97   Dennis Zhou (Facebook)   block: make iolat...
905
906
  		return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
  				 avg_lat, cur_win);
d70675121   Josef Bacik   block: introduce ...
907

c480bcf97   Dennis Zhou (Facebook)   block: make iolat...
908
909
  	return scnprintf(buf, size, " depth=%u avg_lat=%llu win=%llu",
  			 iolat->rq_depth.max_depth, avg_lat, cur_win);
d70675121   Josef Bacik   block: introduce ...
910
  }
cf09a8ee1   Tejun Heo   blkcg: pass @q an...
911
912
913
  static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
  						   struct request_queue *q,
  						   struct blkcg *blkcg)
d70675121   Josef Bacik   block: introduce ...
914
915
  {
  	struct iolatency_grp *iolat;
cf09a8ee1   Tejun Heo   blkcg: pass @q an...
916
  	iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
d70675121   Josef Bacik   block: introduce ...
917
918
  	if (!iolat)
  		return NULL;
1fa2840e5   Josef Bacik   blk-iolatency: us...
919
920
  	iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
  				       __alignof__(struct latency_stat), gfp);
d70675121   Josef Bacik   block: introduce ...
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
  	if (!iolat->stats) {
  		kfree(iolat);
  		return NULL;
  	}
  	return &iolat->pd;
  }
  
  static void iolatency_pd_init(struct blkg_policy_data *pd)
  {
  	struct iolatency_grp *iolat = pd_to_lat(pd);
  	struct blkcg_gq *blkg = lat_to_blkg(iolat);
  	struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
  	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
  	u64 now = ktime_to_ns(ktime_get());
  	int cpu;
1fa2840e5   Josef Bacik   blk-iolatency: us...
936
937
938
939
  	if (blk_queue_nonrot(blkg->q))
  		iolat->ssd = true;
  	else
  		iolat->ssd = false;
d70675121   Josef Bacik   block: introduce ...
940
  	for_each_possible_cpu(cpu) {
1fa2840e5   Josef Bacik   blk-iolatency: us...
941
  		struct latency_stat *stat;
d70675121   Josef Bacik   block: introduce ...
942
  		stat = per_cpu_ptr(iolat->stats, cpu);
1fa2840e5   Josef Bacik   blk-iolatency: us...
943
  		latency_stat_init(iolat, stat);
d70675121   Josef Bacik   block: introduce ...
944
  	}
451bb7c33   Josef Bacik   blk-iolatency: ke...
945
  	latency_stat_init(iolat, &iolat->cur_stat);
d70675121   Josef Bacik   block: introduce ...
946
947
  	rq_wait_init(&iolat->rq_wait);
  	spin_lock_init(&iolat->child_lat.lock);
ff4cee089   Josef Bacik   blk-iolatency: us...
948
  	iolat->rq_depth.queue_depth = blkg->q->nr_requests;
a284390b3   Josef Bacik   blk-iolatency: fi...
949
  	iolat->rq_depth.max_depth = UINT_MAX;
d70675121   Josef Bacik   block: introduce ...
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
  	iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
  	iolat->blkiolat = blkiolat;
  	iolat->cur_win_nsec = 100 * NSEC_PER_MSEC;
  	atomic64_set(&iolat->window_start, now);
  
  	/*
  	 * We init things in list order, so the pd for the parent may not be
  	 * init'ed yet for whatever reason.
  	 */
  	if (blkg->parent && blkg_to_pd(blkg->parent, &blkcg_policy_iolatency)) {
  		struct iolatency_grp *parent = blkg_to_lat(blkg->parent);
  		atomic_set(&iolat->scale_cookie,
  			   atomic_read(&parent->child_lat.scale_cookie));
  	} else {
  		atomic_set(&iolat->scale_cookie, DEFAULT_SCALE_COOKIE);
  	}
  
  	atomic_set(&iolat->child_lat.scale_cookie, DEFAULT_SCALE_COOKIE);
  }
  
  static void iolatency_pd_offline(struct blkg_policy_data *pd)
  {
  	struct iolatency_grp *iolat = pd_to_lat(pd);
  	struct blkcg_gq *blkg = lat_to_blkg(iolat);
8c772a9bf   Liu Bo   blk-iolatency: fi...
974
975
  	struct blk_iolatency *blkiolat = iolat->blkiolat;
  	int ret;
d70675121   Josef Bacik   block: introduce ...
976

8c772a9bf   Liu Bo   blk-iolatency: fi...
977
978
979
980
981
  	ret = iolatency_set_min_lat_nsec(blkg, 0);
  	if (ret == 1)
  		atomic_inc(&blkiolat->enabled);
  	if (ret == -1)
  		atomic_dec(&blkiolat->enabled);
d70675121   Josef Bacik   block: introduce ...
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
  	iolatency_clear_scaling(blkg);
  }
  
  static void iolatency_pd_free(struct blkg_policy_data *pd)
  {
  	struct iolatency_grp *iolat = pd_to_lat(pd);
  	free_percpu(iolat->stats);
  	kfree(iolat);
  }
  
  static struct cftype iolatency_files[] = {
  	{
  		.name = "latency",
  		.flags = CFTYPE_NOT_ON_ROOT,
  		.seq_show = iolatency_print_limit,
  		.write = iolatency_set_limit,
  	},
  	{}
  };
  
  static struct blkcg_policy blkcg_policy_iolatency = {
  	.dfl_cftypes	= iolatency_files,
  	.pd_alloc_fn	= iolatency_pd_alloc,
  	.pd_init_fn	= iolatency_pd_init,
  	.pd_offline_fn	= iolatency_pd_offline,
  	.pd_free_fn	= iolatency_pd_free,
  	.pd_stat_fn	= iolatency_pd_stat,
  };
  
  static int __init iolatency_init(void)
  {
  	return blkcg_policy_register(&blkcg_policy_iolatency);
  }
  
  static void __exit iolatency_exit(void)
  {
  	return blkcg_policy_unregister(&blkcg_policy_iolatency);
  }
  
  module_init(iolatency_init);
  module_exit(iolatency_exit);