Blame view

block/kyber-iosched.c 27.8 KB
8c16567d8   Christoph Hellwig   block: switch all...
1
  // SPDX-License-Identifier: GPL-2.0
00e043936   Omar Sandoval   blk-mq: introduce...
2
3
4
5
6
  /*
   * The Kyber I/O scheduler. Controls latency by throttling queue depths using
   * scalable techniques.
   *
   * Copyright (C) 2017 Facebook
00e043936   Omar Sandoval   blk-mq: introduce...
7
8
9
10
11
12
13
14
15
16
17
   */
  
  #include <linux/kernel.h>
  #include <linux/blkdev.h>
  #include <linux/blk-mq.h>
  #include <linux/elevator.h>
  #include <linux/module.h>
  #include <linux/sbitmap.h>
  
  #include "blk.h"
  #include "blk-mq.h"
16b738f65   Omar Sandoval   kyber: add debugf...
18
  #include "blk-mq-debugfs.h"
00e043936   Omar Sandoval   blk-mq: introduce...
19
20
  #include "blk-mq-sched.h"
  #include "blk-mq-tag.h"
00e043936   Omar Sandoval   blk-mq: introduce...
21

6c3b7af1c   Omar Sandoval   kyber: add tracep...
22
23
  #define CREATE_TRACE_POINTS
  #include <trace/events/kyber.h>
6e25cb01e   Omar Sandoval   kyber: implement ...
24
25
26
27
  /*
   * Scheduling domains: the device is divided into multiple domains based on the
   * request type.
   */
00e043936   Omar Sandoval   blk-mq: introduce...
28
29
  enum {
  	KYBER_READ,
6e25cb01e   Omar Sandoval   kyber: implement ...
30
31
32
  	KYBER_WRITE,
  	KYBER_DISCARD,
  	KYBER_OTHER,
00e043936   Omar Sandoval   blk-mq: introduce...
33
34
  	KYBER_NUM_DOMAINS,
  };
6c3b7af1c   Omar Sandoval   kyber: add tracep...
35
36
37
38
39
40
  static const char *kyber_domain_names[] = {
  	[KYBER_READ] = "READ",
  	[KYBER_WRITE] = "WRITE",
  	[KYBER_DISCARD] = "DISCARD",
  	[KYBER_OTHER] = "OTHER",
  };
00e043936   Omar Sandoval   blk-mq: introduce...
41
  enum {
00e043936   Omar Sandoval   blk-mq: introduce...
42
43
44
45
46
47
48
49
50
  	/*
  	 * In order to prevent starvation of synchronous requests by a flood of
  	 * asynchronous requests, we reserve 25% of requests for synchronous
  	 * operations.
  	 */
  	KYBER_ASYNC_PERCENT = 75,
  };
  
  /*
6e25cb01e   Omar Sandoval   kyber: implement ...
51
   * Maximum device-wide depth for each scheduling domain.
00e043936   Omar Sandoval   blk-mq: introduce...
52
   *
6e25cb01e   Omar Sandoval   kyber: implement ...
53
54
55
   * Even for fast devices with lots of tags like NVMe, you can saturate the
   * device with only a fraction of the maximum possible queue depth. So, we cap
   * these to a reasonable value.
00e043936   Omar Sandoval   blk-mq: introduce...
56
57
58
   */
  static const unsigned int kyber_depth[] = {
  	[KYBER_READ] = 256,
6e25cb01e   Omar Sandoval   kyber: implement ...
59
60
61
  	[KYBER_WRITE] = 128,
  	[KYBER_DISCARD] = 64,
  	[KYBER_OTHER] = 16,
00e043936   Omar Sandoval   blk-mq: introduce...
62
63
64
  };
  
  /*
6e25cb01e   Omar Sandoval   kyber: implement ...
65
66
67
   * Default latency targets for each scheduling domain.
   */
  static const u64 kyber_latency_targets[] = {
f0a0cdddb   Omar Sandoval   kyber: fix intege...
68
69
70
  	[KYBER_READ] = 2ULL * NSEC_PER_MSEC,
  	[KYBER_WRITE] = 10ULL * NSEC_PER_MSEC,
  	[KYBER_DISCARD] = 5ULL * NSEC_PER_SEC,
6e25cb01e   Omar Sandoval   kyber: implement ...
71
72
73
74
75
  };
  
  /*
   * Batch size (number of requests we'll dispatch in a row) for each scheduling
   * domain.
00e043936   Omar Sandoval   blk-mq: introduce...
76
77
78
   */
  static const unsigned int kyber_batch_size[] = {
  	[KYBER_READ] = 16,
6e25cb01e   Omar Sandoval   kyber: implement ...
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  	[KYBER_WRITE] = 8,
  	[KYBER_DISCARD] = 1,
  	[KYBER_OTHER] = 1,
  };
  
  /*
   * Requests latencies are recorded in a histogram with buckets defined relative
   * to the target latency:
   *
   * <= 1/4 * target latency
   * <= 1/2 * target latency
   * <= 3/4 * target latency
   * <= target latency
   * <= 1 1/4 * target latency
   * <= 1 1/2 * target latency
   * <= 1 3/4 * target latency
   * > 1 3/4 * target latency
   */
  enum {
  	/*
  	 * The width of the latency histogram buckets is
  	 * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
  	 */
  	KYBER_LATENCY_SHIFT = 2,
  	/*
  	 * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
  	 * thus, "good".
  	 */
  	KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
  	/* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */
  	KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
  };
  
  /*
   * We measure both the total latency and the I/O latency (i.e., latency after
   * submitting to the device).
   */
  enum {
  	KYBER_TOTAL_LATENCY,
  	KYBER_IO_LATENCY,
  };
6c3b7af1c   Omar Sandoval   kyber: add tracep...
120
121
122
123
  static const char *kyber_latency_type_names[] = {
  	[KYBER_TOTAL_LATENCY] = "total",
  	[KYBER_IO_LATENCY] = "I/O",
  };
6e25cb01e   Omar Sandoval   kyber: implement ...
124
125
126
127
128
129
  /*
   * Per-cpu latency histograms: total latency and I/O latency for each scheduling
   * domain except for KYBER_OTHER.
   */
  struct kyber_cpu_latency {
  	atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
00e043936   Omar Sandoval   blk-mq: introduce...
130
  };
a6088845c   Jianchao Wang   block: kyber: mak...
131
132
133
134
135
136
137
138
139
140
141
142
  /*
   * There is a same mapping between ctx & hctx and kcq & khd,
   * we use request->mq_ctx->index_hw to index the kcq in khd.
   */
  struct kyber_ctx_queue {
  	/*
  	 * Used to ensure operations on rq_list and kcq_map to be an atmoic one.
  	 * Also protect the rqs on rq_list when merge.
  	 */
  	spinlock_t lock;
  	struct list_head rq_list[KYBER_NUM_DOMAINS];
  } ____cacheline_aligned_in_smp;
00e043936   Omar Sandoval   blk-mq: introduce...
143
  struct kyber_queue_data {
6c3b7af1c   Omar Sandoval   kyber: add tracep...
144
  	struct request_queue *q;
00e043936   Omar Sandoval   blk-mq: introduce...
145
  	/*
6e25cb01e   Omar Sandoval   kyber: implement ...
146
147
  	 * Each scheduling domain has a limited number of in-flight requests
  	 * device-wide, limited by these tokens.
00e043936   Omar Sandoval   blk-mq: introduce...
148
149
150
151
152
153
154
155
  	 */
  	struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
  
  	/*
  	 * Async request percentage, converted to per-word depth for
  	 * sbitmap_get_shallow().
  	 */
  	unsigned int async_depth;
6e25cb01e   Omar Sandoval   kyber: implement ...
156
157
158
159
160
161
162
163
164
165
  	struct kyber_cpu_latency __percpu *cpu_latency;
  
  	/* Timer for stats aggregation and adjusting domain tokens. */
  	struct timer_list timer;
  
  	unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
  
  	unsigned long latency_timeout[KYBER_OTHER];
  
  	int domain_p99[KYBER_OTHER];
00e043936   Omar Sandoval   blk-mq: introduce...
166
  	/* Target latencies in nanoseconds. */
6e25cb01e   Omar Sandoval   kyber: implement ...
167
  	u64 latency_targets[KYBER_OTHER];
00e043936   Omar Sandoval   blk-mq: introduce...
168
169
170
171
172
173
174
  };
  
  struct kyber_hctx_data {
  	spinlock_t lock;
  	struct list_head rqs[KYBER_NUM_DOMAINS];
  	unsigned int cur_domain;
  	unsigned int batching;
a6088845c   Jianchao Wang   block: kyber: mak...
175
176
  	struct kyber_ctx_queue *kcqs;
  	struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
00203ba40   Jens Axboe   kyber: use sbitma...
177
  	struct sbq_wait domain_wait[KYBER_NUM_DOMAINS];
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
178
  	struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
00e043936   Omar Sandoval   blk-mq: introduce...
179
180
  	atomic_t wait_index[KYBER_NUM_DOMAINS];
  };
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
181
182
  static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
  			     void *key);
a6088845c   Jianchao Wang   block: kyber: mak...
183
  static unsigned int kyber_sched_domain(unsigned int op)
00e043936   Omar Sandoval   blk-mq: introduce...
184
  {
6e25cb01e   Omar Sandoval   kyber: implement ...
185
186
  	switch (op & REQ_OP_MASK) {
  	case REQ_OP_READ:
00e043936   Omar Sandoval   blk-mq: introduce...
187
  		return KYBER_READ;
6e25cb01e   Omar Sandoval   kyber: implement ...
188
189
190
191
192
  	case REQ_OP_WRITE:
  		return KYBER_WRITE;
  	case REQ_OP_DISCARD:
  		return KYBER_DISCARD;
  	default:
00e043936   Omar Sandoval   blk-mq: introduce...
193
  		return KYBER_OTHER;
6e25cb01e   Omar Sandoval   kyber: implement ...
194
  	}
00e043936   Omar Sandoval   blk-mq: introduce...
195
  }
6e25cb01e   Omar Sandoval   kyber: implement ...
196
197
198
  static void flush_latency_buckets(struct kyber_queue_data *kqd,
  				  struct kyber_cpu_latency *cpu_latency,
  				  unsigned int sched_domain, unsigned int type)
00e043936   Omar Sandoval   blk-mq: introduce...
199
  {
6e25cb01e   Omar Sandoval   kyber: implement ...
200
201
202
  	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
  	atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
  	unsigned int bucket;
00e043936   Omar Sandoval   blk-mq: introduce...
203

6e25cb01e   Omar Sandoval   kyber: implement ...
204
205
  	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
  		buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
00e043936   Omar Sandoval   blk-mq: introduce...
206
207
208
  }
  
  /*
6e25cb01e   Omar Sandoval   kyber: implement ...
209
210
   * Calculate the histogram bucket with the given percentile rank, or -1 if there
   * aren't enough samples yet.
00e043936   Omar Sandoval   blk-mq: introduce...
211
   */
6e25cb01e   Omar Sandoval   kyber: implement ...
212
213
214
  static int calculate_percentile(struct kyber_queue_data *kqd,
  				unsigned int sched_domain, unsigned int type,
  				unsigned int percentile)
00e043936   Omar Sandoval   blk-mq: introduce...
215
  {
6e25cb01e   Omar Sandoval   kyber: implement ...
216
217
218
219
220
221
222
223
  	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
  	unsigned int bucket, samples = 0, percentile_samples;
  
  	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
  		samples += buckets[bucket];
  
  	if (!samples)
  		return -1;
00e043936   Omar Sandoval   blk-mq: introduce...
224
225
  
  	/*
6e25cb01e   Omar Sandoval   kyber: implement ...
226
227
  	 * We do the calculation once we have 500 samples or one second passes
  	 * since the first sample was recorded, whichever comes first.
00e043936   Omar Sandoval   blk-mq: introduce...
228
  	 */
6e25cb01e   Omar Sandoval   kyber: implement ...
229
230
231
232
233
234
235
  	if (!kqd->latency_timeout[sched_domain])
  		kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
  	if (samples < 500 &&
  	    time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
  		return -1;
  	}
  	kqd->latency_timeout[sched_domain] = 0;
00e043936   Omar Sandoval   blk-mq: introduce...
236

6e25cb01e   Omar Sandoval   kyber: implement ...
237
238
239
  	percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
  	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
  		if (buckets[bucket] >= percentile_samples)
00e043936   Omar Sandoval   blk-mq: introduce...
240
  			break;
6e25cb01e   Omar Sandoval   kyber: implement ...
241
  		percentile_samples -= buckets[bucket];
00e043936   Omar Sandoval   blk-mq: introduce...
242
  	}
6e25cb01e   Omar Sandoval   kyber: implement ...
243
  	memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));
00e043936   Omar Sandoval   blk-mq: introduce...
244

6c3b7af1c   Omar Sandoval   kyber: add tracep...
245
246
247
  	trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain],
  			    kyber_latency_type_names[type], percentile,
  			    bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples);
6e25cb01e   Omar Sandoval   kyber: implement ...
248
249
250
251
252
253
  	return bucket;
  }
  
  static void kyber_resize_domain(struct kyber_queue_data *kqd,
  				unsigned int sched_domain, unsigned int depth)
  {
00e043936   Omar Sandoval   blk-mq: introduce...
254
  	depth = clamp(depth, 1U, kyber_depth[sched_domain]);
6c3b7af1c   Omar Sandoval   kyber: add tracep...
255
  	if (depth != kqd->domain_tokens[sched_domain].sb.depth) {
00e043936   Omar Sandoval   blk-mq: introduce...
256
  		sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
6c3b7af1c   Omar Sandoval   kyber: add tracep...
257
258
259
  		trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain],
  				   depth);
  	}
00e043936   Omar Sandoval   blk-mq: introduce...
260
  }
6e25cb01e   Omar Sandoval   kyber: implement ...
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  static void kyber_timer_fn(struct timer_list *t)
  {
  	struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
  	unsigned int sched_domain;
  	int cpu;
  	bool bad = false;
  
  	/* Sum all of the per-cpu latency histograms. */
  	for_each_online_cpu(cpu) {
  		struct kyber_cpu_latency *cpu_latency;
  
  		cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
  		for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
  			flush_latency_buckets(kqd, cpu_latency, sched_domain,
  					      KYBER_TOTAL_LATENCY);
  			flush_latency_buckets(kqd, cpu_latency, sched_domain,
  					      KYBER_IO_LATENCY);
00e043936   Omar Sandoval   blk-mq: introduce...
278
279
  		}
  	}
6e25cb01e   Omar Sandoval   kyber: implement ...
280
281
282
283
284
285
286
  	/*
  	 * Check if any domains have a high I/O latency, which might indicate
  	 * congestion in the device. Note that we use the p90; we don't want to
  	 * be too sensitive to outliers here.
  	 */
  	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
  		int p90;
00e043936   Omar Sandoval   blk-mq: introduce...
287

6e25cb01e   Omar Sandoval   kyber: implement ...
288
289
290
291
292
  		p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
  					   90);
  		if (p90 >= KYBER_GOOD_BUCKETS)
  			bad = true;
  	}
00e043936   Omar Sandoval   blk-mq: introduce...
293
294
  
  	/*
6e25cb01e   Omar Sandoval   kyber: implement ...
295
296
297
  	 * Adjust the scheduling domain depths. If we determined that there was
  	 * congestion, we throttle all domains with good latencies. Either way,
  	 * we ease up on throttling domains with bad latencies.
00e043936   Omar Sandoval   blk-mq: introduce...
298
  	 */
6e25cb01e   Omar Sandoval   kyber: implement ...
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
  	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
  		unsigned int orig_depth, depth;
  		int p99;
  
  		p99 = calculate_percentile(kqd, sched_domain,
  					   KYBER_TOTAL_LATENCY, 99);
  		/*
  		 * This is kind of subtle: different domains will not
  		 * necessarily have enough samples to calculate the latency
  		 * percentiles during the same window, so we have to remember
  		 * the p99 for the next time we observe congestion; once we do,
  		 * we don't want to throttle again until we get more data, so we
  		 * reset it to -1.
  		 */
  		if (bad) {
  			if (p99 < 0)
  				p99 = kqd->domain_p99[sched_domain];
  			kqd->domain_p99[sched_domain] = -1;
  		} else if (p99 >= 0) {
  			kqd->domain_p99[sched_domain] = p99;
  		}
  		if (p99 < 0)
  			continue;
  
  		/*
  		 * If this domain has bad latency, throttle less. Otherwise,
  		 * throttle more iff we determined that there is congestion.
  		 *
  		 * The new depth is scaled linearly with the p99 latency vs the
  		 * latency target. E.g., if the p99 is 3/4 of the target, then
  		 * we throttle down to 3/4 of the current depth, and if the p99
  		 * is 2x the target, then we double the depth.
  		 */
  		if (bad || p99 >= KYBER_GOOD_BUCKETS) {
  			orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
  			depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
  			kyber_resize_domain(kqd, sched_domain, depth);
  		}
  	}
00e043936   Omar Sandoval   blk-mq: introduce...
338
  }
6e25cb01e   Omar Sandoval   kyber: implement ...
339
  static unsigned int kyber_sched_tags_shift(struct request_queue *q)
00e043936   Omar Sandoval   blk-mq: introduce...
340
341
342
343
344
  {
  	/*
  	 * All of the hardware queues have the same depth, so we can just grab
  	 * the shift of the first one.
  	 */
222a5ae03   John Garry   blk-mq: Use point...
345
  	return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
a6088845c   Jianchao Wang   block: kyber: mak...
346
  }
00e043936   Omar Sandoval   blk-mq: introduce...
347
348
349
  static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
  {
  	struct kyber_queue_data *kqd;
00e043936   Omar Sandoval   blk-mq: introduce...
350
351
352
  	unsigned int shift;
  	int ret = -ENOMEM;
  	int i;
6e25cb01e   Omar Sandoval   kyber: implement ...
353
  	kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
00e043936   Omar Sandoval   blk-mq: introduce...
354
355
  	if (!kqd)
  		goto err;
00e043936   Omar Sandoval   blk-mq: introduce...
356

6c3b7af1c   Omar Sandoval   kyber: add tracep...
357
  	kqd->q = q;
6e25cb01e   Omar Sandoval   kyber: implement ...
358
359
360
  	kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
  					    GFP_KERNEL | __GFP_ZERO);
  	if (!kqd->cpu_latency)
00e043936   Omar Sandoval   blk-mq: introduce...
361
  		goto err_kqd;
6e25cb01e   Omar Sandoval   kyber: implement ...
362
  	timer_setup(&kqd->timer, kyber_timer_fn, 0);
00e043936   Omar Sandoval   blk-mq: introduce...
363
364
365
366
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
  		WARN_ON(!kyber_depth[i]);
  		WARN_ON(!kyber_batch_size[i]);
  		ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
fa2a1f609   Omar Sandoval   kyber: don't make...
367
368
  					      kyber_depth[i], -1, false,
  					      GFP_KERNEL, q->node);
00e043936   Omar Sandoval   blk-mq: introduce...
369
370
371
  		if (ret) {
  			while (--i >= 0)
  				sbitmap_queue_free(&kqd->domain_tokens[i]);
6e25cb01e   Omar Sandoval   kyber: implement ...
372
  			goto err_buckets;
00e043936   Omar Sandoval   blk-mq: introduce...
373
  		}
00e043936   Omar Sandoval   blk-mq: introduce...
374
  	}
6e25cb01e   Omar Sandoval   kyber: implement ...
375
376
377
378
  	for (i = 0; i < KYBER_OTHER; i++) {
  		kqd->domain_p99[i] = -1;
  		kqd->latency_targets[i] = kyber_latency_targets[i];
  	}
00e043936   Omar Sandoval   blk-mq: introduce...
379

6e25cb01e   Omar Sandoval   kyber: implement ...
380
381
  	shift = kyber_sched_tags_shift(q);
  	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
00e043936   Omar Sandoval   blk-mq: introduce...
382
383
  
  	return kqd;
6e25cb01e   Omar Sandoval   kyber: implement ...
384
385
  err_buckets:
  	free_percpu(kqd->cpu_latency);
00e043936   Omar Sandoval   blk-mq: introduce...
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
  err_kqd:
  	kfree(kqd);
  err:
  	return ERR_PTR(ret);
  }
  
  static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
  {
  	struct kyber_queue_data *kqd;
  	struct elevator_queue *eq;
  
  	eq = elevator_alloc(q, e);
  	if (!eq)
  		return -ENOMEM;
  
  	kqd = kyber_queue_data_alloc(q);
  	if (IS_ERR(kqd)) {
  		kobject_put(&eq->kobj);
  		return PTR_ERR(kqd);
  	}
6e25cb01e   Omar Sandoval   kyber: implement ...
406
  	blk_stat_enable_accounting(q);
00e043936   Omar Sandoval   blk-mq: introduce...
407
408
  	eq->elevator_data = kqd;
  	q->elevator = eq;
00e043936   Omar Sandoval   blk-mq: introduce...
409
410
411
412
413
414
  	return 0;
  }
  
  static void kyber_exit_sched(struct elevator_queue *e)
  {
  	struct kyber_queue_data *kqd = e->elevator_data;
00e043936   Omar Sandoval   blk-mq: introduce...
415
  	int i;
6e25cb01e   Omar Sandoval   kyber: implement ...
416
  	del_timer_sync(&kqd->timer);
00e043936   Omar Sandoval   blk-mq: introduce...
417
418
419
  
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
  		sbitmap_queue_free(&kqd->domain_tokens[i]);
6e25cb01e   Omar Sandoval   kyber: implement ...
420
  	free_percpu(kqd->cpu_latency);
00e043936   Omar Sandoval   blk-mq: introduce...
421
422
  	kfree(kqd);
  }
a6088845c   Jianchao Wang   block: kyber: mak...
423
424
425
426
427
428
429
430
  static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
  {
  	unsigned int i;
  
  	spin_lock_init(&kcq->lock);
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
  		INIT_LIST_HEAD(&kcq->rq_list[i]);
  }
00e043936   Omar Sandoval   blk-mq: introduce...
431
432
  static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  {
288206407   Jens Axboe   kyber-iosched: up...
433
  	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
00e043936   Omar Sandoval   blk-mq: introduce...
434
435
436
437
438
439
  	struct kyber_hctx_data *khd;
  	int i;
  
  	khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
  	if (!khd)
  		return -ENOMEM;
a6088845c   Jianchao Wang   block: kyber: mak...
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
  	khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
  				       sizeof(struct kyber_ctx_queue),
  				       GFP_KERNEL, hctx->numa_node);
  	if (!khd->kcqs)
  		goto err_khd;
  
  	for (i = 0; i < hctx->nr_ctx; i++)
  		kyber_ctx_queue_init(&khd->kcqs[i]);
  
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
  		if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
  				      ilog2(8), GFP_KERNEL, hctx->numa_node)) {
  			while (--i >= 0)
  				sbitmap_free(&khd->kcq_map[i]);
  			goto err_kcqs;
  		}
  	}
00e043936   Omar Sandoval   blk-mq: introduce...
457
458
459
460
  	spin_lock_init(&khd->lock);
  
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
  		INIT_LIST_HEAD(&khd->rqs[i]);
00203ba40   Jens Axboe   kyber: use sbitma...
461
462
  		khd->domain_wait[i].sbq = NULL;
  		init_waitqueue_func_entry(&khd->domain_wait[i].wait,
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
463
  					  kyber_domain_wake);
00203ba40   Jens Axboe   kyber: use sbitma...
464
465
  		khd->domain_wait[i].wait.private = hctx;
  		INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry);
00e043936   Omar Sandoval   blk-mq: introduce...
466
467
468
469
470
471
472
  		atomic_set(&khd->wait_index[i], 0);
  	}
  
  	khd->cur_domain = 0;
  	khd->batching = 0;
  
  	hctx->sched_data = khd;
222a5ae03   John Garry   blk-mq: Use point...
473
  	sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
288206407   Jens Axboe   kyber-iosched: up...
474
  					kqd->async_depth);
00e043936   Omar Sandoval   blk-mq: introduce...
475
476
  
  	return 0;
a6088845c   Jianchao Wang   block: kyber: mak...
477
478
479
480
481
482
  
  err_kcqs:
  	kfree(khd->kcqs);
  err_khd:
  	kfree(khd);
  	return -ENOMEM;
00e043936   Omar Sandoval   blk-mq: introduce...
483
484
485
486
  }
  
  static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  {
a6088845c   Jianchao Wang   block: kyber: mak...
487
488
489
490
491
492
  	struct kyber_hctx_data *khd = hctx->sched_data;
  	int i;
  
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
  		sbitmap_free(&khd->kcq_map[i]);
  	kfree(khd->kcqs);
00e043936   Omar Sandoval   blk-mq: introduce...
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
  	kfree(hctx->sched_data);
  }
  
  static int rq_get_domain_token(struct request *rq)
  {
  	return (long)rq->elv.priv[0];
  }
  
  static void rq_set_domain_token(struct request *rq, int token)
  {
  	rq->elv.priv[0] = (void *)(long)token;
  }
  
  static void rq_clear_domain_token(struct kyber_queue_data *kqd,
  				  struct request *rq)
  {
  	unsigned int sched_domain;
  	int nr;
  
  	nr = rq_get_domain_token(rq);
  	if (nr != -1) {
a6088845c   Jianchao Wang   block: kyber: mak...
514
  		sched_domain = kyber_sched_domain(rq->cmd_flags);
00e043936   Omar Sandoval   blk-mq: introduce...
515
516
517
518
  		sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
  				    rq->mq_ctx->cpu);
  	}
  }
5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
519
  static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
00e043936   Omar Sandoval   blk-mq: introduce...
520
  {
00e043936   Omar Sandoval   blk-mq: introduce...
521
522
523
524
  	/*
  	 * We use the scheduler tags as per-hardware queue queueing tokens.
  	 * Async requests can be limited at this stage.
  	 */
5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
525
526
  	if (!op_is_sync(op)) {
  		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
00e043936   Omar Sandoval   blk-mq: introduce...
527
  		data->shallow_depth = kqd->async_depth;
5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
528
529
  	}
  }
00e043936   Omar Sandoval   blk-mq: introduce...
530

14ccb66b3   Christoph Hellwig   block: remove the...
531
532
  static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
  		unsigned int nr_segs)
a6088845c   Jianchao Wang   block: kyber: mak...
533
534
535
  {
  	struct kyber_hctx_data *khd = hctx->sched_data;
  	struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
f31967f0e   Jens Axboe   blk-mq: allow sof...
536
  	struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
a6088845c   Jianchao Wang   block: kyber: mak...
537
538
539
540
541
  	unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
  	struct list_head *rq_list = &kcq->rq_list[sched_domain];
  	bool merged;
  
  	spin_lock(&kcq->lock);
bdc6a287b   Baolin Wang   block: Move blk_m...
542
  	merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs);
a6088845c   Jianchao Wang   block: kyber: mak...
543
  	spin_unlock(&kcq->lock);
a6088845c   Jianchao Wang   block: kyber: mak...
544
545
546
  
  	return merged;
  }
5d9c305b8   Christoph Hellwig   blk-mq: remove th...
547
  static void kyber_prepare_request(struct request *rq)
5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
548
549
  {
  	rq_set_domain_token(rq, -1);
00e043936   Omar Sandoval   blk-mq: introduce...
550
  }
a6088845c   Jianchao Wang   block: kyber: mak...
551
552
553
554
555
556
557
558
  static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
  				  struct list_head *rq_list, bool at_head)
  {
  	struct kyber_hctx_data *khd = hctx->sched_data;
  	struct request *rq, *next;
  
  	list_for_each_entry_safe(rq, next, rq_list, queuelist) {
  		unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
f31967f0e   Jens Axboe   blk-mq: allow sof...
559
  		struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]];
a6088845c   Jianchao Wang   block: kyber: mak...
560
561
562
563
564
565
566
567
  		struct list_head *head = &kcq->rq_list[sched_domain];
  
  		spin_lock(&kcq->lock);
  		if (at_head)
  			list_move(&rq->queuelist, head);
  		else
  			list_move_tail(&rq->queuelist, head);
  		sbitmap_set_bit(&khd->kcq_map[sched_domain],
f31967f0e   Jens Axboe   blk-mq: allow sof...
568
  				rq->mq_ctx->index_hw[hctx->type]);
a6088845c   Jianchao Wang   block: kyber: mak...
569
570
571
572
  		blk_mq_sched_request_inserted(rq);
  		spin_unlock(&kcq->lock);
  	}
  }
7b9e93616   Christoph Hellwig   blk-mq-sched: uni...
573
  static void kyber_finish_request(struct request *rq)
00e043936   Omar Sandoval   blk-mq: introduce...
574
  {
7b9e93616   Christoph Hellwig   blk-mq-sched: uni...
575
  	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
00e043936   Omar Sandoval   blk-mq: introduce...
576
577
  
  	rq_clear_domain_token(kqd, rq);
00e043936   Omar Sandoval   blk-mq: introduce...
578
  }
6e25cb01e   Omar Sandoval   kyber: implement ...
579
580
581
  static void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
  			       unsigned int sched_domain, unsigned int type,
  			       u64 target, u64 latency)
00e043936   Omar Sandoval   blk-mq: introduce...
582
  {
6e25cb01e   Omar Sandoval   kyber: implement ...
583
584
  	unsigned int bucket;
  	u64 divisor;
00e043936   Omar Sandoval   blk-mq: introduce...
585

6e25cb01e   Omar Sandoval   kyber: implement ...
586
587
588
589
590
591
  	if (latency > 0) {
  		divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
  		bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
  			       KYBER_LATENCY_BUCKETS - 1);
  	} else {
  		bucket = 0;
00e043936   Omar Sandoval   blk-mq: introduce...
592
  	}
6e25cb01e   Omar Sandoval   kyber: implement ...
593
594
  	atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
  }
00e043936   Omar Sandoval   blk-mq: introduce...
595

6e25cb01e   Omar Sandoval   kyber: implement ...
596
597
598
599
600
601
602
603
604
  static void kyber_completed_request(struct request *rq, u64 now)
  {
  	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
  	struct kyber_cpu_latency *cpu_latency;
  	unsigned int sched_domain;
  	u64 target;
  
  	sched_domain = kyber_sched_domain(rq->cmd_flags);
  	if (sched_domain == KYBER_OTHER)
00e043936   Omar Sandoval   blk-mq: introduce...
605
  		return;
6e25cb01e   Omar Sandoval   kyber: implement ...
606
607
608
609
610
611
612
  	cpu_latency = get_cpu_ptr(kqd->cpu_latency);
  	target = kqd->latency_targets[sched_domain];
  	add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
  			   target, now - rq->start_time_ns);
  	add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
  			   now - rq->io_start_time_ns);
  	put_cpu_ptr(kqd->cpu_latency);
00e043936   Omar Sandoval   blk-mq: introduce...
613

6e25cb01e   Omar Sandoval   kyber: implement ...
614
  	timer_reduce(&kqd->timer, jiffies + HZ / 10);
00e043936   Omar Sandoval   blk-mq: introduce...
615
  }
a6088845c   Jianchao Wang   block: kyber: mak...
616
617
618
619
620
621
622
  struct flush_kcq_data {
  	struct kyber_hctx_data *khd;
  	unsigned int sched_domain;
  	struct list_head *list;
  };
  
  static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
00e043936   Omar Sandoval   blk-mq: introduce...
623
  {
a6088845c   Jianchao Wang   block: kyber: mak...
624
625
  	struct flush_kcq_data *flush_data = data;
  	struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
00e043936   Omar Sandoval   blk-mq: introduce...
626

a6088845c   Jianchao Wang   block: kyber: mak...
627
628
629
630
631
  	spin_lock(&kcq->lock);
  	list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
  			      flush_data->list);
  	sbitmap_clear_bit(sb, bitnr);
  	spin_unlock(&kcq->lock);
00e043936   Omar Sandoval   blk-mq: introduce...
632

a6088845c   Jianchao Wang   block: kyber: mak...
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
  	return true;
  }
  
  static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
  				  unsigned int sched_domain,
  				  struct list_head *list)
  {
  	struct flush_kcq_data data = {
  		.khd = khd,
  		.sched_domain = sched_domain,
  		.list = list,
  	};
  
  	sbitmap_for_each_set(&khd->kcq_map[sched_domain],
  			     flush_busy_kcq, &data);
00e043936   Omar Sandoval   blk-mq: introduce...
648
  }
00203ba40   Jens Axboe   kyber: use sbitma...
649
  static int kyber_domain_wake(wait_queue_entry_t *wqe, unsigned mode, int flags,
00e043936   Omar Sandoval   blk-mq: introduce...
650
651
  			     void *key)
  {
00203ba40   Jens Axboe   kyber: use sbitma...
652
653
  	struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private);
  	struct sbq_wait *wait = container_of(wqe, struct sbq_wait, wait);
00e043936   Omar Sandoval   blk-mq: introduce...
654

00203ba40   Jens Axboe   kyber: use sbitma...
655
  	sbitmap_del_wait_queue(wait);
00e043936   Omar Sandoval   blk-mq: introduce...
656
657
658
659
660
661
662
663
664
665
  	blk_mq_run_hw_queue(hctx, true);
  	return 1;
  }
  
  static int kyber_get_domain_token(struct kyber_queue_data *kqd,
  				  struct kyber_hctx_data *khd,
  				  struct blk_mq_hw_ctx *hctx)
  {
  	unsigned int sched_domain = khd->cur_domain;
  	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
00203ba40   Jens Axboe   kyber: use sbitma...
666
  	struct sbq_wait *wait = &khd->domain_wait[sched_domain];
00e043936   Omar Sandoval   blk-mq: introduce...
667
668
669
670
  	struct sbq_wait_state *ws;
  	int nr;
  
  	nr = __sbitmap_queue_get(domain_tokens);
00e043936   Omar Sandoval   blk-mq: introduce...
671
672
673
674
675
676
  
  	/*
  	 * If we failed to get a domain token, make sure the hardware queue is
  	 * run when one becomes available. Note that this is serialized on
  	 * khd->lock, but we still need to be careful about the waker.
  	 */
00203ba40   Jens Axboe   kyber: use sbitma...
677
  	if (nr < 0 && list_empty_careful(&wait->wait.entry)) {
00e043936   Omar Sandoval   blk-mq: introduce...
678
679
  		ws = sbq_wait_ptr(domain_tokens,
  				  &khd->wait_index[sched_domain]);
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
680
  		khd->domain_ws[sched_domain] = ws;
00203ba40   Jens Axboe   kyber: use sbitma...
681
  		sbitmap_add_wait_queue(domain_tokens, ws, wait);
00e043936   Omar Sandoval   blk-mq: introduce...
682
683
684
  
  		/*
  		 * Try again in case a token was freed before we got on the wait
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
685
  		 * queue.
00e043936   Omar Sandoval   blk-mq: introduce...
686
687
  		 */
  		nr = __sbitmap_queue_get(domain_tokens);
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
688
  	}
8cf466602   Omar Sandoval   kyber: fix hang o...
689

fcf38cdf3   Omar Sandoval   kyber: fix anothe...
690
691
692
693
694
695
696
  	/*
  	 * If we got a token while we were on the wait queue, remove ourselves
  	 * from the wait queue to ensure that all wake ups make forward
  	 * progress. It's possible that the waker already deleted the entry
  	 * between the !list_empty_careful() check and us grabbing the lock, but
  	 * list_del_init() is okay with that.
  	 */
00203ba40   Jens Axboe   kyber: use sbitma...
697
  	if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) {
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
698
699
  		ws = khd->domain_ws[sched_domain];
  		spin_lock_irq(&ws->wait.lock);
00203ba40   Jens Axboe   kyber: use sbitma...
700
  		sbitmap_del_wait_queue(wait);
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
701
  		spin_unlock_irq(&ws->wait.lock);
00e043936   Omar Sandoval   blk-mq: introduce...
702
  	}
fcf38cdf3   Omar Sandoval   kyber: fix anothe...
703

00e043936   Omar Sandoval   blk-mq: introduce...
704
705
706
707
708
709
  	return nr;
  }
  
  static struct request *
  kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
  			  struct kyber_hctx_data *khd,
a6088845c   Jianchao Wang   block: kyber: mak...
710
  			  struct blk_mq_hw_ctx *hctx)
00e043936   Omar Sandoval   blk-mq: introduce...
711
712
713
714
715
716
  {
  	struct list_head *rqs;
  	struct request *rq;
  	int nr;
  
  	rqs = &khd->rqs[khd->cur_domain];
00e043936   Omar Sandoval   blk-mq: introduce...
717
718
  
  	/*
a6088845c   Jianchao Wang   block: kyber: mak...
719
720
721
722
723
724
  	 * If we already have a flushed request, then we just need to get a
  	 * token for it. Otherwise, if there are pending requests in the kcqs,
  	 * flush the kcqs, but only if we can get a token. If not, we should
  	 * leave the requests in the kcqs so that they can be merged. Note that
  	 * khd->lock serializes the flushes, so if we observed any bit set in
  	 * the kcq_map, we will always get a request.
00e043936   Omar Sandoval   blk-mq: introduce...
725
  	 */
a6088845c   Jianchao Wang   block: kyber: mak...
726
  	rq = list_first_entry_or_null(rqs, struct request, queuelist);
00e043936   Omar Sandoval   blk-mq: introduce...
727
728
729
730
731
732
733
  	if (rq) {
  		nr = kyber_get_domain_token(kqd, khd, hctx);
  		if (nr >= 0) {
  			khd->batching++;
  			rq_set_domain_token(rq, nr);
  			list_del_init(&rq->queuelist);
  			return rq;
6c3b7af1c   Omar Sandoval   kyber: add tracep...
734
735
736
  		} else {
  			trace_kyber_throttled(kqd->q,
  					      kyber_domain_names[khd->cur_domain]);
00e043936   Omar Sandoval   blk-mq: introduce...
737
  		}
a6088845c   Jianchao Wang   block: kyber: mak...
738
739
740
741
742
743
744
745
746
  	} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
  		nr = kyber_get_domain_token(kqd, khd, hctx);
  		if (nr >= 0) {
  			kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
  			rq = list_first_entry(rqs, struct request, queuelist);
  			khd->batching++;
  			rq_set_domain_token(rq, nr);
  			list_del_init(&rq->queuelist);
  			return rq;
6c3b7af1c   Omar Sandoval   kyber: add tracep...
747
748
749
  		} else {
  			trace_kyber_throttled(kqd->q,
  					      kyber_domain_names[khd->cur_domain]);
a6088845c   Jianchao Wang   block: kyber: mak...
750
  		}
00e043936   Omar Sandoval   blk-mq: introduce...
751
752
753
754
755
756
757
758
759
760
  	}
  
  	/* There were either no pending requests or no tokens. */
  	return NULL;
  }
  
  static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
  {
  	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
  	struct kyber_hctx_data *khd = hctx->sched_data;
00e043936   Omar Sandoval   blk-mq: introduce...
761
762
763
764
765
766
767
768
769
770
  	struct request *rq;
  	int i;
  
  	spin_lock(&khd->lock);
  
  	/*
  	 * First, if we are still entitled to batch, try to dispatch a request
  	 * from the batch.
  	 */
  	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
a6088845c   Jianchao Wang   block: kyber: mak...
771
  		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
00e043936   Omar Sandoval   blk-mq: introduce...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
  		if (rq)
  			goto out;
  	}
  
  	/*
  	 * Either,
  	 * 1. We were no longer entitled to a batch.
  	 * 2. The domain we were batching didn't have any requests.
  	 * 3. The domain we were batching was out of tokens.
  	 *
  	 * Start another batch. Note that this wraps back around to the original
  	 * domain if no other domains have requests or tokens.
  	 */
  	khd->batching = 0;
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
  		if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
  			khd->cur_domain = 0;
  		else
  			khd->cur_domain++;
a6088845c   Jianchao Wang   block: kyber: mak...
791
  		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
00e043936   Omar Sandoval   blk-mq: introduce...
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
  		if (rq)
  			goto out;
  	}
  
  	rq = NULL;
  out:
  	spin_unlock(&khd->lock);
  	return rq;
  }
  
  static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
  {
  	struct kyber_hctx_data *khd = hctx->sched_data;
  	int i;
  
  	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
a6088845c   Jianchao Wang   block: kyber: mak...
808
809
  		if (!list_empty_careful(&khd->rqs[i]) ||
  		    sbitmap_any_bit_set(&khd->kcq_map[i]))
00e043936   Omar Sandoval   blk-mq: introduce...
810
811
  			return true;
  	}
a6088845c   Jianchao Wang   block: kyber: mak...
812
813
  
  	return false;
00e043936   Omar Sandoval   blk-mq: introduce...
814
  }
6e25cb01e   Omar Sandoval   kyber: implement ...
815
816
817
  #define KYBER_LAT_SHOW_STORE(domain, name)				\
  static ssize_t kyber_##name##_lat_show(struct elevator_queue *e,	\
  				       char *page)			\
00e043936   Omar Sandoval   blk-mq: introduce...
818
819
820
  {									\
  	struct kyber_queue_data *kqd = e->elevator_data;		\
  									\
6e25cb01e   Omar Sandoval   kyber: implement ...
821
822
  	return sprintf(page, "%llu
  ", kqd->latency_targets[domain]);	\
00e043936   Omar Sandoval   blk-mq: introduce...
823
824
  }									\
  									\
6e25cb01e   Omar Sandoval   kyber: implement ...
825
826
  static ssize_t kyber_##name##_lat_store(struct elevator_queue *e,	\
  					const char *page, size_t count)	\
00e043936   Omar Sandoval   blk-mq: introduce...
827
828
829
830
831
832
833
834
835
  {									\
  	struct kyber_queue_data *kqd = e->elevator_data;		\
  	unsigned long long nsec;					\
  	int ret;							\
  									\
  	ret = kstrtoull(page, 10, &nsec);				\
  	if (ret)							\
  		return ret;						\
  									\
6e25cb01e   Omar Sandoval   kyber: implement ...
836
  	kqd->latency_targets[domain] = nsec;				\
00e043936   Omar Sandoval   blk-mq: introduce...
837
838
839
  									\
  	return count;							\
  }
6e25cb01e   Omar Sandoval   kyber: implement ...
840
841
  KYBER_LAT_SHOW_STORE(KYBER_READ, read);
  KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
00e043936   Omar Sandoval   blk-mq: introduce...
842
843
844
845
846
847
848
849
850
  #undef KYBER_LAT_SHOW_STORE
  
  #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
  static struct elv_fs_entry kyber_sched_attrs[] = {
  	KYBER_LAT_ATTR(read),
  	KYBER_LAT_ATTR(write),
  	__ATTR_NULL
  };
  #undef KYBER_LAT_ATTR
16b738f65   Omar Sandoval   kyber: add debugf...
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
  #ifdef CONFIG_BLK_DEBUG_FS
  #define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name)			\
  static int kyber_##name##_tokens_show(void *data, struct seq_file *m)	\
  {									\
  	struct request_queue *q = data;					\
  	struct kyber_queue_data *kqd = q->elevator->elevator_data;	\
  									\
  	sbitmap_queue_show(&kqd->domain_tokens[domain], m);		\
  	return 0;							\
  }									\
  									\
  static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos)	\
  	__acquires(&khd->lock)						\
  {									\
  	struct blk_mq_hw_ctx *hctx = m->private;			\
  	struct kyber_hctx_data *khd = hctx->sched_data;			\
  									\
  	spin_lock(&khd->lock);						\
  	return seq_list_start(&khd->rqs[domain], *pos);			\
  }									\
  									\
  static void *kyber_##name##_rqs_next(struct seq_file *m, void *v,	\
  				     loff_t *pos)			\
  {									\
  	struct blk_mq_hw_ctx *hctx = m->private;			\
  	struct kyber_hctx_data *khd = hctx->sched_data;			\
  									\
  	return seq_list_next(v, &khd->rqs[domain], pos);		\
  }									\
  									\
  static void kyber_##name##_rqs_stop(struct seq_file *m, void *v)	\
  	__releases(&khd->lock)						\
  {									\
  	struct blk_mq_hw_ctx *hctx = m->private;			\
  	struct kyber_hctx_data *khd = hctx->sched_data;			\
  									\
  	spin_unlock(&khd->lock);					\
  }									\
  									\
  static const struct seq_operations kyber_##name##_rqs_seq_ops = {	\
  	.start	= kyber_##name##_rqs_start,				\
  	.next	= kyber_##name##_rqs_next,				\
  	.stop	= kyber_##name##_rqs_stop,				\
  	.show	= blk_mq_debugfs_rq_show,				\
  };									\
  									\
  static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
  {									\
  	struct blk_mq_hw_ctx *hctx = data;				\
  	struct kyber_hctx_data *khd = hctx->sched_data;			\
00203ba40   Jens Axboe   kyber: use sbitma...
901
  	wait_queue_entry_t *wait = &khd->domain_wait[domain].wait;	\
16b738f65   Omar Sandoval   kyber: add debugf...
902
  									\
2055da973   Ingo Molnar   sched/wait: Disam...
903
904
  	seq_printf(m, "%d
  ", !list_empty_careful(&wait->entry));	\
16b738f65   Omar Sandoval   kyber: add debugf...
905
906
907
  	return 0;							\
  }
  KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
6e25cb01e   Omar Sandoval   kyber: implement ...
908
909
  KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
  KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
16b738f65   Omar Sandoval   kyber: add debugf...
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
  KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
  #undef KYBER_DEBUGFS_DOMAIN_ATTRS
  
  static int kyber_async_depth_show(void *data, struct seq_file *m)
  {
  	struct request_queue *q = data;
  	struct kyber_queue_data *kqd = q->elevator->elevator_data;
  
  	seq_printf(m, "%u
  ", kqd->async_depth);
  	return 0;
  }
  
  static int kyber_cur_domain_show(void *data, struct seq_file *m)
  {
  	struct blk_mq_hw_ctx *hctx = data;
  	struct kyber_hctx_data *khd = hctx->sched_data;
6c3b7af1c   Omar Sandoval   kyber: add tracep...
927
928
  	seq_printf(m, "%s
  ", kyber_domain_names[khd->cur_domain]);
16b738f65   Omar Sandoval   kyber: add debugf...
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
  	return 0;
  }
  
  static int kyber_batching_show(void *data, struct seq_file *m)
  {
  	struct blk_mq_hw_ctx *hctx = data;
  	struct kyber_hctx_data *khd = hctx->sched_data;
  
  	seq_printf(m, "%u
  ", khd->batching);
  	return 0;
  }
  
  #define KYBER_QUEUE_DOMAIN_ATTRS(name)	\
  	{#name "_tokens", 0400, kyber_##name##_tokens_show}
  static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
  	KYBER_QUEUE_DOMAIN_ATTRS(read),
6e25cb01e   Omar Sandoval   kyber: implement ...
946
947
  	KYBER_QUEUE_DOMAIN_ATTRS(write),
  	KYBER_QUEUE_DOMAIN_ATTRS(discard),
16b738f65   Omar Sandoval   kyber: add debugf...
948
949
950
951
952
953
954
955
956
957
958
  	KYBER_QUEUE_DOMAIN_ATTRS(other),
  	{"async_depth", 0400, kyber_async_depth_show},
  	{},
  };
  #undef KYBER_QUEUE_DOMAIN_ATTRS
  
  #define KYBER_HCTX_DOMAIN_ATTRS(name)					\
  	{#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops},	\
  	{#name "_waiting", 0400, kyber_##name##_waiting_show}
  static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
  	KYBER_HCTX_DOMAIN_ATTRS(read),
6e25cb01e   Omar Sandoval   kyber: implement ...
959
960
  	KYBER_HCTX_DOMAIN_ATTRS(write),
  	KYBER_HCTX_DOMAIN_ATTRS(discard),
16b738f65   Omar Sandoval   kyber: add debugf...
961
962
963
964
965
966
967
  	KYBER_HCTX_DOMAIN_ATTRS(other),
  	{"cur_domain", 0400, kyber_cur_domain_show},
  	{"batching", 0400, kyber_batching_show},
  	{},
  };
  #undef KYBER_HCTX_DOMAIN_ATTRS
  #endif
00e043936   Omar Sandoval   blk-mq: introduce...
968
  static struct elevator_type kyber_sched = {
f9cd4bfe9   Jens Axboe   block: get rid of...
969
  	.ops = {
00e043936   Omar Sandoval   blk-mq: introduce...
970
971
972
973
  		.init_sched = kyber_init_sched,
  		.exit_sched = kyber_exit_sched,
  		.init_hctx = kyber_init_hctx,
  		.exit_hctx = kyber_exit_hctx,
5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
974
  		.limit_depth = kyber_limit_depth,
a6088845c   Jianchao Wang   block: kyber: mak...
975
  		.bio_merge = kyber_bio_merge,
5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
976
  		.prepare_request = kyber_prepare_request,
a6088845c   Jianchao Wang   block: kyber: mak...
977
  		.insert_requests = kyber_insert_requests,
7b9e93616   Christoph Hellwig   blk-mq-sched: uni...
978
  		.finish_request = kyber_finish_request,
ba989a014   Ming Lei   block: kyber: fix...
979
  		.requeue_request = kyber_finish_request,
00e043936   Omar Sandoval   blk-mq: introduce...
980
981
982
983
  		.completed_request = kyber_completed_request,
  		.dispatch_request = kyber_dispatch_request,
  		.has_work = kyber_has_work,
  	},
16b738f65   Omar Sandoval   kyber: add debugf...
984
985
986
987
  #ifdef CONFIG_BLK_DEBUG_FS
  	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
  	.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
  #endif
00e043936   Omar Sandoval   blk-mq: introduce...
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
  	.elevator_attrs = kyber_sched_attrs,
  	.elevator_name = "kyber",
  	.elevator_owner = THIS_MODULE,
  };
  
  static int __init kyber_init(void)
  {
  	return elv_register(&kyber_sched);
  }
  
  static void __exit kyber_exit(void)
  {
  	elv_unregister(&kyber_sched);
  }
  
  module_init(kyber_init);
  module_exit(kyber_exit);
  
  MODULE_AUTHOR("Omar Sandoval");
  MODULE_LICENSE("GPL");
  MODULE_DESCRIPTION("Kyber I/O scheduler");