Blame view

drivers/md/dm-service-time.c 8.78 KB
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   * Copyright (C) 2007-2009 NEC Corporation.  All Rights Reserved.
   *
   * Module Author: Kiyoshi Ueda
   *
   * This file is released under the GPL.
   *
   * Throughput oriented path selector.
   */
  
  #include "dm.h"
  #include "dm-path-selector.h"
5a0e3ad6a   Tejun Heo   include cleanup: ...
13
  #include <linux/slab.h>
056075c76   Paul Gortmaker   md: Add module.h ...
14
  #include <linux/module.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
15

f392ba889   Kiyoshi Ueda   dm mpath: add ser...
16
17
18
19
20
  #define DM_MSG_PREFIX	"multipath service-time"
  #define ST_MIN_IO	1
  #define ST_MAX_RELATIVE_THROUGHPUT	100
  #define ST_MAX_RELATIVE_THROUGHPUT_SHIFT	7
  #define ST_MAX_INFLIGHT_SIZE	((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
21136f89d   Mike Snitzer   dm mpath: remove ...
21
  #define ST_VERSION	"0.3.0"
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
22
23
24
25
  
  struct selector {
  	struct list_head valid_paths;
  	struct list_head failed_paths;
9659f8114   Mike Snitzer   dm mpath: push pa...
26
  	spinlock_t lock;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
  };
  
  struct path_info {
  	struct list_head list;
  	struct dm_path *path;
  	unsigned repeat_count;
  	unsigned relative_throughput;
  	atomic_t in_flight_size;	/* Total size of in-flight I/Os */
  };
  
  static struct selector *alloc_selector(void)
  {
  	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
  
  	if (s) {
  		INIT_LIST_HEAD(&s->valid_paths);
  		INIT_LIST_HEAD(&s->failed_paths);
9659f8114   Mike Snitzer   dm mpath: push pa...
44
  		spin_lock_init(&s->lock);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  	}
  
  	return s;
  }
  
  static int st_create(struct path_selector *ps, unsigned argc, char **argv)
  {
  	struct selector *s = alloc_selector();
  
  	if (!s)
  		return -ENOMEM;
  
  	ps->context = s;
  	return 0;
  }
  
  static void free_paths(struct list_head *paths)
  {
  	struct path_info *pi, *next;
  
  	list_for_each_entry_safe(pi, next, paths, list) {
  		list_del(&pi->list);
  		kfree(pi);
  	}
  }
  
  static void st_destroy(struct path_selector *ps)
  {
  	struct selector *s = ps->context;
  
  	free_paths(&s->valid_paths);
  	free_paths(&s->failed_paths);
  	kfree(s);
  	ps->context = NULL;
  }
  
  static int st_status(struct path_selector *ps, struct dm_path *path,
  		     status_type_t type, char *result, unsigned maxlen)
  {
  	unsigned sz = 0;
  	struct path_info *pi;
  
  	if (!path)
  		DMEMIT("0 ");
  	else {
  		pi = path->pscontext;
  
  		switch (type) {
  		case STATUSTYPE_INFO:
  			DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
  			       pi->relative_throughput);
  			break;
  		case STATUSTYPE_TABLE:
  			DMEMIT("%u %u ", pi->repeat_count,
  			       pi->relative_throughput);
  			break;
  		}
  	}
  
  	return sz;
  }
  
  static int st_add_path(struct path_selector *ps, struct dm_path *path,
  		       int argc, char **argv, char **error)
  {
  	struct selector *s = ps->context;
  	struct path_info *pi;
  	unsigned repeat_count = ST_MIN_IO;
  	unsigned relative_throughput = 1;
31998ef19   Mikulas Patocka   dm: reject traili...
114
  	char dummy;
9659f8114   Mike Snitzer   dm mpath: push pa...
115
  	unsigned long flags;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  
  	/*
  	 * Arguments: [<repeat_count> [<relative_throughput>]]
  	 * 	<repeat_count>: The number of I/Os before switching path.
  	 * 			If not given, default (ST_MIN_IO) is used.
  	 * 	<relative_throughput>: The relative throughput value of
  	 *			the path among all paths in the path-group.
  	 * 			The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
  	 *			If not given, minimum value '1' is used.
  	 *			If '0' is given, the path isn't selected while
  	 * 			other paths having a positive value are
  	 * 			available.
  	 */
  	if (argc > 2) {
  		*error = "service-time ps: incorrect number of arguments";
  		return -EINVAL;
  	}
31998ef19   Mikulas Patocka   dm: reject traili...
133
  	if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
134
135
136
  		*error = "service-time ps: invalid repeat count";
  		return -EINVAL;
  	}
21136f89d   Mike Snitzer   dm mpath: remove ...
137
138
139
140
  	if (repeat_count > 1) {
  		DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
  		repeat_count = 1;
  	}
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
141
  	if ((argc == 2) &&
31998ef19   Mikulas Patocka   dm: reject traili...
142
  	    (sscanf(argv[1], "%u%c", &relative_throughput, &dummy) != 1 ||
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  	     relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
  		*error = "service-time ps: invalid relative_throughput value";
  		return -EINVAL;
  	}
  
  	/* allocate the path */
  	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
  	if (!pi) {
  		*error = "service-time ps: Error allocating path context";
  		return -ENOMEM;
  	}
  
  	pi->path = path;
  	pi->repeat_count = repeat_count;
  	pi->relative_throughput = relative_throughput;
  	atomic_set(&pi->in_flight_size, 0);
  
  	path->pscontext = pi;
9659f8114   Mike Snitzer   dm mpath: push pa...
161
  	spin_lock_irqsave(&s->lock, flags);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
162
  	list_add_tail(&pi->list, &s->valid_paths);
9659f8114   Mike Snitzer   dm mpath: push pa...
163
  	spin_unlock_irqrestore(&s->lock, flags);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
164
165
166
167
168
169
170
171
  
  	return 0;
  }
  
  static void st_fail_path(struct path_selector *ps, struct dm_path *path)
  {
  	struct selector *s = ps->context;
  	struct path_info *pi = path->pscontext;
9659f8114   Mike Snitzer   dm mpath: push pa...
172
  	unsigned long flags;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
173

9659f8114   Mike Snitzer   dm mpath: push pa...
174
  	spin_lock_irqsave(&s->lock, flags);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
175
  	list_move(&pi->list, &s->failed_paths);
9659f8114   Mike Snitzer   dm mpath: push pa...
176
  	spin_unlock_irqrestore(&s->lock, flags);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
177
178
179
180
181
182
  }
  
  static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
  {
  	struct selector *s = ps->context;
  	struct path_info *pi = path->pscontext;
9659f8114   Mike Snitzer   dm mpath: push pa...
183
  	unsigned long flags;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
184

9659f8114   Mike Snitzer   dm mpath: push pa...
185
  	spin_lock_irqsave(&s->lock, flags);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
186
  	list_move_tail(&pi->list, &s->valid_paths);
9659f8114   Mike Snitzer   dm mpath: push pa...
187
  	spin_unlock_irqrestore(&s->lock, flags);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
  
  	return 0;
  }
  
  /*
   * Compare the estimated service time of 2 paths, pi1 and pi2,
   * for the incoming I/O.
   *
   * Returns:
   * < 0 : pi1 is better
   * 0   : no difference between pi1 and pi2
   * > 0 : pi2 is better
   *
   * Description:
   * Basically, the service time is estimated by:
   *     ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
   * To reduce the calculation, some optimizations are made.
   * (See comments inline)
   */
  static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
  			   size_t incoming)
  {
  	size_t sz1, sz2, st1, st2;
  
  	sz1 = atomic_read(&pi1->in_flight_size);
  	sz2 = atomic_read(&pi2->in_flight_size);
  
  	/*
  	 * Case 1: Both have same throughput value. Choose less loaded path.
  	 */
  	if (pi1->relative_throughput == pi2->relative_throughput)
  		return sz1 - sz2;
  
  	/*
  	 * Case 2a: Both have same load. Choose higher throughput path.
  	 * Case 2b: One path has no throughput value. Choose the other one.
  	 */
  	if (sz1 == sz2 ||
  	    !pi1->relative_throughput || !pi2->relative_throughput)
  		return pi2->relative_throughput - pi1->relative_throughput;
  
  	/*
  	 * Case 3: Calculate service time. Choose faster path.
  	 *         Service time using pi1:
  	 *             st1 = (sz1 + incoming) / pi1->relative_throughput
  	 *         Service time using pi2:
  	 *             st2 = (sz2 + incoming) / pi2->relative_throughput
  	 *
  	 *         To avoid the division, transform the expression to use
  	 *         multiplication.
  	 *         Because ->relative_throughput > 0 here, if st1 < st2,
  	 *         the expressions below are the same meaning:
  	 *             (sz1 + incoming) / pi1->relative_throughput <
  	 *                 (sz2 + incoming) / pi2->relative_throughput
  	 *             (sz1 + incoming) * pi2->relative_throughput <
  	 *                 (sz2 + incoming) * pi1->relative_throughput
  	 *         So use the later one.
  	 */
  	sz1 += incoming;
  	sz2 += incoming;
  	if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
  		     sz2 >= ST_MAX_INFLIGHT_SIZE)) {
  		/*
  		 * Size may be too big for multiplying pi->relative_throughput
  		 * and overflow.
  		 * To avoid the overflow and mis-selection, shift down both.
  		 */
  		sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
  		sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
  	}
  	st1 = sz1 * pi2->relative_throughput;
  	st2 = sz2 * pi1->relative_throughput;
  	if (st1 != st2)
  		return st1 - st2;
  
  	/*
  	 * Case 4: Service time is equal. Choose higher throughput path.
  	 */
  	return pi2->relative_throughput - pi1->relative_throughput;
  }
90a4323cc   Mike Snitzer   dm path selector:...
268
  static struct dm_path *st_select_path(struct path_selector *ps, size_t nr_bytes)
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
269
270
271
  {
  	struct selector *s = ps->context;
  	struct path_info *pi = NULL, *best = NULL;
9659f8114   Mike Snitzer   dm mpath: push pa...
272
273
  	struct dm_path *ret = NULL;
  	unsigned long flags;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
274

9659f8114   Mike Snitzer   dm mpath: push pa...
275
  	spin_lock_irqsave(&s->lock, flags);
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
276
  	if (list_empty(&s->valid_paths))
9659f8114   Mike Snitzer   dm mpath: push pa...
277
  		goto out;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
278

f392ba889   Kiyoshi Ueda   dm mpath: add ser...
279
280
281
282
283
  	list_for_each_entry(pi, &s->valid_paths, list)
  		if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
  			best = pi;
  
  	if (!best)
9659f8114   Mike Snitzer   dm mpath: push pa...
284
  		goto out;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
285

f20426056   Khazhismel Kumykov   dm mpath selector...
286
287
  	/* Move most recently used to least preferred to evenly balance. */
  	list_move_tail(&best->list, &s->valid_paths);
9659f8114   Mike Snitzer   dm mpath: push pa...
288
289
290
291
  	ret = best->path;
  out:
  	spin_unlock_irqrestore(&s->lock, flags);
  	return ret;
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
292
293
294
295
296
297
298
299
300
301
302
303
304
  }
  
  static int st_start_io(struct path_selector *ps, struct dm_path *path,
  		       size_t nr_bytes)
  {
  	struct path_info *pi = path->pscontext;
  
  	atomic_add(nr_bytes, &pi->in_flight_size);
  
  	return 0;
  }
  
  static int st_end_io(struct path_selector *ps, struct dm_path *path,
087615bf3   Gabriel Krisman Bertazi   dm mpath: pass IO...
305
  		     size_t nr_bytes, u64 start_time)
f392ba889   Kiyoshi Ueda   dm mpath: add ser...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
  {
  	struct path_info *pi = path->pscontext;
  
  	atomic_sub(nr_bytes, &pi->in_flight_size);
  
  	return 0;
  }
  
  static struct path_selector_type st_ps = {
  	.name		= "service-time",
  	.module		= THIS_MODULE,
  	.table_args	= 2,
  	.info_args	= 2,
  	.create		= st_create,
  	.destroy	= st_destroy,
  	.status		= st_status,
  	.add_path	= st_add_path,
  	.fail_path	= st_fail_path,
  	.reinstate_path	= st_reinstate_path,
  	.select_path	= st_select_path,
  	.start_io	= st_start_io,
  	.end_io		= st_end_io,
  };
  
  static int __init dm_st_init(void)
  {
  	int r = dm_register_path_selector(&st_ps);
  
  	if (r < 0)
  		DMERR("register failed %d", r);
  
  	DMINFO("version " ST_VERSION " loaded");
  
  	return r;
  }
  
  static void __exit dm_st_exit(void)
  {
  	int r = dm_unregister_path_selector(&st_ps);
  
  	if (r < 0)
  		DMERR("unregister failed %d", r);
  }
  
  module_init(dm_st_init);
  module_exit(dm_st_exit);
  
  MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
  MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
  MODULE_LICENSE("GPL");