Blame view

block/as-iosched.c 37.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
   *  Anticipatory & deadline i/o scheduler.
   *
0fe234795   Jens Axboe   [PATCH] Update ax...
4
   *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
f5b3db001   Nick Piggin   [PATCH] as: coope...
5
   *                     Nick Piggin <nickpiggin@yahoo.com.au>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
9
10
11
12
   *
   */
  #include <linux/kernel.h>
  #include <linux/fs.h>
  #include <linux/blkdev.h>
  #include <linux/elevator.h>
  #include <linux/bio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/compiler.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
  #include <linux/rbtree.h>
  #include <linux/interrupt.h>
  
  #define REQ_SYNC	1
  #define REQ_ASYNC	0
  
  /*
   * See Documentation/block/as-iosched.txt
   */
  
  /*
   * max time before a read is submitted.
   */
  #define default_read_expire (HZ / 8)
  
  /*
   * ditto for writes, these limits are not hard, even
   * if the disk is capable of satisfying them.
   */
  #define default_write_expire (HZ / 4)
  
  /*
   * read_batch_expire describes how long we will allow a stream of reads to
   * persist before looking to see whether it is time to switch over to writes.
   */
  #define default_read_batch_expire (HZ / 2)
  
  /*
   * write_batch_expire describes how long we want a stream of writes to run for.
   * This is not a hard limit, but a target we set for the auto-tuning thingy.
   * See, the problem is: we can send a lot of writes to disk cache / TCQ in
   * a short amount of time...
   */
  #define default_write_batch_expire (HZ / 8)
  
  /*
   * max time we may wait to anticipate a read (default around 6ms)
   */
  #define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)
  
  /*
   * Keep track of up to 20ms thinktimes. We can go as big as we like here,
   * however huge values tend to interfere and not decay fast enough. A program
   * might be in a non-io phase of operation. Waiting on user input for example,
   * or doing a lengthy computation. A small penalty can be justified there, and
   * will still catch out those processes that constantly have large thinktimes.
   */
  #define MAX_THINKTIME (HZ/50UL)
  
  /* Bits in as_io_context.state */
  enum as_io_states {
f5b3db001   Nick Piggin   [PATCH] as: coope...
68
  	AS_TASK_RUNNING=0,	/* Process has not exited */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  	AS_TASK_IOSTARTED,	/* Process has started some IO */
  	AS_TASK_IORUNNING,	/* Process has completed some IO */
  };
  
  enum anticipation_status {
  	ANTIC_OFF=0,		/* Not anticipating (normal operation)	*/
  	ANTIC_WAIT_REQ,		/* The last read has not yet completed  */
  	ANTIC_WAIT_NEXT,	/* Currently anticipating a request vs
  				   last read (which has completed) */
  	ANTIC_FINISHED,		/* Anticipating but have found a candidate
  				 * or timed out */
  };
  
  struct as_data {
  	/*
  	 * run time data
  	 */
  
  	struct request_queue *q;	/* the "owner" queue */
  
  	/*
  	 * requests (as_rq s) are present on both sort_list and fifo_list
  	 */
  	struct rb_root sort_list[2];
  	struct list_head fifo_list[2];
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
94
  	struct request *next_rq[2];	/* next in sort order */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
  	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
97
98
  
  	unsigned long exit_prob;	/* probability a task will exit while
  					   being waited on */
f5b3db001   Nick Piggin   [PATCH] as: coope...
99
100
101
  	unsigned long exit_no_coop;	/* probablility an exited task will
  					   not be part of a later cooperating
  					   request */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
103
104
105
106
107
108
109
110
111
112
113
114
  	unsigned long new_ttime_total; 	/* mean thinktime on new proc */
  	unsigned long new_ttime_mean;
  	u64 new_seek_total;		/* mean seek on new proc */
  	sector_t new_seek_mean;
  
  	unsigned long current_batch_expires;
  	unsigned long last_check_fifo[2];
  	int changed_batch;		/* 1: waiting for old batch to end */
  	int new_batch;			/* 1: waiting on first read complete */
  	int batch_data_dir;		/* current batch REQ_SYNC / REQ_ASYNC */
  	int write_batch_count;		/* max # of reqs in a write batch */
  	int current_write_count;	/* how many requests left this batch */
  	int write_batch_idled;		/* has the write batch gone idle? */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
  
  	enum anticipation_status antic_status;
  	unsigned long antic_start;	/* jiffies: when it started */
  	struct timer_list antic_timer;	/* anticipatory scheduling timer */
  	struct work_struct antic_work;	/* Deferred unplugging */
  	struct io_context *io_context;	/* Identify the expected process */
  	int ioc_finished; /* IO associated with io_context is finished */
  	int nr_dispatched;
  
  	/*
  	 * settings that change how the i/o scheduler behaves
  	 */
  	unsigned long fifo_expire[2];
  	unsigned long batch_expire[2];
  	unsigned long antic_expire;
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
132
133
134
135
136
137
138
139
140
141
142
143
144
  /*
   * per-request data.
   */
  enum arq_state {
  	AS_RQ_NEW=0,		/* New - not referenced and not on any lists */
  	AS_RQ_QUEUED,		/* In the request queue. It belongs to the
  				   scheduler */
  	AS_RQ_DISPATCHED,	/* On the dispatch list. It belongs to the
  				   driver now */
  	AS_RQ_PRESCHED,		/* Debug poisoning for requests being used */
  	AS_RQ_REMOVED,
  	AS_RQ_MERGED,
  	AS_RQ_POSTSCHED,	/* when they shouldn't be */
  };
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
145
146
147
  #define RQ_IOC(rq)	((struct io_context *) (rq)->elevator_private)
  #define RQ_STATE(rq)	((enum arq_state)(rq)->elevator_private2)
  #define RQ_SET_STATE(rq, state)	((rq)->elevator_private2 = (void *) state)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148

e4313dd42   Jens Axboe   [PATCH] as-iosche...
149
  static DEFINE_PER_CPU(unsigned long, ioc_count);
334e94de9   Al Viro   [PATCH] deal with...
150
  static struct completion *ioc_gone;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
151
  static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
ef9be1d33   Tejun Heo   [BLOCK] as-iosche...
152
  static void as_antic_stop(struct as_data *ad);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
153
154
155
156
157
158
159
160
  /*
   * IO Context helper functions
   */
  
  /* Called to deallocate the as_io_context */
  static void free_as_io_context(struct as_io_context *aic)
  {
  	kfree(aic);
e4313dd42   Jens Axboe   [PATCH] as-iosche...
161
162
  	elv_ioc_count_dec(ioc_count);
  	if (ioc_gone && !elv_ioc_count_read(ioc_count))
334e94de9   Al Viro   [PATCH] deal with...
163
  		complete(ioc_gone);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
  }
e17a9489b   Al Viro   [PATCH] stop elv_...
165
166
  static void as_trim(struct io_context *ioc)
  {
334e94de9   Al Viro   [PATCH] deal with...
167
168
  	if (ioc->aic)
  		free_as_io_context(ioc->aic);
e17a9489b   Al Viro   [PATCH] stop elv_...
169
170
  	ioc->aic = NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  /* Called when the task exits */
  static void exit_as_io_context(struct as_io_context *aic)
  {
  	WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state));
  	clear_bit(AS_TASK_RUNNING, &aic->state);
  }
  
  static struct as_io_context *alloc_as_io_context(void)
  {
  	struct as_io_context *ret;
  
  	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
  	if (ret) {
  		ret->dtor = free_as_io_context;
  		ret->exit = exit_as_io_context;
  		ret->state = 1 << AS_TASK_RUNNING;
  		atomic_set(&ret->nr_queued, 0);
  		atomic_set(&ret->nr_dispatched, 0);
  		spin_lock_init(&ret->lock);
  		ret->ttime_total = 0;
  		ret->ttime_samples = 0;
  		ret->ttime_mean = 0;
  		ret->seek_total = 0;
  		ret->seek_samples = 0;
  		ret->seek_mean = 0;
e4313dd42   Jens Axboe   [PATCH] as-iosche...
196
  		elv_ioc_count_inc(ioc_count);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
199
200
201
202
203
204
205
  	}
  
  	return ret;
  }
  
  /*
   * If the current task has no AS IO context then create one and initialise it.
   * Then take a ref on the task's io context and return it.
   */
b5deef901   Jens Axboe   [PATCH] Make sure...
206
  static struct io_context *as_get_io_context(int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
  {
b5deef901   Jens Axboe   [PATCH] Make sure...
208
  	struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
210
211
212
213
214
215
216
217
  	if (ioc && !ioc->aic) {
  		ioc->aic = alloc_as_io_context();
  		if (!ioc->aic) {
  			put_io_context(ioc);
  			ioc = NULL;
  		}
  	}
  	return ioc;
  }
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
218
  static void as_put_io_context(struct request *rq)
b4878f245   Jens Axboe   [PATCH] 02/05: up...
219
220
  {
  	struct as_io_context *aic;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
221
  	if (unlikely(!RQ_IOC(rq)))
b4878f245   Jens Axboe   [PATCH] 02/05: up...
222
  		return;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
223
  	aic = RQ_IOC(rq)->aic;
b4878f245   Jens Axboe   [PATCH] 02/05: up...
224

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
225
  	if (rq_is_sync(rq) && aic) {
b4878f245   Jens Axboe   [PATCH] 02/05: up...
226
227
228
229
230
  		spin_lock(&aic->lock);
  		set_bit(AS_TASK_IORUNNING, &aic->state);
  		aic->last_end_request = jiffies;
  		spin_unlock(&aic->lock);
  	}
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
231
  	put_io_context(RQ_IOC(rq));
b4878f245   Jens Axboe   [PATCH] 02/05: up...
232
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
235
   * rb tree support functions
   */
9e2585a8a   Jens Axboe   [PATCH] as-iosche...
236
  #define RQ_RB_ROOT(ad, rq)	(&(ad)->sort_list[rq_is_sync((rq))])
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
238
  static void as_add_rq_rb(struct as_data *ad, struct request *rq)
ef9be1d33   Tejun Heo   [BLOCK] as-iosche...
239
  {
e37f346e3   Jens Axboe   [PATCH] as-iosche...
240
  	struct request *alias;
ef9be1d33   Tejun Heo   [BLOCK] as-iosche...
241

9e2585a8a   Jens Axboe   [PATCH] as-iosche...
242
  	while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
243
  		as_move_to_dispatch(ad, alias);
ef9be1d33   Tejun Heo   [BLOCK] as-iosche...
244
245
246
  		as_antic_stop(ad);
  	}
  }
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
247
  static inline void as_del_rq_rb(struct as_data *ad, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
  {
9e2585a8a   Jens Axboe   [PATCH] as-iosche...
249
  	elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
  }
  
  /*
   * IO Scheduler proper
   */
  
  #define MAXBACK (1024 * 1024)	/*
  				 * Maximum distance the disk will go backward
  				 * for a request.
  				 */
  
  #define BACK_PENALTY	2
  
  /*
   * as_choose_req selects the preferred one of two requests of the same data_dir
   * ignoring time - eg. timeouts, which is the job of as_dispatch_request
   */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
267
268
  static struct request *
  as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
269
270
271
272
273
  {
  	int data_dir;
  	sector_t last, s1, s2, d1, d2;
  	int r1_wrap=0, r2_wrap=0;	/* requests are behind the disk head */
  	const sector_t maxback = MAXBACK;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
274
275
276
277
  	if (rq1 == NULL || rq1 == rq2)
  		return rq2;
  	if (rq2 == NULL)
  		return rq1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
279
  	data_dir = rq_is_sync(rq1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
  
  	last = ad->last_sector[data_dir];
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
282
283
  	s1 = rq1->sector;
  	s2 = rq2->sector;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
284

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
285
  	BUG_ON(data_dir != rq_is_sync(rq2));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
  
  	/*
  	 * Strict one way elevator _except_ in the case where we allow
  	 * short backward seeks which are biased as twice the cost of a
  	 * similar forward seek.
  	 */
  	if (s1 >= last)
  		d1 = s1 - last;
  	else if (s1+maxback >= last)
  		d1 = (last - s1)*BACK_PENALTY;
  	else {
  		r1_wrap = 1;
  		d1 = 0; /* shut up, gcc */
  	}
  
  	if (s2 >= last)
  		d2 = s2 - last;
  	else if (s2+maxback >= last)
  		d2 = (last - s2)*BACK_PENALTY;
  	else {
  		r2_wrap = 1;
  		d2 = 0;
  	}
  
  	/* Found required data */
  	if (!r1_wrap && r2_wrap)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
312
  		return rq1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
313
  	else if (!r2_wrap && r1_wrap)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
314
  		return rq2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
315
316
317
  	else if (r1_wrap && r2_wrap) {
  		/* both behind the head */
  		if (s1 <= s2)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
318
  			return rq1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319
  		else
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
320
  			return rq2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
322
323
324
  	}
  
  	/* Both requests in front of the head */
  	if (d1 < d2)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
325
  		return rq1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
  	else if (d2 < d1)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
327
  		return rq2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328
329
  	else {
  		if (s1 >= s2)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
330
  			return rq1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
331
  		else
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
332
  			return rq2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
333
334
335
336
  	}
  }
  
  /*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
337
   * as_find_next_rq finds the next request after @prev in elevator order.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
338
339
340
   * this with as_choose_req form the basis for how the scheduler chooses
   * what request to process next. Anticipation works on top of this.
   */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
341
342
  static struct request *
  as_find_next_rq(struct as_data *ad, struct request *last)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
345
  	struct rb_node *rbnext = rb_next(&last->rb_node);
  	struct rb_node *rbprev = rb_prev(&last->rb_node);
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
346
  	struct request *next = NULL, *prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
347

e37f346e3   Jens Axboe   [PATCH] as-iosche...
348
  	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
349
350
  
  	if (rbprev)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
351
  		prev = rb_entry_rq(rbprev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
  
  	if (rbnext)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
354
  		next = rb_entry_rq(rbnext);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355
  	else {
9e2585a8a   Jens Axboe   [PATCH] as-iosche...
356
  		const int data_dir = rq_is_sync(last);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
357

e37f346e3   Jens Axboe   [PATCH] as-iosche...
358
359
  		rbnext = rb_first(&ad->sort_list[data_dir]);
  		if (rbnext && rbnext != &last->rb_node)
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
360
  			next = rb_entry_rq(rbnext);
e37f346e3   Jens Axboe   [PATCH] as-iosche...
361
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
362

e37f346e3   Jens Axboe   [PATCH] as-iosche...
363
  	return as_choose_req(ad, next, prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
  }
  
  /*
   * anticipatory scheduling functions follow
   */
  
  /*
   * as_antic_expired tells us when we have anticipated too long.
   * The funny "absolute difference" math on the elapsed time is to handle
   * jiffy wraps, and disks which have been idle for 0x80000000 jiffies.
   */
  static int as_antic_expired(struct as_data *ad)
  {
  	long delta_jif;
  
  	delta_jif = jiffies - ad->antic_start;
  	if (unlikely(delta_jif < 0))
  		delta_jif = -delta_jif;
  	if (delta_jif < ad->antic_expire)
  		return 0;
  
  	return 1;
  }
  
  /*
   * as_antic_waitnext starts anticipating that a nice request will soon be
   * submitted. See also as_antic_waitreq
   */
  static void as_antic_waitnext(struct as_data *ad)
  {
  	unsigned long timeout;
  
  	BUG_ON(ad->antic_status != ANTIC_OFF
  			&& ad->antic_status != ANTIC_WAIT_REQ);
  
  	timeout = ad->antic_start + ad->antic_expire;
  
  	mod_timer(&ad->antic_timer, timeout);
  
  	ad->antic_status = ANTIC_WAIT_NEXT;
  }
  
  /*
   * as_antic_waitreq starts anticipating. We don't start timing the anticipation
   * until the request that we're anticipating on has finished. This means we
   * are timing from when the candidate process wakes up hopefully.
   */
  static void as_antic_waitreq(struct as_data *ad)
  {
  	BUG_ON(ad->antic_status == ANTIC_FINISHED);
  	if (ad->antic_status == ANTIC_OFF) {
  		if (!ad->io_context || ad->ioc_finished)
  			as_antic_waitnext(ad);
  		else
  			ad->antic_status = ANTIC_WAIT_REQ;
  	}
  }
  
  /*
   * This is called directly by the functions in this file to stop anticipation.
   * We kill the timer and schedule a call to the request_fn asap.
   */
  static void as_antic_stop(struct as_data *ad)
  {
  	int status = ad->antic_status;
  
  	if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) {
  		if (status == ANTIC_WAIT_NEXT)
  			del_timer(&ad->antic_timer);
  		ad->antic_status = ANTIC_FINISHED;
  		/* see as_work_handler */
  		kblockd_schedule_work(&ad->antic_work);
  	}
  }
  
  /*
   * as_antic_timeout is the timer function set by as_antic_waitnext.
   */
  static void as_antic_timeout(unsigned long data)
  {
  	struct request_queue *q = (struct request_queue *)data;
  	struct as_data *ad = q->elevator->elevator_data;
  	unsigned long flags;
  
  	spin_lock_irqsave(q->queue_lock, flags);
  	if (ad->antic_status == ANTIC_WAIT_REQ
  			|| ad->antic_status == ANTIC_WAIT_NEXT) {
  		struct as_io_context *aic = ad->io_context->aic;
  
  		ad->antic_status = ANTIC_FINISHED;
  		kblockd_schedule_work(&ad->antic_work);
  
  		if (aic->ttime_samples == 0) {
f5b3db001   Nick Piggin   [PATCH] as: coope...
457
  			/* process anticipated on has exited or timed out*/
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
458
459
  			ad->exit_prob = (7*ad->exit_prob + 256)/8;
  		}
f5b3db001   Nick Piggin   [PATCH] as: coope...
460
461
462
463
  		if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
  			/* process not "saved" by a cooperating request */
  			ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
465
466
  	}
  	spin_unlock_irqrestore(q->queue_lock, flags);
  }
f5b3db001   Nick Piggin   [PATCH] as: coope...
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
  static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic,
  				unsigned long ttime)
  {
  	/* fixed point: 1.0 == 1<<8 */
  	if (aic->ttime_samples == 0) {
  		ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8;
  		ad->new_ttime_mean = ad->new_ttime_total / 256;
  
  		ad->exit_prob = (7*ad->exit_prob)/8;
  	}
  	aic->ttime_samples = (7*aic->ttime_samples + 256) / 8;
  	aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8;
  	aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;
  }
  
  static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
  				sector_t sdist)
  {
  	u64 total;
  
  	if (aic->seek_samples == 0) {
  		ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8;
  		ad->new_seek_mean = ad->new_seek_total / 256;
  	}
  
  	/*
  	 * Don't allow the seek distance to get too large from the
  	 * odd fragment, pagein, etc
  	 */
  	if (aic->seek_samples <= 60) /* second&third seek */
  		sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024);
  	else
  		sdist = min(sdist, (aic->seek_mean * 4)	+ 2*1024*64);
  
  	aic->seek_samples = (7*aic->seek_samples + 256) / 8;
  	aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8;
  	total = aic->seek_total + (aic->seek_samples/2);
  	do_div(total, aic->seek_samples);
  	aic->seek_mean = (sector_t)total;
  }
  
  /*
   * as_update_iohist keeps a decaying histogram of IO thinktimes, and
   * updates @aic->ttime_mean based on that. It is called when a new
   * request is queued.
   */
  static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
  				struct request *rq)
  {
9e2585a8a   Jens Axboe   [PATCH] as-iosche...
516
  	int data_dir = rq_is_sync(rq);
f5b3db001   Nick Piggin   [PATCH] as: coope...
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
  	unsigned long thinktime = 0;
  	sector_t seek_dist;
  
  	if (aic == NULL)
  		return;
  
  	if (data_dir == REQ_SYNC) {
  		unsigned long in_flight = atomic_read(&aic->nr_queued)
  					+ atomic_read(&aic->nr_dispatched);
  		spin_lock(&aic->lock);
  		if (test_bit(AS_TASK_IORUNNING, &aic->state) ||
  			test_bit(AS_TASK_IOSTARTED, &aic->state)) {
  			/* Calculate read -> read thinktime */
  			if (test_bit(AS_TASK_IORUNNING, &aic->state)
  							&& in_flight == 0) {
  				thinktime = jiffies - aic->last_end_request;
  				thinktime = min(thinktime, MAX_THINKTIME-1);
  			}
  			as_update_thinktime(ad, aic, thinktime);
  
  			/* Calculate read -> read seek distance */
  			if (aic->last_request_pos < rq->sector)
  				seek_dist = rq->sector - aic->last_request_pos;
  			else
  				seek_dist = aic->last_request_pos - rq->sector;
  			as_update_seekdist(ad, aic, seek_dist);
  		}
  		aic->last_request_pos = rq->sector + rq->nr_sectors;
  		set_bit(AS_TASK_IOSTARTED, &aic->state);
  		spin_unlock(&aic->lock);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
550
551
552
  /*
   * as_close_req decides if one request is considered "close" to the
   * previous one issued.
   */
f5b3db001   Nick Piggin   [PATCH] as: coope...
553
  static int as_close_req(struct as_data *ad, struct as_io_context *aic,
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
554
  			struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
555
  {
c6a632a2b   Nick Piggin   as: fix antic_exp...
556
  	unsigned long delay;	/* jiffies */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
  	sector_t last = ad->last_sector[ad->batch_data_dir];
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
558
  	sector_t next = rq->sector;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
  	sector_t delta; /* acceptable close offset (in sectors) */
f5b3db001   Nick Piggin   [PATCH] as: coope...
560
  	sector_t s;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
562
563
564
  
  	if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished)
  		delay = 0;
  	else
c6a632a2b   Nick Piggin   as: fix antic_exp...
565
  		delay = jiffies - ad->antic_start;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566

f5b3db001   Nick Piggin   [PATCH] as: coope...
567
568
  	if (delay == 0)
  		delta = 8192;
c6a632a2b   Nick Piggin   as: fix antic_exp...
569
  	else if (delay <= (20 * HZ / 1000) && delay <= ad->antic_expire)
f5b3db001   Nick Piggin   [PATCH] as: coope...
570
  		delta = 8192 << delay;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
  	else
  		return 1;
f5b3db001   Nick Piggin   [PATCH] as: coope...
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
  	if ((last <= next + (delta>>1)) && (next <= last + delta))
  		return 1;
  
  	if (last < next)
  		s = next - last;
  	else
  		s = last - next;
  
  	if (aic->seek_samples == 0) {
  		/*
  		 * Process has just started IO. Use past statistics to
  		 * gauge success possibility
  		 */
  		if (ad->new_seek_mean > s) {
  			/* this request is better than what we're expecting */
  			return 1;
  		}
  
  	} else {
  		if (aic->seek_mean > s) {
  			/* this request is better than what we're expecting */
  			return 1;
  		}
  	}
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
599
600
601
602
603
604
605
606
607
608
609
  }
  
  /*
   * as_can_break_anticipation returns true if we have been anticipating this
   * request.
   *
   * It also returns true if the process against which we are anticipating
   * submits a write - that's presumably an fsync, O_SYNC write, etc. We want to
   * dispatch it ASAP, because we know that application will not be submitting
   * any new reads.
   *
f5b3db001   Nick Piggin   [PATCH] as: coope...
610
   * If the task which has submitted the request has exited, break anticipation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
612
613
   *
   * If this task has queued some other IO, do not enter enticipation.
   */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
614
  static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615
616
617
  {
  	struct io_context *ioc;
  	struct as_io_context *aic;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
619
620
  
  	ioc = ad->io_context;
  	BUG_ON(!ioc);
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
621
  	if (rq && ioc == RQ_IOC(rq)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  		/* request from same process */
  		return 1;
  	}
  
  	if (ad->ioc_finished && as_antic_expired(ad)) {
  		/*
  		 * In this situation status should really be FINISHED,
  		 * however the timer hasn't had the chance to run yet.
  		 */
  		return 1;
  	}
  
  	aic = ioc->aic;
  	if (!aic)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
637
638
639
640
641
642
643
644
645
  	if (atomic_read(&aic->nr_queued) > 0) {
  		/* process has more requests queued */
  		return 1;
  	}
  
  	if (atomic_read(&aic->nr_dispatched) > 0) {
  		/* process has more requests dispatched */
  		return 1;
  	}
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
646
  	if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
647
648
649
  		/*
  		 * Found a close request that is not one of ours.
  		 *
f5b3db001   Nick Piggin   [PATCH] as: coope...
650
651
  		 * This makes close requests from another process update
  		 * our IO history. Is generally useful when there are
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
652
653
654
  		 * two or more cooperating processes working in the same
  		 * area.
  		 */
f5b3db001   Nick Piggin   [PATCH] as: coope...
655
656
657
658
659
660
  		if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
  			if (aic->ttime_samples == 0)
  				ad->exit_prob = (7*ad->exit_prob + 256)/8;
  
  			ad->exit_no_coop = (7*ad->exit_no_coop)/8;
  		}
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
661
  		as_update_iohist(ad, aic, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
663
  		return 1;
  	}
f5b3db001   Nick Piggin   [PATCH] as: coope...
664
665
666
667
668
669
670
671
  	if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
  		/* process anticipated on has exited */
  		if (aic->ttime_samples == 0)
  			ad->exit_prob = (7*ad->exit_prob + 256)/8;
  
  		if (ad->exit_no_coop > 128)
  			return 1;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
672
673
674
675
  
  	if (aic->ttime_samples == 0) {
  		if (ad->new_ttime_mean > ad->antic_expire)
  			return 1;
f5b3db001   Nick Piggin   [PATCH] as: coope...
676
  		if (ad->exit_prob * ad->exit_no_coop > 128*256)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
677
678
679
680
681
  			return 1;
  	} else if (aic->ttime_mean > ad->antic_expire) {
  		/* the process thinks too much between requests */
  		return 1;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
682
683
684
685
  	return 0;
  }
  
  /*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
686
   * as_can_anticipate indicates whether we should either run rq
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
687
688
   * or keep anticipating a better request.
   */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
689
  static int as_can_anticipate(struct as_data *ad, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690
691
692
693
694
695
696
697
698
699
700
701
  {
  	if (!ad->io_context)
  		/*
  		 * Last request submitted was a write
  		 */
  		return 0;
  
  	if (ad->antic_status == ANTIC_FINISHED)
  		/*
  		 * Don't restart if we have just finished. Run the next request
  		 */
  		return 0;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
702
  	if (as_can_break_anticipation(ad, rq))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
704
705
706
707
708
709
710
711
712
713
  		/*
  		 * This request is a good candidate. Don't keep anticipating,
  		 * run it.
  		 */
  		return 0;
  
  	/*
  	 * OK from here, we haven't finished, and don't have a decent request!
  	 * Status is either ANTIC_OFF so start waiting,
  	 * ANTIC_WAIT_REQ so continue waiting for request to finish
  	 * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
714
715
716
717
  	 */
  
  	return 1;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
  /*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
719
   * as_update_rq must be called whenever a request (rq) is added to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
721
722
   * the sort_list. This function keeps caches up to date, and checks if the
   * request might be one we are "anticipating"
   */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
723
  static void as_update_rq(struct as_data *ad, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
  {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
725
  	const int data_dir = rq_is_sync(rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
726

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
727
728
  	/* keep the next_rq cache up to date */
  	ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729
730
731
732
733
734
735
736
  
  	/*
  	 * have we been anticipating this request?
  	 * or does it come from the same process as the one we are anticipating
  	 * for?
  	 */
  	if (ad->antic_status == ANTIC_WAIT_REQ
  			|| ad->antic_status == ANTIC_WAIT_NEXT) {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
737
  		if (as_can_break_anticipation(ad, rq))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
  			as_antic_stop(ad);
  	}
  }
  
  /*
   * Gathers timings and resizes the write batch automatically
   */
  static void update_write_batch(struct as_data *ad)
  {
  	unsigned long batch = ad->batch_expire[REQ_ASYNC];
  	long write_time;
  
  	write_time = (jiffies - ad->current_batch_expires) + batch;
  	if (write_time < 0)
  		write_time = 0;
  
  	if (write_time > batch && !ad->write_batch_idled) {
  		if (write_time > batch * 3)
  			ad->write_batch_count /= 2;
  		else
  			ad->write_batch_count--;
  	} else if (write_time < batch && ad->current_write_count == 0) {
  		if (batch > write_time * 3)
  			ad->write_batch_count *= 2;
  		else
  			ad->write_batch_count++;
  	}
  
  	if (ad->write_batch_count < 1)
  		ad->write_batch_count = 1;
  }
  
  /*
   * as_completed_request is to be called when a request has completed and
   * returned something to the requesting process, be it an error or data.
   */
  static void as_completed_request(request_queue_t *q, struct request *rq)
  {
  	struct as_data *ad = q->elevator->elevator_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
777
778
  
  	WARN_ON(!list_empty(&rq->queuelist));
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
779
780
781
  	if (RQ_STATE(rq) != AS_RQ_REMOVED) {
  		printk("rq->state %d
  ", RQ_STATE(rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
782
783
784
  		WARN_ON(1);
  		goto out;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
  	if (ad->changed_batch && ad->nr_dispatched == 1) {
  		kblockd_schedule_work(&ad->antic_work);
  		ad->changed_batch = 0;
  
  		if (ad->batch_data_dir == REQ_SYNC)
  			ad->new_batch = 1;
  	}
  	WARN_ON(ad->nr_dispatched == 0);
  	ad->nr_dispatched--;
  
  	/*
  	 * Start counting the batch from when a request of that direction is
  	 * actually serviced. This should help devices with big TCQ windows
  	 * and writeback caches
  	 */
9e2585a8a   Jens Axboe   [PATCH] as-iosche...
800
  	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
801
802
803
804
805
  		update_write_batch(ad);
  		ad->current_batch_expires = jiffies +
  				ad->batch_expire[REQ_SYNC];
  		ad->new_batch = 0;
  	}
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
806
  	if (ad->io_context == RQ_IOC(rq) && ad->io_context) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
808
809
810
811
812
813
814
815
816
  		ad->antic_start = jiffies;
  		ad->ioc_finished = 1;
  		if (ad->antic_status == ANTIC_WAIT_REQ) {
  			/*
  			 * We were waiting on this request, now anticipate
  			 * the next one
  			 */
  			as_antic_waitnext(ad);
  		}
  	}
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
817
  	as_put_io_context(rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
  out:
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
819
  	RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
821
822
823
824
825
826
827
828
829
  }
  
  /*
   * as_remove_queued_request removes a request from the pre dispatch queue
   * without updating refcounts. It is expected the caller will drop the
   * reference unless it replaces the request at somepart of the elevator
   * (ie. the dispatch queue)
   */
  static void as_remove_queued_request(request_queue_t *q, struct request *rq)
  {
9e2585a8a   Jens Axboe   [PATCH] as-iosche...
830
  	const int data_dir = rq_is_sync(rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
831
  	struct as_data *ad = q->elevator->elevator_data;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
832
  	struct io_context *ioc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
834
  	WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
836
837
838
839
  	ioc = RQ_IOC(rq);
  	if (ioc && ioc->aic) {
  		BUG_ON(!atomic_read(&ioc->aic->nr_queued));
  		atomic_dec(&ioc->aic->nr_queued);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
840
841
842
  	}
  
  	/*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
843
  	 * Update the "next_rq" cache if we are about to remove its
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
845
  	 * entry
  	 */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
846
847
  	if (ad->next_rq[data_dir] == rq)
  		ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848

d4f2f4629   Jens Axboe   [PATCH] as-iosche...
849
  	rq_fifo_clear(rq);
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
850
  	as_del_rq_rb(ad, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851
852
853
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
855
856
857
858
859
860
861
862
   * as_fifo_expired returns 0 if there are no expired reads on the fifo,
   * 1 otherwise.  It is ratelimited so that we only perform the check once per
   * `fifo_expire' interval.  Otherwise a large number of expired requests
   * would create a hopeless seekstorm.
   *
   * See as_antic_expired comment.
   */
  static int as_fifo_expired(struct as_data *ad, int adir)
  {
d4f2f4629   Jens Axboe   [PATCH] as-iosche...
863
  	struct request *rq;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864
865
866
867
868
869
870
871
872
873
874
875
  	long delta_jif;
  
  	delta_jif = jiffies - ad->last_check_fifo[adir];
  	if (unlikely(delta_jif < 0))
  		delta_jif = -delta_jif;
  	if (delta_jif < ad->fifo_expire[adir])
  		return 0;
  
  	ad->last_check_fifo[adir] = jiffies;
  
  	if (list_empty(&ad->fifo_list[adir]))
  		return 0;
d4f2f4629   Jens Axboe   [PATCH] as-iosche...
876
  	rq = rq_entry_fifo(ad->fifo_list[adir].next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
877

d4f2f4629   Jens Axboe   [PATCH] as-iosche...
878
  	return time_after(jiffies, rq_fifo_time(rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
  }
  
  /*
   * as_batch_expired returns true if the current batch has expired. A batch
   * is a set of reads or a set of writes.
   */
  static inline int as_batch_expired(struct as_data *ad)
  {
  	if (ad->changed_batch || ad->new_batch)
  		return 0;
  
  	if (ad->batch_data_dir == REQ_SYNC)
  		/* TODO! add a check so a complete fifo gets written? */
  		return time_after(jiffies, ad->current_batch_expires);
  
  	return time_after(jiffies, ad->current_batch_expires)
  		|| ad->current_write_count == 0;
  }
  
  /*
   * move an entry to dispatch queue
   */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
901
  static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
902
  {
9e2585a8a   Jens Axboe   [PATCH] as-iosche...
903
  	const int data_dir = rq_is_sync(rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
904

e37f346e3   Jens Axboe   [PATCH] as-iosche...
905
  	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
906
907
908
909
910
911
  
  	as_antic_stop(ad);
  	ad->antic_status = ANTIC_OFF;
  
  	/*
  	 * This has to be set in order to be correctly updated by
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
912
  	 * as_find_next_rq
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
913
914
915
916
  	 */
  	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
  
  	if (data_dir == REQ_SYNC) {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
917
  		struct io_context *ioc = RQ_IOC(rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
918
  		/* In case we have to anticipate after this */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
919
  		copy_io_context(&ad->io_context, &ioc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
920
921
922
923
924
925
926
927
928
929
  	} else {
  		if (ad->io_context) {
  			put_io_context(ad->io_context);
  			ad->io_context = NULL;
  		}
  
  		if (ad->current_write_count != 0)
  			ad->current_write_count--;
  	}
  	ad->ioc_finished = 0;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
930
  	ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
931
932
933
934
  
  	/*
  	 * take it off the sort and fifo list, add to dispatch queue
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
935
  	as_remove_queued_request(ad->q, rq);
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
936
  	WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
937

b4878f245   Jens Axboe   [PATCH] 02/05: up...
938
  	elv_dispatch_sort(ad->q, rq);
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
939
940
941
  	RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
  	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
  		atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
943
944
945
946
947
948
949
  	ad->nr_dispatched++;
  }
  
  /*
   * as_dispatch_request selects the best request according to
   * read/write expire, batch expire, etc, and moves it to the dispatch
   * queue. Returns 1 if a request was found, 0 otherwise.
   */
b4878f245   Jens Axboe   [PATCH] 02/05: up...
950
  static int as_dispatch_request(request_queue_t *q, int force)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
  {
b4878f245   Jens Axboe   [PATCH] 02/05: up...
952
  	struct as_data *ad = q->elevator->elevator_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
953
954
  	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
  	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
955
  	struct request *rq;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956

b4878f245   Jens Axboe   [PATCH] 02/05: up...
957
958
959
960
961
962
963
964
965
966
967
968
969
  	if (unlikely(force)) {
  		/*
  		 * Forced dispatch, accounting is useless.  Reset
  		 * accounting states and dump fifo_lists.  Note that
  		 * batch_data_dir is reset to REQ_SYNC to avoid
  		 * screwing write batch accounting as write batch
  		 * accounting occurs on W->R transition.
  		 */
  		int dispatched = 0;
  
  		ad->batch_data_dir = REQ_SYNC;
  		ad->changed_batch = 0;
  		ad->new_batch = 0;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
970
971
  		while (ad->next_rq[REQ_SYNC]) {
  			as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
b4878f245   Jens Axboe   [PATCH] 02/05: up...
972
973
974
  			dispatched++;
  		}
  		ad->last_check_fifo[REQ_SYNC] = jiffies;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
975
976
  		while (ad->next_rq[REQ_ASYNC]) {
  			as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
b4878f245   Jens Axboe   [PATCH] 02/05: up...
977
978
979
980
981
982
  			dispatched++;
  		}
  		ad->last_check_fifo[REQ_ASYNC] = jiffies;
  
  		return dispatched;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983
984
985
986
987
988
989
990
991
992
993
  	/* Signal that the write batch was uncontended, so we can't time it */
  	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
  		if (ad->current_write_count == 0 || !writes)
  			ad->write_batch_idled = 1;
  	}
  
  	if (!(reads || writes)
  		|| ad->antic_status == ANTIC_WAIT_REQ
  		|| ad->antic_status == ANTIC_WAIT_NEXT
  		|| ad->changed_batch)
  		return 0;
f5b3db001   Nick Piggin   [PATCH] as: coope...
994
  	if (!(reads && writes && as_batch_expired(ad))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
996
997
  		/*
  		 * batch is still running or no reads or no writes
  		 */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
998
  		rq = ad->next_rq[ad->batch_data_dir];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
1000
1001
1002
  
  		if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
  			if (as_fifo_expired(ad, REQ_SYNC))
  				goto fifo_expired;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1003
  			if (as_can_anticipate(ad, rq)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1004
1005
1006
1007
  				as_antic_waitreq(ad);
  				return 0;
  			}
  		}
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1008
  		if (rq) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
  			/* we have a "next request" */
  			if (reads && !writes)
  				ad->current_batch_expires =
  					jiffies + ad->batch_expire[REQ_SYNC];
  			goto dispatch_request;
  		}
  	}
  
  	/*
  	 * at this point we are not running a batch. select the appropriate
  	 * data direction (read / write)
  	 */
  
  	if (reads) {
dd67d0515   Jens Axboe   [PATCH] rbtree: s...
1023
  		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
  
  		if (writes && ad->batch_data_dir == REQ_SYNC)
  			/*
  			 * Last batch was a read, switch to writes
  			 */
  			goto dispatch_writes;
  
  		if (ad->batch_data_dir == REQ_ASYNC) {
  			WARN_ON(ad->new_batch);
  			ad->changed_batch = 1;
  		}
  		ad->batch_data_dir = REQ_SYNC;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1036
  		rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
  		ad->last_check_fifo[ad->batch_data_dir] = jiffies;
  		goto dispatch_request;
  	}
  
  	/*
  	 * the last batch was a read
  	 */
  
  	if (writes) {
  dispatch_writes:
dd67d0515   Jens Axboe   [PATCH] rbtree: s...
1047
  		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
  
  		if (ad->batch_data_dir == REQ_SYNC) {
  			ad->changed_batch = 1;
  
  			/*
  			 * new_batch might be 1 when the queue runs out of
  			 * reads. A subsequent submission of a write might
  			 * cause a change of batch before the read is finished.
  			 */
  			ad->new_batch = 0;
  		}
  		ad->batch_data_dir = REQ_ASYNC;
  		ad->current_write_count = ad->write_batch_count;
  		ad->write_batch_idled = 0;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1062
  		rq = ad->next_rq[ad->batch_data_dir];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
  		goto dispatch_request;
  	}
  
  	BUG();
  	return 0;
  
  dispatch_request:
  	/*
  	 * If a request has expired, service it.
  	 */
  
  	if (as_fifo_expired(ad, ad->batch_data_dir)) {
  fifo_expired:
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1076
  		rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
  	}
  
  	if (ad->changed_batch) {
  		WARN_ON(ad->new_batch);
  
  		if (ad->nr_dispatched)
  			return 0;
  
  		if (ad->batch_data_dir == REQ_ASYNC)
  			ad->current_batch_expires = jiffies +
  					ad->batch_expire[REQ_ASYNC];
  		else
  			ad->new_batch = 1;
  
  		ad->changed_batch = 0;
  	}
  
  	/*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1095
  	 * rq is the selected appropriate request.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1096
  	 */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1097
  	as_move_to_dispatch(ad, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1098
1099
1100
  
  	return 1;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1101
  /*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1102
   * add rq to rbtree and fifo
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1103
   */
b4878f245   Jens Axboe   [PATCH] 02/05: up...
1104
  static void as_add_request(request_queue_t *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1105
  {
b4878f245   Jens Axboe   [PATCH] 02/05: up...
1106
  	struct as_data *ad = q->elevator->elevator_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1107
  	int data_dir;
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1108
  	RQ_SET_STATE(rq, AS_RQ_NEW);
b4878f245   Jens Axboe   [PATCH] 02/05: up...
1109

9e2585a8a   Jens Axboe   [PATCH] as-iosche...
1110
  	data_dir = rq_is_sync(rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1111

b5deef901   Jens Axboe   [PATCH] Make sure...
1112
  	rq->elevator_private = as_get_io_context(q->node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1113

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1114
1115
1116
  	if (RQ_IOC(rq)) {
  		as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
  		atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1117
  	}
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1118
  	as_add_rq_rb(ad, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1119

ef9be1d33   Tejun Heo   [BLOCK] as-iosche...
1120
1121
1122
  	/*
  	 * set expire time (only used for reads) and add to fifo list
  	 */
d4f2f4629   Jens Axboe   [PATCH] as-iosche...
1123
1124
  	rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
  	list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1126
1127
  	as_update_rq(ad, rq); /* keep state machine up to date */
  	RQ_SET_STATE(rq, AS_RQ_QUEUED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1128
  }
b4878f245   Jens Axboe   [PATCH] 02/05: up...
1129
  static void as_activate_request(request_queue_t *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
  {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1131
1132
1133
1134
  	WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED);
  	RQ_SET_STATE(rq, AS_RQ_REMOVED);
  	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
  		atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1135
  }
b4878f245   Jens Axboe   [PATCH] 02/05: up...
1136
  static void as_deactivate_request(request_queue_t *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1137
  {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1138
1139
1140
1141
  	WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED);
  	RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
  	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
  		atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
  }
  
  /*
   * as_queue_empty tells us if there are requests left in the device. It may
   * not be the case that a driver can get the next request even if the queue
   * is not empty - it is used in the block layer to check for plugging and
   * merging opportunities
   */
  static int as_queue_empty(request_queue_t *q)
  {
  	struct as_data *ad = q->elevator->elevator_data;
b4878f245   Jens Axboe   [PATCH] 02/05: up...
1153
1154
  	return list_empty(&ad->fifo_list[REQ_ASYNC])
  		&& list_empty(&ad->fifo_list[REQ_SYNC]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1155
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1156
1157
1158
1159
1160
1161
  static int
  as_merge(request_queue_t *q, struct request **req, struct bio *bio)
  {
  	struct as_data *ad = q->elevator->elevator_data;
  	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
  	struct request *__rq;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1162
1163
1164
1165
  
  	/*
  	 * check for front merge
  	 */
e37f346e3   Jens Axboe   [PATCH] as-iosche...
1166
  	__rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
9817064b6   Jens Axboe   [PATCH] elevator:...
1167
1168
1169
  	if (__rq && elv_rq_merge_ok(__rq, bio)) {
  		*req = __rq;
  		return ELEVATOR_FRONT_MERGE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1170
1171
1172
  	}
  
  	return ELEVATOR_NO_MERGE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
  }
e37f346e3   Jens Axboe   [PATCH] as-iosche...
1174
  static void as_merged_request(request_queue_t *q, struct request *req, int type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
1176
  {
  	struct as_data *ad = q->elevator->elevator_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177
1178
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1179
1180
  	 * if the merge was a front merge, we need to reposition request
  	 */
e37f346e3   Jens Axboe   [PATCH] as-iosche...
1181
  	if (type == ELEVATOR_FRONT_MERGE) {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1182
1183
  		as_del_rq_rb(ad, req);
  		as_add_rq_rb(ad, req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1184
1185
1186
1187
1188
1189
  		/*
  		 * Note! At this stage of this and the next function, our next
  		 * request may not be optimal - eg the request may have "grown"
  		 * behind the disk head. We currently don't bother adjusting.
  		 */
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190
  }
f5b3db001   Nick Piggin   [PATCH] as: coope...
1191
1192
  static void as_merged_requests(request_queue_t *q, struct request *req,
  			 	struct request *next)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1193
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1194
  	/*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1195
1196
  	 * if next expires before rq, assign its expire time to arq
  	 * and move into next position (next will be deleted) in fifo
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197
  	 */
d4f2f4629   Jens Axboe   [PATCH] as-iosche...
1198
1199
  	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
  		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1200
1201
  			struct io_context *rioc = RQ_IOC(req);
  			struct io_context *nioc = RQ_IOC(next);
d4f2f4629   Jens Axboe   [PATCH] as-iosche...
1202
1203
  			list_move(&req->queuelist, &next->queuelist);
  			rq_set_fifo_time(req, rq_fifo_time(next));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1204
1205
1206
1207
  			/*
  			 * Don't copy here but swap, because when anext is
  			 * removed below, it must contain the unused context
  			 */
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1208
  			swap_io_context(&rioc, &nioc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1209
1210
1211
1212
  		}
  	}
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1213
1214
1215
  	 * kill knowledge of next, this one is a goner
  	 */
  	as_remove_queued_request(q, next);
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1216
  	as_put_io_context(next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1217

8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1218
  	RQ_SET_STATE(next, AS_RQ_MERGED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
  }
  
  /*
   * This is executed in a "deferred" process context, by kblockd. It calls the
   * driver's request_fn so the driver can submit that request.
   *
   * IMPORTANT! This guy will reenter the elevator, so set up all queue global
   * state before calling, and don't rely on any state over calls.
   *
   * FIXME! dispatch queue is not a queue at all!
   */
65f27f384   David Howells   WorkStruct: Pass ...
1230
  static void as_work_handler(struct work_struct *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231
  {
65f27f384   David Howells   WorkStruct: Pass ...
1232
1233
  	struct as_data *ad = container_of(work, struct as_data, antic_work);
  	struct request_queue *q = ad->q;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1234
1235
1236
  	unsigned long flags;
  
  	spin_lock_irqsave(q->queue_lock, flags);
dc72ef4ae   Jens Axboe   [PATCH] Add blk_s...
1237
  	blk_start_queueing(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1238
1239
  	spin_unlock_irqrestore(q->queue_lock, flags);
  }
cb78b285c   Jens Axboe   [PATCH] Drop usel...
1240
  static int as_may_queue(request_queue_t *q, int rw)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
1242
1243
1244
1245
1246
  {
  	int ret = ELV_MQUEUE_MAY;
  	struct as_data *ad = q->elevator->elevator_data;
  	struct io_context *ioc;
  	if (ad->antic_status == ANTIC_WAIT_REQ ||
  			ad->antic_status == ANTIC_WAIT_NEXT) {
b5deef901   Jens Axboe   [PATCH] Make sure...
1247
  		ioc = as_get_io_context(q->node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
  		if (ad->io_context == ioc)
  			ret = ELV_MQUEUE_MUST;
  		put_io_context(ioc);
  	}
  
  	return ret;
  }
  
  static void as_exit_queue(elevator_t *e)
  {
  	struct as_data *ad = e->elevator_data;
  
  	del_timer_sync(&ad->antic_timer);
19a75d83f   Andrew Morton   kblockd: use flus...
1261
  	kblockd_flush_work(&ad->antic_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1262
1263
1264
  
  	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
  	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1265
  	put_io_context(ad->io_context);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1266
1267
1268
1269
  	kfree(ad);
  }
  
  /*
8a8e674cb   Jens Axboe   [PATCH] as-iosche...
1270
   * initialize elevator private data (as_data).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1271
   */
bb37b94c6   Jens Axboe   [BLOCK] Cleanup u...
1272
  static void *as_init_queue(request_queue_t *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1273
1274
  {
  	struct as_data *ad;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1275

94f6030ca   Christoph Lameter   Slab allocators: ...
1276
  	ad = kmalloc_node(sizeof(*ad), GFP_KERNEL | __GFP_ZERO, q->node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1277
  	if (!ad)
bc1c11697   Jens Axboe   [PATCH] elevator ...
1278
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279
1280
  
  	ad->q = q; /* Identify what queue the data belongs to */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1281
1282
1283
1284
  	/* anticipatory scheduling helpers */
  	ad->antic_timer.function = as_antic_timeout;
  	ad->antic_timer.data = (unsigned long)q;
  	init_timer(&ad->antic_timer);
65f27f384   David Howells   WorkStruct: Pass ...
1285
  	INIT_WORK(&ad->antic_work, as_work_handler);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1287
1288
1289
1290
  	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
  	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
  	ad->sort_list[REQ_SYNC] = RB_ROOT;
  	ad->sort_list[REQ_ASYNC] = RB_ROOT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1291
1292
1293
1294
1295
  	ad->fifo_expire[REQ_SYNC] = default_read_expire;
  	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
  	ad->antic_expire = default_antic_expire;
  	ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
  	ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1296
1297
1298
1299
1300
  
  	ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
  	ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
  	if (ad->write_batch_count < 2)
  		ad->write_batch_count = 2;
bc1c11697   Jens Axboe   [PATCH] elevator ...
1301
  	return ad;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1302
1303
1304
1305
1306
  }
  
  /*
   * sysfs parts below
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
1308
1309
1310
  
  static ssize_t
  as_var_show(unsigned int var, char *page)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
1312
1313
1314
1315
1316
1317
  	return sprintf(page, "%d
  ", var);
  }
  
  static ssize_t
  as_var_store(unsigned long *var, const char *page, size_t count)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1318
  	char *p = (char *) page;
c9b3ad673   Jens Axboe   [PATCH] as-iosche...
1319
  	*var = simple_strtoul(p, &p, 10);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
1321
  	return count;
  }
e572ec7e4   Al Viro   [PATCH] fix rmmod...
1322
  static ssize_t est_time_show(elevator_t *e, char *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323
  {
3d1ab40f4   Al Viro   [PATCH] elevator_...
1324
  	struct as_data *ad = e->elevator_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1325
  	int pos = 0;
f5b3db001   Nick Piggin   [PATCH] as: coope...
1326
1327
1328
1329
1330
1331
1332
  	pos += sprintf(page+pos, "%lu %% exit probability
  ",
  				100*ad->exit_prob/256);
  	pos += sprintf(page+pos, "%lu %% probability of exiting without a "
  				"cooperating process submitting IO
  ",
  				100*ad->exit_no_coop/256);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1333
1334
  	pos += sprintf(page+pos, "%lu ms new thinktime
  ", ad->new_ttime_mean);
f5b3db001   Nick Piggin   [PATCH] as: coope...
1335
1336
1337
  	pos += sprintf(page+pos, "%llu sectors new seek distance
  ",
  				(unsigned long long)ad->new_seek_mean);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
1339
1340
1341
1342
  
  	return pos;
  }
  
  #define SHOW_FUNCTION(__FUNC, __VAR)				\
3d1ab40f4   Al Viro   [PATCH] elevator_...
1343
  static ssize_t __FUNC(elevator_t *e, char *page)		\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1344
  {								\
3d1ab40f4   Al Viro   [PATCH] elevator_...
1345
  	struct as_data *ad = e->elevator_data;			\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346
1347
  	return as_var_show(jiffies_to_msecs((__VAR)), (page));	\
  }
e572ec7e4   Al Viro   [PATCH] fix rmmod...
1348
1349
1350
1351
1352
  SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]);
  SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]);
  SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire);
  SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]);
  SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1353
1354
1355
  #undef SHOW_FUNCTION
  
  #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
3d1ab40f4   Al Viro   [PATCH] elevator_...
1356
  static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1357
  {									\
3d1ab40f4   Al Viro   [PATCH] elevator_...
1358
1359
  	struct as_data *ad = e->elevator_data;				\
  	int ret = as_var_store(__PTR, (page), count);			\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360
1361
1362
1363
1364
1365
1366
  	if (*(__PTR) < (MIN))						\
  		*(__PTR) = (MIN);					\
  	else if (*(__PTR) > (MAX))					\
  		*(__PTR) = (MAX);					\
  	*(__PTR) = msecs_to_jiffies(*(__PTR));				\
  	return ret;							\
  }
e572ec7e4   Al Viro   [PATCH] fix rmmod...
1367
1368
1369
1370
  STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
  STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
  STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX);
  STORE_FUNCTION(as_read_batch_expire_store,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371
  			&ad->batch_expire[REQ_SYNC], 0, INT_MAX);
e572ec7e4   Al Viro   [PATCH] fix rmmod...
1372
  STORE_FUNCTION(as_write_batch_expire_store,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1373
1374
  			&ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
  #undef STORE_FUNCTION
e572ec7e4   Al Viro   [PATCH] fix rmmod...
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
  #define AS_ATTR(name) \
  	__ATTR(name, S_IRUGO|S_IWUSR, as_##name##_show, as_##name##_store)
  
  static struct elv_fs_entry as_attrs[] = {
  	__ATTR_RO(est_time),
  	AS_ATTR(read_expire),
  	AS_ATTR(write_expire),
  	AS_ATTR(antic_expire),
  	AS_ATTR(read_batch_expire),
  	AS_ATTR(write_batch_expire),
  	__ATTR_NULL
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1386
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1387
1388
1389
1390
1391
  static struct elevator_type iosched_as = {
  	.ops = {
  		.elevator_merge_fn = 		as_merge,
  		.elevator_merged_fn =		as_merged_request,
  		.elevator_merge_req_fn =	as_merged_requests,
b4878f245   Jens Axboe   [PATCH] 02/05: up...
1392
1393
1394
  		.elevator_dispatch_fn =		as_dispatch_request,
  		.elevator_add_req_fn =		as_add_request,
  		.elevator_activate_req_fn =	as_activate_request,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1395
1396
1397
  		.elevator_deactivate_req_fn = 	as_deactivate_request,
  		.elevator_queue_empty_fn =	as_queue_empty,
  		.elevator_completed_req_fn =	as_completed_request,
e37f346e3   Jens Axboe   [PATCH] as-iosche...
1398
1399
  		.elevator_former_req_fn =	elv_rb_former_request,
  		.elevator_latter_req_fn =	elv_rb_latter_request,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400
1401
1402
  		.elevator_may_queue_fn =	as_may_queue,
  		.elevator_init_fn =		as_init_queue,
  		.elevator_exit_fn =		as_exit_queue,
e17a9489b   Al Viro   [PATCH] stop elv_...
1403
  		.trim =				as_trim,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1404
  	},
3d1ab40f4   Al Viro   [PATCH] elevator_...
1405
  	.elevator_attrs = as_attrs,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1406
1407
1408
1409
1410
1411
  	.elevator_name = "anticipatory",
  	.elevator_owner = THIS_MODULE,
  };
  
  static int __init as_init(void)
  {
c65fb61b3   Jens Axboe   [PATCH] Allow as-...
1412
  	return elv_register(&iosched_as);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413
1414
1415
1416
  }
  
  static void __exit as_exit(void)
  {
6e9a4738c   Peter Zijlstra   [PATCH] completio...
1417
  	DECLARE_COMPLETION_ONSTACK(all_gone);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1418
  	elv_unregister(&iosched_as);
334e94de9   Al Viro   [PATCH] deal with...
1419
  	ioc_gone = &all_gone;
fba822722   OGAWA Hirofumi   [PATCH 1/2] iosch...
1420
1421
  	/* ioc_gone's update must be visible before reading ioc_count */
  	smp_wmb();
e4313dd42   Jens Axboe   [PATCH] as-iosche...
1422
  	if (elv_ioc_count_read(ioc_count))
fba822722   OGAWA Hirofumi   [PATCH 1/2] iosch...
1423
  		wait_for_completion(ioc_gone);
334e94de9   Al Viro   [PATCH] deal with...
1424
  	synchronize_rcu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1425
1426
1427
1428
1429
1430
1431
1432
  }
  
  module_init(as_init);
  module_exit(as_exit);
  
  MODULE_AUTHOR("Nick Piggin");
  MODULE_LICENSE("GPL");
  MODULE_DESCRIPTION("anticipatory IO scheduler");