Doug / smarc-fsl-linux-kernel | Embedian Git Server

Commit 64100099ed22f71cce656c5c2caecf5c9cf255dc

Authored by Arjan van de Ven 2006-01-06 16:46:02 +0800

Committed by Jens Axboe 2006-01-06 16:46:02 +0800

Exists in master and in 7 other branches

[BLOCK] mark some block/ variables cons

the patch below marks various read-only variables in block/* as const,
so that gcc can optimize the use of them; eg gcc will replace the use by
the value directly now and will even remove the memory usage of these.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Jens Axboe <axboe@suse.de>

Showing 4 changed files with 14 additions and 14 deletions Inline Diff

block/cfq-iosched.c
block/deadline-iosched.c
block/ll_rw_blk.c
block/scsi_ioctl.c

block/cfq-iosched.c

Diff comments View file @ 6410009

1	/*	1	/*
2	* CFQ, or complete fairness queueing, disk scheduler.	2	* CFQ, or complete fairness queueing, disk scheduler.
3	*	3	*
4	* Based on ideas from a previously unfinished io	4	* Based on ideas from a previously unfinished io
5	* scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.	5	* scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6	*	6	*
7	* Copyright (C) 2003 Jens Axboe <axboe@suse.de>	7	* Copyright (C) 2003 Jens Axboe <axboe@suse.de>
8	*/	8	*/
9	#include <linux/kernel.h>	9	#include <linux/kernel.h>
10	#include <linux/fs.h>	10	#include <linux/fs.h>
11	#include <linux/blkdev.h>	11	#include <linux/blkdev.h>
12	#include <linux/elevator.h>	12	#include <linux/elevator.h>
13	#include <linux/bio.h>	13	#include <linux/bio.h>
14	#include <linux/config.h>	14	#include <linux/config.h>
15	#include <linux/module.h>	15	#include <linux/module.h>
16	#include <linux/slab.h>	16	#include <linux/slab.h>
17	#include <linux/init.h>	17	#include <linux/init.h>
18	#include <linux/compiler.h>	18	#include <linux/compiler.h>
19	#include <linux/hash.h>	19	#include <linux/hash.h>
20	#include <linux/rbtree.h>	20	#include <linux/rbtree.h>
21	#include <linux/mempool.h>	21	#include <linux/mempool.h>
22	#include <linux/ioprio.h>	22	#include <linux/ioprio.h>
23	#include <linux/writeback.h>	23	#include <linux/writeback.h>
24		24
25	/*	25	/*
26	* tunables	26	* tunables
27	*/	27	*/
28	static int cfq_quantum = 4; /* max queue in one round of service */	28	static const int cfq_quantum = 4; /* max queue in one round of service */
29	static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/	29	static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
30	static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };	30	static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
31	static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */	31	static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
32	static int cfq_back_penalty = 2; /* penalty of a backwards seek */	32	static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
33		33
34	static int cfq_slice_sync = HZ / 10;	34	static const int cfq_slice_sync = HZ / 10;
35	static int cfq_slice_async = HZ / 25;	35	static int cfq_slice_async = HZ / 25;
36	static int cfq_slice_async_rq = 2;	36	static const int cfq_slice_async_rq = 2;
37	static int cfq_slice_idle = HZ / 100;	37	static int cfq_slice_idle = HZ / 100;
38		38
39	#define CFQ_IDLE_GRACE (HZ / 10)	39	#define CFQ_IDLE_GRACE (HZ / 10)
40	#define CFQ_SLICE_SCALE (5)	40	#define CFQ_SLICE_SCALE (5)
41		41
42	#define CFQ_KEY_ASYNC (0)	42	#define CFQ_KEY_ASYNC (0)
43	#define CFQ_KEY_ANY (0xffff)	43	#define CFQ_KEY_ANY (0xffff)
44		44
45	/*	45	/*
46	* disable queueing at the driver/hardware level	46	* disable queueing at the driver/hardware level
47	*/	47	*/
48	static int cfq_max_depth = 2;	48	static const int cfq_max_depth = 2;
49		49
50	/*	50	/*
51	* for the hash of cfqq inside the cfqd	51	* for the hash of cfqq inside the cfqd
52	*/	52	*/
53	#define CFQ_QHASH_SHIFT 6	53	#define CFQ_QHASH_SHIFT 6
54	#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)	54	#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
55	#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)	55	#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
56		56
57	/*	57	/*
58	* for the hash of crq inside the cfqq	58	* for the hash of crq inside the cfqq
59	*/	59	*/
60	#define CFQ_MHASH_SHIFT 6	60	#define CFQ_MHASH_SHIFT 6
61	#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)	61	#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
62	#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)	62	#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
63	#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)	63	#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
64	#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)	64	#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
65	#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)	65	#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
66		66
67	#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)	67	#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
68	#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)	68	#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
69		69
70	#define RQ_DATA(rq) (rq)->elevator_private	70	#define RQ_DATA(rq) (rq)->elevator_private
71		71
72	/*	72	/*
73	* rb-tree defines	73	* rb-tree defines
74	*/	74	*/
75	#define RB_NONE (2)	75	#define RB_NONE (2)
76	#define RB_EMPTY(node) ((node)->rb_node == NULL)	76	#define RB_EMPTY(node) ((node)->rb_node == NULL)
77	#define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE	77	#define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE
78	#define RB_CLEAR(node) do { \	78	#define RB_CLEAR(node) do { \
79	(node)->rb_parent = NULL; \	79	(node)->rb_parent = NULL; \
80	RB_CLEAR_COLOR((node)); \	80	RB_CLEAR_COLOR((node)); \
81	(node)->rb_right = NULL; \	81	(node)->rb_right = NULL; \
82	(node)->rb_left = NULL; \	82	(node)->rb_left = NULL; \
83	} while (0)	83	} while (0)
84	#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)	84	#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
85	#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)	85	#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
86	#define rq_rb_key(rq) (rq)->sector	86	#define rq_rb_key(rq) (rq)->sector
87		87
88	static kmem_cache_t *crq_pool;	88	static kmem_cache_t *crq_pool;
89	static kmem_cache_t *cfq_pool;	89	static kmem_cache_t *cfq_pool;
90	static kmem_cache_t *cfq_ioc_pool;	90	static kmem_cache_t *cfq_ioc_pool;
91		91
92	#define CFQ_PRIO_LISTS IOPRIO_BE_NR	92	#define CFQ_PRIO_LISTS IOPRIO_BE_NR
93	#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)	93	#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
94	#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)	94	#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
95	#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)	95	#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
96		96
97	#define ASYNC (0)	97	#define ASYNC (0)
98	#define SYNC (1)	98	#define SYNC (1)
99		99
100	#define cfq_cfqq_dispatched(cfqq) \	100	#define cfq_cfqq_dispatched(cfqq) \
101	((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])	101	((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
102		102
103	#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)	103	#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
104		104
105	#define cfq_cfqq_sync(cfqq) \	105	#define cfq_cfqq_sync(cfqq) \
106	(cfq_cfqq_class_sync(cfqq) \|\| (cfqq)->on_dispatch[SYNC])	106	(cfq_cfqq_class_sync(cfqq) \|\| (cfqq)->on_dispatch[SYNC])
107		107
108	/*	108	/*
109	* Per block device queue structure	109	* Per block device queue structure
110	*/	110	*/
111	struct cfq_data {	111	struct cfq_data {
112	atomic_t ref;	112	atomic_t ref;
113	request_queue_t *queue;	113	request_queue_t *queue;
114		114
115	/*	115	/*
116	* rr list of queues with requests and the count of them	116	* rr list of queues with requests and the count of them
117	*/	117	*/
118	struct list_head rr_list[CFQ_PRIO_LISTS];	118	struct list_head rr_list[CFQ_PRIO_LISTS];
119	struct list_head busy_rr;	119	struct list_head busy_rr;
120	struct list_head cur_rr;	120	struct list_head cur_rr;
121	struct list_head idle_rr;	121	struct list_head idle_rr;
122	unsigned int busy_queues;	122	unsigned int busy_queues;
123		123
124	/*	124	/*
125	* non-ordered list of empty cfqq's	125	* non-ordered list of empty cfqq's
126	*/	126	*/
127	struct list_head empty_list;	127	struct list_head empty_list;
128		128
129	/*	129	/*
130	* cfqq lookup hash	130	* cfqq lookup hash
131	*/	131	*/
132	struct hlist_head *cfq_hash;	132	struct hlist_head *cfq_hash;
133		133
134	/*	134	/*
135	* global crq hash for all queues	135	* global crq hash for all queues
136	*/	136	*/
137	struct hlist_head *crq_hash;	137	struct hlist_head *crq_hash;
138		138
139	unsigned int max_queued;	139	unsigned int max_queued;
140		140
141	mempool_t *crq_pool;	141	mempool_t *crq_pool;
142		142
143	int rq_in_driver;	143	int rq_in_driver;
144		144
145	/*	145	/*
146	* schedule slice state info	146	* schedule slice state info
147	*/	147	*/
148	/*	148	/*
149	* idle window management	149	* idle window management
150	*/	150	*/
151	struct timer_list idle_slice_timer;	151	struct timer_list idle_slice_timer;
152	struct work_struct unplug_work;	152	struct work_struct unplug_work;
153		153
154	struct cfq_queue *active_queue;	154	struct cfq_queue *active_queue;
155	struct cfq_io_context *active_cic;	155	struct cfq_io_context *active_cic;
156	int cur_prio, cur_end_prio;	156	int cur_prio, cur_end_prio;
157	unsigned int dispatch_slice;	157	unsigned int dispatch_slice;
158		158
159	struct timer_list idle_class_timer;	159	struct timer_list idle_class_timer;
160		160
161	sector_t last_sector;	161	sector_t last_sector;
162	unsigned long last_end_request;	162	unsigned long last_end_request;
163		163
164	unsigned int rq_starved;	164	unsigned int rq_starved;
165		165
166	/*	166	/*
167	* tunables, see top of file	167	* tunables, see top of file
168	*/	168	*/
169	unsigned int cfq_quantum;	169	unsigned int cfq_quantum;
170	unsigned int cfq_queued;	170	unsigned int cfq_queued;
171	unsigned int cfq_fifo_expire[2];	171	unsigned int cfq_fifo_expire[2];
172	unsigned int cfq_back_penalty;	172	unsigned int cfq_back_penalty;
173	unsigned int cfq_back_max;	173	unsigned int cfq_back_max;
174	unsigned int cfq_slice[2];	174	unsigned int cfq_slice[2];
175	unsigned int cfq_slice_async_rq;	175	unsigned int cfq_slice_async_rq;
176	unsigned int cfq_slice_idle;	176	unsigned int cfq_slice_idle;
177	unsigned int cfq_max_depth;	177	unsigned int cfq_max_depth;
178	};	178	};
179		179
180	/*	180	/*
181	* Per process-grouping structure	181	* Per process-grouping structure
182	*/	182	*/
183	struct cfq_queue {	183	struct cfq_queue {
184	/* reference count */	184	/* reference count */
185	atomic_t ref;	185	atomic_t ref;
186	/* parent cfq_data */	186	/* parent cfq_data */
187	struct cfq_data *cfqd;	187	struct cfq_data *cfqd;
188	/* cfqq lookup hash */	188	/* cfqq lookup hash */
189	struct hlist_node cfq_hash;	189	struct hlist_node cfq_hash;
190	/* hash key */	190	/* hash key */
191	unsigned int key;	191	unsigned int key;
192	/* on either rr or empty list of cfqd */	192	/* on either rr or empty list of cfqd */
193	struct list_head cfq_list;	193	struct list_head cfq_list;
194	/* sorted list of pending requests */	194	/* sorted list of pending requests */
195	struct rb_root sort_list;	195	struct rb_root sort_list;
196	/* if fifo isn't expired, next request to serve */	196	/* if fifo isn't expired, next request to serve */
197	struct cfq_rq *next_crq;	197	struct cfq_rq *next_crq;
198	/* requests queued in sort_list */	198	/* requests queued in sort_list */
199	int queued[2];	199	int queued[2];
200	/* currently allocated requests */	200	/* currently allocated requests */
201	int allocated[2];	201	int allocated[2];
202	/* fifo list of requests in sort_list */	202	/* fifo list of requests in sort_list */
203	struct list_head fifo;	203	struct list_head fifo;
204		204
205	unsigned long slice_start;	205	unsigned long slice_start;
206	unsigned long slice_end;	206	unsigned long slice_end;
207	unsigned long slice_left;	207	unsigned long slice_left;
208	unsigned long service_last;	208	unsigned long service_last;
209		209
210	/* number of requests that are on the dispatch list */	210	/* number of requests that are on the dispatch list */
211	int on_dispatch[2];	211	int on_dispatch[2];
212		212
213	/* io prio of this group */	213	/* io prio of this group */
214	unsigned short ioprio, org_ioprio;	214	unsigned short ioprio, org_ioprio;
215	unsigned short ioprio_class, org_ioprio_class;	215	unsigned short ioprio_class, org_ioprio_class;
216		216
217	/* various state flags, see below */	217	/* various state flags, see below */
218	unsigned int flags;	218	unsigned int flags;
219	};	219	};
220		220
221	struct cfq_rq {	221	struct cfq_rq {
222	struct rb_node rb_node;	222	struct rb_node rb_node;
223	sector_t rb_key;	223	sector_t rb_key;
224	struct request *request;	224	struct request *request;
225	struct hlist_node hash;	225	struct hlist_node hash;
226		226
227	struct cfq_queue *cfq_queue;	227	struct cfq_queue *cfq_queue;
228	struct cfq_io_context *io_context;	228	struct cfq_io_context *io_context;
229		229
230	unsigned int crq_flags;	230	unsigned int crq_flags;
231	};	231	};
232		232
233	enum cfqq_state_flags {	233	enum cfqq_state_flags {
234	CFQ_CFQQ_FLAG_on_rr = 0,	234	CFQ_CFQQ_FLAG_on_rr = 0,
235	CFQ_CFQQ_FLAG_wait_request,	235	CFQ_CFQQ_FLAG_wait_request,
236	CFQ_CFQQ_FLAG_must_alloc,	236	CFQ_CFQQ_FLAG_must_alloc,
237	CFQ_CFQQ_FLAG_must_alloc_slice,	237	CFQ_CFQQ_FLAG_must_alloc_slice,
238	CFQ_CFQQ_FLAG_must_dispatch,	238	CFQ_CFQQ_FLAG_must_dispatch,
239	CFQ_CFQQ_FLAG_fifo_expire,	239	CFQ_CFQQ_FLAG_fifo_expire,
240	CFQ_CFQQ_FLAG_idle_window,	240	CFQ_CFQQ_FLAG_idle_window,
241	CFQ_CFQQ_FLAG_prio_changed,	241	CFQ_CFQQ_FLAG_prio_changed,
242	CFQ_CFQQ_FLAG_expired,	242	CFQ_CFQQ_FLAG_expired,
243	};	243	};
244		244
245	#define CFQ_CFQQ_FNS(name) \	245	#define CFQ_CFQQ_FNS(name) \
246	static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \	246	static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
247	{ \	247	{ \
248	cfqq->flags \|= (1 << CFQ_CFQQ_FLAG_##name); \	248	cfqq->flags \|= (1 << CFQ_CFQQ_FLAG_##name); \
249	} \	249	} \
250	static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \	250	static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
251	{ \	251	{ \
252	cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \	252	cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
253	} \	253	} \
254	static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \	254	static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
255	{ \	255	{ \
256	return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \	256	return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
257	}	257	}
258		258
259	CFQ_CFQQ_FNS(on_rr);	259	CFQ_CFQQ_FNS(on_rr);
260	CFQ_CFQQ_FNS(wait_request);	260	CFQ_CFQQ_FNS(wait_request);
261	CFQ_CFQQ_FNS(must_alloc);	261	CFQ_CFQQ_FNS(must_alloc);
262	CFQ_CFQQ_FNS(must_alloc_slice);	262	CFQ_CFQQ_FNS(must_alloc_slice);
263	CFQ_CFQQ_FNS(must_dispatch);	263	CFQ_CFQQ_FNS(must_dispatch);
264	CFQ_CFQQ_FNS(fifo_expire);	264	CFQ_CFQQ_FNS(fifo_expire);
265	CFQ_CFQQ_FNS(idle_window);	265	CFQ_CFQQ_FNS(idle_window);
266	CFQ_CFQQ_FNS(prio_changed);	266	CFQ_CFQQ_FNS(prio_changed);
267	CFQ_CFQQ_FNS(expired);	267	CFQ_CFQQ_FNS(expired);
268	#undef CFQ_CFQQ_FNS	268	#undef CFQ_CFQQ_FNS
269		269
270	enum cfq_rq_state_flags {	270	enum cfq_rq_state_flags {
271	CFQ_CRQ_FLAG_is_sync = 0,	271	CFQ_CRQ_FLAG_is_sync = 0,
272	};	272	};
273		273
274	#define CFQ_CRQ_FNS(name) \	274	#define CFQ_CRQ_FNS(name) \
275	static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \	275	static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \
276	{ \	276	{ \
277	crq->crq_flags \|= (1 << CFQ_CRQ_FLAG_##name); \	277	crq->crq_flags \|= (1 << CFQ_CRQ_FLAG_##name); \
278	} \	278	} \
279	static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \	279	static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \
280	{ \	280	{ \
281	crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \	281	crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \
282	} \	282	} \
283	static inline int cfq_crq_##name(const struct cfq_rq *crq) \	283	static inline int cfq_crq_##name(const struct cfq_rq *crq) \
284	{ \	284	{ \
285	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \	285	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
286	}	286	}
287		287
288	CFQ_CRQ_FNS(is_sync);	288	CFQ_CRQ_FNS(is_sync);
289	#undef CFQ_CRQ_FNS	289	#undef CFQ_CRQ_FNS
290		290
291	static struct cfq_queue cfq_find_cfq_hash(struct cfq_data , unsigned int, unsigned short);	291	static struct cfq_queue cfq_find_cfq_hash(struct cfq_data , unsigned int, unsigned short);
292	static void cfq_dispatch_insert(request_queue_t , struct cfq_rq );	292	static void cfq_dispatch_insert(request_queue_t , struct cfq_rq );
293	static void cfq_put_cfqd(struct cfq_data *cfqd);	293	static void cfq_put_cfqd(struct cfq_data *cfqd);
294		294
295	#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)	295	#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
296		296
297	/*	297	/*
298	* lots of deadline iosched dupes, can be abstracted later...	298	* lots of deadline iosched dupes, can be abstracted later...
299	*/	299	*/
300	static inline void cfq_del_crq_hash(struct cfq_rq *crq)	300	static inline void cfq_del_crq_hash(struct cfq_rq *crq)
301	{	301	{
302	hlist_del_init(&crq->hash);	302	hlist_del_init(&crq->hash);
303	}	303	}
304		304
305	static inline void cfq_add_crq_hash(struct cfq_data cfqd, struct cfq_rq crq)	305	static inline void cfq_add_crq_hash(struct cfq_data cfqd, struct cfq_rq crq)
306	{	306	{
307	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));	307	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
308		308
309	hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);	309	hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
310	}	310	}
311		311
312	static struct request cfq_find_rq_hash(struct cfq_data cfqd, sector_t offset)	312	static struct request cfq_find_rq_hash(struct cfq_data cfqd, sector_t offset)
313	{	313	{
314	struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];	314	struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
315	struct hlist_node entry, next;	315	struct hlist_node entry, next;
316		316
317	hlist_for_each_safe(entry, next, hash_list) {	317	hlist_for_each_safe(entry, next, hash_list) {
318	struct cfq_rq *crq = list_entry_hash(entry);	318	struct cfq_rq *crq = list_entry_hash(entry);
319	struct request *__rq = crq->request;	319	struct request *__rq = crq->request;
320		320
321	if (!rq_mergeable(__rq)) {	321	if (!rq_mergeable(__rq)) {
322	cfq_del_crq_hash(crq);	322	cfq_del_crq_hash(crq);
323	continue;	323	continue;
324	}	324	}
325		325
326	if (rq_hash_key(__rq) == offset)	326	if (rq_hash_key(__rq) == offset)
327	return __rq;	327	return __rq;
328	}	328	}
329		329
330	return NULL;	330	return NULL;
331	}	331	}
332		332
333	/*	333	/*
334	* scheduler run of queue, if there are requests pending and no one in the	334	* scheduler run of queue, if there are requests pending and no one in the
335	* driver that will restart queueing	335	* driver that will restart queueing
336	*/	336	*/
337	static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)	337	static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
338	{	338	{
339	if (!cfqd->rq_in_driver && cfqd->busy_queues)	339	if (!cfqd->rq_in_driver && cfqd->busy_queues)
340	kblockd_schedule_work(&cfqd->unplug_work);	340	kblockd_schedule_work(&cfqd->unplug_work);
341	}	341	}
342		342
343	static int cfq_queue_empty(request_queue_t *q)	343	static int cfq_queue_empty(request_queue_t *q)
344	{	344	{
345	struct cfq_data *cfqd = q->elevator->elevator_data;	345	struct cfq_data *cfqd = q->elevator->elevator_data;
346		346
347	return !cfqd->busy_queues;	347	return !cfqd->busy_queues;
348	}	348	}
349		349
350	/*	350	/*
351	* Lifted from AS - choose which of crq1 and crq2 that is best served now.	351	* Lifted from AS - choose which of crq1 and crq2 that is best served now.
352	* We choose the request that is closest to the head right now. Distance	352	* We choose the request that is closest to the head right now. Distance
353	* behind the head are penalized and only allowed to a certain extent.	353	* behind the head are penalized and only allowed to a certain extent.
354	*/	354	*/
355	static struct cfq_rq *	355	static struct cfq_rq *
356	cfq_choose_req(struct cfq_data cfqd, struct cfq_rq crq1, struct cfq_rq *crq2)	356	cfq_choose_req(struct cfq_data cfqd, struct cfq_rq crq1, struct cfq_rq *crq2)
357	{	357	{
358	sector_t last, s1, s2, d1 = 0, d2 = 0;	358	sector_t last, s1, s2, d1 = 0, d2 = 0;
359	int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */	359	int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */
360	unsigned long back_max;	360	unsigned long back_max;
361		361
362	if (crq1 == NULL \|\| crq1 == crq2)	362	if (crq1 == NULL \|\| crq1 == crq2)
363	return crq2;	363	return crq2;
364	if (crq2 == NULL)	364	if (crq2 == NULL)
365	return crq1;	365	return crq1;
366		366
367	if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))	367	if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
368	return crq1;	368	return crq1;
369	else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))	369	else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
370	return crq2;	370	return crq2;
371		371
372	s1 = crq1->request->sector;	372	s1 = crq1->request->sector;
373	s2 = crq2->request->sector;	373	s2 = crq2->request->sector;
374		374
375	last = cfqd->last_sector;	375	last = cfqd->last_sector;
376		376
377	/*	377	/*
378	* by definition, 1KiB is 2 sectors	378	* by definition, 1KiB is 2 sectors
379	*/	379	*/
380	back_max = cfqd->cfq_back_max * 2;	380	back_max = cfqd->cfq_back_max * 2;
381		381
382	/*	382	/*
383	* Strict one way elevator _except_ in the case where we allow	383	* Strict one way elevator _except_ in the case where we allow
384	* short backward seeks which are biased as twice the cost of a	384	* short backward seeks which are biased as twice the cost of a
385	* similar forward seek.	385	* similar forward seek.
386	*/	386	*/
387	if (s1 >= last)	387	if (s1 >= last)
388	d1 = s1 - last;	388	d1 = s1 - last;
389	else if (s1 + back_max >= last)	389	else if (s1 + back_max >= last)
390	d1 = (last - s1) * cfqd->cfq_back_penalty;	390	d1 = (last - s1) * cfqd->cfq_back_penalty;
391	else	391	else
392	r1_wrap = 1;	392	r1_wrap = 1;
393		393
394	if (s2 >= last)	394	if (s2 >= last)
395	d2 = s2 - last;	395	d2 = s2 - last;
396	else if (s2 + back_max >= last)	396	else if (s2 + back_max >= last)
397	d2 = (last - s2) * cfqd->cfq_back_penalty;	397	d2 = (last - s2) * cfqd->cfq_back_penalty;
398	else	398	else
399	r2_wrap = 1;	399	r2_wrap = 1;
400		400
401	/* Found required data */	401	/* Found required data */
402	if (!r1_wrap && r2_wrap)	402	if (!r1_wrap && r2_wrap)
403	return crq1;	403	return crq1;
404	else if (!r2_wrap && r1_wrap)	404	else if (!r2_wrap && r1_wrap)
405	return crq2;	405	return crq2;
406	else if (r1_wrap && r2_wrap) {	406	else if (r1_wrap && r2_wrap) {
407	/* both behind the head */	407	/* both behind the head */
408	if (s1 <= s2)	408	if (s1 <= s2)
409	return crq1;	409	return crq1;
410	else	410	else
411	return crq2;	411	return crq2;
412	}	412	}
413		413
414	/* Both requests in front of the head */	414	/* Both requests in front of the head */
415	if (d1 < d2)	415	if (d1 < d2)
416	return crq1;	416	return crq1;
417	else if (d2 < d1)	417	else if (d2 < d1)
418	return crq2;	418	return crq2;
419	else {	419	else {
420	if (s1 >= s2)	420	if (s1 >= s2)
421	return crq1;	421	return crq1;
422	else	422	else
423	return crq2;	423	return crq2;
424	}	424	}
425	}	425	}
426		426
427	/*	427	/*
428	* would be nice to take fifo expire time into account as well	428	* would be nice to take fifo expire time into account as well
429	*/	429	*/
430	static struct cfq_rq *	430	static struct cfq_rq *
431	cfq_find_next_crq(struct cfq_data cfqd, struct cfq_queue cfqq,	431	cfq_find_next_crq(struct cfq_data cfqd, struct cfq_queue cfqq,
432	struct cfq_rq *last)	432	struct cfq_rq *last)
433	{	433	{
434	struct cfq_rq crq_next = NULL, crq_prev = NULL;	434	struct cfq_rq crq_next = NULL, crq_prev = NULL;
435	struct rb_node rbnext, rbprev;	435	struct rb_node rbnext, rbprev;
436		436
437	if (!(rbnext = rb_next(&last->rb_node))) {	437	if (!(rbnext = rb_next(&last->rb_node))) {
438	rbnext = rb_first(&cfqq->sort_list);	438	rbnext = rb_first(&cfqq->sort_list);
439	if (rbnext == &last->rb_node)	439	if (rbnext == &last->rb_node)
440	rbnext = NULL;	440	rbnext = NULL;
441	}	441	}
442		442
443	rbprev = rb_prev(&last->rb_node);	443	rbprev = rb_prev(&last->rb_node);
444		444
445	if (rbprev)	445	if (rbprev)
446	crq_prev = rb_entry_crq(rbprev);	446	crq_prev = rb_entry_crq(rbprev);
447	if (rbnext)	447	if (rbnext)
448	crq_next = rb_entry_crq(rbnext);	448	crq_next = rb_entry_crq(rbnext);
449		449
450	return cfq_choose_req(cfqd, crq_next, crq_prev);	450	return cfq_choose_req(cfqd, crq_next, crq_prev);
451	}	451	}
452		452
453	static void cfq_update_next_crq(struct cfq_rq *crq)	453	static void cfq_update_next_crq(struct cfq_rq *crq)
454	{	454	{
455	struct cfq_queue *cfqq = crq->cfq_queue;	455	struct cfq_queue *cfqq = crq->cfq_queue;
456		456
457	if (cfqq->next_crq == crq)	457	if (cfqq->next_crq == crq)
458	cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);	458	cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
459	}	459	}
460		460
461	static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)	461	static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
462	{	462	{
463	struct cfq_data *cfqd = cfqq->cfqd;	463	struct cfq_data *cfqd = cfqq->cfqd;
464	struct list_head list, entry;	464	struct list_head list, entry;
465		465
466	BUG_ON(!cfq_cfqq_on_rr(cfqq));	466	BUG_ON(!cfq_cfqq_on_rr(cfqq));
467		467
468	list_del(&cfqq->cfq_list);	468	list_del(&cfqq->cfq_list);
469		469
470	if (cfq_class_rt(cfqq))	470	if (cfq_class_rt(cfqq))
471	list = &cfqd->cur_rr;	471	list = &cfqd->cur_rr;
472	else if (cfq_class_idle(cfqq))	472	else if (cfq_class_idle(cfqq))
473	list = &cfqd->idle_rr;	473	list = &cfqd->idle_rr;
474	else {	474	else {
475	/*	475	/*
476	* if cfqq has requests in flight, don't allow it to be	476	* if cfqq has requests in flight, don't allow it to be
477	* found in cfq_set_active_queue before it has finished them.	477	* found in cfq_set_active_queue before it has finished them.
478	* this is done to increase fairness between a process that	478	* this is done to increase fairness between a process that
479	* has lots of io pending vs one that only generates one	479	* has lots of io pending vs one that only generates one
480	* sporadically or synchronously	480	* sporadically or synchronously
481	*/	481	*/
482	if (cfq_cfqq_dispatched(cfqq))	482	if (cfq_cfqq_dispatched(cfqq))
483	list = &cfqd->busy_rr;	483	list = &cfqd->busy_rr;
484	else	484	else
485	list = &cfqd->rr_list[cfqq->ioprio];	485	list = &cfqd->rr_list[cfqq->ioprio];
486	}	486	}
487		487
488	/*	488	/*
489	* if queue was preempted, just add to front to be fair. busy_rr	489	* if queue was preempted, just add to front to be fair. busy_rr
490	* isn't sorted.	490	* isn't sorted.
491	*/	491	*/
492	if (preempted \|\| list == &cfqd->busy_rr) {	492	if (preempted \|\| list == &cfqd->busy_rr) {
493	list_add(&cfqq->cfq_list, list);	493	list_add(&cfqq->cfq_list, list);
494	return;	494	return;
495	}	495	}
496		496
497	/*	497	/*
498	* sort by when queue was last serviced	498	* sort by when queue was last serviced
499	*/	499	*/
500	entry = list;	500	entry = list;
501	while ((entry = entry->prev) != list) {	501	while ((entry = entry->prev) != list) {
502	struct cfq_queue *__cfqq = list_entry_cfqq(entry);	502	struct cfq_queue *__cfqq = list_entry_cfqq(entry);
503		503
504	if (!__cfqq->service_last)	504	if (!__cfqq->service_last)
505	break;	505	break;
506	if (time_before(__cfqq->service_last, cfqq->service_last))	506	if (time_before(__cfqq->service_last, cfqq->service_last))
507	break;	507	break;
508	}	508	}
509		509
510	list_add(&cfqq->cfq_list, entry);	510	list_add(&cfqq->cfq_list, entry);
511	}	511	}
512		512
513	/*	513	/*
514	* add to busy list of queues for service, trying to be fair in ordering	514	* add to busy list of queues for service, trying to be fair in ordering
515	* the pending list according to last request service	515	* the pending list according to last request service
516	*/	516	*/
517	static inline void	517	static inline void
518	cfq_add_cfqq_rr(struct cfq_data cfqd, struct cfq_queue cfqq)	518	cfq_add_cfqq_rr(struct cfq_data cfqd, struct cfq_queue cfqq)
519	{	519	{
520	BUG_ON(cfq_cfqq_on_rr(cfqq));	520	BUG_ON(cfq_cfqq_on_rr(cfqq));
521	cfq_mark_cfqq_on_rr(cfqq);	521	cfq_mark_cfqq_on_rr(cfqq);
522	cfqd->busy_queues++;	522	cfqd->busy_queues++;
523		523
524	cfq_resort_rr_list(cfqq, 0);	524	cfq_resort_rr_list(cfqq, 0);
525	}	525	}
526		526
527	static inline void	527	static inline void
528	cfq_del_cfqq_rr(struct cfq_data cfqd, struct cfq_queue cfqq)	528	cfq_del_cfqq_rr(struct cfq_data cfqd, struct cfq_queue cfqq)
529	{	529	{
530	BUG_ON(!cfq_cfqq_on_rr(cfqq));	530	BUG_ON(!cfq_cfqq_on_rr(cfqq));
531	cfq_clear_cfqq_on_rr(cfqq);	531	cfq_clear_cfqq_on_rr(cfqq);
532	list_move(&cfqq->cfq_list, &cfqd->empty_list);	532	list_move(&cfqq->cfq_list, &cfqd->empty_list);
533		533
534	BUG_ON(!cfqd->busy_queues);	534	BUG_ON(!cfqd->busy_queues);
535	cfqd->busy_queues--;	535	cfqd->busy_queues--;
536	}	536	}
537		537
538	/*	538	/*
539	* rb tree support functions	539	* rb tree support functions
540	*/	540	*/
541	static inline void cfq_del_crq_rb(struct cfq_rq *crq)	541	static inline void cfq_del_crq_rb(struct cfq_rq *crq)
542	{	542	{
543	struct cfq_queue *cfqq = crq->cfq_queue;	543	struct cfq_queue *cfqq = crq->cfq_queue;
544	struct cfq_data *cfqd = cfqq->cfqd;	544	struct cfq_data *cfqd = cfqq->cfqd;
545	const int sync = cfq_crq_is_sync(crq);	545	const int sync = cfq_crq_is_sync(crq);
546		546
547	BUG_ON(!cfqq->queued[sync]);	547	BUG_ON(!cfqq->queued[sync]);
548	cfqq->queued[sync]--;	548	cfqq->queued[sync]--;
549		549
550	cfq_update_next_crq(crq);	550	cfq_update_next_crq(crq);
551		551
552	rb_erase(&crq->rb_node, &cfqq->sort_list);	552	rb_erase(&crq->rb_node, &cfqq->sort_list);
553	RB_CLEAR_COLOR(&crq->rb_node);	553	RB_CLEAR_COLOR(&crq->rb_node);
554		554
555	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))	555	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
556	cfq_del_cfqq_rr(cfqd, cfqq);	556	cfq_del_cfqq_rr(cfqd, cfqq);
557	}	557	}
558		558
559	static struct cfq_rq *	559	static struct cfq_rq *
560	__cfq_add_crq_rb(struct cfq_rq *crq)	560	__cfq_add_crq_rb(struct cfq_rq *crq)
561	{	561	{
562	struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;	562	struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
563	struct rb_node *parent = NULL;	563	struct rb_node *parent = NULL;
564	struct cfq_rq *__crq;	564	struct cfq_rq *__crq;
565		565
566	while (*p) {	566	while (*p) {
567	parent = *p;	567	parent = *p;
568	__crq = rb_entry_crq(parent);	568	__crq = rb_entry_crq(parent);
569		569
570	if (crq->rb_key < __crq->rb_key)	570	if (crq->rb_key < __crq->rb_key)
571	p = &(*p)->rb_left;	571	p = &(*p)->rb_left;
572	else if (crq->rb_key > __crq->rb_key)	572	else if (crq->rb_key > __crq->rb_key)
573	p = &(*p)->rb_right;	573	p = &(*p)->rb_right;
574	else	574	else
575	return __crq;	575	return __crq;
576	}	576	}
577		577
578	rb_link_node(&crq->rb_node, parent, p);	578	rb_link_node(&crq->rb_node, parent, p);
579	return NULL;	579	return NULL;
580	}	580	}
581		581
582	static void cfq_add_crq_rb(struct cfq_rq *crq)	582	static void cfq_add_crq_rb(struct cfq_rq *crq)
583	{	583	{
584	struct cfq_queue *cfqq = crq->cfq_queue;	584	struct cfq_queue *cfqq = crq->cfq_queue;
585	struct cfq_data *cfqd = cfqq->cfqd;	585	struct cfq_data *cfqd = cfqq->cfqd;
586	struct request *rq = crq->request;	586	struct request *rq = crq->request;
587	struct cfq_rq *__alias;	587	struct cfq_rq *__alias;
588		588
589	crq->rb_key = rq_rb_key(rq);	589	crq->rb_key = rq_rb_key(rq);
590	cfqq->queued[cfq_crq_is_sync(crq)]++;	590	cfqq->queued[cfq_crq_is_sync(crq)]++;
591		591
592	/*	592	/*
593	* looks a little odd, but the first insert might return an alias.	593	* looks a little odd, but the first insert might return an alias.
594	* if that happens, put the alias on the dispatch list	594	* if that happens, put the alias on the dispatch list
595	*/	595	*/
596	while ((__alias = __cfq_add_crq_rb(crq)) != NULL)	596	while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
597	cfq_dispatch_insert(cfqd->queue, __alias);	597	cfq_dispatch_insert(cfqd->queue, __alias);
598		598
599	rb_insert_color(&crq->rb_node, &cfqq->sort_list);	599	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
600		600
601	if (!cfq_cfqq_on_rr(cfqq))	601	if (!cfq_cfqq_on_rr(cfqq))
602	cfq_add_cfqq_rr(cfqd, cfqq);	602	cfq_add_cfqq_rr(cfqd, cfqq);
603		603
604	/*	604	/*
605	* check if this request is a better next-serve candidate	605	* check if this request is a better next-serve candidate
606	*/	606	*/
607	cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);	607	cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
608	}	608	}
609		609
610	static inline void	610	static inline void
611	cfq_reposition_crq_rb(struct cfq_queue cfqq, struct cfq_rq crq)	611	cfq_reposition_crq_rb(struct cfq_queue cfqq, struct cfq_rq crq)
612	{	612	{
613	rb_erase(&crq->rb_node, &cfqq->sort_list);	613	rb_erase(&crq->rb_node, &cfqq->sort_list);
614	cfqq->queued[cfq_crq_is_sync(crq)]--;	614	cfqq->queued[cfq_crq_is_sync(crq)]--;
615		615
616	cfq_add_crq_rb(crq);	616	cfq_add_crq_rb(crq);
617	}	617	}
618		618
619	static struct request cfq_find_rq_rb(struct cfq_data cfqd, sector_t sector)	619	static struct request cfq_find_rq_rb(struct cfq_data cfqd, sector_t sector)
620		620
621	{	621	{
622	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY);	622	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY);
623	struct rb_node *n;	623	struct rb_node *n;
624		624
625	if (!cfqq)	625	if (!cfqq)
626	goto out;	626	goto out;
627		627
628	n = cfqq->sort_list.rb_node;	628	n = cfqq->sort_list.rb_node;
629	while (n) {	629	while (n) {
630	struct cfq_rq *crq = rb_entry_crq(n);	630	struct cfq_rq *crq = rb_entry_crq(n);
631		631
632	if (sector < crq->rb_key)	632	if (sector < crq->rb_key)
633	n = n->rb_left;	633	n = n->rb_left;
634	else if (sector > crq->rb_key)	634	else if (sector > crq->rb_key)
635	n = n->rb_right;	635	n = n->rb_right;
636	else	636	else
637	return crq->request;	637	return crq->request;
638	}	638	}
639		639
640	out:	640	out:
641	return NULL;	641	return NULL;
642	}	642	}
643		643
644	static void cfq_activate_request(request_queue_t q, struct request rq)	644	static void cfq_activate_request(request_queue_t q, struct request rq)
645	{	645	{
646	struct cfq_data *cfqd = q->elevator->elevator_data;	646	struct cfq_data *cfqd = q->elevator->elevator_data;
647		647
648	cfqd->rq_in_driver++;	648	cfqd->rq_in_driver++;
649	}	649	}
650		650
651	static void cfq_deactivate_request(request_queue_t q, struct request rq)	651	static void cfq_deactivate_request(request_queue_t q, struct request rq)
652	{	652	{
653	struct cfq_data *cfqd = q->elevator->elevator_data;	653	struct cfq_data *cfqd = q->elevator->elevator_data;
654		654
655	WARN_ON(!cfqd->rq_in_driver);	655	WARN_ON(!cfqd->rq_in_driver);
656	cfqd->rq_in_driver--;	656	cfqd->rq_in_driver--;
657	}	657	}
658		658
659	static void cfq_remove_request(struct request *rq)	659	static void cfq_remove_request(struct request *rq)
660	{	660	{
661	struct cfq_rq *crq = RQ_DATA(rq);	661	struct cfq_rq *crq = RQ_DATA(rq);
662		662
663	list_del_init(&rq->queuelist);	663	list_del_init(&rq->queuelist);
664	cfq_del_crq_rb(crq);	664	cfq_del_crq_rb(crq);
665	cfq_del_crq_hash(crq);	665	cfq_del_crq_hash(crq);
666	}	666	}
667		667
668	static int	668	static int
669	cfq_merge(request_queue_t q, struct request req, struct bio bio)	669	cfq_merge(request_queue_t q, struct request req, struct bio bio)
670	{	670	{
671	struct cfq_data *cfqd = q->elevator->elevator_data;	671	struct cfq_data *cfqd = q->elevator->elevator_data;
672	struct request *__rq;	672	struct request *__rq;
673	int ret;	673	int ret;
674		674
675	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);	675	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
676	if (__rq && elv_rq_merge_ok(__rq, bio)) {	676	if (__rq && elv_rq_merge_ok(__rq, bio)) {
677	ret = ELEVATOR_BACK_MERGE;	677	ret = ELEVATOR_BACK_MERGE;
678	goto out;	678	goto out;
679	}	679	}
680		680
681	__rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));	681	__rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
682	if (__rq && elv_rq_merge_ok(__rq, bio)) {	682	if (__rq && elv_rq_merge_ok(__rq, bio)) {
683	ret = ELEVATOR_FRONT_MERGE;	683	ret = ELEVATOR_FRONT_MERGE;
684	goto out;	684	goto out;
685	}	685	}
686		686
687	return ELEVATOR_NO_MERGE;	687	return ELEVATOR_NO_MERGE;
688	out:	688	out:
689	*req = __rq;	689	*req = __rq;
690	return ret;	690	return ret;
691	}	691	}
692		692
693	static void cfq_merged_request(request_queue_t q, struct request req)	693	static void cfq_merged_request(request_queue_t q, struct request req)
694	{	694	{
695	struct cfq_data *cfqd = q->elevator->elevator_data;	695	struct cfq_data *cfqd = q->elevator->elevator_data;
696	struct cfq_rq *crq = RQ_DATA(req);	696	struct cfq_rq *crq = RQ_DATA(req);
697		697
698	cfq_del_crq_hash(crq);	698	cfq_del_crq_hash(crq);
699	cfq_add_crq_hash(cfqd, crq);	699	cfq_add_crq_hash(cfqd, crq);
700		700
701	if (rq_rb_key(req) != crq->rb_key) {	701	if (rq_rb_key(req) != crq->rb_key) {
702	struct cfq_queue *cfqq = crq->cfq_queue;	702	struct cfq_queue *cfqq = crq->cfq_queue;
703		703
704	cfq_update_next_crq(crq);	704	cfq_update_next_crq(crq);
705	cfq_reposition_crq_rb(cfqq, crq);	705	cfq_reposition_crq_rb(cfqq, crq);
706	}	706	}
707	}	707	}
708		708
709	static void	709	static void
710	cfq_merged_requests(request_queue_t q, struct request rq,	710	cfq_merged_requests(request_queue_t q, struct request rq,
711	struct request *next)	711	struct request *next)
712	{	712	{
713	cfq_merged_request(q, rq);	713	cfq_merged_request(q, rq);
714		714
715	/*	715	/*
716	* reposition in fifo if next is older than rq	716	* reposition in fifo if next is older than rq
717	*/	717	*/
718	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&	718	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
719	time_before(next->start_time, rq->start_time))	719	time_before(next->start_time, rq->start_time))
720	list_move(&rq->queuelist, &next->queuelist);	720	list_move(&rq->queuelist, &next->queuelist);
721		721
722	cfq_remove_request(next);	722	cfq_remove_request(next);
723	}	723	}
724		724
725	static inline void	725	static inline void
726	__cfq_set_active_queue(struct cfq_data cfqd, struct cfq_queue cfqq)	726	__cfq_set_active_queue(struct cfq_data cfqd, struct cfq_queue cfqq)
727	{	727	{
728	if (cfqq) {	728	if (cfqq) {
729	/*	729	/*
730	* stop potential idle class queues waiting service	730	* stop potential idle class queues waiting service
731	*/	731	*/
732	del_timer(&cfqd->idle_class_timer);	732	del_timer(&cfqd->idle_class_timer);
733		733
734	cfqq->slice_start = jiffies;	734	cfqq->slice_start = jiffies;
735	cfqq->slice_end = 0;	735	cfqq->slice_end = 0;
736	cfqq->slice_left = 0;	736	cfqq->slice_left = 0;
737	cfq_clear_cfqq_must_alloc_slice(cfqq);	737	cfq_clear_cfqq_must_alloc_slice(cfqq);
738	cfq_clear_cfqq_fifo_expire(cfqq);	738	cfq_clear_cfqq_fifo_expire(cfqq);
739	cfq_clear_cfqq_expired(cfqq);	739	cfq_clear_cfqq_expired(cfqq);
740	}	740	}
741		741
742	cfqd->active_queue = cfqq;	742	cfqd->active_queue = cfqq;
743	}	743	}
744		744
745	/*	745	/*
746	* 0	746	* 0
747	* 0,1	747	* 0,1
748	* 0,1,2	748	* 0,1,2
749	* 0,1,2,3	749	* 0,1,2,3
750	* 0,1,2,3,4	750	* 0,1,2,3,4
751	* 0,1,2,3,4,5	751	* 0,1,2,3,4,5
752	* 0,1,2,3,4,5,6	752	* 0,1,2,3,4,5,6
753	* 0,1,2,3,4,5,6,7	753	* 0,1,2,3,4,5,6,7
754	*/	754	*/
755	static int cfq_get_next_prio_level(struct cfq_data *cfqd)	755	static int cfq_get_next_prio_level(struct cfq_data *cfqd)
756	{	756	{
757	int prio, wrap;	757	int prio, wrap;
758		758
759	prio = -1;	759	prio = -1;
760	wrap = 0;	760	wrap = 0;
761	do {	761	do {
762	int p;	762	int p;
763		763
764	for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {	764	for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
765	if (!list_empty(&cfqd->rr_list[p])) {	765	if (!list_empty(&cfqd->rr_list[p])) {
766	prio = p;	766	prio = p;
767	break;	767	break;
768	}	768	}
769	}	769	}
770		770
771	if (prio != -1)	771	if (prio != -1)
772	break;	772	break;
773	cfqd->cur_prio = 0;	773	cfqd->cur_prio = 0;
774	if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {	774	if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
775	cfqd->cur_end_prio = 0;	775	cfqd->cur_end_prio = 0;
776	if (wrap)	776	if (wrap)
777	break;	777	break;
778	wrap = 1;	778	wrap = 1;
779	}	779	}
780	} while (1);	780	} while (1);
781		781
782	if (unlikely(prio == -1))	782	if (unlikely(prio == -1))
783	return -1;	783	return -1;
784		784
785	BUG_ON(prio >= CFQ_PRIO_LISTS);	785	BUG_ON(prio >= CFQ_PRIO_LISTS);
786		786
787	list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);	787	list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
788		788
789	cfqd->cur_prio = prio + 1;	789	cfqd->cur_prio = prio + 1;
790	if (cfqd->cur_prio > cfqd->cur_end_prio) {	790	if (cfqd->cur_prio > cfqd->cur_end_prio) {
791	cfqd->cur_end_prio = cfqd->cur_prio;	791	cfqd->cur_end_prio = cfqd->cur_prio;
792	cfqd->cur_prio = 0;	792	cfqd->cur_prio = 0;
793	}	793	}
794	if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {	794	if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
795	cfqd->cur_prio = 0;	795	cfqd->cur_prio = 0;
796	cfqd->cur_end_prio = 0;	796	cfqd->cur_end_prio = 0;
797	}	797	}
798		798
799	return prio;	799	return prio;
800	}	800	}
801		801
802	static struct cfq_queue cfq_set_active_queue(struct cfq_data cfqd)	802	static struct cfq_queue cfq_set_active_queue(struct cfq_data cfqd)
803	{	803	{
804	struct cfq_queue *cfqq;	804	struct cfq_queue *cfqq;
805		805
806	/*	806	/*
807	* if current queue is expired but not done with its requests yet,	807	* if current queue is expired but not done with its requests yet,
808	* wait for that to happen	808	* wait for that to happen
809	*/	809	*/
810	if ((cfqq = cfqd->active_queue) != NULL) {	810	if ((cfqq = cfqd->active_queue) != NULL) {
811	if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq))	811	if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq))
812	return NULL;	812	return NULL;
813	}	813	}
814		814
815	/*	815	/*
816	* if current list is non-empty, grab first entry. if it is empty,	816	* if current list is non-empty, grab first entry. if it is empty,
817	* get next prio level and grab first entry then if any are spliced	817	* get next prio level and grab first entry then if any are spliced
818	*/	818	*/
819	if (!list_empty(&cfqd->cur_rr) \|\| cfq_get_next_prio_level(cfqd) != -1)	819	if (!list_empty(&cfqd->cur_rr) \|\| cfq_get_next_prio_level(cfqd) != -1)
820	cfqq = list_entry_cfqq(cfqd->cur_rr.next);	820	cfqq = list_entry_cfqq(cfqd->cur_rr.next);
821		821
822	/*	822	/*
823	* if we have idle queues and no rt or be queues had pending	823	* if we have idle queues and no rt or be queues had pending
824	* requests, either allow immediate service if the grace period	824	* requests, either allow immediate service if the grace period
825	* has passed or arm the idle grace timer	825	* has passed or arm the idle grace timer
826	*/	826	*/
827	if (!cfqq && !list_empty(&cfqd->idle_rr)) {	827	if (!cfqq && !list_empty(&cfqd->idle_rr)) {
828	unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;	828	unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
829		829
830	if (time_after_eq(jiffies, end))	830	if (time_after_eq(jiffies, end))
831	cfqq = list_entry_cfqq(cfqd->idle_rr.next);	831	cfqq = list_entry_cfqq(cfqd->idle_rr.next);
832	else	832	else
833	mod_timer(&cfqd->idle_class_timer, end);	833	mod_timer(&cfqd->idle_class_timer, end);
834	}	834	}
835		835
836	__cfq_set_active_queue(cfqd, cfqq);	836	__cfq_set_active_queue(cfqd, cfqq);
837	return cfqq;	837	return cfqq;
838	}	838	}
839		839
840	/*	840	/*
841	* current cfqq expired its slice (or was too idle), select new one	841	* current cfqq expired its slice (or was too idle), select new one
842	*/	842	*/
843	static void	843	static void
844	__cfq_slice_expired(struct cfq_data cfqd, struct cfq_queue cfqq,	844	__cfq_slice_expired(struct cfq_data cfqd, struct cfq_queue cfqq,
845	int preempted)	845	int preempted)
846	{	846	{
847	unsigned long now = jiffies;	847	unsigned long now = jiffies;
848		848
849	if (cfq_cfqq_wait_request(cfqq))	849	if (cfq_cfqq_wait_request(cfqq))
850	del_timer(&cfqd->idle_slice_timer);	850	del_timer(&cfqd->idle_slice_timer);
851		851
852	if (!preempted && !cfq_cfqq_dispatched(cfqq))	852	if (!preempted && !cfq_cfqq_dispatched(cfqq))
853	cfqq->service_last = now;	853	cfqq->service_last = now;
854		854
855	cfq_clear_cfqq_must_dispatch(cfqq);	855	cfq_clear_cfqq_must_dispatch(cfqq);
856	cfq_clear_cfqq_wait_request(cfqq);	856	cfq_clear_cfqq_wait_request(cfqq);
857		857
858	/*	858	/*
859	* store what was left of this slice, if the queue idled out	859	* store what was left of this slice, if the queue idled out
860	* or was preempted	860	* or was preempted
861	*/	861	*/
862	if (time_after(cfqq->slice_end, now))	862	if (time_after(cfqq->slice_end, now))
863	cfqq->slice_left = cfqq->slice_end - now;	863	cfqq->slice_left = cfqq->slice_end - now;
864	else	864	else
865	cfqq->slice_left = 0;	865	cfqq->slice_left = 0;
866		866
867	if (cfq_cfqq_on_rr(cfqq))	867	if (cfq_cfqq_on_rr(cfqq))
868	cfq_resort_rr_list(cfqq, preempted);	868	cfq_resort_rr_list(cfqq, preempted);
869		869
870	if (cfqq == cfqd->active_queue)	870	if (cfqq == cfqd->active_queue)
871	cfqd->active_queue = NULL;	871	cfqd->active_queue = NULL;
872		872
873	if (cfqd->active_cic) {	873	if (cfqd->active_cic) {
874	put_io_context(cfqd->active_cic->ioc);	874	put_io_context(cfqd->active_cic->ioc);
875	cfqd->active_cic = NULL;	875	cfqd->active_cic = NULL;
876	}	876	}
877		877
878	cfqd->dispatch_slice = 0;	878	cfqd->dispatch_slice = 0;
879	}	879	}
880		880
881	static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)	881	static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
882	{	882	{
883	struct cfq_queue *cfqq = cfqd->active_queue;	883	struct cfq_queue *cfqq = cfqd->active_queue;
884		884
885	if (cfqq) {	885	if (cfqq) {
886	/*	886	/*
887	* use deferred expiry, if there are requests in progress as	887	* use deferred expiry, if there are requests in progress as
888	* not to disturb the slice of the next queue	888	* not to disturb the slice of the next queue
889	*/	889	*/
890	if (cfq_cfqq_dispatched(cfqq))	890	if (cfq_cfqq_dispatched(cfqq))
891	cfq_mark_cfqq_expired(cfqq);	891	cfq_mark_cfqq_expired(cfqq);
892	else	892	else
893	__cfq_slice_expired(cfqd, cfqq, preempted);	893	__cfq_slice_expired(cfqd, cfqq, preempted);
894	}	894	}
895	}	895	}
896		896
897	static int cfq_arm_slice_timer(struct cfq_data cfqd, struct cfq_queue cfqq)	897	static int cfq_arm_slice_timer(struct cfq_data cfqd, struct cfq_queue cfqq)
898		898
899	{	899	{
900	WARN_ON(!RB_EMPTY(&cfqq->sort_list));	900	WARN_ON(!RB_EMPTY(&cfqq->sort_list));
901	WARN_ON(cfqq != cfqd->active_queue);	901	WARN_ON(cfqq != cfqd->active_queue);
902		902
903	/*	903	/*
904	* idle is disabled, either manually or by past process history	904	* idle is disabled, either manually or by past process history
905	*/	905	*/
906	if (!cfqd->cfq_slice_idle)	906	if (!cfqd->cfq_slice_idle)
907	return 0;	907	return 0;
908	if (!cfq_cfqq_idle_window(cfqq))	908	if (!cfq_cfqq_idle_window(cfqq))
909	return 0;	909	return 0;
910	/*	910	/*
911	* task has exited, don't wait	911	* task has exited, don't wait
912	*/	912	*/
913	if (cfqd->active_cic && !cfqd->active_cic->ioc->task)	913	if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
914	return 0;	914	return 0;
915		915
916	cfq_mark_cfqq_must_dispatch(cfqq);	916	cfq_mark_cfqq_must_dispatch(cfqq);
917	cfq_mark_cfqq_wait_request(cfqq);	917	cfq_mark_cfqq_wait_request(cfqq);
918		918
919	if (!timer_pending(&cfqd->idle_slice_timer)) {	919	if (!timer_pending(&cfqd->idle_slice_timer)) {
920	unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);	920	unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
921		921
922	cfqd->idle_slice_timer.expires = jiffies + slice_left;	922	cfqd->idle_slice_timer.expires = jiffies + slice_left;
923	add_timer(&cfqd->idle_slice_timer);	923	add_timer(&cfqd->idle_slice_timer);
924	}	924	}
925		925
926	return 1;	926	return 1;
927	}	927	}
928		928
929	static void cfq_dispatch_insert(request_queue_t q, struct cfq_rq crq)	929	static void cfq_dispatch_insert(request_queue_t q, struct cfq_rq crq)
930	{	930	{
931	struct cfq_data *cfqd = q->elevator->elevator_data;	931	struct cfq_data *cfqd = q->elevator->elevator_data;
932	struct cfq_queue *cfqq = crq->cfq_queue;	932	struct cfq_queue *cfqq = crq->cfq_queue;
933		933
934	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);	934	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
935	cfq_remove_request(crq->request);	935	cfq_remove_request(crq->request);
936	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;	936	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
937	elv_dispatch_sort(q, crq->request);	937	elv_dispatch_sort(q, crq->request);
938	}	938	}
939		939
940	/*	940	/*
941	* return expired entry, or NULL to just start from scratch in rbtree	941	* return expired entry, or NULL to just start from scratch in rbtree
942	*/	942	*/
943	static inline struct cfq_rq cfq_check_fifo(struct cfq_queue cfqq)	943	static inline struct cfq_rq cfq_check_fifo(struct cfq_queue cfqq)
944	{	944	{
945	struct cfq_data *cfqd = cfqq->cfqd;	945	struct cfq_data *cfqd = cfqq->cfqd;
946	struct request *rq;	946	struct request *rq;
947	struct cfq_rq *crq;	947	struct cfq_rq *crq;
948		948
949	if (cfq_cfqq_fifo_expire(cfqq))	949	if (cfq_cfqq_fifo_expire(cfqq))
950	return NULL;	950	return NULL;
951		951
952	if (!list_empty(&cfqq->fifo)) {	952	if (!list_empty(&cfqq->fifo)) {
953	int fifo = cfq_cfqq_class_sync(cfqq);	953	int fifo = cfq_cfqq_class_sync(cfqq);
954		954
955	crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));	955	crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
956	rq = crq->request;	956	rq = crq->request;
957	if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {	957	if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
958	cfq_mark_cfqq_fifo_expire(cfqq);	958	cfq_mark_cfqq_fifo_expire(cfqq);
959	return crq;	959	return crq;
960	}	960	}
961	}	961	}
962		962
963	return NULL;	963	return NULL;
964	}	964	}
965		965
966	/*	966	/*
967	* Scale schedule slice based on io priority. Use the sync time slice only	967	* Scale schedule slice based on io priority. Use the sync time slice only
968	* if a queue is marked sync and has sync io queued. A sync queue with async	968	* if a queue is marked sync and has sync io queued. A sync queue with async
969	* io only, should not get full sync slice length.	969	* io only, should not get full sync slice length.
970	*/	970	*/
971	static inline int	971	static inline int
972	cfq_prio_to_slice(struct cfq_data cfqd, struct cfq_queue cfqq)	972	cfq_prio_to_slice(struct cfq_data cfqd, struct cfq_queue cfqq)
973	{	973	{
974	const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];	974	const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
975		975
976	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);	976	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
977		977
978	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));	978	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
979	}	979	}
980		980
981	static inline void	981	static inline void
982	cfq_set_prio_slice(struct cfq_data cfqd, struct cfq_queue cfqq)	982	cfq_set_prio_slice(struct cfq_data cfqd, struct cfq_queue cfqq)
983	{	983	{
984	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;	984	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
985	}	985	}
986		986
987	static inline int	987	static inline int
988	cfq_prio_to_maxrq(struct cfq_data cfqd, struct cfq_queue cfqq)	988	cfq_prio_to_maxrq(struct cfq_data cfqd, struct cfq_queue cfqq)
989	{	989	{
990	const int base_rq = cfqd->cfq_slice_async_rq;	990	const int base_rq = cfqd->cfq_slice_async_rq;
991		991
992	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);	992	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
993		993
994	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));	994	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
995	}	995	}
996		996
997	/*	997	/*
998	* get next queue for service	998	* get next queue for service
999	*/	999	*/
1000	static struct cfq_queue cfq_select_queue(struct cfq_data cfqd)	1000	static struct cfq_queue cfq_select_queue(struct cfq_data cfqd)
1001	{	1001	{
1002	unsigned long now = jiffies;	1002	unsigned long now = jiffies;
1003	struct cfq_queue *cfqq;	1003	struct cfq_queue *cfqq;
1004		1004
1005	cfqq = cfqd->active_queue;	1005	cfqq = cfqd->active_queue;
1006	if (!cfqq)	1006	if (!cfqq)
1007	goto new_queue;	1007	goto new_queue;
1008		1008
1009	if (cfq_cfqq_expired(cfqq))	1009	if (cfq_cfqq_expired(cfqq))
1010	goto new_queue;	1010	goto new_queue;
1011		1011
1012	/*	1012	/*
1013	* slice has expired	1013	* slice has expired
1014	*/	1014	*/
1015	if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))	1015	if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))
1016	goto expire;	1016	goto expire;
1017		1017
1018	/*	1018	/*
1019	* if queue has requests, dispatch one. if not, check if	1019	* if queue has requests, dispatch one. if not, check if
1020	* enough slice is left to wait for one	1020	* enough slice is left to wait for one
1021	*/	1021	*/
1022	if (!RB_EMPTY(&cfqq->sort_list))	1022	if (!RB_EMPTY(&cfqq->sort_list))
1023	goto keep_queue;	1023	goto keep_queue;
1024	else if (cfq_cfqq_class_sync(cfqq) &&	1024	else if (cfq_cfqq_class_sync(cfqq) &&
1025	time_before(now, cfqq->slice_end)) {	1025	time_before(now, cfqq->slice_end)) {
1026	if (cfq_arm_slice_timer(cfqd, cfqq))	1026	if (cfq_arm_slice_timer(cfqd, cfqq))
1027	return NULL;	1027	return NULL;
1028	}	1028	}
1029		1029
1030	expire:	1030	expire:
1031	cfq_slice_expired(cfqd, 0);	1031	cfq_slice_expired(cfqd, 0);
1032	new_queue:	1032	new_queue:
1033	cfqq = cfq_set_active_queue(cfqd);	1033	cfqq = cfq_set_active_queue(cfqd);
1034	keep_queue:	1034	keep_queue:
1035	return cfqq;	1035	return cfqq;
1036	}	1036	}
1037		1037
1038	static int	1038	static int
1039	__cfq_dispatch_requests(struct cfq_data cfqd, struct cfq_queue cfqq,	1039	__cfq_dispatch_requests(struct cfq_data cfqd, struct cfq_queue cfqq,
1040	int max_dispatch)	1040	int max_dispatch)
1041	{	1041	{
1042	int dispatched = 0;	1042	int dispatched = 0;
1043		1043
1044	BUG_ON(RB_EMPTY(&cfqq->sort_list));	1044	BUG_ON(RB_EMPTY(&cfqq->sort_list));
1045		1045
1046	do {	1046	do {
1047	struct cfq_rq *crq;	1047	struct cfq_rq *crq;
1048		1048
1049	/*	1049	/*
1050	* follow expired path, else get first next available	1050	* follow expired path, else get first next available
1051	*/	1051	*/
1052	if ((crq = cfq_check_fifo(cfqq)) == NULL)	1052	if ((crq = cfq_check_fifo(cfqq)) == NULL)
1053	crq = cfqq->next_crq;	1053	crq = cfqq->next_crq;
1054		1054
1055	/*	1055	/*
1056	* finally, insert request into driver dispatch list	1056	* finally, insert request into driver dispatch list
1057	*/	1057	*/
1058	cfq_dispatch_insert(cfqd->queue, crq);	1058	cfq_dispatch_insert(cfqd->queue, crq);
1059		1059
1060	cfqd->dispatch_slice++;	1060	cfqd->dispatch_slice++;
1061	dispatched++;	1061	dispatched++;
1062		1062
1063	if (!cfqd->active_cic) {	1063	if (!cfqd->active_cic) {
1064	atomic_inc(&crq->io_context->ioc->refcount);	1064	atomic_inc(&crq->io_context->ioc->refcount);
1065	cfqd->active_cic = crq->io_context;	1065	cfqd->active_cic = crq->io_context;
1066	}	1066	}
1067		1067
1068	if (RB_EMPTY(&cfqq->sort_list))	1068	if (RB_EMPTY(&cfqq->sort_list))
1069	break;	1069	break;
1070		1070
1071	} while (dispatched < max_dispatch);	1071	} while (dispatched < max_dispatch);
1072		1072
1073	/*	1073	/*
1074	* if slice end isn't set yet, set it. if at least one request was	1074	* if slice end isn't set yet, set it. if at least one request was
1075	* sync, use the sync time slice value	1075	* sync, use the sync time slice value
1076	*/	1076	*/
1077	if (!cfqq->slice_end)	1077	if (!cfqq->slice_end)
1078	cfq_set_prio_slice(cfqd, cfqq);	1078	cfq_set_prio_slice(cfqd, cfqq);
1079		1079
1080	/*	1080	/*
1081	* expire an async queue immediately if it has used up its slice. idle	1081	* expire an async queue immediately if it has used up its slice. idle
1082	* queue always expire after 1 dispatch round.	1082	* queue always expire after 1 dispatch round.
1083	*/	1083	*/
1084	if ((!cfq_cfqq_sync(cfqq) &&	1084	if ((!cfq_cfqq_sync(cfqq) &&
1085	cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) \|\|	1085	cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) \|\|
1086	cfq_class_idle(cfqq))	1086	cfq_class_idle(cfqq))
1087	cfq_slice_expired(cfqd, 0);	1087	cfq_slice_expired(cfqd, 0);
1088		1088
1089	return dispatched;	1089	return dispatched;
1090	}	1090	}
1091		1091
1092	static int	1092	static int
1093	cfq_forced_dispatch_cfqqs(struct list_head *list)	1093	cfq_forced_dispatch_cfqqs(struct list_head *list)
1094	{	1094	{
1095	int dispatched = 0;	1095	int dispatched = 0;
1096	struct cfq_queue cfqq, next;	1096	struct cfq_queue cfqq, next;
1097	struct cfq_rq *crq;	1097	struct cfq_rq *crq;
1098		1098
1099	list_for_each_entry_safe(cfqq, next, list, cfq_list) {	1099	list_for_each_entry_safe(cfqq, next, list, cfq_list) {
1100	while ((crq = cfqq->next_crq)) {	1100	while ((crq = cfqq->next_crq)) {
1101	cfq_dispatch_insert(cfqq->cfqd->queue, crq);	1101	cfq_dispatch_insert(cfqq->cfqd->queue, crq);
1102	dispatched++;	1102	dispatched++;
1103	}	1103	}
1104	BUG_ON(!list_empty(&cfqq->fifo));	1104	BUG_ON(!list_empty(&cfqq->fifo));
1105	}	1105	}
1106	return dispatched;	1106	return dispatched;
1107	}	1107	}
1108		1108
1109	static int	1109	static int
1110	cfq_forced_dispatch(struct cfq_data *cfqd)	1110	cfq_forced_dispatch(struct cfq_data *cfqd)
1111	{	1111	{
1112	int i, dispatched = 0;	1112	int i, dispatched = 0;
1113		1113
1114	for (i = 0; i < CFQ_PRIO_LISTS; i++)	1114	for (i = 0; i < CFQ_PRIO_LISTS; i++)
1115	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]);	1115	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]);
1116		1116
1117	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr);	1117	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr);
1118	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);	1118	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
1119	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);	1119	dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
1120		1120
1121	cfq_slice_expired(cfqd, 0);	1121	cfq_slice_expired(cfqd, 0);
1122		1122
1123	BUG_ON(cfqd->busy_queues);	1123	BUG_ON(cfqd->busy_queues);
1124		1124
1125	return dispatched;	1125	return dispatched;
1126	}	1126	}
1127		1127
1128	static int	1128	static int
1129	cfq_dispatch_requests(request_queue_t *q, int force)	1129	cfq_dispatch_requests(request_queue_t *q, int force)
1130	{	1130	{
1131	struct cfq_data *cfqd = q->elevator->elevator_data;	1131	struct cfq_data *cfqd = q->elevator->elevator_data;
1132	struct cfq_queue *cfqq;	1132	struct cfq_queue *cfqq;
1133		1133
1134	if (!cfqd->busy_queues)	1134	if (!cfqd->busy_queues)
1135	return 0;	1135	return 0;
1136		1136
1137	if (unlikely(force))	1137	if (unlikely(force))
1138	return cfq_forced_dispatch(cfqd);	1138	return cfq_forced_dispatch(cfqd);
1139		1139
1140	cfqq = cfq_select_queue(cfqd);	1140	cfqq = cfq_select_queue(cfqd);
1141	if (cfqq) {	1141	if (cfqq) {
1142	int max_dispatch;	1142	int max_dispatch;
1143		1143
1144	/*	1144	/*
1145	* if idle window is disabled, allow queue buildup	1145	* if idle window is disabled, allow queue buildup
1146	*/	1146	*/
1147	if (!cfq_cfqq_idle_window(cfqq) &&	1147	if (!cfq_cfqq_idle_window(cfqq) &&
1148	cfqd->rq_in_driver >= cfqd->cfq_max_depth)	1148	cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1149	return 0;	1149	return 0;
1150		1150
1151	cfq_clear_cfqq_must_dispatch(cfqq);	1151	cfq_clear_cfqq_must_dispatch(cfqq);
1152	cfq_clear_cfqq_wait_request(cfqq);	1152	cfq_clear_cfqq_wait_request(cfqq);
1153	del_timer(&cfqd->idle_slice_timer);	1153	del_timer(&cfqd->idle_slice_timer);
1154		1154
1155	max_dispatch = cfqd->cfq_quantum;	1155	max_dispatch = cfqd->cfq_quantum;
1156	if (cfq_class_idle(cfqq))	1156	if (cfq_class_idle(cfqq))
1157	max_dispatch = 1;	1157	max_dispatch = 1;
1158		1158
1159	return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);	1159	return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1160	}	1160	}
1161		1161
1162	return 0;	1162	return 0;
1163	}	1163	}
1164		1164
1165	/*	1165	/*
1166	* task holds one reference to the queue, dropped when task exits. each crq	1166	* task holds one reference to the queue, dropped when task exits. each crq
1167	* in-flight on this queue also holds a reference, dropped when crq is freed.	1167	* in-flight on this queue also holds a reference, dropped when crq is freed.
1168	*	1168	*
1169	* queue lock must be held here.	1169	* queue lock must be held here.
1170	*/	1170	*/
1171	static void cfq_put_queue(struct cfq_queue *cfqq)	1171	static void cfq_put_queue(struct cfq_queue *cfqq)
1172	{	1172	{
1173	struct cfq_data *cfqd = cfqq->cfqd;	1173	struct cfq_data *cfqd = cfqq->cfqd;
1174		1174
1175	BUG_ON(atomic_read(&cfqq->ref) <= 0);	1175	BUG_ON(atomic_read(&cfqq->ref) <= 0);
1176		1176
1177	if (!atomic_dec_and_test(&cfqq->ref))	1177	if (!atomic_dec_and_test(&cfqq->ref))
1178	return;	1178	return;
1179		1179
1180	BUG_ON(rb_first(&cfqq->sort_list));	1180	BUG_ON(rb_first(&cfqq->sort_list));
1181	BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);	1181	BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
1182	BUG_ON(cfq_cfqq_on_rr(cfqq));	1182	BUG_ON(cfq_cfqq_on_rr(cfqq));
1183		1183
1184	if (unlikely(cfqd->active_queue == cfqq)) {	1184	if (unlikely(cfqd->active_queue == cfqq)) {
1185	__cfq_slice_expired(cfqd, cfqq, 0);	1185	__cfq_slice_expired(cfqd, cfqq, 0);
1186	cfq_schedule_dispatch(cfqd);	1186	cfq_schedule_dispatch(cfqd);
1187	}	1187	}
1188		1188
1189	cfq_put_cfqd(cfqq->cfqd);	1189	cfq_put_cfqd(cfqq->cfqd);
1190		1190
1191	/*	1191	/*
1192	* it's on the empty list and still hashed	1192	* it's on the empty list and still hashed
1193	*/	1193	*/
1194	list_del(&cfqq->cfq_list);	1194	list_del(&cfqq->cfq_list);
1195	hlist_del(&cfqq->cfq_hash);	1195	hlist_del(&cfqq->cfq_hash);
1196	kmem_cache_free(cfq_pool, cfqq);	1196	kmem_cache_free(cfq_pool, cfqq);
1197	}	1197	}
1198		1198
1199	static inline struct cfq_queue *	1199	static inline struct cfq_queue *
1200	__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,	1200	__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1201	const int hashval)	1201	const int hashval)
1202	{	1202	{
1203	struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];	1203	struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1204	struct hlist_node entry, next;	1204	struct hlist_node entry, next;
1205		1205
1206	hlist_for_each_safe(entry, next, hash_list) {	1206	hlist_for_each_safe(entry, next, hash_list) {
1207	struct cfq_queue *__cfqq = list_entry_qhash(entry);	1207	struct cfq_queue *__cfqq = list_entry_qhash(entry);
1208	const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio);	1208	const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio);
1209		1209
1210	if (__cfqq->key == key && (__p == prio \|\| prio == CFQ_KEY_ANY))	1210	if (__cfqq->key == key && (__p == prio \|\| prio == CFQ_KEY_ANY))
1211	return __cfqq;	1211	return __cfqq;
1212	}	1212	}
1213		1213
1214	return NULL;	1214	return NULL;
1215	}	1215	}
1216		1216
1217	static struct cfq_queue *	1217	static struct cfq_queue *
1218	cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)	1218	cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
1219	{	1219	{
1220	return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));	1220	return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
1221	}	1221	}
1222		1222
1223	static void cfq_free_io_context(struct cfq_io_context *cic)	1223	static void cfq_free_io_context(struct cfq_io_context *cic)
1224	{	1224	{
1225	struct cfq_io_context *__cic;	1225	struct cfq_io_context *__cic;
1226	struct list_head entry, next;	1226	struct list_head entry, next;
1227		1227
1228	list_for_each_safe(entry, next, &cic->list) {	1228	list_for_each_safe(entry, next, &cic->list) {
1229	__cic = list_entry(entry, struct cfq_io_context, list);	1229	__cic = list_entry(entry, struct cfq_io_context, list);
1230	kmem_cache_free(cfq_ioc_pool, __cic);	1230	kmem_cache_free(cfq_ioc_pool, __cic);
1231	}	1231	}
1232		1232
1233	kmem_cache_free(cfq_ioc_pool, cic);	1233	kmem_cache_free(cfq_ioc_pool, cic);
1234	}	1234	}
1235		1235
1236	/*	1236	/*
1237	* Called with interrupts disabled	1237	* Called with interrupts disabled
1238	*/	1238	*/
1239	static void cfq_exit_single_io_context(struct cfq_io_context *cic)	1239	static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1240	{	1240	{
1241	struct cfq_data *cfqd = cic->cfqq->cfqd;	1241	struct cfq_data *cfqd = cic->cfqq->cfqd;
1242	request_queue_t *q = cfqd->queue;	1242	request_queue_t *q = cfqd->queue;
1243		1243
1244	WARN_ON(!irqs_disabled());	1244	WARN_ON(!irqs_disabled());
1245		1245
1246	spin_lock(q->queue_lock);	1246	spin_lock(q->queue_lock);
1247		1247
1248	if (unlikely(cic->cfqq == cfqd->active_queue)) {	1248	if (unlikely(cic->cfqq == cfqd->active_queue)) {
1249	__cfq_slice_expired(cfqd, cic->cfqq, 0);	1249	__cfq_slice_expired(cfqd, cic->cfqq, 0);
1250	cfq_schedule_dispatch(cfqd);	1250	cfq_schedule_dispatch(cfqd);
1251	}	1251	}
1252		1252
1253	cfq_put_queue(cic->cfqq);	1253	cfq_put_queue(cic->cfqq);
1254	cic->cfqq = NULL;	1254	cic->cfqq = NULL;
1255	spin_unlock(q->queue_lock);	1255	spin_unlock(q->queue_lock);
1256	}	1256	}
1257		1257
1258	/*	1258	/*
1259	* Another task may update the task cic list, if it is doing a queue lookup	1259	* Another task may update the task cic list, if it is doing a queue lookup
1260	* on its behalf. cfq_cic_lock excludes such concurrent updates	1260	* on its behalf. cfq_cic_lock excludes such concurrent updates
1261	*/	1261	*/
1262	static void cfq_exit_io_context(struct cfq_io_context *cic)	1262	static void cfq_exit_io_context(struct cfq_io_context *cic)
1263	{	1263	{
1264	struct cfq_io_context *__cic;	1264	struct cfq_io_context *__cic;
1265	struct list_head *entry;	1265	struct list_head *entry;
1266	unsigned long flags;	1266	unsigned long flags;
1267		1267
1268	local_irq_save(flags);	1268	local_irq_save(flags);
1269		1269
1270	/*	1270	/*
1271	* put the reference this task is holding to the various queues	1271	* put the reference this task is holding to the various queues
1272	*/	1272	*/
1273	list_for_each(entry, &cic->list) {	1273	list_for_each(entry, &cic->list) {
1274	__cic = list_entry(entry, struct cfq_io_context, list);	1274	__cic = list_entry(entry, struct cfq_io_context, list);
1275	cfq_exit_single_io_context(__cic);	1275	cfq_exit_single_io_context(__cic);
1276	}	1276	}
1277		1277
1278	cfq_exit_single_io_context(cic);	1278	cfq_exit_single_io_context(cic);
1279	local_irq_restore(flags);	1279	local_irq_restore(flags);
1280	}	1280	}
1281		1281
1282	static struct cfq_io_context *	1282	static struct cfq_io_context *
1283	cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)	1283	cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1284	{	1284	{
1285	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);	1285	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
1286		1286
1287	if (cic) {	1287	if (cic) {
1288	INIT_LIST_HEAD(&cic->list);	1288	INIT_LIST_HEAD(&cic->list);
1289	cic->cfqq = NULL;	1289	cic->cfqq = NULL;
1290	cic->key = NULL;	1290	cic->key = NULL;
1291	cic->last_end_request = jiffies;	1291	cic->last_end_request = jiffies;
1292	cic->ttime_total = 0;	1292	cic->ttime_total = 0;
1293	cic->ttime_samples = 0;	1293	cic->ttime_samples = 0;
1294	cic->ttime_mean = 0;	1294	cic->ttime_mean = 0;
1295	cic->dtor = cfq_free_io_context;	1295	cic->dtor = cfq_free_io_context;
1296	cic->exit = cfq_exit_io_context;	1296	cic->exit = cfq_exit_io_context;
1297	}	1297	}
1298		1298
1299	return cic;	1299	return cic;
1300	}	1300	}
1301		1301
1302	static void cfq_init_prio_data(struct cfq_queue *cfqq)	1302	static void cfq_init_prio_data(struct cfq_queue *cfqq)
1303	{	1303	{
1304	struct task_struct *tsk = current;	1304	struct task_struct *tsk = current;
1305	int ioprio_class;	1305	int ioprio_class;
1306		1306
1307	if (!cfq_cfqq_prio_changed(cfqq))	1307	if (!cfq_cfqq_prio_changed(cfqq))
1308	return;	1308	return;
1309		1309
1310	ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);	1310	ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);
1311	switch (ioprio_class) {	1311	switch (ioprio_class) {
1312	default:	1312	default:
1313	printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);	1313	printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
1314	case IOPRIO_CLASS_NONE:	1314	case IOPRIO_CLASS_NONE:
1315	/*	1315	/*
1316	* no prio set, place us in the middle of the BE classes	1316	* no prio set, place us in the middle of the BE classes
1317	*/	1317	*/
1318	cfqq->ioprio = task_nice_ioprio(tsk);	1318	cfqq->ioprio = task_nice_ioprio(tsk);
1319	cfqq->ioprio_class = IOPRIO_CLASS_BE;	1319	cfqq->ioprio_class = IOPRIO_CLASS_BE;
1320	break;	1320	break;
1321	case IOPRIO_CLASS_RT:	1321	case IOPRIO_CLASS_RT:
1322	cfqq->ioprio = task_ioprio(tsk);	1322	cfqq->ioprio = task_ioprio(tsk);
1323	cfqq->ioprio_class = IOPRIO_CLASS_RT;	1323	cfqq->ioprio_class = IOPRIO_CLASS_RT;
1324	break;	1324	break;
1325	case IOPRIO_CLASS_BE:	1325	case IOPRIO_CLASS_BE:
1326	cfqq->ioprio = task_ioprio(tsk);	1326	cfqq->ioprio = task_ioprio(tsk);
1327	cfqq->ioprio_class = IOPRIO_CLASS_BE;	1327	cfqq->ioprio_class = IOPRIO_CLASS_BE;
1328	break;	1328	break;
1329	case IOPRIO_CLASS_IDLE:	1329	case IOPRIO_CLASS_IDLE:
1330	cfqq->ioprio_class = IOPRIO_CLASS_IDLE;	1330	cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
1331	cfqq->ioprio = 7;	1331	cfqq->ioprio = 7;
1332	cfq_clear_cfqq_idle_window(cfqq);	1332	cfq_clear_cfqq_idle_window(cfqq);
1333	break;	1333	break;
1334	}	1334	}
1335		1335
1336	/*	1336	/*
1337	* keep track of original prio settings in case we have to temporarily	1337	* keep track of original prio settings in case we have to temporarily
1338	* elevate the priority of this queue	1338	* elevate the priority of this queue
1339	*/	1339	*/
1340	cfqq->org_ioprio = cfqq->ioprio;	1340	cfqq->org_ioprio = cfqq->ioprio;
1341	cfqq->org_ioprio_class = cfqq->ioprio_class;	1341	cfqq->org_ioprio_class = cfqq->ioprio_class;
1342		1342
1343	if (cfq_cfqq_on_rr(cfqq))	1343	if (cfq_cfqq_on_rr(cfqq))
1344	cfq_resort_rr_list(cfqq, 0);	1344	cfq_resort_rr_list(cfqq, 0);
1345		1345
1346	cfq_clear_cfqq_prio_changed(cfqq);	1346	cfq_clear_cfqq_prio_changed(cfqq);
1347	}	1347	}
1348		1348
1349	static inline void changed_ioprio(struct cfq_queue *cfqq)	1349	static inline void changed_ioprio(struct cfq_queue *cfqq)
1350	{	1350	{
1351	if (cfqq) {	1351	if (cfqq) {
1352	struct cfq_data *cfqd = cfqq->cfqd;	1352	struct cfq_data *cfqd = cfqq->cfqd;
1353		1353
1354	spin_lock(cfqd->queue->queue_lock);	1354	spin_lock(cfqd->queue->queue_lock);
1355	cfq_mark_cfqq_prio_changed(cfqq);	1355	cfq_mark_cfqq_prio_changed(cfqq);
1356	cfq_init_prio_data(cfqq);	1356	cfq_init_prio_data(cfqq);
1357	spin_unlock(cfqd->queue->queue_lock);	1357	spin_unlock(cfqd->queue->queue_lock);
1358	}	1358	}
1359	}	1359	}
1360		1360
1361	/*	1361	/*
1362	* callback from sys_ioprio_set, irqs are disabled	1362	* callback from sys_ioprio_set, irqs are disabled
1363	*/	1363	*/
1364	static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)	1364	static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1365	{	1365	{
1366	struct cfq_io_context *cic = ioc->cic;	1366	struct cfq_io_context *cic = ioc->cic;
1367		1367
1368	changed_ioprio(cic->cfqq);	1368	changed_ioprio(cic->cfqq);
1369		1369
1370	list_for_each_entry(cic, &cic->list, list)	1370	list_for_each_entry(cic, &cic->list, list)
1371	changed_ioprio(cic->cfqq);	1371	changed_ioprio(cic->cfqq);
1372		1372
1373	return 0;	1373	return 0;
1374	}	1374	}
1375		1375
1376	static struct cfq_queue *	1376	static struct cfq_queue *
1377	cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,	1377	cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
1378	gfp_t gfp_mask)	1378	gfp_t gfp_mask)
1379	{	1379	{
1380	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);	1380	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
1381	struct cfq_queue cfqq, new_cfqq = NULL;	1381	struct cfq_queue cfqq, new_cfqq = NULL;
1382		1382
1383	retry:	1383	retry:
1384	cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);	1384	cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);
1385		1385
1386	if (!cfqq) {	1386	if (!cfqq) {
1387	if (new_cfqq) {	1387	if (new_cfqq) {
1388	cfqq = new_cfqq;	1388	cfqq = new_cfqq;
1389	new_cfqq = NULL;	1389	new_cfqq = NULL;
1390	} else if (gfp_mask & __GFP_WAIT) {	1390	} else if (gfp_mask & __GFP_WAIT) {
1391	spin_unlock_irq(cfqd->queue->queue_lock);	1391	spin_unlock_irq(cfqd->queue->queue_lock);
1392	new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);	1392	new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1393	spin_lock_irq(cfqd->queue->queue_lock);	1393	spin_lock_irq(cfqd->queue->queue_lock);
1394	goto retry;	1394	goto retry;
1395	} else {	1395	} else {
1396	cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);	1396	cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1397	if (!cfqq)	1397	if (!cfqq)
1398	goto out;	1398	goto out;
1399	}	1399	}
1400		1400
1401	memset(cfqq, 0, sizeof(*cfqq));	1401	memset(cfqq, 0, sizeof(*cfqq));
1402		1402
1403	INIT_HLIST_NODE(&cfqq->cfq_hash);	1403	INIT_HLIST_NODE(&cfqq->cfq_hash);
1404	INIT_LIST_HEAD(&cfqq->cfq_list);	1404	INIT_LIST_HEAD(&cfqq->cfq_list);
1405	RB_CLEAR_ROOT(&cfqq->sort_list);	1405	RB_CLEAR_ROOT(&cfqq->sort_list);
1406	INIT_LIST_HEAD(&cfqq->fifo);	1406	INIT_LIST_HEAD(&cfqq->fifo);
1407		1407
1408	cfqq->key = key;	1408	cfqq->key = key;
1409	hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);	1409	hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1410	atomic_set(&cfqq->ref, 0);	1410	atomic_set(&cfqq->ref, 0);
1411	cfqq->cfqd = cfqd;	1411	cfqq->cfqd = cfqd;
1412	atomic_inc(&cfqd->ref);	1412	atomic_inc(&cfqd->ref);
1413	cfqq->service_last = 0;	1413	cfqq->service_last = 0;
1414	/*	1414	/*
1415	* set ->slice_left to allow preemption for a new process	1415	* set ->slice_left to allow preemption for a new process
1416	*/	1416	*/
1417	cfqq->slice_left = 2 * cfqd->cfq_slice_idle;	1417	cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1418	cfq_mark_cfqq_idle_window(cfqq);	1418	cfq_mark_cfqq_idle_window(cfqq);
1419	cfq_mark_cfqq_prio_changed(cfqq);	1419	cfq_mark_cfqq_prio_changed(cfqq);
1420	cfq_init_prio_data(cfqq);	1420	cfq_init_prio_data(cfqq);
1421	}	1421	}
1422		1422
1423	if (new_cfqq)	1423	if (new_cfqq)
1424	kmem_cache_free(cfq_pool, new_cfqq);	1424	kmem_cache_free(cfq_pool, new_cfqq);
1425		1425
1426	atomic_inc(&cfqq->ref);	1426	atomic_inc(&cfqq->ref);
1427	out:	1427	out:
1428	WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);	1428	WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1429	return cfqq;	1429	return cfqq;
1430	}	1430	}
1431		1431
1432	/*	1432	/*
1433	* Setup general io context and cfq io context. There can be several cfq	1433	* Setup general io context and cfq io context. There can be several cfq
1434	* io contexts per general io context, if this process is doing io to more	1434	* io contexts per general io context, if this process is doing io to more
1435	* than one device managed by cfq. Note that caller is holding a reference to	1435	* than one device managed by cfq. Note that caller is holding a reference to
1436	* cfqq, so we don't need to worry about it disappearing	1436	* cfqq, so we don't need to worry about it disappearing
1437	*/	1437	*/
1438	static struct cfq_io_context *	1438	static struct cfq_io_context *
1439	cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)	1439	cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
1440	{	1440	{
1441	struct io_context *ioc = NULL;	1441	struct io_context *ioc = NULL;
1442	struct cfq_io_context *cic;	1442	struct cfq_io_context *cic;
1443		1443
1444	might_sleep_if(gfp_mask & __GFP_WAIT);	1444	might_sleep_if(gfp_mask & __GFP_WAIT);
1445		1445
1446	ioc = get_io_context(gfp_mask);	1446	ioc = get_io_context(gfp_mask);
1447	if (!ioc)	1447	if (!ioc)
1448	return NULL;	1448	return NULL;
1449		1449
1450	if ((cic = ioc->cic) == NULL) {	1450	if ((cic = ioc->cic) == NULL) {
1451	cic = cfq_alloc_io_context(cfqd, gfp_mask);	1451	cic = cfq_alloc_io_context(cfqd, gfp_mask);
1452		1452
1453	if (cic == NULL)	1453	if (cic == NULL)
1454	goto err;	1454	goto err;
1455		1455
1456	/*	1456	/*
1457	* manually increment generic io_context usage count, it	1457	* manually increment generic io_context usage count, it
1458	* cannot go away since we are already holding one ref to it	1458	* cannot go away since we are already holding one ref to it
1459	*/	1459	*/
1460	ioc->cic = cic;	1460	ioc->cic = cic;
1461	ioc->set_ioprio = cfq_ioc_set_ioprio;	1461	ioc->set_ioprio = cfq_ioc_set_ioprio;
1462	cic->ioc = ioc;	1462	cic->ioc = ioc;
1463	cic->key = cfqd;	1463	cic->key = cfqd;
1464	atomic_inc(&cfqd->ref);	1464	atomic_inc(&cfqd->ref);
1465	} else {	1465	} else {
1466	struct cfq_io_context *__cic;	1466	struct cfq_io_context *__cic;
1467		1467
1468	/*	1468	/*
1469	* the first cic on the list is actually the head itself	1469	* the first cic on the list is actually the head itself
1470	*/	1470	*/
1471	if (cic->key == cfqd)	1471	if (cic->key == cfqd)
1472	goto out;	1472	goto out;
1473		1473
1474	/*	1474	/*
1475	* cic exists, check if we already are there. linear search	1475	* cic exists, check if we already are there. linear search
1476	* should be ok here, the list will usually not be more than	1476	* should be ok here, the list will usually not be more than
1477	* 1 or a few entries long	1477	* 1 or a few entries long
1478	*/	1478	*/
1479	list_for_each_entry(__cic, &cic->list, list) {	1479	list_for_each_entry(__cic, &cic->list, list) {
1480	/*	1480	/*
1481	* this process is already holding a reference to	1481	* this process is already holding a reference to
1482	* this queue, so no need to get one more	1482	* this queue, so no need to get one more
1483	*/	1483	*/
1484	if (__cic->key == cfqd) {	1484	if (__cic->key == cfqd) {
1485	cic = __cic;	1485	cic = __cic;
1486	goto out;	1486	goto out;
1487	}	1487	}
1488	}	1488	}
1489		1489
1490	/*	1490	/*
1491	* nope, process doesn't have a cic assoicated with this	1491	* nope, process doesn't have a cic assoicated with this
1492	* cfqq yet. get a new one and add to list	1492	* cfqq yet. get a new one and add to list
1493	*/	1493	*/
1494	__cic = cfq_alloc_io_context(cfqd, gfp_mask);	1494	__cic = cfq_alloc_io_context(cfqd, gfp_mask);
1495	if (__cic == NULL)	1495	if (__cic == NULL)
1496	goto err;	1496	goto err;
1497		1497
1498	__cic->ioc = ioc;	1498	__cic->ioc = ioc;
1499	__cic->key = cfqd;	1499	__cic->key = cfqd;
1500	atomic_inc(&cfqd->ref);	1500	atomic_inc(&cfqd->ref);
1501	list_add(&__cic->list, &cic->list);	1501	list_add(&__cic->list, &cic->list);
1502	cic = __cic;	1502	cic = __cic;
1503	}	1503	}
1504		1504
1505	out:	1505	out:
1506	return cic;	1506	return cic;
1507	err:	1507	err:
1508	put_io_context(ioc);	1508	put_io_context(ioc);
1509	return NULL;	1509	return NULL;
1510	}	1510	}
1511		1511
1512	static void	1512	static void
1513	cfq_update_io_thinktime(struct cfq_data cfqd, struct cfq_io_context cic)	1513	cfq_update_io_thinktime(struct cfq_data cfqd, struct cfq_io_context cic)
1514	{	1514	{
1515	unsigned long elapsed, ttime;	1515	unsigned long elapsed, ttime;
1516		1516
1517	/*	1517	/*
1518	* if this context already has stuff queued, thinktime is from	1518	* if this context already has stuff queued, thinktime is from
1519	* last queue not last end	1519	* last queue not last end
1520	*/	1520	*/
1521	#if 0	1521	#if 0
1522	if (time_after(cic->last_end_request, cic->last_queue))	1522	if (time_after(cic->last_end_request, cic->last_queue))
1523	elapsed = jiffies - cic->last_end_request;	1523	elapsed = jiffies - cic->last_end_request;
1524	else	1524	else
1525	elapsed = jiffies - cic->last_queue;	1525	elapsed = jiffies - cic->last_queue;
1526	#else	1526	#else
1527	elapsed = jiffies - cic->last_end_request;	1527	elapsed = jiffies - cic->last_end_request;
1528	#endif	1528	#endif
1529		1529
1530	ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);	1530	ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
1531		1531
1532	cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;	1532	cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
1533	cic->ttime_total = (7cic->ttime_total + 256ttime) / 8;	1533	cic->ttime_total = (7cic->ttime_total + 256ttime) / 8;
1534	cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;	1534	cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1535	}	1535	}
1536		1536
1537	#define sample_valid(samples) ((samples) > 80)	1537	#define sample_valid(samples) ((samples) > 80)
1538		1538
1539	/*	1539	/*
1540	* Disable idle window if the process thinks too long or seeks so much that	1540	* Disable idle window if the process thinks too long or seeks so much that
1541	* it doesn't matter	1541	* it doesn't matter
1542	*/	1542	*/
1543	static void	1543	static void
1544	cfq_update_idle_window(struct cfq_data cfqd, struct cfq_queue cfqq,	1544	cfq_update_idle_window(struct cfq_data cfqd, struct cfq_queue cfqq,
1545	struct cfq_io_context *cic)	1545	struct cfq_io_context *cic)
1546	{	1546	{
1547	int enable_idle = cfq_cfqq_idle_window(cfqq);	1547	int enable_idle = cfq_cfqq_idle_window(cfqq);
1548		1548
1549	if (!cic->ioc->task \|\| !cfqd->cfq_slice_idle)	1549	if (!cic->ioc->task \|\| !cfqd->cfq_slice_idle)
1550	enable_idle = 0;	1550	enable_idle = 0;
1551	else if (sample_valid(cic->ttime_samples)) {	1551	else if (sample_valid(cic->ttime_samples)) {
1552	if (cic->ttime_mean > cfqd->cfq_slice_idle)	1552	if (cic->ttime_mean > cfqd->cfq_slice_idle)
1553	enable_idle = 0;	1553	enable_idle = 0;
1554	else	1554	else
1555	enable_idle = 1;	1555	enable_idle = 1;
1556	}	1556	}
1557		1557
1558	if (enable_idle)	1558	if (enable_idle)
1559	cfq_mark_cfqq_idle_window(cfqq);	1559	cfq_mark_cfqq_idle_window(cfqq);
1560	else	1560	else
1561	cfq_clear_cfqq_idle_window(cfqq);	1561	cfq_clear_cfqq_idle_window(cfqq);
1562	}	1562	}
1563		1563
1564		1564
1565	/*	1565	/*
1566	* Check if new_cfqq should preempt the currently active queue. Return 0 for	1566	* Check if new_cfqq should preempt the currently active queue. Return 0 for
1567	* no or if we aren't sure, a 1 will cause a preempt.	1567	* no or if we aren't sure, a 1 will cause a preempt.
1568	*/	1568	*/
1569	static int	1569	static int
1570	cfq_should_preempt(struct cfq_data cfqd, struct cfq_queue new_cfqq,	1570	cfq_should_preempt(struct cfq_data cfqd, struct cfq_queue new_cfqq,
1571	struct cfq_rq *crq)	1571	struct cfq_rq *crq)
1572	{	1572	{
1573	struct cfq_queue *cfqq = cfqd->active_queue;	1573	struct cfq_queue *cfqq = cfqd->active_queue;
1574		1574
1575	if (cfq_class_idle(new_cfqq))	1575	if (cfq_class_idle(new_cfqq))
1576	return 0;	1576	return 0;
1577		1577
1578	if (!cfqq)	1578	if (!cfqq)
1579	return 1;	1579	return 1;
1580		1580
1581	if (cfq_class_idle(cfqq))	1581	if (cfq_class_idle(cfqq))
1582	return 1;	1582	return 1;
1583	if (!cfq_cfqq_wait_request(new_cfqq))	1583	if (!cfq_cfqq_wait_request(new_cfqq))
1584	return 0;	1584	return 0;
1585	/*	1585	/*
1586	* if it doesn't have slice left, forget it	1586	* if it doesn't have slice left, forget it
1587	*/	1587	*/
1588	if (new_cfqq->slice_left < cfqd->cfq_slice_idle)	1588	if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
1589	return 0;	1589	return 0;
1590	if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))	1590	if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
1591	return 1;	1591	return 1;
1592		1592
1593	return 0;	1593	return 0;
1594	}	1594	}
1595		1595
1596	/*	1596	/*
1597	* cfqq preempts the active queue. if we allowed preempt with no slice left,	1597	* cfqq preempts the active queue. if we allowed preempt with no slice left,
1598	* let it have half of its nominal slice.	1598	* let it have half of its nominal slice.
1599	*/	1599	*/
1600	static void cfq_preempt_queue(struct cfq_data cfqd, struct cfq_queue cfqq)	1600	static void cfq_preempt_queue(struct cfq_data cfqd, struct cfq_queue cfqq)
1601	{	1601	{
1602	struct cfq_queue __cfqq, next;	1602	struct cfq_queue __cfqq, next;
1603		1603
1604	list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)	1604	list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
1605	cfq_resort_rr_list(__cfqq, 1);	1605	cfq_resort_rr_list(__cfqq, 1);
1606		1606
1607	if (!cfqq->slice_left)	1607	if (!cfqq->slice_left)
1608	cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;	1608	cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
1609		1609
1610	cfqq->slice_end = cfqq->slice_left + jiffies;	1610	cfqq->slice_end = cfqq->slice_left + jiffies;
1611	__cfq_slice_expired(cfqd, cfqq, 1);	1611	__cfq_slice_expired(cfqd, cfqq, 1);
1612	__cfq_set_active_queue(cfqd, cfqq);	1612	__cfq_set_active_queue(cfqd, cfqq);
1613	}	1613	}
1614		1614
1615	/*	1615	/*
1616	* should really be a ll_rw_blk.c helper	1616	* should really be a ll_rw_blk.c helper
1617	*/	1617	*/
1618	static void cfq_start_queueing(struct cfq_data cfqd, struct cfq_queue cfqq)	1618	static void cfq_start_queueing(struct cfq_data cfqd, struct cfq_queue cfqq)
1619	{	1619	{
1620	request_queue_t *q = cfqd->queue;	1620	request_queue_t *q = cfqd->queue;
1621		1621
1622	if (!blk_queue_plugged(q))	1622	if (!blk_queue_plugged(q))
1623	q->request_fn(q);	1623	q->request_fn(q);
1624	else	1624	else
1625	__generic_unplug_device(q);	1625	__generic_unplug_device(q);
1626	}	1626	}
1627		1627
1628	/*	1628	/*
1629	* Called when a new fs request (crq) is added (to cfqq). Check if there's	1629	* Called when a new fs request (crq) is added (to cfqq). Check if there's
1630	* something we should do about it	1630	* something we should do about it
1631	*/	1631	*/
1632	static void	1632	static void
1633	cfq_crq_enqueued(struct cfq_data cfqd, struct cfq_queue cfqq,	1633	cfq_crq_enqueued(struct cfq_data cfqd, struct cfq_queue cfqq,
1634	struct cfq_rq *crq)	1634	struct cfq_rq *crq)
1635	{	1635	{
1636	struct cfq_io_context *cic;	1636	struct cfq_io_context *cic;
1637		1637
1638	cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);	1638	cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
1639		1639
1640	/*	1640	/*
1641	* we never wait for an async request and we don't allow preemption	1641	* we never wait for an async request and we don't allow preemption
1642	* of an async request. so just return early	1642	* of an async request. so just return early
1643	*/	1643	*/
1644	if (!cfq_crq_is_sync(crq))	1644	if (!cfq_crq_is_sync(crq))
1645	return;	1645	return;
1646		1646
1647	cic = crq->io_context;	1647	cic = crq->io_context;
1648		1648
1649	cfq_update_io_thinktime(cfqd, cic);	1649	cfq_update_io_thinktime(cfqd, cic);
1650	cfq_update_idle_window(cfqd, cfqq, cic);	1650	cfq_update_idle_window(cfqd, cfqq, cic);
1651		1651
1652	cic->last_queue = jiffies;	1652	cic->last_queue = jiffies;
1653		1653
1654	if (cfqq == cfqd->active_queue) {	1654	if (cfqq == cfqd->active_queue) {
1655	/*	1655	/*
1656	* if we are waiting for a request for this queue, let it rip	1656	* if we are waiting for a request for this queue, let it rip
1657	* immediately and flag that we must not expire this queue	1657	* immediately and flag that we must not expire this queue
1658	* just now	1658	* just now
1659	*/	1659	*/
1660	if (cfq_cfqq_wait_request(cfqq)) {	1660	if (cfq_cfqq_wait_request(cfqq)) {
1661	cfq_mark_cfqq_must_dispatch(cfqq);	1661	cfq_mark_cfqq_must_dispatch(cfqq);
1662	del_timer(&cfqd->idle_slice_timer);	1662	del_timer(&cfqd->idle_slice_timer);
1663	cfq_start_queueing(cfqd, cfqq);	1663	cfq_start_queueing(cfqd, cfqq);
1664	}	1664	}
1665	} else if (cfq_should_preempt(cfqd, cfqq, crq)) {	1665	} else if (cfq_should_preempt(cfqd, cfqq, crq)) {
1666	/*	1666	/*
1667	* not the active queue - expire current slice if it is	1667	* not the active queue - expire current slice if it is
1668	* idle and has expired it's mean thinktime or this new queue	1668	* idle and has expired it's mean thinktime or this new queue
1669	* has some old slice time left and is of higher priority	1669	* has some old slice time left and is of higher priority
1670	*/	1670	*/
1671	cfq_preempt_queue(cfqd, cfqq);	1671	cfq_preempt_queue(cfqd, cfqq);
1672	cfq_mark_cfqq_must_dispatch(cfqq);	1672	cfq_mark_cfqq_must_dispatch(cfqq);
1673	cfq_start_queueing(cfqd, cfqq);	1673	cfq_start_queueing(cfqd, cfqq);
1674	}	1674	}
1675	}	1675	}
1676		1676
1677	static void cfq_insert_request(request_queue_t q, struct request rq)	1677	static void cfq_insert_request(request_queue_t q, struct request rq)
1678	{	1678	{
1679	struct cfq_data *cfqd = q->elevator->elevator_data;	1679	struct cfq_data *cfqd = q->elevator->elevator_data;
1680	struct cfq_rq *crq = RQ_DATA(rq);	1680	struct cfq_rq *crq = RQ_DATA(rq);
1681	struct cfq_queue *cfqq = crq->cfq_queue;	1681	struct cfq_queue *cfqq = crq->cfq_queue;
1682		1682
1683	cfq_init_prio_data(cfqq);	1683	cfq_init_prio_data(cfqq);
1684		1684
1685	cfq_add_crq_rb(crq);	1685	cfq_add_crq_rb(crq);
1686		1686
1687	list_add_tail(&rq->queuelist, &cfqq->fifo);	1687	list_add_tail(&rq->queuelist, &cfqq->fifo);
1688		1688
1689	if (rq_mergeable(rq))	1689	if (rq_mergeable(rq))
1690	cfq_add_crq_hash(cfqd, crq);	1690	cfq_add_crq_hash(cfqd, crq);
1691		1691
1692	cfq_crq_enqueued(cfqd, cfqq, crq);	1692	cfq_crq_enqueued(cfqd, cfqq, crq);
1693	}	1693	}
1694		1694
1695	static void cfq_completed_request(request_queue_t q, struct request rq)	1695	static void cfq_completed_request(request_queue_t q, struct request rq)
1696	{	1696	{
1697	struct cfq_rq *crq = RQ_DATA(rq);	1697	struct cfq_rq *crq = RQ_DATA(rq);
1698	struct cfq_queue *cfqq = crq->cfq_queue;	1698	struct cfq_queue *cfqq = crq->cfq_queue;
1699	struct cfq_data *cfqd = cfqq->cfqd;	1699	struct cfq_data *cfqd = cfqq->cfqd;
1700	const int sync = cfq_crq_is_sync(crq);	1700	const int sync = cfq_crq_is_sync(crq);
1701	unsigned long now;	1701	unsigned long now;
1702		1702
1703	now = jiffies;	1703	now = jiffies;
1704		1704
1705	WARN_ON(!cfqd->rq_in_driver);	1705	WARN_ON(!cfqd->rq_in_driver);
1706	WARN_ON(!cfqq->on_dispatch[sync]);	1706	WARN_ON(!cfqq->on_dispatch[sync]);
1707	cfqd->rq_in_driver--;	1707	cfqd->rq_in_driver--;
1708	cfqq->on_dispatch[sync]--;	1708	cfqq->on_dispatch[sync]--;
1709		1709
1710	if (!cfq_class_idle(cfqq))	1710	if (!cfq_class_idle(cfqq))
1711	cfqd->last_end_request = now;	1711	cfqd->last_end_request = now;
1712		1712
1713	if (!cfq_cfqq_dispatched(cfqq)) {	1713	if (!cfq_cfqq_dispatched(cfqq)) {
1714	if (cfq_cfqq_on_rr(cfqq)) {	1714	if (cfq_cfqq_on_rr(cfqq)) {
1715	cfqq->service_last = now;	1715	cfqq->service_last = now;
1716	cfq_resort_rr_list(cfqq, 0);	1716	cfq_resort_rr_list(cfqq, 0);
1717	}	1717	}
1718	if (cfq_cfqq_expired(cfqq)) {	1718	if (cfq_cfqq_expired(cfqq)) {
1719	__cfq_slice_expired(cfqd, cfqq, 0);	1719	__cfq_slice_expired(cfqd, cfqq, 0);
1720	cfq_schedule_dispatch(cfqd);	1720	cfq_schedule_dispatch(cfqd);
1721	}	1721	}
1722	}	1722	}
1723		1723
1724	if (cfq_crq_is_sync(crq))	1724	if (cfq_crq_is_sync(crq))
1725	crq->io_context->last_end_request = now;	1725	crq->io_context->last_end_request = now;
1726	}	1726	}
1727		1727
1728	static struct request *	1728	static struct request *
1729	cfq_former_request(request_queue_t q, struct request rq)	1729	cfq_former_request(request_queue_t q, struct request rq)
1730	{	1730	{
1731	struct cfq_rq *crq = RQ_DATA(rq);	1731	struct cfq_rq *crq = RQ_DATA(rq);
1732	struct rb_node *rbprev = rb_prev(&crq->rb_node);	1732	struct rb_node *rbprev = rb_prev(&crq->rb_node);
1733		1733
1734	if (rbprev)	1734	if (rbprev)
1735	return rb_entry_crq(rbprev)->request;	1735	return rb_entry_crq(rbprev)->request;
1736		1736
1737	return NULL;	1737	return NULL;
1738	}	1738	}
1739		1739
1740	static struct request *	1740	static struct request *
1741	cfq_latter_request(request_queue_t q, struct request rq)	1741	cfq_latter_request(request_queue_t q, struct request rq)
1742	{	1742	{
1743	struct cfq_rq *crq = RQ_DATA(rq);	1743	struct cfq_rq *crq = RQ_DATA(rq);
1744	struct rb_node *rbnext = rb_next(&crq->rb_node);	1744	struct rb_node *rbnext = rb_next(&crq->rb_node);
1745		1745
1746	if (rbnext)	1746	if (rbnext)
1747	return rb_entry_crq(rbnext)->request;	1747	return rb_entry_crq(rbnext)->request;
1748		1748
1749	return NULL;	1749	return NULL;
1750	}	1750	}
1751		1751
1752	/*	1752	/*
1753	* we temporarily boost lower priority queues if they are holding fs exclusive	1753	* we temporarily boost lower priority queues if they are holding fs exclusive
1754	* resources. they are boosted to normal prio (CLASS_BE/4)	1754	* resources. they are boosted to normal prio (CLASS_BE/4)
1755	*/	1755	*/
1756	static void cfq_prio_boost(struct cfq_queue *cfqq)	1756	static void cfq_prio_boost(struct cfq_queue *cfqq)
1757	{	1757	{
1758	const int ioprio_class = cfqq->ioprio_class;	1758	const int ioprio_class = cfqq->ioprio_class;
1759	const int ioprio = cfqq->ioprio;	1759	const int ioprio = cfqq->ioprio;
1760		1760
1761	if (has_fs_excl()) {	1761	if (has_fs_excl()) {
1762	/*	1762	/*
1763	* boost idle prio on transactions that would lock out other	1763	* boost idle prio on transactions that would lock out other
1764	* users of the filesystem	1764	* users of the filesystem
1765	*/	1765	*/
1766	if (cfq_class_idle(cfqq))	1766	if (cfq_class_idle(cfqq))
1767	cfqq->ioprio_class = IOPRIO_CLASS_BE;	1767	cfqq->ioprio_class = IOPRIO_CLASS_BE;
1768	if (cfqq->ioprio > IOPRIO_NORM)	1768	if (cfqq->ioprio > IOPRIO_NORM)
1769	cfqq->ioprio = IOPRIO_NORM;	1769	cfqq->ioprio = IOPRIO_NORM;
1770	} else {	1770	} else {
1771	/*	1771	/*
1772	* check if we need to unboost the queue	1772	* check if we need to unboost the queue
1773	*/	1773	*/
1774	if (cfqq->ioprio_class != cfqq->org_ioprio_class)	1774	if (cfqq->ioprio_class != cfqq->org_ioprio_class)
1775	cfqq->ioprio_class = cfqq->org_ioprio_class;	1775	cfqq->ioprio_class = cfqq->org_ioprio_class;
1776	if (cfqq->ioprio != cfqq->org_ioprio)	1776	if (cfqq->ioprio != cfqq->org_ioprio)
1777	cfqq->ioprio = cfqq->org_ioprio;	1777	cfqq->ioprio = cfqq->org_ioprio;
1778	}	1778	}
1779		1779
1780	/*	1780	/*
1781	* refile between round-robin lists if we moved the priority class	1781	* refile between round-robin lists if we moved the priority class
1782	*/	1782	*/
1783	if ((ioprio_class != cfqq->ioprio_class \|\| ioprio != cfqq->ioprio) &&	1783	if ((ioprio_class != cfqq->ioprio_class \|\| ioprio != cfqq->ioprio) &&
1784	cfq_cfqq_on_rr(cfqq))	1784	cfq_cfqq_on_rr(cfqq))
1785	cfq_resort_rr_list(cfqq, 0);	1785	cfq_resort_rr_list(cfqq, 0);
1786	}	1786	}
1787		1787
1788	static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)	1788	static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
1789	{	1789	{
1790	if (rw == READ \|\| process_sync(task))	1790	if (rw == READ \|\| process_sync(task))
1791	return task->pid;	1791	return task->pid;
1792		1792
1793	return CFQ_KEY_ASYNC;	1793	return CFQ_KEY_ASYNC;
1794	}	1794	}
1795		1795
1796	static inline int	1796	static inline int
1797	__cfq_may_queue(struct cfq_data cfqd, struct cfq_queue cfqq,	1797	__cfq_may_queue(struct cfq_data cfqd, struct cfq_queue cfqq,
1798	struct task_struct *task, int rw)	1798	struct task_struct *task, int rw)
1799	{	1799	{
1800	#if 1	1800	#if 1
1801	if ((cfq_cfqq_wait_request(cfqq) \|\| cfq_cfqq_must_alloc(cfqq)) &&	1801	if ((cfq_cfqq_wait_request(cfqq) \|\| cfq_cfqq_must_alloc(cfqq)) &&
1802	!cfq_cfqq_must_alloc_slice(cfqq)) {	1802	!cfq_cfqq_must_alloc_slice(cfqq)) {
1803	cfq_mark_cfqq_must_alloc_slice(cfqq);	1803	cfq_mark_cfqq_must_alloc_slice(cfqq);
1804	return ELV_MQUEUE_MUST;	1804	return ELV_MQUEUE_MUST;
1805	}	1805	}
1806		1806
1807	return ELV_MQUEUE_MAY;	1807	return ELV_MQUEUE_MAY;
1808	#else	1808	#else
1809	if (!cfqq \|\| task->flags & PF_MEMALLOC)	1809	if (!cfqq \|\| task->flags & PF_MEMALLOC)
1810	return ELV_MQUEUE_MAY;	1810	return ELV_MQUEUE_MAY;
1811	if (!cfqq->allocated[rw] \|\| cfq_cfqq_must_alloc(cfqq)) {	1811	if (!cfqq->allocated[rw] \|\| cfq_cfqq_must_alloc(cfqq)) {
1812	if (cfq_cfqq_wait_request(cfqq))	1812	if (cfq_cfqq_wait_request(cfqq))
1813	return ELV_MQUEUE_MUST;	1813	return ELV_MQUEUE_MUST;
1814		1814
1815	/*	1815	/*
1816	* only allow 1 ELV_MQUEUE_MUST per slice, otherwise we	1816	* only allow 1 ELV_MQUEUE_MUST per slice, otherwise we
1817	* can quickly flood the queue with writes from a single task	1817	* can quickly flood the queue with writes from a single task
1818	*/	1818	*/
1819	if (rw == READ \|\| !cfq_cfqq_must_alloc_slice(cfqq)) {	1819	if (rw == READ \|\| !cfq_cfqq_must_alloc_slice(cfqq)) {
1820	cfq_mark_cfqq_must_alloc_slice(cfqq);	1820	cfq_mark_cfqq_must_alloc_slice(cfqq);
1821	return ELV_MQUEUE_MUST;	1821	return ELV_MQUEUE_MUST;
1822	}	1822	}
1823		1823
1824	return ELV_MQUEUE_MAY;	1824	return ELV_MQUEUE_MAY;
1825	}	1825	}
1826	if (cfq_class_idle(cfqq))	1826	if (cfq_class_idle(cfqq))
1827	return ELV_MQUEUE_NO;	1827	return ELV_MQUEUE_NO;
1828	if (cfqq->allocated[rw] >= cfqd->max_queued) {	1828	if (cfqq->allocated[rw] >= cfqd->max_queued) {
1829	struct io_context *ioc = get_io_context(GFP_ATOMIC);	1829	struct io_context *ioc = get_io_context(GFP_ATOMIC);
1830	int ret = ELV_MQUEUE_NO;	1830	int ret = ELV_MQUEUE_NO;
1831		1831
1832	if (ioc && ioc->nr_batch_requests)	1832	if (ioc && ioc->nr_batch_requests)
1833	ret = ELV_MQUEUE_MAY;	1833	ret = ELV_MQUEUE_MAY;
1834		1834
1835	put_io_context(ioc);	1835	put_io_context(ioc);
1836	return ret;	1836	return ret;
1837	}	1837	}
1838		1838
1839	return ELV_MQUEUE_MAY;	1839	return ELV_MQUEUE_MAY;
1840	#endif	1840	#endif
1841	}	1841	}
1842		1842
1843	static int cfq_may_queue(request_queue_t q, int rw, struct bio bio)	1843	static int cfq_may_queue(request_queue_t q, int rw, struct bio bio)
1844	{	1844	{
1845	struct cfq_data *cfqd = q->elevator->elevator_data;	1845	struct cfq_data *cfqd = q->elevator->elevator_data;
1846	struct task_struct *tsk = current;	1846	struct task_struct *tsk = current;
1847	struct cfq_queue *cfqq;	1847	struct cfq_queue *cfqq;
1848		1848
1849	/*	1849	/*
1850	* don't force setup of a queue from here, as a call to may_queue	1850	* don't force setup of a queue from here, as a call to may_queue
1851	* does not necessarily imply that a request actually will be queued.	1851	* does not necessarily imply that a request actually will be queued.
1852	* so just lookup a possibly existing queue, or return 'may queue'	1852	* so just lookup a possibly existing queue, or return 'may queue'
1853	* if that fails	1853	* if that fails
1854	*/	1854	*/
1855	cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);	1855	cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);
1856	if (cfqq) {	1856	if (cfqq) {
1857	cfq_init_prio_data(cfqq);	1857	cfq_init_prio_data(cfqq);
1858	cfq_prio_boost(cfqq);	1858	cfq_prio_boost(cfqq);
1859		1859
1860	return __cfq_may_queue(cfqd, cfqq, tsk, rw);	1860	return __cfq_may_queue(cfqd, cfqq, tsk, rw);
1861	}	1861	}
1862		1862
1863	return ELV_MQUEUE_MAY;	1863	return ELV_MQUEUE_MAY;
1864	}	1864	}
1865		1865
1866	static void cfq_check_waiters(request_queue_t q, struct cfq_queue cfqq)	1866	static void cfq_check_waiters(request_queue_t q, struct cfq_queue cfqq)
1867	{	1867	{
1868	struct cfq_data *cfqd = q->elevator->elevator_data;	1868	struct cfq_data *cfqd = q->elevator->elevator_data;
1869	struct request_list *rl = &q->rq;	1869	struct request_list *rl = &q->rq;
1870		1870
1871	if (cfqq->allocated[READ] <= cfqd->max_queued \|\| cfqd->rq_starved) {	1871	if (cfqq->allocated[READ] <= cfqd->max_queued \|\| cfqd->rq_starved) {
1872	smp_mb();	1872	smp_mb();
1873	if (waitqueue_active(&rl->wait[READ]))	1873	if (waitqueue_active(&rl->wait[READ]))
1874	wake_up(&rl->wait[READ]);	1874	wake_up(&rl->wait[READ]);
1875	}	1875	}
1876		1876
1877	if (cfqq->allocated[WRITE] <= cfqd->max_queued \|\| cfqd->rq_starved) {	1877	if (cfqq->allocated[WRITE] <= cfqd->max_queued \|\| cfqd->rq_starved) {
1878	smp_mb();	1878	smp_mb();
1879	if (waitqueue_active(&rl->wait[WRITE]))	1879	if (waitqueue_active(&rl->wait[WRITE]))
1880	wake_up(&rl->wait[WRITE]);	1880	wake_up(&rl->wait[WRITE]);
1881	}	1881	}
1882	}	1882	}
1883		1883
1884	/*	1884	/*
1885	* queue lock held here	1885	* queue lock held here
1886	*/	1886	*/
1887	static void cfq_put_request(request_queue_t q, struct request rq)	1887	static void cfq_put_request(request_queue_t q, struct request rq)
1888	{	1888	{
1889	struct cfq_data *cfqd = q->elevator->elevator_data;	1889	struct cfq_data *cfqd = q->elevator->elevator_data;
1890	struct cfq_rq *crq = RQ_DATA(rq);	1890	struct cfq_rq *crq = RQ_DATA(rq);
1891		1891
1892	if (crq) {	1892	if (crq) {
1893	struct cfq_queue *cfqq = crq->cfq_queue;	1893	struct cfq_queue *cfqq = crq->cfq_queue;
1894	const int rw = rq_data_dir(rq);	1894	const int rw = rq_data_dir(rq);
1895		1895
1896	BUG_ON(!cfqq->allocated[rw]);	1896	BUG_ON(!cfqq->allocated[rw]);
1897	cfqq->allocated[rw]--;	1897	cfqq->allocated[rw]--;
1898		1898
1899	put_io_context(crq->io_context->ioc);	1899	put_io_context(crq->io_context->ioc);
1900		1900
1901	mempool_free(crq, cfqd->crq_pool);	1901	mempool_free(crq, cfqd->crq_pool);
1902	rq->elevator_private = NULL;	1902	rq->elevator_private = NULL;
1903		1903
1904	cfq_check_waiters(q, cfqq);	1904	cfq_check_waiters(q, cfqq);
1905	cfq_put_queue(cfqq);	1905	cfq_put_queue(cfqq);
1906	}	1906	}
1907	}	1907	}
1908		1908
1909	/*	1909	/*
1910	* Allocate cfq data structures associated with this request.	1910	* Allocate cfq data structures associated with this request.
1911	*/	1911	*/
1912	static int	1912	static int
1913	cfq_set_request(request_queue_t q, struct request rq, struct bio *bio,	1913	cfq_set_request(request_queue_t q, struct request rq, struct bio *bio,
1914	gfp_t gfp_mask)	1914	gfp_t gfp_mask)
1915	{	1915	{
1916	struct cfq_data *cfqd = q->elevator->elevator_data;	1916	struct cfq_data *cfqd = q->elevator->elevator_data;
1917	struct task_struct *tsk = current;	1917	struct task_struct *tsk = current;
1918	struct cfq_io_context *cic;	1918	struct cfq_io_context *cic;
1919	const int rw = rq_data_dir(rq);	1919	const int rw = rq_data_dir(rq);
1920	pid_t key = cfq_queue_pid(tsk, rw);	1920	pid_t key = cfq_queue_pid(tsk, rw);
1921	struct cfq_queue *cfqq;	1921	struct cfq_queue *cfqq;
1922	struct cfq_rq *crq;	1922	struct cfq_rq *crq;
1923	unsigned long flags;	1923	unsigned long flags;
1924		1924
1925	might_sleep_if(gfp_mask & __GFP_WAIT);	1925	might_sleep_if(gfp_mask & __GFP_WAIT);
1926		1926
1927	cic = cfq_get_io_context(cfqd, key, gfp_mask);	1927	cic = cfq_get_io_context(cfqd, key, gfp_mask);
1928		1928
1929	spin_lock_irqsave(q->queue_lock, flags);	1929	spin_lock_irqsave(q->queue_lock, flags);
1930		1930
1931	if (!cic)	1931	if (!cic)
1932	goto queue_fail;	1932	goto queue_fail;
1933		1933
1934	if (!cic->cfqq) {	1934	if (!cic->cfqq) {
1935	cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask);	1935	cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask);
1936	if (!cfqq)	1936	if (!cfqq)
1937	goto queue_fail;	1937	goto queue_fail;
1938		1938
1939	cic->cfqq = cfqq;	1939	cic->cfqq = cfqq;
1940	} else	1940	} else
1941	cfqq = cic->cfqq;	1941	cfqq = cic->cfqq;
1942		1942
1943	cfqq->allocated[rw]++;	1943	cfqq->allocated[rw]++;
1944	cfq_clear_cfqq_must_alloc(cfqq);	1944	cfq_clear_cfqq_must_alloc(cfqq);
1945	cfqd->rq_starved = 0;	1945	cfqd->rq_starved = 0;
1946	atomic_inc(&cfqq->ref);	1946	atomic_inc(&cfqq->ref);
1947	spin_unlock_irqrestore(q->queue_lock, flags);	1947	spin_unlock_irqrestore(q->queue_lock, flags);
1948		1948
1949	crq = mempool_alloc(cfqd->crq_pool, gfp_mask);	1949	crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
1950	if (crq) {	1950	if (crq) {
1951	RB_CLEAR(&crq->rb_node);	1951	RB_CLEAR(&crq->rb_node);
1952	crq->rb_key = 0;	1952	crq->rb_key = 0;
1953	crq->request = rq;	1953	crq->request = rq;
1954	INIT_HLIST_NODE(&crq->hash);	1954	INIT_HLIST_NODE(&crq->hash);
1955	crq->cfq_queue = cfqq;	1955	crq->cfq_queue = cfqq;
1956	crq->io_context = cic;	1956	crq->io_context = cic;
1957		1957
1958	if (rw == READ \|\| process_sync(tsk))	1958	if (rw == READ \|\| process_sync(tsk))
1959	cfq_mark_crq_is_sync(crq);	1959	cfq_mark_crq_is_sync(crq);
1960	else	1960	else
1961	cfq_clear_crq_is_sync(crq);	1961	cfq_clear_crq_is_sync(crq);
1962		1962
1963	rq->elevator_private = crq;	1963	rq->elevator_private = crq;
1964	return 0;	1964	return 0;
1965	}	1965	}
1966		1966
1967	spin_lock_irqsave(q->queue_lock, flags);	1967	spin_lock_irqsave(q->queue_lock, flags);
1968	cfqq->allocated[rw]--;	1968	cfqq->allocated[rw]--;
1969	if (!(cfqq->allocated[0] + cfqq->allocated[1]))	1969	if (!(cfqq->allocated[0] + cfqq->allocated[1]))
1970	cfq_mark_cfqq_must_alloc(cfqq);	1970	cfq_mark_cfqq_must_alloc(cfqq);
1971	cfq_put_queue(cfqq);	1971	cfq_put_queue(cfqq);
1972	queue_fail:	1972	queue_fail:
1973	if (cic)	1973	if (cic)
1974	put_io_context(cic->ioc);	1974	put_io_context(cic->ioc);
1975	/*	1975	/*
1976	* mark us rq allocation starved. we need to kickstart the process	1976	* mark us rq allocation starved. we need to kickstart the process
1977	* ourselves if there are no pending requests that can do it for us.	1977	* ourselves if there are no pending requests that can do it for us.
1978	* that would be an extremely rare OOM situation	1978	* that would be an extremely rare OOM situation
1979	*/	1979	*/
1980	cfqd->rq_starved = 1;	1980	cfqd->rq_starved = 1;
1981	cfq_schedule_dispatch(cfqd);	1981	cfq_schedule_dispatch(cfqd);
1982	spin_unlock_irqrestore(q->queue_lock, flags);	1982	spin_unlock_irqrestore(q->queue_lock, flags);
1983	return 1;	1983	return 1;
1984	}	1984	}
1985		1985
1986	static void cfq_kick_queue(void *data)	1986	static void cfq_kick_queue(void *data)
1987	{	1987	{
1988	request_queue_t *q = data;	1988	request_queue_t *q = data;
1989	struct cfq_data *cfqd = q->elevator->elevator_data;	1989	struct cfq_data *cfqd = q->elevator->elevator_data;
1990	unsigned long flags;	1990	unsigned long flags;
1991		1991
1992	spin_lock_irqsave(q->queue_lock, flags);	1992	spin_lock_irqsave(q->queue_lock, flags);
1993		1993
1994	if (cfqd->rq_starved) {	1994	if (cfqd->rq_starved) {
1995	struct request_list *rl = &q->rq;	1995	struct request_list *rl = &q->rq;
1996		1996
1997	/*	1997	/*
1998	* we aren't guaranteed to get a request after this, but we	1998	* we aren't guaranteed to get a request after this, but we
1999	* have to be opportunistic	1999	* have to be opportunistic
2000	*/	2000	*/
2001	smp_mb();	2001	smp_mb();
2002	if (waitqueue_active(&rl->wait[READ]))	2002	if (waitqueue_active(&rl->wait[READ]))
2003	wake_up(&rl->wait[READ]);	2003	wake_up(&rl->wait[READ]);
2004	if (waitqueue_active(&rl->wait[WRITE]))	2004	if (waitqueue_active(&rl->wait[WRITE]))
2005	wake_up(&rl->wait[WRITE]);	2005	wake_up(&rl->wait[WRITE]);
2006	}	2006	}
2007		2007
2008	blk_remove_plug(q);	2008	blk_remove_plug(q);
2009	q->request_fn(q);	2009	q->request_fn(q);
2010	spin_unlock_irqrestore(q->queue_lock, flags);	2010	spin_unlock_irqrestore(q->queue_lock, flags);
2011	}	2011	}
2012		2012
2013	/*	2013	/*
2014	* Timer running if the active_queue is currently idling inside its time slice	2014	* Timer running if the active_queue is currently idling inside its time slice
2015	*/	2015	*/
2016	static void cfq_idle_slice_timer(unsigned long data)	2016	static void cfq_idle_slice_timer(unsigned long data)
2017	{	2017	{
2018	struct cfq_data cfqd = (struct cfq_data ) data;	2018	struct cfq_data cfqd = (struct cfq_data ) data;
2019	struct cfq_queue *cfqq;	2019	struct cfq_queue *cfqq;
2020	unsigned long flags;	2020	unsigned long flags;
2021		2021
2022	spin_lock_irqsave(cfqd->queue->queue_lock, flags);	2022	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2023		2023
2024	if ((cfqq = cfqd->active_queue) != NULL) {	2024	if ((cfqq = cfqd->active_queue) != NULL) {
2025	unsigned long now = jiffies;	2025	unsigned long now = jiffies;
2026		2026
2027	/*	2027	/*
2028	* expired	2028	* expired
2029	*/	2029	*/
2030	if (time_after(now, cfqq->slice_end))	2030	if (time_after(now, cfqq->slice_end))
2031	goto expire;	2031	goto expire;
2032		2032
2033	/*	2033	/*
2034	* only expire and reinvoke request handler, if there are	2034	* only expire and reinvoke request handler, if there are
2035	* other queues with pending requests	2035	* other queues with pending requests
2036	*/	2036	*/
2037	if (!cfqd->busy_queues) {	2037	if (!cfqd->busy_queues) {
2038	cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);	2038	cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
2039	add_timer(&cfqd->idle_slice_timer);	2039	add_timer(&cfqd->idle_slice_timer);
2040	goto out_cont;	2040	goto out_cont;
2041	}	2041	}
2042		2042
2043	/*	2043	/*
2044	* not expired and it has a request pending, let it dispatch	2044	* not expired and it has a request pending, let it dispatch
2045	*/	2045	*/
2046	if (!RB_EMPTY(&cfqq->sort_list)) {	2046	if (!RB_EMPTY(&cfqq->sort_list)) {
2047	cfq_mark_cfqq_must_dispatch(cfqq);	2047	cfq_mark_cfqq_must_dispatch(cfqq);
2048	goto out_kick;	2048	goto out_kick;
2049	}	2049	}
2050	}	2050	}
2051	expire:	2051	expire:
2052	cfq_slice_expired(cfqd, 0);	2052	cfq_slice_expired(cfqd, 0);
2053	out_kick:	2053	out_kick:
2054	cfq_schedule_dispatch(cfqd);	2054	cfq_schedule_dispatch(cfqd);
2055	out_cont:	2055	out_cont:
2056	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);	2056	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2057	}	2057	}
2058		2058
2059	/*	2059	/*
2060	* Timer running if an idle class queue is waiting for service	2060	* Timer running if an idle class queue is waiting for service
2061	*/	2061	*/
2062	static void cfq_idle_class_timer(unsigned long data)	2062	static void cfq_idle_class_timer(unsigned long data)
2063	{	2063	{
2064	struct cfq_data cfqd = (struct cfq_data ) data;	2064	struct cfq_data cfqd = (struct cfq_data ) data;
2065	unsigned long flags, end;	2065	unsigned long flags, end;
2066		2066
2067	spin_lock_irqsave(cfqd->queue->queue_lock, flags);	2067	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2068		2068
2069	/*	2069	/*
2070	* race with a non-idle queue, reset timer	2070	* race with a non-idle queue, reset timer
2071	*/	2071	*/
2072	end = cfqd->last_end_request + CFQ_IDLE_GRACE;	2072	end = cfqd->last_end_request + CFQ_IDLE_GRACE;
2073	if (!time_after_eq(jiffies, end)) {	2073	if (!time_after_eq(jiffies, end)) {
2074	cfqd->idle_class_timer.expires = end;	2074	cfqd->idle_class_timer.expires = end;
2075	add_timer(&cfqd->idle_class_timer);	2075	add_timer(&cfqd->idle_class_timer);
2076	} else	2076	} else
2077	cfq_schedule_dispatch(cfqd);	2077	cfq_schedule_dispatch(cfqd);
2078		2078
2079	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);	2079	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2080	}	2080	}
2081		2081
2082	static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)	2082	static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
2083	{	2083	{
2084	del_timer_sync(&cfqd->idle_slice_timer);	2084	del_timer_sync(&cfqd->idle_slice_timer);
2085	del_timer_sync(&cfqd->idle_class_timer);	2085	del_timer_sync(&cfqd->idle_class_timer);
2086	blk_sync_queue(cfqd->queue);	2086	blk_sync_queue(cfqd->queue);
2087	}	2087	}
2088		2088
2089	static void cfq_put_cfqd(struct cfq_data *cfqd)	2089	static void cfq_put_cfqd(struct cfq_data *cfqd)
2090	{	2090	{
2091	request_queue_t *q = cfqd->queue;	2091	request_queue_t *q = cfqd->queue;
2092		2092
2093	if (!atomic_dec_and_test(&cfqd->ref))	2093	if (!atomic_dec_and_test(&cfqd->ref))
2094	return;	2094	return;
2095		2095
2096	cfq_shutdown_timer_wq(cfqd);	2096	cfq_shutdown_timer_wq(cfqd);
2097	blk_put_queue(q);	2097	blk_put_queue(q);
2098		2098
2099	mempool_destroy(cfqd->crq_pool);	2099	mempool_destroy(cfqd->crq_pool);
2100	kfree(cfqd->crq_hash);	2100	kfree(cfqd->crq_hash);
2101	kfree(cfqd->cfq_hash);	2101	kfree(cfqd->cfq_hash);
2102	kfree(cfqd);	2102	kfree(cfqd);
2103	}	2103	}
2104		2104
2105	static void cfq_exit_queue(elevator_t *e)	2105	static void cfq_exit_queue(elevator_t *e)
2106	{	2106	{
2107	struct cfq_data *cfqd = e->elevator_data;	2107	struct cfq_data *cfqd = e->elevator_data;
2108		2108
2109	cfq_shutdown_timer_wq(cfqd);	2109	cfq_shutdown_timer_wq(cfqd);
2110	cfq_put_cfqd(cfqd);	2110	cfq_put_cfqd(cfqd);
2111	}	2111	}
2112		2112
2113	static int cfq_init_queue(request_queue_t q, elevator_t e)	2113	static int cfq_init_queue(request_queue_t q, elevator_t e)
2114	{	2114	{
2115	struct cfq_data *cfqd;	2115	struct cfq_data *cfqd;
2116	int i;	2116	int i;
2117		2117
2118	cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);	2118	cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2119	if (!cfqd)	2119	if (!cfqd)
2120	return -ENOMEM;	2120	return -ENOMEM;
2121		2121
2122	memset(cfqd, 0, sizeof(*cfqd));	2122	memset(cfqd, 0, sizeof(*cfqd));
2123		2123
2124	for (i = 0; i < CFQ_PRIO_LISTS; i++)	2124	for (i = 0; i < CFQ_PRIO_LISTS; i++)
2125	INIT_LIST_HEAD(&cfqd->rr_list[i]);	2125	INIT_LIST_HEAD(&cfqd->rr_list[i]);
2126		2126
2127	INIT_LIST_HEAD(&cfqd->busy_rr);	2127	INIT_LIST_HEAD(&cfqd->busy_rr);
2128	INIT_LIST_HEAD(&cfqd->cur_rr);	2128	INIT_LIST_HEAD(&cfqd->cur_rr);
2129	INIT_LIST_HEAD(&cfqd->idle_rr);	2129	INIT_LIST_HEAD(&cfqd->idle_rr);
2130	INIT_LIST_HEAD(&cfqd->empty_list);	2130	INIT_LIST_HEAD(&cfqd->empty_list);
2131		2131
2132	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);	2132	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
2133	if (!cfqd->crq_hash)	2133	if (!cfqd->crq_hash)
2134	goto out_crqhash;	2134	goto out_crqhash;
2135		2135
2136	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);	2136	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
2137	if (!cfqd->cfq_hash)	2137	if (!cfqd->cfq_hash)
2138	goto out_cfqhash;	2138	goto out_cfqhash;
2139		2139
2140	cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);	2140	cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
2141	if (!cfqd->crq_pool)	2141	if (!cfqd->crq_pool)
2142	goto out_crqpool;	2142	goto out_crqpool;
2143		2143
2144	for (i = 0; i < CFQ_MHASH_ENTRIES; i++)	2144	for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
2145	INIT_HLIST_HEAD(&cfqd->crq_hash[i]);	2145	INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
2146	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)	2146	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2147	INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);	2147	INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2148		2148
2149	e->elevator_data = cfqd;	2149	e->elevator_data = cfqd;
2150		2150
2151	cfqd->queue = q;	2151	cfqd->queue = q;
2152	atomic_inc(&q->refcnt);	2152	atomic_inc(&q->refcnt);
2153		2153
2154	cfqd->max_queued = q->nr_requests / 4;	2154	cfqd->max_queued = q->nr_requests / 4;
2155	q->nr_batching = cfq_queued;	2155	q->nr_batching = cfq_queued;
2156		2156
2157	init_timer(&cfqd->idle_slice_timer);	2157	init_timer(&cfqd->idle_slice_timer);
2158	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;	2158	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
2159	cfqd->idle_slice_timer.data = (unsigned long) cfqd;	2159	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
2160		2160
2161	init_timer(&cfqd->idle_class_timer);	2161	init_timer(&cfqd->idle_class_timer);
2162	cfqd->idle_class_timer.function = cfq_idle_class_timer;	2162	cfqd->idle_class_timer.function = cfq_idle_class_timer;
2163	cfqd->idle_class_timer.data = (unsigned long) cfqd;	2163	cfqd->idle_class_timer.data = (unsigned long) cfqd;
2164		2164
2165	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);	2165	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
2166		2166
2167	atomic_set(&cfqd->ref, 1);	2167	atomic_set(&cfqd->ref, 1);
2168		2168
2169	cfqd->cfq_queued = cfq_queued;	2169	cfqd->cfq_queued = cfq_queued;
2170	cfqd->cfq_quantum = cfq_quantum;	2170	cfqd->cfq_quantum = cfq_quantum;
2171	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];	2171	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
2172	cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];	2172	cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
2173	cfqd->cfq_back_max = cfq_back_max;	2173	cfqd->cfq_back_max = cfq_back_max;
2174	cfqd->cfq_back_penalty = cfq_back_penalty;	2174	cfqd->cfq_back_penalty = cfq_back_penalty;
2175	cfqd->cfq_slice[0] = cfq_slice_async;	2175	cfqd->cfq_slice[0] = cfq_slice_async;
2176	cfqd->cfq_slice[1] = cfq_slice_sync;	2176	cfqd->cfq_slice[1] = cfq_slice_sync;
2177	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;	2177	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2178	cfqd->cfq_slice_idle = cfq_slice_idle;	2178	cfqd->cfq_slice_idle = cfq_slice_idle;
2179	cfqd->cfq_max_depth = cfq_max_depth;	2179	cfqd->cfq_max_depth = cfq_max_depth;
2180		2180
2181	return 0;	2181	return 0;
2182	out_crqpool:	2182	out_crqpool:
2183	kfree(cfqd->cfq_hash);	2183	kfree(cfqd->cfq_hash);
2184	out_cfqhash:	2184	out_cfqhash:
2185	kfree(cfqd->crq_hash);	2185	kfree(cfqd->crq_hash);
2186	out_crqhash:	2186	out_crqhash:
2187	kfree(cfqd);	2187	kfree(cfqd);
2188	return -ENOMEM;	2188	return -ENOMEM;
2189	}	2189	}
2190		2190
2191	static void cfq_slab_kill(void)	2191	static void cfq_slab_kill(void)
2192	{	2192	{
2193	if (crq_pool)	2193	if (crq_pool)
2194	kmem_cache_destroy(crq_pool);	2194	kmem_cache_destroy(crq_pool);
2195	if (cfq_pool)	2195	if (cfq_pool)
2196	kmem_cache_destroy(cfq_pool);	2196	kmem_cache_destroy(cfq_pool);
2197	if (cfq_ioc_pool)	2197	if (cfq_ioc_pool)
2198	kmem_cache_destroy(cfq_ioc_pool);	2198	kmem_cache_destroy(cfq_ioc_pool);
2199	}	2199	}
2200		2200
2201	static int __init cfq_slab_setup(void)	2201	static int __init cfq_slab_setup(void)
2202	{	2202	{
2203	crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,	2203	crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
2204	NULL, NULL);	2204	NULL, NULL);
2205	if (!crq_pool)	2205	if (!crq_pool)
2206	goto fail;	2206	goto fail;
2207		2207
2208	cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,	2208	cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
2209	NULL, NULL);	2209	NULL, NULL);
2210	if (!cfq_pool)	2210	if (!cfq_pool)
2211	goto fail;	2211	goto fail;
2212		2212
2213	cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",	2213	cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",
2214	sizeof(struct cfq_io_context), 0, 0, NULL, NULL);	2214	sizeof(struct cfq_io_context), 0, 0, NULL, NULL);
2215	if (!cfq_ioc_pool)	2215	if (!cfq_ioc_pool)
2216	goto fail;	2216	goto fail;
2217		2217
2218	return 0;	2218	return 0;
2219	fail:	2219	fail:
2220	cfq_slab_kill();	2220	cfq_slab_kill();
2221	return -ENOMEM;	2221	return -ENOMEM;
2222	}	2222	}
2223		2223
2224	/*	2224	/*
2225	* sysfs parts below -->	2225	* sysfs parts below -->
2226	*/	2226	*/
2227	struct cfq_fs_entry {	2227	struct cfq_fs_entry {
2228	struct attribute attr;	2228	struct attribute attr;
2229	ssize_t (show)(struct cfq_data , char *);	2229	ssize_t (show)(struct cfq_data , char *);
2230	ssize_t (store)(struct cfq_data , const char *, size_t);	2230	ssize_t (store)(struct cfq_data , const char *, size_t);
2231	};	2231	};
2232		2232
2233	static ssize_t	2233	static ssize_t
2234	cfq_var_show(unsigned int var, char *page)	2234	cfq_var_show(unsigned int var, char *page)
2235	{	2235	{
2236	return sprintf(page, "%d\n", var);	2236	return sprintf(page, "%d\n", var);
2237	}	2237	}
2238		2238
2239	static ssize_t	2239	static ssize_t
2240	cfq_var_store(unsigned int var, const char page, size_t count)	2240	cfq_var_store(unsigned int var, const char page, size_t count)
2241	{	2241	{
2242	char p = (char ) page;	2242	char p = (char ) page;
2243		2243
2244	*var = simple_strtoul(p, &p, 10);	2244	*var = simple_strtoul(p, &p, 10);
2245	return count;	2245	return count;
2246	}	2246	}
2247		2247
2248	#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \	2248	#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
2249	static ssize_t __FUNC(struct cfq_data cfqd, char page) \	2249	static ssize_t __FUNC(struct cfq_data cfqd, char page) \
2250	{ \	2250	{ \
2251	unsigned int __data = __VAR; \	2251	unsigned int __data = __VAR; \
2252	if (__CONV) \	2252	if (__CONV) \
2253	__data = jiffies_to_msecs(__data); \	2253	__data = jiffies_to_msecs(__data); \
2254	return cfq_var_show(__data, (page)); \	2254	return cfq_var_show(__data, (page)); \
2255	}	2255	}
2256	SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);	2256	SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2257	SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);	2257	SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
2258	SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);	2258	SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2259	SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);	2259	SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
2260	SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);	2260	SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
2261	SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);	2261	SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
2262	SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);	2262	SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2263	SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);	2263	SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2264	SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);	2264	SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2265	SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);	2265	SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2266	SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);	2266	SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
2267	#undef SHOW_FUNCTION	2267	#undef SHOW_FUNCTION
2268		2268
2269	#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \	2269	#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
2270	static ssize_t __FUNC(struct cfq_data cfqd, const char page, size_t count) \	2270	static ssize_t __FUNC(struct cfq_data cfqd, const char page, size_t count) \
2271	{ \	2271	{ \
2272	unsigned int __data; \	2272	unsigned int __data; \
2273	int ret = cfq_var_store(&__data, (page), count); \	2273	int ret = cfq_var_store(&__data, (page), count); \
2274	if (__data < (MIN)) \	2274	if (__data < (MIN)) \
2275	__data = (MIN); \	2275	__data = (MIN); \
2276	else if (__data > (MAX)) \	2276	else if (__data > (MAX)) \
2277	__data = (MAX); \	2277	__data = (MAX); \
2278	if (__CONV) \	2278	if (__CONV) \
2279	*(__PTR) = msecs_to_jiffies(__data); \	2279	*(__PTR) = msecs_to_jiffies(__data); \
2280	else \	2280	else \
2281	*(__PTR) = __data; \	2281	*(__PTR) = __data; \
2282	return ret; \	2282	return ret; \
2283	}	2283	}
2284	STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);	2284	STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2285	STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);	2285	STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
2286	STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);	2286	STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2287	STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);	2287	STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
2288	STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);	2288	STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
2289	STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);	2289	STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
2290	STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);	2290	STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2291	STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);	2291	STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2292	STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);	2292	STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2293	STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);	2293	STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2294	STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);	2294	STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
2295	#undef STORE_FUNCTION	2295	#undef STORE_FUNCTION
2296		2296
2297	static struct cfq_fs_entry cfq_quantum_entry = {	2297	static struct cfq_fs_entry cfq_quantum_entry = {
2298	.attr = {.name = "quantum", .mode = S_IRUGO \| S_IWUSR },	2298	.attr = {.name = "quantum", .mode = S_IRUGO \| S_IWUSR },
2299	.show = cfq_quantum_show,	2299	.show = cfq_quantum_show,
2300	.store = cfq_quantum_store,	2300	.store = cfq_quantum_store,
2301	};	2301	};
2302	static struct cfq_fs_entry cfq_queued_entry = {	2302	static struct cfq_fs_entry cfq_queued_entry = {
2303	.attr = {.name = "queued", .mode = S_IRUGO \| S_IWUSR },	2303	.attr = {.name = "queued", .mode = S_IRUGO \| S_IWUSR },
2304	.show = cfq_queued_show,	2304	.show = cfq_queued_show,
2305	.store = cfq_queued_store,	2305	.store = cfq_queued_store,
2306	};	2306	};
2307	static struct cfq_fs_entry cfq_fifo_expire_sync_entry = {	2307	static struct cfq_fs_entry cfq_fifo_expire_sync_entry = {
2308	.attr = {.name = "fifo_expire_sync", .mode = S_IRUGO \| S_IWUSR },	2308	.attr = {.name = "fifo_expire_sync", .mode = S_IRUGO \| S_IWUSR },
2309	.show = cfq_fifo_expire_sync_show,	2309	.show = cfq_fifo_expire_sync_show,
2310	.store = cfq_fifo_expire_sync_store,	2310	.store = cfq_fifo_expire_sync_store,
2311	};	2311	};
2312	static struct cfq_fs_entry cfq_fifo_expire_async_entry = {	2312	static struct cfq_fs_entry cfq_fifo_expire_async_entry = {
2313	.attr = {.name = "fifo_expire_async", .mode = S_IRUGO \| S_IWUSR },	2313	.attr = {.name = "fifo_expire_async", .mode = S_IRUGO \| S_IWUSR },
2314	.show = cfq_fifo_expire_async_show,	2314	.show = cfq_fifo_expire_async_show,
2315	.store = cfq_fifo_expire_async_store,	2315	.store = cfq_fifo_expire_async_store,
2316	};	2316	};
2317	static struct cfq_fs_entry cfq_back_max_entry = {	2317	static struct cfq_fs_entry cfq_back_max_entry = {
2318	.attr = {.name = "back_seek_max", .mode = S_IRUGO \| S_IWUSR },	2318	.attr = {.name = "back_seek_max", .mode = S_IRUGO \| S_IWUSR },
2319	.show = cfq_back_max_show,	2319	.show = cfq_back_max_show,
2320	.store = cfq_back_max_store,	2320	.store = cfq_back_max_store,
2321	};	2321	};
2322	static struct cfq_fs_entry cfq_back_penalty_entry = {	2322	static struct cfq_fs_entry cfq_back_penalty_entry = {
2323	.attr = {.name = "back_seek_penalty", .mode = S_IRUGO \| S_IWUSR },	2323	.attr = {.name = "back_seek_penalty", .mode = S_IRUGO \| S_IWUSR },
2324	.show = cfq_back_penalty_show,	2324	.show = cfq_back_penalty_show,
2325	.store = cfq_back_penalty_store,	2325	.store = cfq_back_penalty_store,
2326	};	2326	};
2327	static struct cfq_fs_entry cfq_slice_sync_entry = {	2327	static struct cfq_fs_entry cfq_slice_sync_entry = {
2328	.attr = {.name = "slice_sync", .mode = S_IRUGO \| S_IWUSR },	2328	.attr = {.name = "slice_sync", .mode = S_IRUGO \| S_IWUSR },
2329	.show = cfq_slice_sync_show,	2329	.show = cfq_slice_sync_show,
2330	.store = cfq_slice_sync_store,	2330	.store = cfq_slice_sync_store,
2331	};	2331	};
2332	static struct cfq_fs_entry cfq_slice_async_entry = {	2332	static struct cfq_fs_entry cfq_slice_async_entry = {
2333	.attr = {.name = "slice_async", .mode = S_IRUGO \| S_IWUSR },	2333	.attr = {.name = "slice_async", .mode = S_IRUGO \| S_IWUSR },
2334	.show = cfq_slice_async_show,	2334	.show = cfq_slice_async_show,
2335	.store = cfq_slice_async_store,	2335	.store = cfq_slice_async_store,
2336	};	2336	};
2337	static struct cfq_fs_entry cfq_slice_async_rq_entry = {	2337	static struct cfq_fs_entry cfq_slice_async_rq_entry = {
2338	.attr = {.name = "slice_async_rq", .mode = S_IRUGO \| S_IWUSR },	2338	.attr = {.name = "slice_async_rq", .mode = S_IRUGO \| S_IWUSR },
2339	.show = cfq_slice_async_rq_show,	2339	.show = cfq_slice_async_rq_show,
2340	.store = cfq_slice_async_rq_store,	2340	.store = cfq_slice_async_rq_store,
2341	};	2341	};
2342	static struct cfq_fs_entry cfq_slice_idle_entry = {	2342	static struct cfq_fs_entry cfq_slice_idle_entry = {
2343	.attr = {.name = "slice_idle", .mode = S_IRUGO \| S_IWUSR },	2343	.attr = {.name = "slice_idle", .mode = S_IRUGO \| S_IWUSR },
2344	.show = cfq_slice_idle_show,	2344	.show = cfq_slice_idle_show,
2345	.store = cfq_slice_idle_store,	2345	.store = cfq_slice_idle_store,
2346	};	2346	};
2347	static struct cfq_fs_entry cfq_max_depth_entry = {	2347	static struct cfq_fs_entry cfq_max_depth_entry = {
2348	.attr = {.name = "max_depth", .mode = S_IRUGO \| S_IWUSR },	2348	.attr = {.name = "max_depth", .mode = S_IRUGO \| S_IWUSR },
2349	.show = cfq_max_depth_show,	2349	.show = cfq_max_depth_show,
2350	.store = cfq_max_depth_store,	2350	.store = cfq_max_depth_store,
2351	};	2351	};
2352		2352
2353	static struct attribute *default_attrs[] = {	2353	static struct attribute *default_attrs[] = {
2354	&cfq_quantum_entry.attr,	2354	&cfq_quantum_entry.attr,
2355	&cfq_queued_entry.attr,	2355	&cfq_queued_entry.attr,
2356	&cfq_fifo_expire_sync_entry.attr,	2356	&cfq_fifo_expire_sync_entry.attr,
2357	&cfq_fifo_expire_async_entry.attr,	2357	&cfq_fifo_expire_async_entry.attr,
2358	&cfq_back_max_entry.attr,	2358	&cfq_back_max_entry.attr,
2359	&cfq_back_penalty_entry.attr,	2359	&cfq_back_penalty_entry.attr,
2360	&cfq_slice_sync_entry.attr,	2360	&cfq_slice_sync_entry.attr,
2361	&cfq_slice_async_entry.attr,	2361	&cfq_slice_async_entry.attr,
2362	&cfq_slice_async_rq_entry.attr,	2362	&cfq_slice_async_rq_entry.attr,
2363	&cfq_slice_idle_entry.attr,	2363	&cfq_slice_idle_entry.attr,
2364	&cfq_max_depth_entry.attr,	2364	&cfq_max_depth_entry.attr,
2365	NULL,	2365	NULL,
2366	};	2366	};
2367		2367
2368	#define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr)	2368	#define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr)
2369		2369
2370	static ssize_t	2370	static ssize_t
2371	cfq_attr_show(struct kobject kobj, struct attribute attr, char *page)	2371	cfq_attr_show(struct kobject kobj, struct attribute attr, char *page)
2372	{	2372	{
2373	elevator_t *e = container_of(kobj, elevator_t, kobj);	2373	elevator_t *e = container_of(kobj, elevator_t, kobj);
2374	struct cfq_fs_entry *entry = to_cfq(attr);	2374	struct cfq_fs_entry *entry = to_cfq(attr);
2375		2375
2376	if (!entry->show)	2376	if (!entry->show)
2377	return -EIO;	2377	return -EIO;
2378		2378
2379	return entry->show(e->elevator_data, page);	2379	return entry->show(e->elevator_data, page);
2380	}	2380	}
2381		2381
2382	static ssize_t	2382	static ssize_t
2383	cfq_attr_store(struct kobject kobj, struct attribute attr,	2383	cfq_attr_store(struct kobject kobj, struct attribute attr,
2384	const char *page, size_t length)	2384	const char *page, size_t length)
2385	{	2385	{
2386	elevator_t *e = container_of(kobj, elevator_t, kobj);	2386	elevator_t *e = container_of(kobj, elevator_t, kobj);
2387	struct cfq_fs_entry *entry = to_cfq(attr);	2387	struct cfq_fs_entry *entry = to_cfq(attr);
2388		2388
2389	if (!entry->store)	2389	if (!entry->store)
2390	return -EIO;	2390	return -EIO;
2391		2391
2392	return entry->store(e->elevator_data, page, length);	2392	return entry->store(e->elevator_data, page, length);
2393	}	2393	}
2394		2394
2395	static struct sysfs_ops cfq_sysfs_ops = {	2395	static struct sysfs_ops cfq_sysfs_ops = {
2396	.show = cfq_attr_show,	2396	.show = cfq_attr_show,
2397	.store = cfq_attr_store,	2397	.store = cfq_attr_store,
2398	};	2398	};
2399		2399
2400	static struct kobj_type cfq_ktype = {	2400	static struct kobj_type cfq_ktype = {
2401	.sysfs_ops = &cfq_sysfs_ops,	2401	.sysfs_ops = &cfq_sysfs_ops,
2402	.default_attrs = default_attrs,	2402	.default_attrs = default_attrs,
2403	};	2403	};
2404		2404
2405	static struct elevator_type iosched_cfq = {	2405	static struct elevator_type iosched_cfq = {
2406	.ops = {	2406	.ops = {
2407	.elevator_merge_fn = cfq_merge,	2407	.elevator_merge_fn = cfq_merge,
2408	.elevator_merged_fn = cfq_merged_request,	2408	.elevator_merged_fn = cfq_merged_request,
2409	.elevator_merge_req_fn = cfq_merged_requests,	2409	.elevator_merge_req_fn = cfq_merged_requests,
2410	.elevator_dispatch_fn = cfq_dispatch_requests,	2410	.elevator_dispatch_fn = cfq_dispatch_requests,
2411	.elevator_add_req_fn = cfq_insert_request,	2411	.elevator_add_req_fn = cfq_insert_request,
2412	.elevator_activate_req_fn = cfq_activate_request,	2412	.elevator_activate_req_fn = cfq_activate_request,
2413	.elevator_deactivate_req_fn = cfq_deactivate_request,	2413	.elevator_deactivate_req_fn = cfq_deactivate_request,
2414	.elevator_queue_empty_fn = cfq_queue_empty,	2414	.elevator_queue_empty_fn = cfq_queue_empty,
2415	.elevator_completed_req_fn = cfq_completed_request,	2415	.elevator_completed_req_fn = cfq_completed_request,
2416	.elevator_former_req_fn = cfq_former_request,	2416	.elevator_former_req_fn = cfq_former_request,
2417	.elevator_latter_req_fn = cfq_latter_request,	2417	.elevator_latter_req_fn = cfq_latter_request,
2418	.elevator_set_req_fn = cfq_set_request,	2418	.elevator_set_req_fn = cfq_set_request,
2419	.elevator_put_req_fn = cfq_put_request,	2419	.elevator_put_req_fn = cfq_put_request,
2420	.elevator_may_queue_fn = cfq_may_queue,	2420	.elevator_may_queue_fn = cfq_may_queue,
2421	.elevator_init_fn = cfq_init_queue,	2421	.elevator_init_fn = cfq_init_queue,
2422	.elevator_exit_fn = cfq_exit_queue,	2422	.elevator_exit_fn = cfq_exit_queue,
2423	},	2423	},
2424	.elevator_ktype = &cfq_ktype,	2424	.elevator_ktype = &cfq_ktype,
2425	.elevator_name = "cfq",	2425	.elevator_name = "cfq",
2426	.elevator_owner = THIS_MODULE,	2426	.elevator_owner = THIS_MODULE,
2427	};	2427	};
2428		2428
2429	static int __init cfq_init(void)	2429	static int __init cfq_init(void)
2430	{	2430	{
2431	int ret;	2431	int ret;
2432		2432
2433	/*	2433	/*
2434	* could be 0 on HZ < 1000 setups	2434	* could be 0 on HZ < 1000 setups
2435	*/	2435	*/
2436	if (!cfq_slice_async)	2436	if (!cfq_slice_async)
2437	cfq_slice_async = 1;	2437	cfq_slice_async = 1;
2438	if (!cfq_slice_idle)	2438	if (!cfq_slice_idle)
2439	cfq_slice_idle = 1;	2439	cfq_slice_idle = 1;
2440		2440
2441	if (cfq_slab_setup())	2441	if (cfq_slab_setup())
2442	return -ENOMEM;	2442	return -ENOMEM;
2443		2443
2444	ret = elv_register(&iosched_cfq);	2444	ret = elv_register(&iosched_cfq);
2445	if (ret)	2445	if (ret)
2446	cfq_slab_kill();	2446	cfq_slab_kill();
2447		2447
2448	return ret;	2448	return ret;
2449	}	2449	}
2450		2450
2451	static void __exit cfq_exit(void)	2451	static void __exit cfq_exit(void)
2452	{	2452	{
2453	elv_unregister(&iosched_cfq);	2453	elv_unregister(&iosched_cfq);
2454	cfq_slab_kill();	2454	cfq_slab_kill();
2455	}	2455	}
2456		2456
2457	module_init(cfq_init);	2457	module_init(cfq_init);
2458	module_exit(cfq_exit);	2458	module_exit(cfq_exit);
2459		2459
2460	MODULE_AUTHOR("Jens Axboe");	2460	MODULE_AUTHOR("Jens Axboe");
2461	MODULE_LICENSE("GPL");	2461	MODULE_LICENSE("GPL");
2462	MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");	2462	MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
2463		2463

block/deadline-iosched.c

Diff comments View file @ 6410009

1	/*	1	/*
2	* Deadline i/o scheduler.	2	* Deadline i/o scheduler.
3	*	3	*
4	* Copyright (C) 2002 Jens Axboe <axboe@suse.de>	4	* Copyright (C) 2002 Jens Axboe <axboe@suse.de>
5	*/	5	*/
6	#include <linux/kernel.h>	6	#include <linux/kernel.h>
7	#include <linux/fs.h>	7	#include <linux/fs.h>
8	#include <linux/blkdev.h>	8	#include <linux/blkdev.h>
9	#include <linux/elevator.h>	9	#include <linux/elevator.h>
10	#include <linux/bio.h>	10	#include <linux/bio.h>
11	#include <linux/config.h>	11	#include <linux/config.h>
12	#include <linux/module.h>	12	#include <linux/module.h>
13	#include <linux/slab.h>	13	#include <linux/slab.h>
14	#include <linux/init.h>	14	#include <linux/init.h>
15	#include <linux/compiler.h>	15	#include <linux/compiler.h>
16	#include <linux/hash.h>	16	#include <linux/hash.h>
17	#include <linux/rbtree.h>	17	#include <linux/rbtree.h>
18		18
19	/*	19	/*
20	* See Documentation/block/deadline-iosched.txt	20	* See Documentation/block/deadline-iosched.txt
21	*/	21	*/
22	static int read_expire = HZ / 2; /* max time before a read is submitted. */	22	static const int read_expire = HZ / 2; /* max time before a read is submitted. */
23	static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */	23	static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
24	static int writes_starved = 2; /* max times reads can starve a write */	24	static const int writes_starved = 2; /* max times reads can starve a write */
25	static int fifo_batch = 16; /* # of sequential requests treated as one	25	static const int fifo_batch = 16; /* # of sequential requests treated as one
26	by the above parameters. For throughput. */	26	by the above parameters. For throughput. */
27		27
28	static const int deadline_hash_shift = 5;	28	static const int deadline_hash_shift = 5;
29	#define DL_HASH_BLOCK(sec) ((sec) >> 3)	29	#define DL_HASH_BLOCK(sec) ((sec) >> 3)
30	#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))	30	#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
31	#define DL_HASH_ENTRIES (1 << deadline_hash_shift)	31	#define DL_HASH_ENTRIES (1 << deadline_hash_shift)
32	#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)	32	#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
33	#define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash)	33	#define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash)
34	#define ON_HASH(drq) (drq)->on_hash	34	#define ON_HASH(drq) (drq)->on_hash
35		35
36	struct deadline_data {	36	struct deadline_data {
37	/*	37	/*
38	* run time data	38	* run time data
39	*/	39	*/
40		40
41	/*	41	/*
42	* requests (deadline_rq s) are present on both sort_list and fifo_list	42	* requests (deadline_rq s) are present on both sort_list and fifo_list
43	*/	43	*/
44	struct rb_root sort_list[2];	44	struct rb_root sort_list[2];
45	struct list_head fifo_list[2];	45	struct list_head fifo_list[2];
46		46
47	/*	47	/*
48	* next in sort order. read, write or both are NULL	48	* next in sort order. read, write or both are NULL
49	*/	49	*/
50	struct deadline_rq *next_drq[2];	50	struct deadline_rq *next_drq[2];
51	struct list_head hash; / request hash */	51	struct list_head hash; / request hash */
52	unsigned int batching; /* number of sequential requests made */	52	unsigned int batching; /* number of sequential requests made */
53	sector_t last_sector; /* head position */	53	sector_t last_sector; /* head position */
54	unsigned int starved; /* times reads have starved writes */	54	unsigned int starved; /* times reads have starved writes */
55		55
56	/*	56	/*
57	* settings that change how the i/o scheduler behaves	57	* settings that change how the i/o scheduler behaves
58	*/	58	*/
59	int fifo_expire[2];	59	int fifo_expire[2];
60	int fifo_batch;	60	int fifo_batch;
61	int writes_starved;	61	int writes_starved;
62	int front_merges;	62	int front_merges;
63		63
64	mempool_t *drq_pool;	64	mempool_t *drq_pool;
65	};	65	};
66		66
67	/*	67	/*
68	* pre-request data.	68	* pre-request data.
69	*/	69	*/
70	struct deadline_rq {	70	struct deadline_rq {
71	/*	71	/*
72	* rbtree index, key is the starting offset	72	* rbtree index, key is the starting offset
73	*/	73	*/
74	struct rb_node rb_node;	74	struct rb_node rb_node;
75	sector_t rb_key;	75	sector_t rb_key;
76		76
77	struct request *request;	77	struct request *request;
78		78
79	/*	79	/*
80	* request hash, key is the ending offset (for back merge lookup)	80	* request hash, key is the ending offset (for back merge lookup)
81	*/	81	*/
82	struct list_head hash;	82	struct list_head hash;
83	char on_hash;	83	char on_hash;
84		84
85	/*	85	/*
86	* expire fifo	86	* expire fifo
87	*/	87	*/
88	struct list_head fifo;	88	struct list_head fifo;
89	unsigned long expires;	89	unsigned long expires;
90	};	90	};
91		91
92	static void deadline_move_request(struct deadline_data dd, struct deadline_rq drq);	92	static void deadline_move_request(struct deadline_data dd, struct deadline_rq drq);
93		93
94	static kmem_cache_t *drq_pool;	94	static kmem_cache_t *drq_pool;
95		95
96	#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)	96	#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
97		97
98	/*	98	/*
99	* the back merge hash support functions	99	* the back merge hash support functions
100	*/	100	*/
101	static inline void __deadline_del_drq_hash(struct deadline_rq *drq)	101	static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
102	{	102	{
103	drq->on_hash = 0;	103	drq->on_hash = 0;
104	list_del_init(&drq->hash);	104	list_del_init(&drq->hash);
105	}	105	}
106		106
107	static inline void deadline_del_drq_hash(struct deadline_rq *drq)	107	static inline void deadline_del_drq_hash(struct deadline_rq *drq)
108	{	108	{
109	if (ON_HASH(drq))	109	if (ON_HASH(drq))
110	__deadline_del_drq_hash(drq);	110	__deadline_del_drq_hash(drq);
111	}	111	}
112		112
113	static inline void	113	static inline void
114	deadline_add_drq_hash(struct deadline_data dd, struct deadline_rq drq)	114	deadline_add_drq_hash(struct deadline_data dd, struct deadline_rq drq)
115	{	115	{
116	struct request *rq = drq->request;	116	struct request *rq = drq->request;
117		117
118	BUG_ON(ON_HASH(drq));	118	BUG_ON(ON_HASH(drq));
119		119
120	drq->on_hash = 1;	120	drq->on_hash = 1;
121	list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);	121	list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
122	}	122	}
123		123
124	/*	124	/*
125	* move hot entry to front of chain	125	* move hot entry to front of chain
126	*/	126	*/
127	static inline void	127	static inline void
128	deadline_hot_drq_hash(struct deadline_data dd, struct deadline_rq drq)	128	deadline_hot_drq_hash(struct deadline_data dd, struct deadline_rq drq)
129	{	129	{
130	struct request *rq = drq->request;	130	struct request *rq = drq->request;
131	struct list_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];	131	struct list_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
132		132
133	if (ON_HASH(drq) && drq->hash.prev != head) {	133	if (ON_HASH(drq) && drq->hash.prev != head) {
134	list_del(&drq->hash);	134	list_del(&drq->hash);
135	list_add(&drq->hash, head);	135	list_add(&drq->hash, head);
136	}	136	}
137	}	137	}
138		138
139	static struct request *	139	static struct request *
140	deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)	140	deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
141	{	141	{
142	struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];	142	struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
143	struct list_head entry, next = hash_list->next;	143	struct list_head entry, next = hash_list->next;
144		144
145	while ((entry = next) != hash_list) {	145	while ((entry = next) != hash_list) {
146	struct deadline_rq *drq = list_entry_hash(entry);	146	struct deadline_rq *drq = list_entry_hash(entry);
147	struct request *__rq = drq->request;	147	struct request *__rq = drq->request;
148		148
149	next = entry->next;	149	next = entry->next;
150		150
151	BUG_ON(!ON_HASH(drq));	151	BUG_ON(!ON_HASH(drq));
152		152
153	if (!rq_mergeable(__rq)) {	153	if (!rq_mergeable(__rq)) {
154	__deadline_del_drq_hash(drq);	154	__deadline_del_drq_hash(drq);
155	continue;	155	continue;
156	}	156	}
157		157
158	if (rq_hash_key(__rq) == offset)	158	if (rq_hash_key(__rq) == offset)
159	return __rq;	159	return __rq;
160	}	160	}
161		161
162	return NULL;	162	return NULL;
163	}	163	}
164		164
165	/*	165	/*
166	* rb tree support functions	166	* rb tree support functions
167	*/	167	*/
168	#define RB_NONE (2)	168	#define RB_NONE (2)
169	#define RB_EMPTY(root) ((root)->rb_node == NULL)	169	#define RB_EMPTY(root) ((root)->rb_node == NULL)
170	#define ON_RB(node) ((node)->rb_color != RB_NONE)	170	#define ON_RB(node) ((node)->rb_color != RB_NONE)
171	#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)	171	#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
172	#define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)	172	#define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
173	#define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])	173	#define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
174	#define rq_rb_key(rq) (rq)->sector	174	#define rq_rb_key(rq) (rq)->sector
175		175
176	static struct deadline_rq *	176	static struct deadline_rq *
177	__deadline_add_drq_rb(struct deadline_data dd, struct deadline_rq drq)	177	__deadline_add_drq_rb(struct deadline_data dd, struct deadline_rq drq)
178	{	178	{
179	struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;	179	struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
180	struct rb_node *parent = NULL;	180	struct rb_node *parent = NULL;
181	struct deadline_rq *__drq;	181	struct deadline_rq *__drq;
182		182
183	while (*p) {	183	while (*p) {
184	parent = *p;	184	parent = *p;
185	__drq = rb_entry_drq(parent);	185	__drq = rb_entry_drq(parent);
186		186
187	if (drq->rb_key < __drq->rb_key)	187	if (drq->rb_key < __drq->rb_key)
188	p = &(*p)->rb_left;	188	p = &(*p)->rb_left;
189	else if (drq->rb_key > __drq->rb_key)	189	else if (drq->rb_key > __drq->rb_key)
190	p = &(*p)->rb_right;	190	p = &(*p)->rb_right;
191	else	191	else
192	return __drq;	192	return __drq;
193	}	193	}
194		194
195	rb_link_node(&drq->rb_node, parent, p);	195	rb_link_node(&drq->rb_node, parent, p);
196	return NULL;	196	return NULL;
197	}	197	}
198		198
199	static void	199	static void
200	deadline_add_drq_rb(struct deadline_data dd, struct deadline_rq drq)	200	deadline_add_drq_rb(struct deadline_data dd, struct deadline_rq drq)
201	{	201	{
202	struct deadline_rq *__alias;	202	struct deadline_rq *__alias;
203		203
204	drq->rb_key = rq_rb_key(drq->request);	204	drq->rb_key = rq_rb_key(drq->request);
205		205
206	retry:	206	retry:
207	__alias = __deadline_add_drq_rb(dd, drq);	207	__alias = __deadline_add_drq_rb(dd, drq);
208	if (!__alias) {	208	if (!__alias) {
209	rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));	209	rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
210	return;	210	return;
211	}	211	}
212		212
213	deadline_move_request(dd, __alias);	213	deadline_move_request(dd, __alias);
214	goto retry;	214	goto retry;
215	}	215	}
216		216
217	static inline void	217	static inline void
218	deadline_del_drq_rb(struct deadline_data dd, struct deadline_rq drq)	218	deadline_del_drq_rb(struct deadline_data dd, struct deadline_rq drq)
219	{	219	{
220	const int data_dir = rq_data_dir(drq->request);	220	const int data_dir = rq_data_dir(drq->request);
221		221
222	if (dd->next_drq[data_dir] == drq) {	222	if (dd->next_drq[data_dir] == drq) {
223	struct rb_node *rbnext = rb_next(&drq->rb_node);	223	struct rb_node *rbnext = rb_next(&drq->rb_node);
224		224
225	dd->next_drq[data_dir] = NULL;	225	dd->next_drq[data_dir] = NULL;
226	if (rbnext)	226	if (rbnext)
227	dd->next_drq[data_dir] = rb_entry_drq(rbnext);	227	dd->next_drq[data_dir] = rb_entry_drq(rbnext);
228	}	228	}
229		229
230	BUG_ON(!ON_RB(&drq->rb_node));	230	BUG_ON(!ON_RB(&drq->rb_node));
231	rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));	231	rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
232	RB_CLEAR(&drq->rb_node);	232	RB_CLEAR(&drq->rb_node);
233	}	233	}
234		234
235	static struct request *	235	static struct request *
236	deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)	236	deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
237	{	237	{
238	struct rb_node *n = dd->sort_list[data_dir].rb_node;	238	struct rb_node *n = dd->sort_list[data_dir].rb_node;
239	struct deadline_rq *drq;	239	struct deadline_rq *drq;
240		240
241	while (n) {	241	while (n) {
242	drq = rb_entry_drq(n);	242	drq = rb_entry_drq(n);
243		243
244	if (sector < drq->rb_key)	244	if (sector < drq->rb_key)
245	n = n->rb_left;	245	n = n->rb_left;
246	else if (sector > drq->rb_key)	246	else if (sector > drq->rb_key)
247	n = n->rb_right;	247	n = n->rb_right;
248	else	248	else
249	return drq->request;	249	return drq->request;
250	}	250	}
251		251
252	return NULL;	252	return NULL;
253	}	253	}
254		254
255	/*	255	/*
256	* deadline_find_first_drq finds the first (lowest sector numbered) request	256	* deadline_find_first_drq finds the first (lowest sector numbered) request
257	* for the specified data_dir. Used to sweep back to the start of the disk	257	* for the specified data_dir. Used to sweep back to the start of the disk
258	* (1-way elevator) after we process the last (highest sector) request.	258	* (1-way elevator) after we process the last (highest sector) request.
259	*/	259	*/
260	static struct deadline_rq *	260	static struct deadline_rq *
261	deadline_find_first_drq(struct deadline_data *dd, int data_dir)	261	deadline_find_first_drq(struct deadline_data *dd, int data_dir)
262	{	262	{
263	struct rb_node *n = dd->sort_list[data_dir].rb_node;	263	struct rb_node *n = dd->sort_list[data_dir].rb_node;
264		264
265	for (;;) {	265	for (;;) {
266	if (n->rb_left == NULL)	266	if (n->rb_left == NULL)
267	return rb_entry_drq(n);	267	return rb_entry_drq(n);
268		268
269	n = n->rb_left;	269	n = n->rb_left;
270	}	270	}
271	}	271	}
272		272
273	/*	273	/*
274	* add drq to rbtree and fifo	274	* add drq to rbtree and fifo
275	*/	275	*/
276	static void	276	static void
277	deadline_add_request(struct request_queue q, struct request rq)	277	deadline_add_request(struct request_queue q, struct request rq)
278	{	278	{
279	struct deadline_data *dd = q->elevator->elevator_data;	279	struct deadline_data *dd = q->elevator->elevator_data;
280	struct deadline_rq *drq = RQ_DATA(rq);	280	struct deadline_rq *drq = RQ_DATA(rq);
281		281
282	const int data_dir = rq_data_dir(drq->request);	282	const int data_dir = rq_data_dir(drq->request);
283		283
284	deadline_add_drq_rb(dd, drq);	284	deadline_add_drq_rb(dd, drq);
285	/*	285	/*
286	* set expire time (only used for reads) and add to fifo list	286	* set expire time (only used for reads) and add to fifo list
287	*/	287	*/
288	drq->expires = jiffies + dd->fifo_expire[data_dir];	288	drq->expires = jiffies + dd->fifo_expire[data_dir];
289	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);	289	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
290		290
291	if (rq_mergeable(rq))	291	if (rq_mergeable(rq))
292	deadline_add_drq_hash(dd, drq);	292	deadline_add_drq_hash(dd, drq);
293	}	293	}
294		294
295	/*	295	/*
296	* remove rq from rbtree, fifo, and hash	296	* remove rq from rbtree, fifo, and hash
297	*/	297	*/
298	static void deadline_remove_request(request_queue_t q, struct request rq)	298	static void deadline_remove_request(request_queue_t q, struct request rq)
299	{	299	{
300	struct deadline_rq *drq = RQ_DATA(rq);	300	struct deadline_rq *drq = RQ_DATA(rq);
301	struct deadline_data *dd = q->elevator->elevator_data;	301	struct deadline_data *dd = q->elevator->elevator_data;
302		302
303	list_del_init(&drq->fifo);	303	list_del_init(&drq->fifo);
304	deadline_del_drq_rb(dd, drq);	304	deadline_del_drq_rb(dd, drq);
305	deadline_del_drq_hash(drq);	305	deadline_del_drq_hash(drq);
306	}	306	}
307		307
308	static int	308	static int
309	deadline_merge(request_queue_t q, struct request req, struct bio bio)	309	deadline_merge(request_queue_t q, struct request req, struct bio bio)
310	{	310	{
311	struct deadline_data *dd = q->elevator->elevator_data;	311	struct deadline_data *dd = q->elevator->elevator_data;
312	struct request *__rq;	312	struct request *__rq;
313	int ret;	313	int ret;
314		314
315	/*	315	/*
316	* see if the merge hash can satisfy a back merge	316	* see if the merge hash can satisfy a back merge
317	*/	317	*/
318	__rq = deadline_find_drq_hash(dd, bio->bi_sector);	318	__rq = deadline_find_drq_hash(dd, bio->bi_sector);
319	if (__rq) {	319	if (__rq) {
320	BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);	320	BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
321		321
322	if (elv_rq_merge_ok(__rq, bio)) {	322	if (elv_rq_merge_ok(__rq, bio)) {
323	ret = ELEVATOR_BACK_MERGE;	323	ret = ELEVATOR_BACK_MERGE;
324	goto out;	324	goto out;
325	}	325	}
326	}	326	}
327		327
328	/*	328	/*
329	* check for front merge	329	* check for front merge
330	*/	330	*/
331	if (dd->front_merges) {	331	if (dd->front_merges) {
332	sector_t rb_key = bio->bi_sector + bio_sectors(bio);	332	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
333		333
334	__rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));	334	__rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
335	if (__rq) {	335	if (__rq) {
336	BUG_ON(rb_key != rq_rb_key(__rq));	336	BUG_ON(rb_key != rq_rb_key(__rq));
337		337
338	if (elv_rq_merge_ok(__rq, bio)) {	338	if (elv_rq_merge_ok(__rq, bio)) {
339	ret = ELEVATOR_FRONT_MERGE;	339	ret = ELEVATOR_FRONT_MERGE;
340	goto out;	340	goto out;
341	}	341	}
342	}	342	}
343	}	343	}
344		344
345	return ELEVATOR_NO_MERGE;	345	return ELEVATOR_NO_MERGE;
346	out:	346	out:
347	if (ret)	347	if (ret)
348	deadline_hot_drq_hash(dd, RQ_DATA(__rq));	348	deadline_hot_drq_hash(dd, RQ_DATA(__rq));
349	*req = __rq;	349	*req = __rq;
350	return ret;	350	return ret;
351	}	351	}
352		352
353	static void deadline_merged_request(request_queue_t q, struct request req)	353	static void deadline_merged_request(request_queue_t q, struct request req)
354	{	354	{
355	struct deadline_data *dd = q->elevator->elevator_data;	355	struct deadline_data *dd = q->elevator->elevator_data;
356	struct deadline_rq *drq = RQ_DATA(req);	356	struct deadline_rq *drq = RQ_DATA(req);
357		357
358	/*	358	/*
359	* hash always needs to be repositioned, key is end sector	359	* hash always needs to be repositioned, key is end sector
360	*/	360	*/
361	deadline_del_drq_hash(drq);	361	deadline_del_drq_hash(drq);
362	deadline_add_drq_hash(dd, drq);	362	deadline_add_drq_hash(dd, drq);
363		363
364	/*	364	/*
365	* if the merge was a front merge, we need to reposition request	365	* if the merge was a front merge, we need to reposition request
366	*/	366	*/
367	if (rq_rb_key(req) != drq->rb_key) {	367	if (rq_rb_key(req) != drq->rb_key) {
368	deadline_del_drq_rb(dd, drq);	368	deadline_del_drq_rb(dd, drq);
369	deadline_add_drq_rb(dd, drq);	369	deadline_add_drq_rb(dd, drq);
370	}	370	}
371	}	371	}
372		372
373	static void	373	static void
374	deadline_merged_requests(request_queue_t q, struct request req,	374	deadline_merged_requests(request_queue_t q, struct request req,
375	struct request *next)	375	struct request *next)
376	{	376	{
377	struct deadline_data *dd = q->elevator->elevator_data;	377	struct deadline_data *dd = q->elevator->elevator_data;
378	struct deadline_rq *drq = RQ_DATA(req);	378	struct deadline_rq *drq = RQ_DATA(req);
379	struct deadline_rq *dnext = RQ_DATA(next);	379	struct deadline_rq *dnext = RQ_DATA(next);
380		380
381	BUG_ON(!drq);	381	BUG_ON(!drq);
382	BUG_ON(!dnext);	382	BUG_ON(!dnext);
383		383
384	/*	384	/*
385	* reposition drq (this is the merged request) in hash, and in rbtree	385	* reposition drq (this is the merged request) in hash, and in rbtree
386	* in case of a front merge	386	* in case of a front merge
387	*/	387	*/
388	deadline_del_drq_hash(drq);	388	deadline_del_drq_hash(drq);
389	deadline_add_drq_hash(dd, drq);	389	deadline_add_drq_hash(dd, drq);
390		390
391	if (rq_rb_key(req) != drq->rb_key) {	391	if (rq_rb_key(req) != drq->rb_key) {
392	deadline_del_drq_rb(dd, drq);	392	deadline_del_drq_rb(dd, drq);
393	deadline_add_drq_rb(dd, drq);	393	deadline_add_drq_rb(dd, drq);
394	}	394	}
395		395
396	/*	396	/*
397	* if dnext expires before drq, assign its expire time to drq	397	* if dnext expires before drq, assign its expire time to drq
398	* and move into dnext position (dnext will be deleted) in fifo	398	* and move into dnext position (dnext will be deleted) in fifo
399	*/	399	*/
400	if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {	400	if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
401	if (time_before(dnext->expires, drq->expires)) {	401	if (time_before(dnext->expires, drq->expires)) {
402	list_move(&drq->fifo, &dnext->fifo);	402	list_move(&drq->fifo, &dnext->fifo);
403	drq->expires = dnext->expires;	403	drq->expires = dnext->expires;
404	}	404	}
405	}	405	}
406		406
407	/*	407	/*
408	* kill knowledge of next, this one is a goner	408	* kill knowledge of next, this one is a goner
409	*/	409	*/
410	deadline_remove_request(q, next);	410	deadline_remove_request(q, next);
411	}	411	}
412		412
413	/*	413	/*
414	* move request from sort list to dispatch queue.	414	* move request from sort list to dispatch queue.
415	*/	415	*/
416	static inline void	416	static inline void
417	deadline_move_to_dispatch(struct deadline_data dd, struct deadline_rq drq)	417	deadline_move_to_dispatch(struct deadline_data dd, struct deadline_rq drq)
418	{	418	{
419	request_queue_t *q = drq->request->q;	419	request_queue_t *q = drq->request->q;
420		420
421	deadline_remove_request(q, drq->request);	421	deadline_remove_request(q, drq->request);
422	elv_dispatch_add_tail(q, drq->request);	422	elv_dispatch_add_tail(q, drq->request);
423	}	423	}
424		424
425	/*	425	/*
426	* move an entry to dispatch queue	426	* move an entry to dispatch queue
427	*/	427	*/
428	static void	428	static void
429	deadline_move_request(struct deadline_data dd, struct deadline_rq drq)	429	deadline_move_request(struct deadline_data dd, struct deadline_rq drq)
430	{	430	{
431	const int data_dir = rq_data_dir(drq->request);	431	const int data_dir = rq_data_dir(drq->request);
432	struct rb_node *rbnext = rb_next(&drq->rb_node);	432	struct rb_node *rbnext = rb_next(&drq->rb_node);
433		433
434	dd->next_drq[READ] = NULL;	434	dd->next_drq[READ] = NULL;
435	dd->next_drq[WRITE] = NULL;	435	dd->next_drq[WRITE] = NULL;
436		436
437	if (rbnext)	437	if (rbnext)
438	dd->next_drq[data_dir] = rb_entry_drq(rbnext);	438	dd->next_drq[data_dir] = rb_entry_drq(rbnext);
439		439
440	dd->last_sector = drq->request->sector + drq->request->nr_sectors;	440	dd->last_sector = drq->request->sector + drq->request->nr_sectors;
441		441
442	/*	442	/*
443	* take it off the sort and fifo list, move	443	* take it off the sort and fifo list, move
444	* to dispatch queue	444	* to dispatch queue
445	*/	445	*/
446	deadline_move_to_dispatch(dd, drq);	446	deadline_move_to_dispatch(dd, drq);
447	}	447	}
448		448
449	#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)	449	#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
450		450
451	/*	451	/*
452	* deadline_check_fifo returns 0 if there are no expired reads on the fifo,	452	* deadline_check_fifo returns 0 if there are no expired reads on the fifo,
453	* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])	453	* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
454	*/	454	*/
455	static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)	455	static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
456	{	456	{
457	struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);	457	struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
458		458
459	/*	459	/*
460	* drq is expired!	460	* drq is expired!
461	*/	461	*/
462	if (time_after(jiffies, drq->expires))	462	if (time_after(jiffies, drq->expires))
463	return 1;	463	return 1;
464		464
465	return 0;	465	return 0;
466	}	466	}
467		467
468	/*	468	/*
469	* deadline_dispatch_requests selects the best request according to	469	* deadline_dispatch_requests selects the best request according to
470	* read/write expire, fifo_batch, etc	470	* read/write expire, fifo_batch, etc
471	*/	471	*/
472	static int deadline_dispatch_requests(request_queue_t *q, int force)	472	static int deadline_dispatch_requests(request_queue_t *q, int force)
473	{	473	{
474	struct deadline_data *dd = q->elevator->elevator_data;	474	struct deadline_data *dd = q->elevator->elevator_data;
475	const int reads = !list_empty(&dd->fifo_list[READ]);	475	const int reads = !list_empty(&dd->fifo_list[READ]);
476	const int writes = !list_empty(&dd->fifo_list[WRITE]);	476	const int writes = !list_empty(&dd->fifo_list[WRITE]);
477	struct deadline_rq *drq;	477	struct deadline_rq *drq;
478	int data_dir;	478	int data_dir;
479		479
480	/*	480	/*
481	* batches are currently reads XOR writes	481	* batches are currently reads XOR writes
482	*/	482	*/
483	if (dd->next_drq[WRITE])	483	if (dd->next_drq[WRITE])
484	drq = dd->next_drq[WRITE];	484	drq = dd->next_drq[WRITE];
485	else	485	else
486	drq = dd->next_drq[READ];	486	drq = dd->next_drq[READ];
487		487
488	if (drq) {	488	if (drq) {
489	/* we have a "next request" */	489	/* we have a "next request" */
490		490
491	if (dd->last_sector != drq->request->sector)	491	if (dd->last_sector != drq->request->sector)
492	/* end the batch on a non sequential request */	492	/* end the batch on a non sequential request */
493	dd->batching += dd->fifo_batch;	493	dd->batching += dd->fifo_batch;
494		494
495	if (dd->batching < dd->fifo_batch)	495	if (dd->batching < dd->fifo_batch)
496	/* we are still entitled to batch */	496	/* we are still entitled to batch */
497	goto dispatch_request;	497	goto dispatch_request;
498	}	498	}
499		499
500	/*	500	/*
501	* at this point we are not running a batch. select the appropriate	501	* at this point we are not running a batch. select the appropriate
502	* data direction (read / write)	502	* data direction (read / write)
503	*/	503	*/
504		504
505	if (reads) {	505	if (reads) {
506	BUG_ON(RB_EMPTY(&dd->sort_list[READ]));	506	BUG_ON(RB_EMPTY(&dd->sort_list[READ]));
507		507
508	if (writes && (dd->starved++ >= dd->writes_starved))	508	if (writes && (dd->starved++ >= dd->writes_starved))
509	goto dispatch_writes;	509	goto dispatch_writes;
510		510
511	data_dir = READ;	511	data_dir = READ;
512		512
513	goto dispatch_find_request;	513	goto dispatch_find_request;
514	}	514	}
515		515
516	/*	516	/*
517	* there are either no reads or writes have been starved	517	* there are either no reads or writes have been starved
518	*/	518	*/
519		519
520	if (writes) {	520	if (writes) {
521	dispatch_writes:	521	dispatch_writes:
522	BUG_ON(RB_EMPTY(&dd->sort_list[WRITE]));	522	BUG_ON(RB_EMPTY(&dd->sort_list[WRITE]));
523		523
524	dd->starved = 0;	524	dd->starved = 0;
525		525
526	data_dir = WRITE;	526	data_dir = WRITE;
527		527
528	goto dispatch_find_request;	528	goto dispatch_find_request;
529	}	529	}
530		530
531	return 0;	531	return 0;
532		532
533	dispatch_find_request:	533	dispatch_find_request:
534	/*	534	/*
535	* we are not running a batch, find best request for selected data_dir	535	* we are not running a batch, find best request for selected data_dir
536	*/	536	*/
537	if (deadline_check_fifo(dd, data_dir)) {	537	if (deadline_check_fifo(dd, data_dir)) {
538	/* An expired request exists - satisfy it */	538	/* An expired request exists - satisfy it */
539	dd->batching = 0;	539	dd->batching = 0;
540	drq = list_entry_fifo(dd->fifo_list[data_dir].next);	540	drq = list_entry_fifo(dd->fifo_list[data_dir].next);
541		541
542	} else if (dd->next_drq[data_dir]) {	542	} else if (dd->next_drq[data_dir]) {
543	/*	543	/*
544	* The last req was the same dir and we have a next request in	544	* The last req was the same dir and we have a next request in
545	* sort order. No expired requests so continue on from here.	545	* sort order. No expired requests so continue on from here.
546	*/	546	*/
547	drq = dd->next_drq[data_dir];	547	drq = dd->next_drq[data_dir];
548	} else {	548	} else {
549	/*	549	/*
550	* The last req was the other direction or we have run out of	550	* The last req was the other direction or we have run out of
551	* higher-sectored requests. Go back to the lowest sectored	551	* higher-sectored requests. Go back to the lowest sectored
552	* request (1 way elevator) and start a new batch.	552	* request (1 way elevator) and start a new batch.
553	*/	553	*/
554	dd->batching = 0;	554	dd->batching = 0;
555	drq = deadline_find_first_drq(dd, data_dir);	555	drq = deadline_find_first_drq(dd, data_dir);
556	}	556	}
557		557
558	dispatch_request:	558	dispatch_request:
559	/*	559	/*
560	* drq is the selected appropriate request.	560	* drq is the selected appropriate request.
561	*/	561	*/
562	dd->batching++;	562	dd->batching++;
563	deadline_move_request(dd, drq);	563	deadline_move_request(dd, drq);
564		564
565	return 1;	565	return 1;
566	}	566	}
567		567
568	static int deadline_queue_empty(request_queue_t *q)	568	static int deadline_queue_empty(request_queue_t *q)
569	{	569	{
570	struct deadline_data *dd = q->elevator->elevator_data;	570	struct deadline_data *dd = q->elevator->elevator_data;
571		571
572	return list_empty(&dd->fifo_list[WRITE])	572	return list_empty(&dd->fifo_list[WRITE])
573	&& list_empty(&dd->fifo_list[READ]);	573	&& list_empty(&dd->fifo_list[READ]);
574	}	574	}
575		575
576	static struct request *	576	static struct request *
577	deadline_former_request(request_queue_t q, struct request rq)	577	deadline_former_request(request_queue_t q, struct request rq)
578	{	578	{
579	struct deadline_rq *drq = RQ_DATA(rq);	579	struct deadline_rq *drq = RQ_DATA(rq);
580	struct rb_node *rbprev = rb_prev(&drq->rb_node);	580	struct rb_node *rbprev = rb_prev(&drq->rb_node);
581		581
582	if (rbprev)	582	if (rbprev)
583	return rb_entry_drq(rbprev)->request;	583	return rb_entry_drq(rbprev)->request;
584		584
585	return NULL;	585	return NULL;
586	}	586	}
587		587
588	static struct request *	588	static struct request *
589	deadline_latter_request(request_queue_t q, struct request rq)	589	deadline_latter_request(request_queue_t q, struct request rq)
590	{	590	{
591	struct deadline_rq *drq = RQ_DATA(rq);	591	struct deadline_rq *drq = RQ_DATA(rq);
592	struct rb_node *rbnext = rb_next(&drq->rb_node);	592	struct rb_node *rbnext = rb_next(&drq->rb_node);
593		593
594	if (rbnext)	594	if (rbnext)
595	return rb_entry_drq(rbnext)->request;	595	return rb_entry_drq(rbnext)->request;
596		596
597	return NULL;	597	return NULL;
598	}	598	}
599		599
600	static void deadline_exit_queue(elevator_t *e)	600	static void deadline_exit_queue(elevator_t *e)
601	{	601	{
602	struct deadline_data *dd = e->elevator_data;	602	struct deadline_data *dd = e->elevator_data;
603		603
604	BUG_ON(!list_empty(&dd->fifo_list[READ]));	604	BUG_ON(!list_empty(&dd->fifo_list[READ]));
605	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));	605	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
606		606
607	mempool_destroy(dd->drq_pool);	607	mempool_destroy(dd->drq_pool);
608	kfree(dd->hash);	608	kfree(dd->hash);
609	kfree(dd);	609	kfree(dd);
610	}	610	}
611		611
612	/*	612	/*
613	* initialize elevator private data (deadline_data), and alloc a drq for	613	* initialize elevator private data (deadline_data), and alloc a drq for
614	* each request on the free lists	614	* each request on the free lists
615	*/	615	*/
616	static int deadline_init_queue(request_queue_t q, elevator_t e)	616	static int deadline_init_queue(request_queue_t q, elevator_t e)
617	{	617	{
618	struct deadline_data *dd;	618	struct deadline_data *dd;
619	int i;	619	int i;
620		620
621	if (!drq_pool)	621	if (!drq_pool)
622	return -ENOMEM;	622	return -ENOMEM;
623		623
624	dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);	624	dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
625	if (!dd)	625	if (!dd)
626	return -ENOMEM;	626	return -ENOMEM;
627	memset(dd, 0, sizeof(*dd));	627	memset(dd, 0, sizeof(*dd));
628		628
629	dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES,	629	dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES,
630	GFP_KERNEL, q->node);	630	GFP_KERNEL, q->node);
631	if (!dd->hash) {	631	if (!dd->hash) {
632	kfree(dd);	632	kfree(dd);
633	return -ENOMEM;	633	return -ENOMEM;
634	}	634	}
635		635
636	dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,	636	dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
637	mempool_free_slab, drq_pool, q->node);	637	mempool_free_slab, drq_pool, q->node);
638	if (!dd->drq_pool) {	638	if (!dd->drq_pool) {
639	kfree(dd->hash);	639	kfree(dd->hash);
640	kfree(dd);	640	kfree(dd);
641	return -ENOMEM;	641	return -ENOMEM;
642	}	642	}
643		643
644	for (i = 0; i < DL_HASH_ENTRIES; i++)	644	for (i = 0; i < DL_HASH_ENTRIES; i++)
645	INIT_LIST_HEAD(&dd->hash[i]);	645	INIT_LIST_HEAD(&dd->hash[i]);
646		646
647	INIT_LIST_HEAD(&dd->fifo_list[READ]);	647	INIT_LIST_HEAD(&dd->fifo_list[READ]);
648	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);	648	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
649	dd->sort_list[READ] = RB_ROOT;	649	dd->sort_list[READ] = RB_ROOT;
650	dd->sort_list[WRITE] = RB_ROOT;	650	dd->sort_list[WRITE] = RB_ROOT;
651	dd->fifo_expire[READ] = read_expire;	651	dd->fifo_expire[READ] = read_expire;
652	dd->fifo_expire[WRITE] = write_expire;	652	dd->fifo_expire[WRITE] = write_expire;
653	dd->writes_starved = writes_starved;	653	dd->writes_starved = writes_starved;
654	dd->front_merges = 1;	654	dd->front_merges = 1;
655	dd->fifo_batch = fifo_batch;	655	dd->fifo_batch = fifo_batch;
656	e->elevator_data = dd;	656	e->elevator_data = dd;
657	return 0;	657	return 0;
658	}	658	}
659		659
660	static void deadline_put_request(request_queue_t q, struct request rq)	660	static void deadline_put_request(request_queue_t q, struct request rq)
661	{	661	{
662	struct deadline_data *dd = q->elevator->elevator_data;	662	struct deadline_data *dd = q->elevator->elevator_data;
663	struct deadline_rq *drq = RQ_DATA(rq);	663	struct deadline_rq *drq = RQ_DATA(rq);
664		664
665	mempool_free(drq, dd->drq_pool);	665	mempool_free(drq, dd->drq_pool);
666	rq->elevator_private = NULL;	666	rq->elevator_private = NULL;
667	}	667	}
668		668
669	static int	669	static int
670	deadline_set_request(request_queue_t q, struct request rq, struct bio *bio,	670	deadline_set_request(request_queue_t q, struct request rq, struct bio *bio,
671	gfp_t gfp_mask)	671	gfp_t gfp_mask)
672	{	672	{
673	struct deadline_data *dd = q->elevator->elevator_data;	673	struct deadline_data *dd = q->elevator->elevator_data;
674	struct deadline_rq *drq;	674	struct deadline_rq *drq;
675		675
676	drq = mempool_alloc(dd->drq_pool, gfp_mask);	676	drq = mempool_alloc(dd->drq_pool, gfp_mask);
677	if (drq) {	677	if (drq) {
678	memset(drq, 0, sizeof(*drq));	678	memset(drq, 0, sizeof(*drq));
679	RB_CLEAR(&drq->rb_node);	679	RB_CLEAR(&drq->rb_node);
680	drq->request = rq;	680	drq->request = rq;
681		681
682	INIT_LIST_HEAD(&drq->hash);	682	INIT_LIST_HEAD(&drq->hash);
683	drq->on_hash = 0;	683	drq->on_hash = 0;
684		684
685	INIT_LIST_HEAD(&drq->fifo);	685	INIT_LIST_HEAD(&drq->fifo);
686		686
687	rq->elevator_private = drq;	687	rq->elevator_private = drq;
688	return 0;	688	return 0;
689	}	689	}
690		690
691	return 1;	691	return 1;
692	}	692	}
693		693
694	/*	694	/*
695	* sysfs parts below	695	* sysfs parts below
696	*/	696	*/
697	struct deadline_fs_entry {	697	struct deadline_fs_entry {
698	struct attribute attr;	698	struct attribute attr;
699	ssize_t (show)(struct deadline_data , char *);	699	ssize_t (show)(struct deadline_data , char *);
700	ssize_t (store)(struct deadline_data , const char *, size_t);	700	ssize_t (store)(struct deadline_data , const char *, size_t);
701	};	701	};
702		702
703	static ssize_t	703	static ssize_t
704	deadline_var_show(int var, char *page)	704	deadline_var_show(int var, char *page)
705	{	705	{
706	return sprintf(page, "%d\n", var);	706	return sprintf(page, "%d\n", var);
707	}	707	}
708		708
709	static ssize_t	709	static ssize_t
710	deadline_var_store(int var, const char page, size_t count)	710	deadline_var_store(int var, const char page, size_t count)
711	{	711	{
712	char p = (char ) page;	712	char p = (char ) page;
713		713
714	*var = simple_strtol(p, &p, 10);	714	*var = simple_strtol(p, &p, 10);
715	return count;	715	return count;
716	}	716	}
717		717
718	#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \	718	#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
719	static ssize_t __FUNC(struct deadline_data dd, char page) \	719	static ssize_t __FUNC(struct deadline_data dd, char page) \
720	{ \	720	{ \
721	int __data = __VAR; \	721	int __data = __VAR; \
722	if (__CONV) \	722	if (__CONV) \
723	__data = jiffies_to_msecs(__data); \	723	__data = jiffies_to_msecs(__data); \
724	return deadline_var_show(__data, (page)); \	724	return deadline_var_show(__data, (page)); \
725	}	725	}
726	SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1);	726	SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1);
727	SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1);	727	SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1);
728	SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0);	728	SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0);
729	SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0);	729	SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0);
730	SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0);	730	SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0);
731	#undef SHOW_FUNCTION	731	#undef SHOW_FUNCTION
732		732
733	#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \	733	#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
734	static ssize_t __FUNC(struct deadline_data dd, const char page, size_t count) \	734	static ssize_t __FUNC(struct deadline_data dd, const char page, size_t count) \
735	{ \	735	{ \
736	int __data; \	736	int __data; \
737	int ret = deadline_var_store(&__data, (page), count); \	737	int ret = deadline_var_store(&__data, (page), count); \
738	if (__data < (MIN)) \	738	if (__data < (MIN)) \
739	__data = (MIN); \	739	__data = (MIN); \
740	else if (__data > (MAX)) \	740	else if (__data > (MAX)) \
741	__data = (MAX); \	741	__data = (MAX); \
742	if (__CONV) \	742	if (__CONV) \
743	*(__PTR) = msecs_to_jiffies(__data); \	743	*(__PTR) = msecs_to_jiffies(__data); \
744	else \	744	else \
745	*(__PTR) = __data; \	745	*(__PTR) = __data; \
746	return ret; \	746	return ret; \
747	}	747	}
748	STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);	748	STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
749	STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);	749	STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
750	STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);	750	STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
751	STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0);	751	STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0);
752	STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0);	752	STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0);
753	#undef STORE_FUNCTION	753	#undef STORE_FUNCTION
754		754
755	static struct deadline_fs_entry deadline_readexpire_entry = {	755	static struct deadline_fs_entry deadline_readexpire_entry = {
756	.attr = {.name = "read_expire", .mode = S_IRUGO \| S_IWUSR },	756	.attr = {.name = "read_expire", .mode = S_IRUGO \| S_IWUSR },
757	.show = deadline_readexpire_show,	757	.show = deadline_readexpire_show,
758	.store = deadline_readexpire_store,	758	.store = deadline_readexpire_store,
759	};	759	};
760	static struct deadline_fs_entry deadline_writeexpire_entry = {	760	static struct deadline_fs_entry deadline_writeexpire_entry = {
761	.attr = {.name = "write_expire", .mode = S_IRUGO \| S_IWUSR },	761	.attr = {.name = "write_expire", .mode = S_IRUGO \| S_IWUSR },
762	.show = deadline_writeexpire_show,	762	.show = deadline_writeexpire_show,
763	.store = deadline_writeexpire_store,	763	.store = deadline_writeexpire_store,
764	};	764	};
765	static struct deadline_fs_entry deadline_writesstarved_entry = {	765	static struct deadline_fs_entry deadline_writesstarved_entry = {
766	.attr = {.name = "writes_starved", .mode = S_IRUGO \| S_IWUSR },	766	.attr = {.name = "writes_starved", .mode = S_IRUGO \| S_IWUSR },
767	.show = deadline_writesstarved_show,	767	.show = deadline_writesstarved_show,
768	.store = deadline_writesstarved_store,	768	.store = deadline_writesstarved_store,
769	};	769	};
770	static struct deadline_fs_entry deadline_frontmerges_entry = {	770	static struct deadline_fs_entry deadline_frontmerges_entry = {
771	.attr = {.name = "front_merges", .mode = S_IRUGO \| S_IWUSR },	771	.attr = {.name = "front_merges", .mode = S_IRUGO \| S_IWUSR },
772	.show = deadline_frontmerges_show,	772	.show = deadline_frontmerges_show,
773	.store = deadline_frontmerges_store,	773	.store = deadline_frontmerges_store,
774	};	774	};
775	static struct deadline_fs_entry deadline_fifobatch_entry = {	775	static struct deadline_fs_entry deadline_fifobatch_entry = {
776	.attr = {.name = "fifo_batch", .mode = S_IRUGO \| S_IWUSR },	776	.attr = {.name = "fifo_batch", .mode = S_IRUGO \| S_IWUSR },
777	.show = deadline_fifobatch_show,	777	.show = deadline_fifobatch_show,
778	.store = deadline_fifobatch_store,	778	.store = deadline_fifobatch_store,
779	};	779	};
780		780
781	static struct attribute *default_attrs[] = {	781	static struct attribute *default_attrs[] = {
782	&deadline_readexpire_entry.attr,	782	&deadline_readexpire_entry.attr,
783	&deadline_writeexpire_entry.attr,	783	&deadline_writeexpire_entry.attr,
784	&deadline_writesstarved_entry.attr,	784	&deadline_writesstarved_entry.attr,
785	&deadline_frontmerges_entry.attr,	785	&deadline_frontmerges_entry.attr,
786	&deadline_fifobatch_entry.attr,	786	&deadline_fifobatch_entry.attr,
787	NULL,	787	NULL,
788	};	788	};
789		789
790	#define to_deadline(atr) container_of((atr), struct deadline_fs_entry, attr)	790	#define to_deadline(atr) container_of((atr), struct deadline_fs_entry, attr)
791		791
792	static ssize_t	792	static ssize_t
793	deadline_attr_show(struct kobject kobj, struct attribute attr, char *page)	793	deadline_attr_show(struct kobject kobj, struct attribute attr, char *page)
794	{	794	{
795	elevator_t *e = container_of(kobj, elevator_t, kobj);	795	elevator_t *e = container_of(kobj, elevator_t, kobj);
796	struct deadline_fs_entry *entry = to_deadline(attr);	796	struct deadline_fs_entry *entry = to_deadline(attr);
797		797
798	if (!entry->show)	798	if (!entry->show)
799	return -EIO;	799	return -EIO;
800		800
801	return entry->show(e->elevator_data, page);	801	return entry->show(e->elevator_data, page);
802	}	802	}
803		803
804	static ssize_t	804	static ssize_t
805	deadline_attr_store(struct kobject kobj, struct attribute attr,	805	deadline_attr_store(struct kobject kobj, struct attribute attr,
806	const char *page, size_t length)	806	const char *page, size_t length)
807	{	807	{
808	elevator_t *e = container_of(kobj, elevator_t, kobj);	808	elevator_t *e = container_of(kobj, elevator_t, kobj);
809	struct deadline_fs_entry *entry = to_deadline(attr);	809	struct deadline_fs_entry *entry = to_deadline(attr);
810		810
811	if (!entry->store)	811	if (!entry->store)
812	return -EIO;	812	return -EIO;
813		813
814	return entry->store(e->elevator_data, page, length);	814	return entry->store(e->elevator_data, page, length);
815	}	815	}
816		816
817	static struct sysfs_ops deadline_sysfs_ops = {	817	static struct sysfs_ops deadline_sysfs_ops = {
818	.show = deadline_attr_show,	818	.show = deadline_attr_show,
819	.store = deadline_attr_store,	819	.store = deadline_attr_store,
820	};	820	};
821		821
822	static struct kobj_type deadline_ktype = {	822	static struct kobj_type deadline_ktype = {
823	.sysfs_ops = &deadline_sysfs_ops,	823	.sysfs_ops = &deadline_sysfs_ops,
824	.default_attrs = default_attrs,	824	.default_attrs = default_attrs,
825	};	825	};
826		826
827	static struct elevator_type iosched_deadline = {	827	static struct elevator_type iosched_deadline = {
828	.ops = {	828	.ops = {
829	.elevator_merge_fn = deadline_merge,	829	.elevator_merge_fn = deadline_merge,
830	.elevator_merged_fn = deadline_merged_request,	830	.elevator_merged_fn = deadline_merged_request,
831	.elevator_merge_req_fn = deadline_merged_requests,	831	.elevator_merge_req_fn = deadline_merged_requests,
832	.elevator_dispatch_fn = deadline_dispatch_requests,	832	.elevator_dispatch_fn = deadline_dispatch_requests,
833	.elevator_add_req_fn = deadline_add_request,	833	.elevator_add_req_fn = deadline_add_request,
834	.elevator_queue_empty_fn = deadline_queue_empty,	834	.elevator_queue_empty_fn = deadline_queue_empty,
835	.elevator_former_req_fn = deadline_former_request,	835	.elevator_former_req_fn = deadline_former_request,
836	.elevator_latter_req_fn = deadline_latter_request,	836	.elevator_latter_req_fn = deadline_latter_request,
837	.elevator_set_req_fn = deadline_set_request,	837	.elevator_set_req_fn = deadline_set_request,
838	.elevator_put_req_fn = deadline_put_request,	838	.elevator_put_req_fn = deadline_put_request,
839	.elevator_init_fn = deadline_init_queue,	839	.elevator_init_fn = deadline_init_queue,
840	.elevator_exit_fn = deadline_exit_queue,	840	.elevator_exit_fn = deadline_exit_queue,
841	},	841	},
842		842
843	.elevator_ktype = &deadline_ktype,	843	.elevator_ktype = &deadline_ktype,
844	.elevator_name = "deadline",	844	.elevator_name = "deadline",
845	.elevator_owner = THIS_MODULE,	845	.elevator_owner = THIS_MODULE,
846	};	846	};
847		847
848	static int __init deadline_init(void)	848	static int __init deadline_init(void)
849	{	849	{
850	int ret;	850	int ret;
851		851
852	drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),	852	drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
853	0, 0, NULL, NULL);	853	0, 0, NULL, NULL);
854		854
855	if (!drq_pool)	855	if (!drq_pool)
856	return -ENOMEM;	856	return -ENOMEM;
857		857
858	ret = elv_register(&iosched_deadline);	858	ret = elv_register(&iosched_deadline);
859	if (ret)	859	if (ret)
860	kmem_cache_destroy(drq_pool);	860	kmem_cache_destroy(drq_pool);
861		861
862	return ret;	862	return ret;
863	}	863	}
864		864
865	static void __exit deadline_exit(void)	865	static void __exit deadline_exit(void)
866	{	866	{
867	kmem_cache_destroy(drq_pool);	867	kmem_cache_destroy(drq_pool);
868	elv_unregister(&iosched_deadline);	868	elv_unregister(&iosched_deadline);
869	}	869	}
870		870
871	module_init(deadline_init);	871	module_init(deadline_init);
872	module_exit(deadline_exit);	872	module_exit(deadline_exit);
873		873
874	MODULE_AUTHOR("Jens Axboe");	874	MODULE_AUTHOR("Jens Axboe");
875	MODULE_LICENSE("GPL");	875	MODULE_LICENSE("GPL");
876	MODULE_DESCRIPTION("deadline IO scheduler");	876	MODULE_DESCRIPTION("deadline IO scheduler");
877		877

block/ll_rw_blk.c

Diff comments View file @ 6410009

1	/*	1	/*
2	* Copyright (C) 1991, 1992 Linus Torvalds	2	* Copyright (C) 1991, 1992 Linus Torvalds
3	* Copyright (C) 1994, Karl Keyte: Added support for disk statistics	3	* Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4	* Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE	4	* Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5	* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>	5	* Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6	* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000	6	* kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
7	* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001	7	* bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
8	*/	8	*/
9		9
10	/*	10	/*
11	* This handles all read/write requests to block devices	11	* This handles all read/write requests to block devices
12	*/	12	*/
13	#include <linux/config.h>	13	#include <linux/config.h>
14	#include <linux/kernel.h>	14	#include <linux/kernel.h>
15	#include <linux/module.h>	15	#include <linux/module.h>
16	#include <linux/backing-dev.h>	16	#include <linux/backing-dev.h>
17	#include <linux/bio.h>	17	#include <linux/bio.h>
18	#include <linux/blkdev.h>	18	#include <linux/blkdev.h>
19	#include <linux/highmem.h>	19	#include <linux/highmem.h>
20	#include <linux/mm.h>	20	#include <linux/mm.h>
21	#include <linux/kernel_stat.h>	21	#include <linux/kernel_stat.h>
22	#include <linux/string.h>	22	#include <linux/string.h>
23	#include <linux/init.h>	23	#include <linux/init.h>
24	#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */	24	#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
25	#include <linux/completion.h>	25	#include <linux/completion.h>
26	#include <linux/slab.h>	26	#include <linux/slab.h>
27	#include <linux/swap.h>	27	#include <linux/swap.h>
28	#include <linux/writeback.h>	28	#include <linux/writeback.h>
29	#include <linux/blkdev.h>	29	#include <linux/blkdev.h>
30		30
31	/*	31	/*
32	* for max sense size	32	* for max sense size
33	*/	33	*/
34	#include <scsi/scsi_cmnd.h>	34	#include <scsi/scsi_cmnd.h>
35		35
36	static void blk_unplug_work(void *data);	36	static void blk_unplug_work(void *data);
37	static void blk_unplug_timeout(unsigned long data);	37	static void blk_unplug_timeout(unsigned long data);
38	static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);	38	static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
39		39
40	/*	40	/*
41	* For the allocated request tables	41	* For the allocated request tables
42	*/	42	*/
43	static kmem_cache_t *request_cachep;	43	static kmem_cache_t *request_cachep;
44		44
45	/*	45	/*
46	* For queue allocation	46	* For queue allocation
47	*/	47	*/
48	static kmem_cache_t *requestq_cachep;	48	static kmem_cache_t *requestq_cachep;
49		49
50	/*	50	/*
51	* For io context allocations	51	* For io context allocations
52	*/	52	*/
53	static kmem_cache_t *iocontext_cachep;	53	static kmem_cache_t *iocontext_cachep;
54		54
55	static wait_queue_head_t congestion_wqh[2] = {	55	static wait_queue_head_t congestion_wqh[2] = {
56	__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),	56	__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
57	__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])	57	__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
58	};	58	};
59		59
60	/*	60	/*
61	* Controlling structure to kblockd	61	* Controlling structure to kblockd
62	*/	62	*/
63	static struct workqueue_struct *kblockd_workqueue;	63	static struct workqueue_struct *kblockd_workqueue;
64		64
65	unsigned long blk_max_low_pfn, blk_max_pfn;	65	unsigned long blk_max_low_pfn, blk_max_pfn;
66		66
67	EXPORT_SYMBOL(blk_max_low_pfn);	67	EXPORT_SYMBOL(blk_max_low_pfn);
68	EXPORT_SYMBOL(blk_max_pfn);	68	EXPORT_SYMBOL(blk_max_pfn);
69		69
70	/* Amount of time in which a process may batch requests */	70	/* Amount of time in which a process may batch requests */
71	#define BLK_BATCH_TIME (HZ/50UL)	71	#define BLK_BATCH_TIME (HZ/50UL)
72		72
73	/* Number of requests a "batching" process may submit */	73	/* Number of requests a "batching" process may submit */
74	#define BLK_BATCH_REQ 32	74	#define BLK_BATCH_REQ 32
75		75
76	/*	76	/*
77	* Return the threshold (number of used requests) at which the queue is	77	* Return the threshold (number of used requests) at which the queue is
78	* considered to be congested. It include a little hysteresis to keep the	78	* considered to be congested. It include a little hysteresis to keep the
79	* context switch rate down.	79	* context switch rate down.
80	*/	80	*/
81	static inline int queue_congestion_on_threshold(struct request_queue *q)	81	static inline int queue_congestion_on_threshold(struct request_queue *q)
82	{	82	{
83	return q->nr_congestion_on;	83	return q->nr_congestion_on;
84	}	84	}
85		85
86	/*	86	/*
87	* The threshold at which a queue is considered to be uncongested	87	* The threshold at which a queue is considered to be uncongested
88	*/	88	*/
89	static inline int queue_congestion_off_threshold(struct request_queue *q)	89	static inline int queue_congestion_off_threshold(struct request_queue *q)
90	{	90	{
91	return q->nr_congestion_off;	91	return q->nr_congestion_off;
92	}	92	}
93		93
94	static void blk_queue_congestion_threshold(struct request_queue *q)	94	static void blk_queue_congestion_threshold(struct request_queue *q)
95	{	95	{
96	int nr;	96	int nr;
97		97
98	nr = q->nr_requests - (q->nr_requests / 8) + 1;	98	nr = q->nr_requests - (q->nr_requests / 8) + 1;
99	if (nr > q->nr_requests)	99	if (nr > q->nr_requests)
100	nr = q->nr_requests;	100	nr = q->nr_requests;
101	q->nr_congestion_on = nr;	101	q->nr_congestion_on = nr;
102		102
103	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;	103	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
104	if (nr < 1)	104	if (nr < 1)
105	nr = 1;	105	nr = 1;
106	q->nr_congestion_off = nr;	106	q->nr_congestion_off = nr;
107	}	107	}
108		108
109	/*	109	/*
110	* A queue has just exitted congestion. Note this in the global counter of	110	* A queue has just exitted congestion. Note this in the global counter of
111	* congested queues, and wake up anyone who was waiting for requests to be	111	* congested queues, and wake up anyone who was waiting for requests to be
112	* put back.	112	* put back.
113	*/	113	*/
114	static void clear_queue_congested(request_queue_t *q, int rw)	114	static void clear_queue_congested(request_queue_t *q, int rw)
115	{	115	{
116	enum bdi_state bit;	116	enum bdi_state bit;
117	wait_queue_head_t *wqh = &congestion_wqh[rw];	117	wait_queue_head_t *wqh = &congestion_wqh[rw];
118		118
119	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;	119	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
120	clear_bit(bit, &q->backing_dev_info.state);	120	clear_bit(bit, &q->backing_dev_info.state);
121	smp_mb__after_clear_bit();	121	smp_mb__after_clear_bit();
122	if (waitqueue_active(wqh))	122	if (waitqueue_active(wqh))
123	wake_up(wqh);	123	wake_up(wqh);
124	}	124	}
125		125
126	/*	126	/*
127	* A queue has just entered congestion. Flag that in the queue's VM-visible	127	* A queue has just entered congestion. Flag that in the queue's VM-visible
128	* state flags and increment the global gounter of congested queues.	128	* state flags and increment the global gounter of congested queues.
129	*/	129	*/
130	static void set_queue_congested(request_queue_t *q, int rw)	130	static void set_queue_congested(request_queue_t *q, int rw)
131	{	131	{
132	enum bdi_state bit;	132	enum bdi_state bit;
133		133
134	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;	134	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
135	set_bit(bit, &q->backing_dev_info.state);	135	set_bit(bit, &q->backing_dev_info.state);
136	}	136	}
137		137
138	/**	138	/**
139	* blk_get_backing_dev_info - get the address of a queue's backing_dev_info	139	* blk_get_backing_dev_info - get the address of a queue's backing_dev_info
140	* @bdev: device	140	* @bdev: device
141	*	141	*
142	* Locates the passed device's request queue and returns the address of its	142	* Locates the passed device's request queue and returns the address of its
143	* backing_dev_info	143	* backing_dev_info
144	*	144	*
145	* Will return NULL if the request queue cannot be located.	145	* Will return NULL if the request queue cannot be located.
146	*/	146	*/
147	struct backing_dev_info blk_get_backing_dev_info(struct block_device bdev)	147	struct backing_dev_info blk_get_backing_dev_info(struct block_device bdev)
148	{	148	{
149	struct backing_dev_info *ret = NULL;	149	struct backing_dev_info *ret = NULL;
150	request_queue_t *q = bdev_get_queue(bdev);	150	request_queue_t *q = bdev_get_queue(bdev);
151		151
152	if (q)	152	if (q)
153	ret = &q->backing_dev_info;	153	ret = &q->backing_dev_info;
154	return ret;	154	return ret;
155	}	155	}
156		156
157	EXPORT_SYMBOL(blk_get_backing_dev_info);	157	EXPORT_SYMBOL(blk_get_backing_dev_info);
158		158
159	void blk_queue_activity_fn(request_queue_t q, activity_fn fn, void *data)	159	void blk_queue_activity_fn(request_queue_t q, activity_fn fn, void *data)
160	{	160	{
161	q->activity_fn = fn;	161	q->activity_fn = fn;
162	q->activity_data = data;	162	q->activity_data = data;
163	}	163	}
164		164
165	EXPORT_SYMBOL(blk_queue_activity_fn);	165	EXPORT_SYMBOL(blk_queue_activity_fn);
166		166
167	/**	167	/**
168	* blk_queue_prep_rq - set a prepare_request function for queue	168	* blk_queue_prep_rq - set a prepare_request function for queue
169	* @q: queue	169	* @q: queue
170	* @pfn: prepare_request function	170	* @pfn: prepare_request function
171	*	171	*
172	* It's possible for a queue to register a prepare_request callback which	172	* It's possible for a queue to register a prepare_request callback which
173	* is invoked before the request is handed to the request_fn. The goal of	173	* is invoked before the request is handed to the request_fn. The goal of
174	* the function is to prepare a request for I/O, it can be used to build a	174	* the function is to prepare a request for I/O, it can be used to build a
175	* cdb from the request data for instance.	175	* cdb from the request data for instance.
176	*	176	*
177	*/	177	*/
178	void blk_queue_prep_rq(request_queue_t q, prep_rq_fn pfn)	178	void blk_queue_prep_rq(request_queue_t q, prep_rq_fn pfn)
179	{	179	{
180	q->prep_rq_fn = pfn;	180	q->prep_rq_fn = pfn;
181	}	181	}
182		182
183	EXPORT_SYMBOL(blk_queue_prep_rq);	183	EXPORT_SYMBOL(blk_queue_prep_rq);
184		184
185	/**	185	/**
186	* blk_queue_merge_bvec - set a merge_bvec function for queue	186	* blk_queue_merge_bvec - set a merge_bvec function for queue
187	* @q: queue	187	* @q: queue
188	* @mbfn: merge_bvec_fn	188	* @mbfn: merge_bvec_fn
189	*	189	*
190	* Usually queues have static limitations on the max sectors or segments that	190	* Usually queues have static limitations on the max sectors or segments that
191	* we can put in a request. Stacking drivers may have some settings that	191	* we can put in a request. Stacking drivers may have some settings that
192	* are dynamic, and thus we have to query the queue whether it is ok to	192	* are dynamic, and thus we have to query the queue whether it is ok to
193	* add a new bio_vec to a bio at a given offset or not. If the block device	193	* add a new bio_vec to a bio at a given offset or not. If the block device
194	* has such limitations, it needs to register a merge_bvec_fn to control	194	* has such limitations, it needs to register a merge_bvec_fn to control
195	* the size of bio's sent to it. Note that a block device must allow a	195	* the size of bio's sent to it. Note that a block device must allow a
196	* single page to be added to an empty bio. The block device driver may want	196	* single page to be added to an empty bio. The block device driver may want
197	* to use the bio_split() function to deal with these bio's. By default	197	* to use the bio_split() function to deal with these bio's. By default
198	* no merge_bvec_fn is defined for a queue, and only the fixed limits are	198	* no merge_bvec_fn is defined for a queue, and only the fixed limits are
199	* honored.	199	* honored.
200	*/	200	*/
201	void blk_queue_merge_bvec(request_queue_t q, merge_bvec_fn mbfn)	201	void blk_queue_merge_bvec(request_queue_t q, merge_bvec_fn mbfn)
202	{	202	{
203	q->merge_bvec_fn = mbfn;	203	q->merge_bvec_fn = mbfn;
204	}	204	}
205		205
206	EXPORT_SYMBOL(blk_queue_merge_bvec);	206	EXPORT_SYMBOL(blk_queue_merge_bvec);
207		207
208	/**	208	/**
209	* blk_queue_make_request - define an alternate make_request function for a device	209	* blk_queue_make_request - define an alternate make_request function for a device
210	* @q: the request queue for the device to be affected	210	* @q: the request queue for the device to be affected
211	* @mfn: the alternate make_request function	211	* @mfn: the alternate make_request function
212	*	212	*
213	* Description:	213	* Description:
214	* The normal way for &struct bios to be passed to a device	214	* The normal way for &struct bios to be passed to a device
215	* driver is for them to be collected into requests on a request	215	* driver is for them to be collected into requests on a request
216	* queue, and then to allow the device driver to select requests	216	* queue, and then to allow the device driver to select requests
217	* off that queue when it is ready. This works well for many block	217	* off that queue when it is ready. This works well for many block
218	* devices. However some block devices (typically virtual devices	218	* devices. However some block devices (typically virtual devices
219	* such as md or lvm) do not benefit from the processing on the	219	* such as md or lvm) do not benefit from the processing on the
220	* request queue, and are served best by having the requests passed	220	* request queue, and are served best by having the requests passed
221	* directly to them. This can be achieved by providing a function	221	* directly to them. This can be achieved by providing a function
222	* to blk_queue_make_request().	222	* to blk_queue_make_request().
223	*	223	*
224	* Caveat:	224	* Caveat:
225	* The driver that does this must be able to deal appropriately	225	* The driver that does this must be able to deal appropriately
226	* with buffers in "highmemory". This can be accomplished by either calling	226	* with buffers in "highmemory". This can be accomplished by either calling
227	* __bio_kmap_atomic() to get a temporary kernel mapping, or by calling	227	* __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
228	* blk_queue_bounce() to create a buffer in normal memory.	228	* blk_queue_bounce() to create a buffer in normal memory.
229	**/	229	**/
230	void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)	230	void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
231	{	231	{
232	/*	232	/*
233	* set defaults	233	* set defaults
234	*/	234	*/
235	q->nr_requests = BLKDEV_MAX_RQ;	235	q->nr_requests = BLKDEV_MAX_RQ;
236	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);	236	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
237	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);	237	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
238	q->make_request_fn = mfn;	238	q->make_request_fn = mfn;
239	q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;	239	q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
240	q->backing_dev_info.state = 0;	240	q->backing_dev_info.state = 0;
241	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;	241	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
242	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);	242	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
243	blk_queue_hardsect_size(q, 512);	243	blk_queue_hardsect_size(q, 512);
244	blk_queue_dma_alignment(q, 511);	244	blk_queue_dma_alignment(q, 511);
245	blk_queue_congestion_threshold(q);	245	blk_queue_congestion_threshold(q);
246	q->nr_batching = BLK_BATCH_REQ;	246	q->nr_batching = BLK_BATCH_REQ;
247		247
248	q->unplug_thresh = 4; /* hmm */	248	q->unplug_thresh = 4; /* hmm */
249	q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */	249	q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
250	if (q->unplug_delay == 0)	250	if (q->unplug_delay == 0)
251	q->unplug_delay = 1;	251	q->unplug_delay = 1;
252		252
253	INIT_WORK(&q->unplug_work, blk_unplug_work, q);	253	INIT_WORK(&q->unplug_work, blk_unplug_work, q);
254		254
255	q->unplug_timer.function = blk_unplug_timeout;	255	q->unplug_timer.function = blk_unplug_timeout;
256	q->unplug_timer.data = (unsigned long)q;	256	q->unplug_timer.data = (unsigned long)q;
257		257
258	/*	258	/*
259	* by default assume old behaviour and bounce for any highmem page	259	* by default assume old behaviour and bounce for any highmem page
260	*/	260	*/
261	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);	261	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
262		262
263	blk_queue_activity_fn(q, NULL, NULL);	263	blk_queue_activity_fn(q, NULL, NULL);
264	}	264	}
265		265
266	EXPORT_SYMBOL(blk_queue_make_request);	266	EXPORT_SYMBOL(blk_queue_make_request);
267		267
268	static inline void rq_init(request_queue_t q, struct request rq)	268	static inline void rq_init(request_queue_t q, struct request rq)
269	{	269	{
270	INIT_LIST_HEAD(&rq->queuelist);	270	INIT_LIST_HEAD(&rq->queuelist);
271		271
272	rq->errors = 0;	272	rq->errors = 0;
273	rq->rq_status = RQ_ACTIVE;	273	rq->rq_status = RQ_ACTIVE;
274	rq->bio = rq->biotail = NULL;	274	rq->bio = rq->biotail = NULL;
275	rq->ioprio = 0;	275	rq->ioprio = 0;
276	rq->buffer = NULL;	276	rq->buffer = NULL;
277	rq->ref_count = 1;	277	rq->ref_count = 1;
278	rq->q = q;	278	rq->q = q;
279	rq->waiting = NULL;	279	rq->waiting = NULL;
280	rq->special = NULL;	280	rq->special = NULL;
281	rq->data_len = 0;	281	rq->data_len = 0;
282	rq->data = NULL;	282	rq->data = NULL;
283	rq->nr_phys_segments = 0;	283	rq->nr_phys_segments = 0;
284	rq->sense = NULL;	284	rq->sense = NULL;
285	rq->end_io = NULL;	285	rq->end_io = NULL;
286	rq->end_io_data = NULL;	286	rq->end_io_data = NULL;
287	}	287	}
288		288
289	/**	289	/**
290	* blk_queue_ordered - does this queue support ordered writes	290	* blk_queue_ordered - does this queue support ordered writes
291	* @q: the request queue	291	* @q: the request queue
292	* @flag: see below	292	* @flag: see below
293	*	293	*
294	* Description:	294	* Description:
295	* For journalled file systems, doing ordered writes on a commit	295	* For journalled file systems, doing ordered writes on a commit
296	* block instead of explicitly doing wait_on_buffer (which is bad	296	* block instead of explicitly doing wait_on_buffer (which is bad
297	* for performance) can be a big win. Block drivers supporting this	297	* for performance) can be a big win. Block drivers supporting this
298	* feature should call this function and indicate so.	298	* feature should call this function and indicate so.
299	*	299	*
300	**/	300	**/
301	void blk_queue_ordered(request_queue_t *q, int flag)	301	void blk_queue_ordered(request_queue_t *q, int flag)
302	{	302	{
303	switch (flag) {	303	switch (flag) {
304	case QUEUE_ORDERED_NONE:	304	case QUEUE_ORDERED_NONE:
305	if (q->flush_rq)	305	if (q->flush_rq)
306	kmem_cache_free(request_cachep, q->flush_rq);	306	kmem_cache_free(request_cachep, q->flush_rq);
307	q->flush_rq = NULL;	307	q->flush_rq = NULL;
308	q->ordered = flag;	308	q->ordered = flag;
309	break;	309	break;
310	case QUEUE_ORDERED_TAG:	310	case QUEUE_ORDERED_TAG:
311	q->ordered = flag;	311	q->ordered = flag;
312	break;	312	break;
313	case QUEUE_ORDERED_FLUSH:	313	case QUEUE_ORDERED_FLUSH:
314	q->ordered = flag;	314	q->ordered = flag;
315	if (!q->flush_rq)	315	if (!q->flush_rq)
316	q->flush_rq = kmem_cache_alloc(request_cachep,	316	q->flush_rq = kmem_cache_alloc(request_cachep,
317	GFP_KERNEL);	317	GFP_KERNEL);
318	break;	318	break;
319	default:	319	default:
320	printk("blk_queue_ordered: bad value %d\n", flag);	320	printk("blk_queue_ordered: bad value %d\n", flag);
321	break;	321	break;
322	}	322	}
323	}	323	}
324		324
325	EXPORT_SYMBOL(blk_queue_ordered);	325	EXPORT_SYMBOL(blk_queue_ordered);
326		326
327	/**	327	/**
328	* blk_queue_issue_flush_fn - set function for issuing a flush	328	* blk_queue_issue_flush_fn - set function for issuing a flush
329	* @q: the request queue	329	* @q: the request queue
330	* @iff: the function to be called issuing the flush	330	* @iff: the function to be called issuing the flush
331	*	331	*
332	* Description:	332	* Description:
333	* If a driver supports issuing a flush command, the support is notified	333	* If a driver supports issuing a flush command, the support is notified
334	* to the block layer by defining it through this call.	334	* to the block layer by defining it through this call.
335	*	335	*
336	**/	336	**/
337	void blk_queue_issue_flush_fn(request_queue_t q, issue_flush_fn iff)	337	void blk_queue_issue_flush_fn(request_queue_t q, issue_flush_fn iff)
338	{	338	{
339	q->issue_flush_fn = iff;	339	q->issue_flush_fn = iff;
340	}	340	}
341		341
342	EXPORT_SYMBOL(blk_queue_issue_flush_fn);	342	EXPORT_SYMBOL(blk_queue_issue_flush_fn);
343		343
344	/*	344	/*
345	* Cache flushing for ordered writes handling	345	* Cache flushing for ordered writes handling
346	*/	346	*/
347	static void blk_pre_flush_end_io(struct request *flush_rq)	347	static void blk_pre_flush_end_io(struct request *flush_rq)
348	{	348	{
349	struct request *rq = flush_rq->end_io_data;	349	struct request *rq = flush_rq->end_io_data;
350	request_queue_t *q = rq->q;	350	request_queue_t *q = rq->q;
351		351
352	elv_completed_request(q, flush_rq);	352	elv_completed_request(q, flush_rq);
353		353
354	rq->flags \|= REQ_BAR_PREFLUSH;	354	rq->flags \|= REQ_BAR_PREFLUSH;
355		355
356	if (!flush_rq->errors)	356	if (!flush_rq->errors)
357	elv_requeue_request(q, rq);	357	elv_requeue_request(q, rq);
358	else {	358	else {
359	q->end_flush_fn(q, flush_rq);	359	q->end_flush_fn(q, flush_rq);
360	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);	360	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
361	q->request_fn(q);	361	q->request_fn(q);
362	}	362	}
363	}	363	}
364		364
365	static void blk_post_flush_end_io(struct request *flush_rq)	365	static void blk_post_flush_end_io(struct request *flush_rq)
366	{	366	{
367	struct request *rq = flush_rq->end_io_data;	367	struct request *rq = flush_rq->end_io_data;
368	request_queue_t *q = rq->q;	368	request_queue_t *q = rq->q;
369		369
370	elv_completed_request(q, flush_rq);	370	elv_completed_request(q, flush_rq);
371		371
372	rq->flags \|= REQ_BAR_POSTFLUSH;	372	rq->flags \|= REQ_BAR_POSTFLUSH;
373		373
374	q->end_flush_fn(q, flush_rq);	374	q->end_flush_fn(q, flush_rq);
375	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);	375	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
376	q->request_fn(q);	376	q->request_fn(q);
377	}	377	}
378		378
379	struct request blk_start_pre_flush(request_queue_t q, struct request *rq)	379	struct request blk_start_pre_flush(request_queue_t q, struct request *rq)
380	{	380	{
381	struct request *flush_rq = q->flush_rq;	381	struct request *flush_rq = q->flush_rq;
382		382
383	BUG_ON(!blk_barrier_rq(rq));	383	BUG_ON(!blk_barrier_rq(rq));
384		384
385	if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))	385	if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))
386	return NULL;	386	return NULL;
387		387
388	rq_init(q, flush_rq);	388	rq_init(q, flush_rq);
389	flush_rq->elevator_private = NULL;	389	flush_rq->elevator_private = NULL;
390	flush_rq->flags = REQ_BAR_FLUSH;	390	flush_rq->flags = REQ_BAR_FLUSH;
391	flush_rq->rq_disk = rq->rq_disk;	391	flush_rq->rq_disk = rq->rq_disk;
392	flush_rq->rl = NULL;	392	flush_rq->rl = NULL;
393		393
394	/*	394	/*
395	* prepare_flush returns 0 if no flush is needed, just mark both	395	* prepare_flush returns 0 if no flush is needed, just mark both
396	* pre and post flush as done in that case	396	* pre and post flush as done in that case
397	*/	397	*/
398	if (!q->prepare_flush_fn(q, flush_rq)) {	398	if (!q->prepare_flush_fn(q, flush_rq)) {
399	rq->flags \|= REQ_BAR_PREFLUSH \| REQ_BAR_POSTFLUSH;	399	rq->flags \|= REQ_BAR_PREFLUSH \| REQ_BAR_POSTFLUSH;
400	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);	400	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
401	return rq;	401	return rq;
402	}	402	}
403		403
404	/*	404	/*
405	* some drivers dequeue requests right away, some only after io	405	* some drivers dequeue requests right away, some only after io
406	* completion. make sure the request is dequeued.	406	* completion. make sure the request is dequeued.
407	*/	407	*/
408	if (!list_empty(&rq->queuelist))	408	if (!list_empty(&rq->queuelist))
409	blkdev_dequeue_request(rq);	409	blkdev_dequeue_request(rq);
410		410
411	flush_rq->end_io_data = rq;	411	flush_rq->end_io_data = rq;
412	flush_rq->end_io = blk_pre_flush_end_io;	412	flush_rq->end_io = blk_pre_flush_end_io;
413		413
414	__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);	414	__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
415	return flush_rq;	415	return flush_rq;
416	}	416	}
417		417
418	static void blk_start_post_flush(request_queue_t q, struct request rq)	418	static void blk_start_post_flush(request_queue_t q, struct request rq)
419	{	419	{
420	struct request *flush_rq = q->flush_rq;	420	struct request *flush_rq = q->flush_rq;
421		421
422	BUG_ON(!blk_barrier_rq(rq));	422	BUG_ON(!blk_barrier_rq(rq));
423		423
424	rq_init(q, flush_rq);	424	rq_init(q, flush_rq);
425	flush_rq->elevator_private = NULL;	425	flush_rq->elevator_private = NULL;
426	flush_rq->flags = REQ_BAR_FLUSH;	426	flush_rq->flags = REQ_BAR_FLUSH;
427	flush_rq->rq_disk = rq->rq_disk;	427	flush_rq->rq_disk = rq->rq_disk;
428	flush_rq->rl = NULL;	428	flush_rq->rl = NULL;
429		429
430	if (q->prepare_flush_fn(q, flush_rq)) {	430	if (q->prepare_flush_fn(q, flush_rq)) {
431	flush_rq->end_io_data = rq;	431	flush_rq->end_io_data = rq;
432	flush_rq->end_io = blk_post_flush_end_io;	432	flush_rq->end_io = blk_post_flush_end_io;
433		433
434	__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);	434	__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
435	q->request_fn(q);	435	q->request_fn(q);
436	}	436	}
437	}	437	}
438		438
439	static inline int blk_check_end_barrier(request_queue_t q, struct request rq,	439	static inline int blk_check_end_barrier(request_queue_t q, struct request rq,
440	int sectors)	440	int sectors)
441	{	441	{
442	if (sectors > rq->nr_sectors)	442	if (sectors > rq->nr_sectors)
443	sectors = rq->nr_sectors;	443	sectors = rq->nr_sectors;
444		444
445	rq->nr_sectors -= sectors;	445	rq->nr_sectors -= sectors;
446	return rq->nr_sectors;	446	return rq->nr_sectors;
447	}	447	}
448		448
449	static int __blk_complete_barrier_rq(request_queue_t q, struct request rq,	449	static int __blk_complete_barrier_rq(request_queue_t q, struct request rq,
450	int sectors, int queue_locked)	450	int sectors, int queue_locked)
451	{	451	{
452	if (q->ordered != QUEUE_ORDERED_FLUSH)	452	if (q->ordered != QUEUE_ORDERED_FLUSH)
453	return 0;	453	return 0;
454	if (!blk_fs_request(rq) \|\| !blk_barrier_rq(rq))	454	if (!blk_fs_request(rq) \|\| !blk_barrier_rq(rq))
455	return 0;	455	return 0;
456	if (blk_barrier_postflush(rq))	456	if (blk_barrier_postflush(rq))
457	return 0;	457	return 0;
458		458
459	if (!blk_check_end_barrier(q, rq, sectors)) {	459	if (!blk_check_end_barrier(q, rq, sectors)) {
460	unsigned long flags = 0;	460	unsigned long flags = 0;
461		461
462	if (!queue_locked)	462	if (!queue_locked)
463	spin_lock_irqsave(q->queue_lock, flags);	463	spin_lock_irqsave(q->queue_lock, flags);
464		464
465	blk_start_post_flush(q, rq);	465	blk_start_post_flush(q, rq);
466		466
467	if (!queue_locked)	467	if (!queue_locked)
468	spin_unlock_irqrestore(q->queue_lock, flags);	468	spin_unlock_irqrestore(q->queue_lock, flags);
469	}	469	}
470		470
471	return 1;	471	return 1;
472	}	472	}
473		473
474	/**	474	/**
475	* blk_complete_barrier_rq - complete possible barrier request	475	* blk_complete_barrier_rq - complete possible barrier request
476	* @q: the request queue for the device	476	* @q: the request queue for the device
477	* @rq: the request	477	* @rq: the request
478	* @sectors: number of sectors to complete	478	* @sectors: number of sectors to complete
479	*	479	*
480	* Description:	480	* Description:
481	* Used in driver end_io handling to determine whether to postpone	481	* Used in driver end_io handling to determine whether to postpone
482	* completion of a barrier request until a post flush has been done. This	482	* completion of a barrier request until a post flush has been done. This
483	* is the unlocked variant, used if the caller doesn't already hold the	483	* is the unlocked variant, used if the caller doesn't already hold the
484	* queue lock.	484	* queue lock.
485	**/	485	**/
486	int blk_complete_barrier_rq(request_queue_t q, struct request rq, int sectors)	486	int blk_complete_barrier_rq(request_queue_t q, struct request rq, int sectors)
487	{	487	{
488	return __blk_complete_barrier_rq(q, rq, sectors, 0);	488	return __blk_complete_barrier_rq(q, rq, sectors, 0);
489	}	489	}
490	EXPORT_SYMBOL(blk_complete_barrier_rq);	490	EXPORT_SYMBOL(blk_complete_barrier_rq);
491		491
492	/**	492	/**
493	* blk_complete_barrier_rq_locked - complete possible barrier request	493	* blk_complete_barrier_rq_locked - complete possible barrier request
494	* @q: the request queue for the device	494	* @q: the request queue for the device
495	* @rq: the request	495	* @rq: the request
496	* @sectors: number of sectors to complete	496	* @sectors: number of sectors to complete
497	*	497	*
498	* Description:	498	* Description:
499	* See blk_complete_barrier_rq(). This variant must be used if the caller	499	* See blk_complete_barrier_rq(). This variant must be used if the caller
500	* holds the queue lock.	500	* holds the queue lock.
501	**/	501	**/
502	int blk_complete_barrier_rq_locked(request_queue_t q, struct request rq,	502	int blk_complete_barrier_rq_locked(request_queue_t q, struct request rq,
503	int sectors)	503	int sectors)
504	{	504	{
505	return __blk_complete_barrier_rq(q, rq, sectors, 1);	505	return __blk_complete_barrier_rq(q, rq, sectors, 1);
506	}	506	}
507	EXPORT_SYMBOL(blk_complete_barrier_rq_locked);	507	EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
508		508
509	/**	509	/**
510	* blk_queue_bounce_limit - set bounce buffer limit for queue	510	* blk_queue_bounce_limit - set bounce buffer limit for queue
511	* @q: the request queue for the device	511	* @q: the request queue for the device
512	* @dma_addr: bus address limit	512	* @dma_addr: bus address limit
513	*	513	*
514	* Description:	514	* Description:
515	* Different hardware can have different requirements as to what pages	515	* Different hardware can have different requirements as to what pages
516	* it can do I/O directly to. A low level driver can call	516	* it can do I/O directly to. A low level driver can call
517	* blk_queue_bounce_limit to have lower memory pages allocated as bounce	517	* blk_queue_bounce_limit to have lower memory pages allocated as bounce
518	* buffers for doing I/O to pages residing above @page. By default	518	* buffers for doing I/O to pages residing above @page. By default
519	* the block layer sets this to the highest numbered "low" memory page.	519	* the block layer sets this to the highest numbered "low" memory page.
520	**/	520	**/
521	void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)	521	void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
522	{	522	{
523	unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;	523	unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
524		524
525	/*	525	/*
526	* set appropriate bounce gfp mask -- unfortunately we don't have a	526	* set appropriate bounce gfp mask -- unfortunately we don't have a
527	* full 4GB zone, so we have to resort to low memory for any bounces.	527	* full 4GB zone, so we have to resort to low memory for any bounces.
528	* ISA has its own < 16MB zone.	528	* ISA has its own < 16MB zone.
529	*/	529	*/
530	if (bounce_pfn < blk_max_low_pfn) {	530	if (bounce_pfn < blk_max_low_pfn) {
531	BUG_ON(dma_addr < BLK_BOUNCE_ISA);	531	BUG_ON(dma_addr < BLK_BOUNCE_ISA);
532	init_emergency_isa_pool();	532	init_emergency_isa_pool();
533	q->bounce_gfp = GFP_NOIO \| GFP_DMA;	533	q->bounce_gfp = GFP_NOIO \| GFP_DMA;
534	} else	534	} else
535	q->bounce_gfp = GFP_NOIO;	535	q->bounce_gfp = GFP_NOIO;
536		536
537	q->bounce_pfn = bounce_pfn;	537	q->bounce_pfn = bounce_pfn;
538	}	538	}
539		539
540	EXPORT_SYMBOL(blk_queue_bounce_limit);	540	EXPORT_SYMBOL(blk_queue_bounce_limit);
541		541
542	/**	542	/**
543	* blk_queue_max_sectors - set max sectors for a request for this queue	543	* blk_queue_max_sectors - set max sectors for a request for this queue
544	* @q: the request queue for the device	544	* @q: the request queue for the device
545	* @max_sectors: max sectors in the usual 512b unit	545	* @max_sectors: max sectors in the usual 512b unit
546	*	546	*
547	* Description:	547	* Description:
548	* Enables a low level driver to set an upper limit on the size of	548	* Enables a low level driver to set an upper limit on the size of
549	* received requests.	549	* received requests.
550	**/	550	**/
551	void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)	551	void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
552	{	552	{
553	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {	553	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
554	max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);	554	max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
555	printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);	555	printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
556	}	556	}
557		557
558	if (BLK_DEF_MAX_SECTORS > max_sectors)	558	if (BLK_DEF_MAX_SECTORS > max_sectors)
559	q->max_hw_sectors = q->max_sectors = max_sectors;	559	q->max_hw_sectors = q->max_sectors = max_sectors;
560	else {	560	else {
561	q->max_sectors = BLK_DEF_MAX_SECTORS;	561	q->max_sectors = BLK_DEF_MAX_SECTORS;
562	q->max_hw_sectors = max_sectors;	562	q->max_hw_sectors = max_sectors;
563	}	563	}
564	}	564	}
565		565
566	EXPORT_SYMBOL(blk_queue_max_sectors);	566	EXPORT_SYMBOL(blk_queue_max_sectors);
567		567
568	/**	568	/**
569	* blk_queue_max_phys_segments - set max phys segments for a request for this queue	569	* blk_queue_max_phys_segments - set max phys segments for a request for this queue
570	* @q: the request queue for the device	570	* @q: the request queue for the device
571	* @max_segments: max number of segments	571	* @max_segments: max number of segments
572	*	572	*
573	* Description:	573	* Description:
574	* Enables a low level driver to set an upper limit on the number of	574	* Enables a low level driver to set an upper limit on the number of
575	* physical data segments in a request. This would be the largest sized	575	* physical data segments in a request. This would be the largest sized
576	* scatter list the driver could handle.	576	* scatter list the driver could handle.
577	**/	577	**/
578	void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)	578	void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
579	{	579	{
580	if (!max_segments) {	580	if (!max_segments) {
581	max_segments = 1;	581	max_segments = 1;
582	printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);	582	printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
583	}	583	}
584		584
585	q->max_phys_segments = max_segments;	585	q->max_phys_segments = max_segments;
586	}	586	}
587		587
588	EXPORT_SYMBOL(blk_queue_max_phys_segments);	588	EXPORT_SYMBOL(blk_queue_max_phys_segments);
589		589
590	/**	590	/**
591	* blk_queue_max_hw_segments - set max hw segments for a request for this queue	591	* blk_queue_max_hw_segments - set max hw segments for a request for this queue
592	* @q: the request queue for the device	592	* @q: the request queue for the device
593	* @max_segments: max number of segments	593	* @max_segments: max number of segments
594	*	594	*
595	* Description:	595	* Description:
596	* Enables a low level driver to set an upper limit on the number of	596	* Enables a low level driver to set an upper limit on the number of
597	* hw data segments in a request. This would be the largest number of	597	* hw data segments in a request. This would be the largest number of
598	* address/length pairs the host adapter can actually give as once	598	* address/length pairs the host adapter can actually give as once
599	* to the device.	599	* to the device.
600	**/	600	**/
601	void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)	601	void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
602	{	602	{
603	if (!max_segments) {	603	if (!max_segments) {
604	max_segments = 1;	604	max_segments = 1;
605	printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);	605	printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
606	}	606	}
607		607
608	q->max_hw_segments = max_segments;	608	q->max_hw_segments = max_segments;
609	}	609	}
610		610
611	EXPORT_SYMBOL(blk_queue_max_hw_segments);	611	EXPORT_SYMBOL(blk_queue_max_hw_segments);
612		612
613	/**	613	/**
614	* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg	614	* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
615	* @q: the request queue for the device	615	* @q: the request queue for the device
616	* @max_size: max size of segment in bytes	616	* @max_size: max size of segment in bytes
617	*	617	*
618	* Description:	618	* Description:
619	* Enables a low level driver to set an upper limit on the size of a	619	* Enables a low level driver to set an upper limit on the size of a
620	* coalesced segment	620	* coalesced segment
621	**/	621	**/
622	void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)	622	void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
623	{	623	{
624	if (max_size < PAGE_CACHE_SIZE) {	624	if (max_size < PAGE_CACHE_SIZE) {
625	max_size = PAGE_CACHE_SIZE;	625	max_size = PAGE_CACHE_SIZE;
626	printk("%s: set to minimum %d\n", __FUNCTION__, max_size);	626	printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
627	}	627	}
628		628
629	q->max_segment_size = max_size;	629	q->max_segment_size = max_size;
630	}	630	}
631		631
632	EXPORT_SYMBOL(blk_queue_max_segment_size);	632	EXPORT_SYMBOL(blk_queue_max_segment_size);
633		633
634	/**	634	/**
635	* blk_queue_hardsect_size - set hardware sector size for the queue	635	* blk_queue_hardsect_size - set hardware sector size for the queue
636	* @q: the request queue for the device	636	* @q: the request queue for the device
637	* @size: the hardware sector size, in bytes	637	* @size: the hardware sector size, in bytes
638	*	638	*
639	* Description:	639	* Description:
640	* This should typically be set to the lowest possible sector size	640	* This should typically be set to the lowest possible sector size
641	* that the hardware can operate on (possible without reverting to	641	* that the hardware can operate on (possible without reverting to
642	* even internal read-modify-write operations). Usually the default	642	* even internal read-modify-write operations). Usually the default
643	* of 512 covers most hardware.	643	* of 512 covers most hardware.
644	**/	644	**/
645	void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)	645	void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
646	{	646	{
647	q->hardsect_size = size;	647	q->hardsect_size = size;
648	}	648	}
649		649
650	EXPORT_SYMBOL(blk_queue_hardsect_size);	650	EXPORT_SYMBOL(blk_queue_hardsect_size);
651		651
652	/*	652	/*
653	* Returns the minimum that is _not_ zero, unless both are zero.	653	* Returns the minimum that is _not_ zero, unless both are zero.
654	*/	654	*/
655	#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))	655	#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
656		656
657	/**	657	/**
658	* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers	658	* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
659	* @t: the stacking driver (top)	659	* @t: the stacking driver (top)
660	* @b: the underlying device (bottom)	660	* @b: the underlying device (bottom)
661	**/	661	**/
662	void blk_queue_stack_limits(request_queue_t t, request_queue_t b)	662	void blk_queue_stack_limits(request_queue_t t, request_queue_t b)
663	{	663	{
664	/* zero is "infinity" */	664	/* zero is "infinity" */
665	t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);	665	t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
666	t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);	666	t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
667		667
668	t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);	668	t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
669	t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);	669	t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
670	t->max_segment_size = min(t->max_segment_size,b->max_segment_size);	670	t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
671	t->hardsect_size = max(t->hardsect_size,b->hardsect_size);	671	t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
672	}	672	}
673		673
674	EXPORT_SYMBOL(blk_queue_stack_limits);	674	EXPORT_SYMBOL(blk_queue_stack_limits);
675		675
676	/**	676	/**
677	* blk_queue_segment_boundary - set boundary rules for segment merging	677	* blk_queue_segment_boundary - set boundary rules for segment merging
678	* @q: the request queue for the device	678	* @q: the request queue for the device
679	* @mask: the memory boundary mask	679	* @mask: the memory boundary mask
680	**/	680	**/
681	void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)	681	void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
682	{	682	{
683	if (mask < PAGE_CACHE_SIZE - 1) {	683	if (mask < PAGE_CACHE_SIZE - 1) {
684	mask = PAGE_CACHE_SIZE - 1;	684	mask = PAGE_CACHE_SIZE - 1;
685	printk("%s: set to minimum %lx\n", __FUNCTION__, mask);	685	printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
686	}	686	}
687		687
688	q->seg_boundary_mask = mask;	688	q->seg_boundary_mask = mask;
689	}	689	}
690		690
691	EXPORT_SYMBOL(blk_queue_segment_boundary);	691	EXPORT_SYMBOL(blk_queue_segment_boundary);
692		692
693	/**	693	/**
694	* blk_queue_dma_alignment - set dma length and memory alignment	694	* blk_queue_dma_alignment - set dma length and memory alignment
695	* @q: the request queue for the device	695	* @q: the request queue for the device
696	* @mask: alignment mask	696	* @mask: alignment mask
697	*	697	*
698	* description:	698	* description:
699	* set required memory and length aligment for direct dma transactions.	699	* set required memory and length aligment for direct dma transactions.
700	* this is used when buiding direct io requests for the queue.	700	* this is used when buiding direct io requests for the queue.
701	*	701	*
702	**/	702	**/
703	void blk_queue_dma_alignment(request_queue_t *q, int mask)	703	void blk_queue_dma_alignment(request_queue_t *q, int mask)
704	{	704	{
705	q->dma_alignment = mask;	705	q->dma_alignment = mask;
706	}	706	}
707		707
708	EXPORT_SYMBOL(blk_queue_dma_alignment);	708	EXPORT_SYMBOL(blk_queue_dma_alignment);
709		709
710	/**	710	/**
711	* blk_queue_find_tag - find a request by its tag and queue	711	* blk_queue_find_tag - find a request by its tag and queue
712	* @q: The request queue for the device	712	* @q: The request queue for the device
713	* @tag: The tag of the request	713	* @tag: The tag of the request
714	*	714	*
715	* Notes:	715	* Notes:
716	* Should be used when a device returns a tag and you want to match	716	* Should be used when a device returns a tag and you want to match
717	* it with a request.	717	* it with a request.
718	*	718	*
719	* no locks need be held.	719	* no locks need be held.
720	**/	720	**/
721	struct request blk_queue_find_tag(request_queue_t q, int tag)	721	struct request blk_queue_find_tag(request_queue_t q, int tag)
722	{	722	{
723	struct blk_queue_tag *bqt = q->queue_tags;	723	struct blk_queue_tag *bqt = q->queue_tags;
724		724
725	if (unlikely(bqt == NULL \|\| tag >= bqt->real_max_depth))	725	if (unlikely(bqt == NULL \|\| tag >= bqt->real_max_depth))
726	return NULL;	726	return NULL;
727		727
728	return bqt->tag_index[tag];	728	return bqt->tag_index[tag];
729	}	729	}
730		730
731	EXPORT_SYMBOL(blk_queue_find_tag);	731	EXPORT_SYMBOL(blk_queue_find_tag);
732		732
733	/**	733	/**
734	* __blk_queue_free_tags - release tag maintenance info	734	* __blk_queue_free_tags - release tag maintenance info
735	* @q: the request queue for the device	735	* @q: the request queue for the device
736	*	736	*
737	* Notes:	737	* Notes:
738	* blk_cleanup_queue() will take care of calling this function, if tagging	738	* blk_cleanup_queue() will take care of calling this function, if tagging
739	* has been used. So there's no need to call this directly.	739	* has been used. So there's no need to call this directly.
740	**/	740	**/
741	static void __blk_queue_free_tags(request_queue_t *q)	741	static void __blk_queue_free_tags(request_queue_t *q)
742	{	742	{
743	struct blk_queue_tag *bqt = q->queue_tags;	743	struct blk_queue_tag *bqt = q->queue_tags;
744		744
745	if (!bqt)	745	if (!bqt)
746	return;	746	return;
747		747
748	if (atomic_dec_and_test(&bqt->refcnt)) {	748	if (atomic_dec_and_test(&bqt->refcnt)) {
749	BUG_ON(bqt->busy);	749	BUG_ON(bqt->busy);
750	BUG_ON(!list_empty(&bqt->busy_list));	750	BUG_ON(!list_empty(&bqt->busy_list));
751		751
752	kfree(bqt->tag_index);	752	kfree(bqt->tag_index);
753	bqt->tag_index = NULL;	753	bqt->tag_index = NULL;
754		754
755	kfree(bqt->tag_map);	755	kfree(bqt->tag_map);
756	bqt->tag_map = NULL;	756	bqt->tag_map = NULL;
757		757
758	kfree(bqt);	758	kfree(bqt);
759	}	759	}
760		760
761	q->queue_tags = NULL;	761	q->queue_tags = NULL;
762	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);	762	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
763	}	763	}
764		764
765	/**	765	/**
766	* blk_queue_free_tags - release tag maintenance info	766	* blk_queue_free_tags - release tag maintenance info
767	* @q: the request queue for the device	767	* @q: the request queue for the device
768	*	768	*
769	* Notes:	769	* Notes:
770	* This is used to disabled tagged queuing to a device, yet leave	770	* This is used to disabled tagged queuing to a device, yet leave
771	* queue in function.	771	* queue in function.
772	**/	772	**/
773	void blk_queue_free_tags(request_queue_t *q)	773	void blk_queue_free_tags(request_queue_t *q)
774	{	774	{
775	clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);	775	clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
776	}	776	}
777		777
778	EXPORT_SYMBOL(blk_queue_free_tags);	778	EXPORT_SYMBOL(blk_queue_free_tags);
779		779
780	static int	780	static int
781	init_tag_map(request_queue_t q, struct blk_queue_tag tags, int depth)	781	init_tag_map(request_queue_t q, struct blk_queue_tag tags, int depth)
782	{	782	{
783	struct request **tag_index;	783	struct request **tag_index;
784	unsigned long *tag_map;	784	unsigned long *tag_map;
785	int nr_ulongs;	785	int nr_ulongs;
786		786
787	if (depth > q->nr_requests * 2) {	787	if (depth > q->nr_requests * 2) {
788	depth = q->nr_requests * 2;	788	depth = q->nr_requests * 2;
789	printk(KERN_ERR "%s: adjusted depth to %d\n",	789	printk(KERN_ERR "%s: adjusted depth to %d\n",
790	__FUNCTION__, depth);	790	__FUNCTION__, depth);
791	}	791	}
792		792
793	tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);	793	tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
794	if (!tag_index)	794	if (!tag_index)
795	goto fail;	795	goto fail;
796		796
797	nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;	797	nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
798	tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);	798	tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
799	if (!tag_map)	799	if (!tag_map)
800	goto fail;	800	goto fail;
801		801
802	memset(tag_index, 0, depth * sizeof(struct request *));	802	memset(tag_index, 0, depth * sizeof(struct request *));
803	memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));	803	memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));
804	tags->real_max_depth = depth;	804	tags->real_max_depth = depth;
805	tags->max_depth = depth;	805	tags->max_depth = depth;
806	tags->tag_index = tag_index;	806	tags->tag_index = tag_index;
807	tags->tag_map = tag_map;	807	tags->tag_map = tag_map;
808		808
809	return 0;	809	return 0;
810	fail:	810	fail:
811	kfree(tag_index);	811	kfree(tag_index);
812	return -ENOMEM;	812	return -ENOMEM;
813	}	813	}
814		814
815	/**	815	/**
816	* blk_queue_init_tags - initialize the queue tag info	816	* blk_queue_init_tags - initialize the queue tag info
817	* @q: the request queue for the device	817	* @q: the request queue for the device
818	* @depth: the maximum queue depth supported	818	* @depth: the maximum queue depth supported
819	* @tags: the tag to use	819	* @tags: the tag to use
820	**/	820	**/
821	int blk_queue_init_tags(request_queue_t *q, int depth,	821	int blk_queue_init_tags(request_queue_t *q, int depth,
822	struct blk_queue_tag *tags)	822	struct blk_queue_tag *tags)
823	{	823	{
824	int rc;	824	int rc;
825		825
826	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);	826	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
827		827
828	if (!tags && !q->queue_tags) {	828	if (!tags && !q->queue_tags) {
829	tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);	829	tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
830	if (!tags)	830	if (!tags)
831	goto fail;	831	goto fail;
832		832
833	if (init_tag_map(q, tags, depth))	833	if (init_tag_map(q, tags, depth))
834	goto fail;	834	goto fail;
835		835
836	INIT_LIST_HEAD(&tags->busy_list);	836	INIT_LIST_HEAD(&tags->busy_list);
837	tags->busy = 0;	837	tags->busy = 0;
838	atomic_set(&tags->refcnt, 1);	838	atomic_set(&tags->refcnt, 1);
839	} else if (q->queue_tags) {	839	} else if (q->queue_tags) {
840	if ((rc = blk_queue_resize_tags(q, depth)))	840	if ((rc = blk_queue_resize_tags(q, depth)))
841	return rc;	841	return rc;
842	set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);	842	set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
843	return 0;	843	return 0;
844	} else	844	} else
845	atomic_inc(&tags->refcnt);	845	atomic_inc(&tags->refcnt);
846		846
847	/*	847	/*
848	* assign it, all done	848	* assign it, all done
849	*/	849	*/
850	q->queue_tags = tags;	850	q->queue_tags = tags;
851	q->queue_flags \|= (1 << QUEUE_FLAG_QUEUED);	851	q->queue_flags \|= (1 << QUEUE_FLAG_QUEUED);
852	return 0;	852	return 0;
853	fail:	853	fail:
854	kfree(tags);	854	kfree(tags);
855	return -ENOMEM;	855	return -ENOMEM;
856	}	856	}
857		857
858	EXPORT_SYMBOL(blk_queue_init_tags);	858	EXPORT_SYMBOL(blk_queue_init_tags);
859		859
860	/**	860	/**
861	* blk_queue_resize_tags - change the queueing depth	861	* blk_queue_resize_tags - change the queueing depth
862	* @q: the request queue for the device	862	* @q: the request queue for the device
863	* @new_depth: the new max command queueing depth	863	* @new_depth: the new max command queueing depth
864	*	864	*
865	* Notes:	865	* Notes:
866	* Must be called with the queue lock held.	866	* Must be called with the queue lock held.
867	**/	867	**/
868	int blk_queue_resize_tags(request_queue_t *q, int new_depth)	868	int blk_queue_resize_tags(request_queue_t *q, int new_depth)
869	{	869	{
870	struct blk_queue_tag *bqt = q->queue_tags;	870	struct blk_queue_tag *bqt = q->queue_tags;
871	struct request **tag_index;	871	struct request **tag_index;
872	unsigned long *tag_map;	872	unsigned long *tag_map;
873	int max_depth, nr_ulongs;	873	int max_depth, nr_ulongs;
874		874
875	if (!bqt)	875	if (!bqt)
876	return -ENXIO;	876	return -ENXIO;
877		877
878	/*	878	/*
879	* if we already have large enough real_max_depth. just	879	* if we already have large enough real_max_depth. just
880	* adjust max_depth. NOTE as requests with tag value	880	* adjust max_depth. NOTE as requests with tag value
881	* between new_depth and real_max_depth can be in-flight, tag	881	* between new_depth and real_max_depth can be in-flight, tag
882	* map can not be shrunk blindly here.	882	* map can not be shrunk blindly here.
883	*/	883	*/
884	if (new_depth <= bqt->real_max_depth) {	884	if (new_depth <= bqt->real_max_depth) {
885	bqt->max_depth = new_depth;	885	bqt->max_depth = new_depth;
886	return 0;	886	return 0;
887	}	887	}
888		888
889	/*	889	/*
890	* save the old state info, so we can copy it back	890	* save the old state info, so we can copy it back
891	*/	891	*/
892	tag_index = bqt->tag_index;	892	tag_index = bqt->tag_index;
893	tag_map = bqt->tag_map;	893	tag_map = bqt->tag_map;
894	max_depth = bqt->real_max_depth;	894	max_depth = bqt->real_max_depth;
895		895
896	if (init_tag_map(q, bqt, new_depth))	896	if (init_tag_map(q, bqt, new_depth))
897	return -ENOMEM;	897	return -ENOMEM;
898		898
899	memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));	899	memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
900	nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;	900	nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
901	memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));	901	memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
902		902
903	kfree(tag_index);	903	kfree(tag_index);
904	kfree(tag_map);	904	kfree(tag_map);
905	return 0;	905	return 0;
906	}	906	}
907		907
908	EXPORT_SYMBOL(blk_queue_resize_tags);	908	EXPORT_SYMBOL(blk_queue_resize_tags);
909		909
910	/**	910	/**
911	* blk_queue_end_tag - end tag operations for a request	911	* blk_queue_end_tag - end tag operations for a request
912	* @q: the request queue for the device	912	* @q: the request queue for the device
913	* @rq: the request that has completed	913	* @rq: the request that has completed
914	*	914	*
915	* Description:	915	* Description:
916	* Typically called when end_that_request_first() returns 0, meaning	916	* Typically called when end_that_request_first() returns 0, meaning
917	* all transfers have been done for a request. It's important to call	917	* all transfers have been done for a request. It's important to call
918	* this function before end_that_request_last(), as that will put the	918	* this function before end_that_request_last(), as that will put the
919	* request back on the free list thus corrupting the internal tag list.	919	* request back on the free list thus corrupting the internal tag list.
920	*	920	*
921	* Notes:	921	* Notes:
922	* queue lock must be held.	922	* queue lock must be held.
923	**/	923	**/
924	void blk_queue_end_tag(request_queue_t q, struct request rq)	924	void blk_queue_end_tag(request_queue_t q, struct request rq)
925	{	925	{
926	struct blk_queue_tag *bqt = q->queue_tags;	926	struct blk_queue_tag *bqt = q->queue_tags;
927	int tag = rq->tag;	927	int tag = rq->tag;
928		928
929	BUG_ON(tag == -1);	929	BUG_ON(tag == -1);
930		930
931	if (unlikely(tag >= bqt->real_max_depth))	931	if (unlikely(tag >= bqt->real_max_depth))
932	/*	932	/*
933	* This can happen after tag depth has been reduced.	933	* This can happen after tag depth has been reduced.
934	* FIXME: how about a warning or info message here?	934	* FIXME: how about a warning or info message here?
935	*/	935	*/
936	return;	936	return;
937		937
938	if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {	938	if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
939	printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",	939	printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
940	__FUNCTION__, tag);	940	__FUNCTION__, tag);
941	return;	941	return;
942	}	942	}
943		943
944	list_del_init(&rq->queuelist);	944	list_del_init(&rq->queuelist);
945	rq->flags &= ~REQ_QUEUED;	945	rq->flags &= ~REQ_QUEUED;
946	rq->tag = -1;	946	rq->tag = -1;
947		947
948	if (unlikely(bqt->tag_index[tag] == NULL))	948	if (unlikely(bqt->tag_index[tag] == NULL))
949	printk(KERN_ERR "%s: tag %d is missing\n",	949	printk(KERN_ERR "%s: tag %d is missing\n",
950	__FUNCTION__, tag);	950	__FUNCTION__, tag);
951		951
952	bqt->tag_index[tag] = NULL;	952	bqt->tag_index[tag] = NULL;
953	bqt->busy--;	953	bqt->busy--;
954	}	954	}
955		955
956	EXPORT_SYMBOL(blk_queue_end_tag);	956	EXPORT_SYMBOL(blk_queue_end_tag);
957		957
958	/**	958	/**
959	* blk_queue_start_tag - find a free tag and assign it	959	* blk_queue_start_tag - find a free tag and assign it
960	* @q: the request queue for the device	960	* @q: the request queue for the device
961	* @rq: the block request that needs tagging	961	* @rq: the block request that needs tagging
962	*	962	*
963	* Description:	963	* Description:
964	* This can either be used as a stand-alone helper, or possibly be	964	* This can either be used as a stand-alone helper, or possibly be
965	* assigned as the queue &prep_rq_fn (in which case &struct request	965	* assigned as the queue &prep_rq_fn (in which case &struct request
966	* automagically gets a tag assigned). Note that this function	966	* automagically gets a tag assigned). Note that this function
967	* assumes that any type of request can be queued! if this is not	967	* assumes that any type of request can be queued! if this is not
968	* true for your device, you must check the request type before	968	* true for your device, you must check the request type before
969	* calling this function. The request will also be removed from	969	* calling this function. The request will also be removed from
970	* the request queue, so it's the drivers responsibility to readd	970	* the request queue, so it's the drivers responsibility to readd
971	* it if it should need to be restarted for some reason.	971	* it if it should need to be restarted for some reason.
972	*	972	*
973	* Notes:	973	* Notes:
974	* queue lock must be held.	974	* queue lock must be held.
975	**/	975	**/
976	int blk_queue_start_tag(request_queue_t q, struct request rq)	976	int blk_queue_start_tag(request_queue_t q, struct request rq)
977	{	977	{
978	struct blk_queue_tag *bqt = q->queue_tags;	978	struct blk_queue_tag *bqt = q->queue_tags;
979	int tag;	979	int tag;
980		980
981	if (unlikely((rq->flags & REQ_QUEUED))) {	981	if (unlikely((rq->flags & REQ_QUEUED))) {
982	printk(KERN_ERR	982	printk(KERN_ERR
983	"%s: request %p for device [%s] already tagged %d",	983	"%s: request %p for device [%s] already tagged %d",
984	__FUNCTION__, rq,	984	__FUNCTION__, rq,
985	rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);	985	rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
986	BUG();	986	BUG();
987	}	987	}
988		988
989	tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);	989	tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
990	if (tag >= bqt->max_depth)	990	if (tag >= bqt->max_depth)
991	return 1;	991	return 1;
992		992
993	__set_bit(tag, bqt->tag_map);	993	__set_bit(tag, bqt->tag_map);
994		994
995	rq->flags \|= REQ_QUEUED;	995	rq->flags \|= REQ_QUEUED;
996	rq->tag = tag;	996	rq->tag = tag;
997	bqt->tag_index[tag] = rq;	997	bqt->tag_index[tag] = rq;
998	blkdev_dequeue_request(rq);	998	blkdev_dequeue_request(rq);
999	list_add(&rq->queuelist, &bqt->busy_list);	999	list_add(&rq->queuelist, &bqt->busy_list);
1000	bqt->busy++;	1000	bqt->busy++;
1001	return 0;	1001	return 0;
1002	}	1002	}
1003		1003
1004	EXPORT_SYMBOL(blk_queue_start_tag);	1004	EXPORT_SYMBOL(blk_queue_start_tag);
1005		1005
1006	/**	1006	/**
1007	* blk_queue_invalidate_tags - invalidate all pending tags	1007	* blk_queue_invalidate_tags - invalidate all pending tags
1008	* @q: the request queue for the device	1008	* @q: the request queue for the device
1009	*	1009	*
1010	* Description:	1010	* Description:
1011	* Hardware conditions may dictate a need to stop all pending requests.	1011	* Hardware conditions may dictate a need to stop all pending requests.
1012	* In this case, we will safely clear the block side of the tag queue and	1012	* In this case, we will safely clear the block side of the tag queue and
1013	* readd all requests to the request queue in the right order.	1013	* readd all requests to the request queue in the right order.
1014	*	1014	*
1015	* Notes:	1015	* Notes:
1016	* queue lock must be held.	1016	* queue lock must be held.
1017	**/	1017	**/
1018	void blk_queue_invalidate_tags(request_queue_t *q)	1018	void blk_queue_invalidate_tags(request_queue_t *q)
1019	{	1019	{
1020	struct blk_queue_tag *bqt = q->queue_tags;	1020	struct blk_queue_tag *bqt = q->queue_tags;
1021	struct list_head tmp, n;	1021	struct list_head tmp, n;
1022	struct request *rq;	1022	struct request *rq;
1023		1023
1024	list_for_each_safe(tmp, n, &bqt->busy_list) {	1024	list_for_each_safe(tmp, n, &bqt->busy_list) {
1025	rq = list_entry_rq(tmp);	1025	rq = list_entry_rq(tmp);
1026		1026
1027	if (rq->tag == -1) {	1027	if (rq->tag == -1) {
1028	printk(KERN_ERR	1028	printk(KERN_ERR
1029	"%s: bad tag found on list\n", __FUNCTION__);	1029	"%s: bad tag found on list\n", __FUNCTION__);
1030	list_del_init(&rq->queuelist);	1030	list_del_init(&rq->queuelist);
1031	rq->flags &= ~REQ_QUEUED;	1031	rq->flags &= ~REQ_QUEUED;
1032	} else	1032	} else
1033	blk_queue_end_tag(q, rq);	1033	blk_queue_end_tag(q, rq);
1034		1034
1035	rq->flags &= ~REQ_STARTED;	1035	rq->flags &= ~REQ_STARTED;
1036	__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);	1036	__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1037	}	1037	}
1038	}	1038	}
1039		1039
1040	EXPORT_SYMBOL(blk_queue_invalidate_tags);	1040	EXPORT_SYMBOL(blk_queue_invalidate_tags);
1041		1041
1042	static char *rq_flags[] = {	1042	static const char * const rq_flags[] = {
1043	"REQ_RW",	1043	"REQ_RW",
1044	"REQ_FAILFAST",	1044	"REQ_FAILFAST",
1045	"REQ_SORTED",	1045	"REQ_SORTED",
1046	"REQ_SOFTBARRIER",	1046	"REQ_SOFTBARRIER",
1047	"REQ_HARDBARRIER",	1047	"REQ_HARDBARRIER",
1048	"REQ_CMD",	1048	"REQ_CMD",
1049	"REQ_NOMERGE",	1049	"REQ_NOMERGE",
1050	"REQ_STARTED",	1050	"REQ_STARTED",
1051	"REQ_DONTPREP",	1051	"REQ_DONTPREP",
1052	"REQ_QUEUED",	1052	"REQ_QUEUED",
1053	"REQ_ELVPRIV",	1053	"REQ_ELVPRIV",
1054	"REQ_PC",	1054	"REQ_PC",
1055	"REQ_BLOCK_PC",	1055	"REQ_BLOCK_PC",
1056	"REQ_SENSE",	1056	"REQ_SENSE",
1057	"REQ_FAILED",	1057	"REQ_FAILED",
1058	"REQ_QUIET",	1058	"REQ_QUIET",
1059	"REQ_SPECIAL",	1059	"REQ_SPECIAL",
1060	"REQ_DRIVE_CMD",	1060	"REQ_DRIVE_CMD",
1061	"REQ_DRIVE_TASK",	1061	"REQ_DRIVE_TASK",
1062	"REQ_DRIVE_TASKFILE",	1062	"REQ_DRIVE_TASKFILE",
1063	"REQ_PREEMPT",	1063	"REQ_PREEMPT",
1064	"REQ_PM_SUSPEND",	1064	"REQ_PM_SUSPEND",
1065	"REQ_PM_RESUME",	1065	"REQ_PM_RESUME",
1066	"REQ_PM_SHUTDOWN",	1066	"REQ_PM_SHUTDOWN",
1067	};	1067	};
1068		1068
1069	void blk_dump_rq_flags(struct request rq, char msg)	1069	void blk_dump_rq_flags(struct request rq, char msg)
1070	{	1070	{
1071	int bit;	1071	int bit;
1072		1072
1073	printk("%s: dev %s: flags = ", msg,	1073	printk("%s: dev %s: flags = ", msg,
1074	rq->rq_disk ? rq->rq_disk->disk_name : "?");	1074	rq->rq_disk ? rq->rq_disk->disk_name : "?");
1075	bit = 0;	1075	bit = 0;
1076	do {	1076	do {
1077	if (rq->flags & (1 << bit))	1077	if (rq->flags & (1 << bit))
1078	printk("%s ", rq_flags[bit]);	1078	printk("%s ", rq_flags[bit]);
1079	bit++;	1079	bit++;
1080	} while (bit < __REQ_NR_BITS);	1080	} while (bit < __REQ_NR_BITS);
1081		1081
1082	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,	1082	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1083	rq->nr_sectors,	1083	rq->nr_sectors,
1084	rq->current_nr_sectors);	1084	rq->current_nr_sectors);
1085	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);	1085	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1086		1086
1087	if (rq->flags & (REQ_BLOCK_PC \| REQ_PC)) {	1087	if (rq->flags & (REQ_BLOCK_PC \| REQ_PC)) {
1088	printk("cdb: ");	1088	printk("cdb: ");
1089	for (bit = 0; bit < sizeof(rq->cmd); bit++)	1089	for (bit = 0; bit < sizeof(rq->cmd); bit++)
1090	printk("%02x ", rq->cmd[bit]);	1090	printk("%02x ", rq->cmd[bit]);
1091	printk("\n");	1091	printk("\n");
1092	}	1092	}
1093	}	1093	}
1094		1094
1095	EXPORT_SYMBOL(blk_dump_rq_flags);	1095	EXPORT_SYMBOL(blk_dump_rq_flags);
1096		1096
1097	void blk_recount_segments(request_queue_t q, struct bio bio)	1097	void blk_recount_segments(request_queue_t q, struct bio bio)
1098	{	1098	{
1099	struct bio_vec bv, bvprv = NULL;	1099	struct bio_vec bv, bvprv = NULL;
1100	int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;	1100	int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
1101	int high, highprv = 1;	1101	int high, highprv = 1;
1102		1102
1103	if (unlikely(!bio->bi_io_vec))	1103	if (unlikely(!bio->bi_io_vec))
1104	return;	1104	return;
1105		1105
1106	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);	1106	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1107	hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;	1107	hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
1108	bio_for_each_segment(bv, bio, i) {	1108	bio_for_each_segment(bv, bio, i) {
1109	/*	1109	/*
1110	* the trick here is making sure that a high page is never	1110	* the trick here is making sure that a high page is never
1111	* considered part of another segment, since that might	1111	* considered part of another segment, since that might
1112	* change with the bounce page.	1112	* change with the bounce page.
1113	*/	1113	*/
1114	high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;	1114	high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
1115	if (high \|\| highprv)	1115	if (high \|\| highprv)
1116	goto new_hw_segment;	1116	goto new_hw_segment;
1117	if (cluster) {	1117	if (cluster) {
1118	if (seg_size + bv->bv_len > q->max_segment_size)	1118	if (seg_size + bv->bv_len > q->max_segment_size)
1119	goto new_segment;	1119	goto new_segment;
1120	if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))	1120	if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
1121	goto new_segment;	1121	goto new_segment;
1122	if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))	1122	if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
1123	goto new_segment;	1123	goto new_segment;
1124	if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))	1124	if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1125	goto new_hw_segment;	1125	goto new_hw_segment;
1126		1126
1127	seg_size += bv->bv_len;	1127	seg_size += bv->bv_len;
1128	hw_seg_size += bv->bv_len;	1128	hw_seg_size += bv->bv_len;
1129	bvprv = bv;	1129	bvprv = bv;
1130	continue;	1130	continue;
1131	}	1131	}
1132	new_segment:	1132	new_segment:
1133	if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&	1133	if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
1134	!BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {	1134	!BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
1135	hw_seg_size += bv->bv_len;	1135	hw_seg_size += bv->bv_len;
1136	} else {	1136	} else {
1137	new_hw_segment:	1137	new_hw_segment:
1138	if (hw_seg_size > bio->bi_hw_front_size)	1138	if (hw_seg_size > bio->bi_hw_front_size)
1139	bio->bi_hw_front_size = hw_seg_size;	1139	bio->bi_hw_front_size = hw_seg_size;
1140	hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;	1140	hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
1141	nr_hw_segs++;	1141	nr_hw_segs++;
1142	}	1142	}
1143		1143
1144	nr_phys_segs++;	1144	nr_phys_segs++;
1145	bvprv = bv;	1145	bvprv = bv;
1146	seg_size = bv->bv_len;	1146	seg_size = bv->bv_len;
1147	highprv = high;	1147	highprv = high;
1148	}	1148	}
1149	if (hw_seg_size > bio->bi_hw_back_size)	1149	if (hw_seg_size > bio->bi_hw_back_size)
1150	bio->bi_hw_back_size = hw_seg_size;	1150	bio->bi_hw_back_size = hw_seg_size;
1151	if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)	1151	if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
1152	bio->bi_hw_front_size = hw_seg_size;	1152	bio->bi_hw_front_size = hw_seg_size;
1153	bio->bi_phys_segments = nr_phys_segs;	1153	bio->bi_phys_segments = nr_phys_segs;
1154	bio->bi_hw_segments = nr_hw_segs;	1154	bio->bi_hw_segments = nr_hw_segs;
1155	bio->bi_flags \|= (1 << BIO_SEG_VALID);	1155	bio->bi_flags \|= (1 << BIO_SEG_VALID);
1156	}	1156	}
1157		1157
1158		1158
1159	static int blk_phys_contig_segment(request_queue_t q, struct bio bio,	1159	static int blk_phys_contig_segment(request_queue_t q, struct bio bio,
1160	struct bio *nxt)	1160	struct bio *nxt)
1161	{	1161	{
1162	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))	1162	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
1163	return 0;	1163	return 0;
1164		1164
1165	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))	1165	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
1166	return 0;	1166	return 0;
1167	if (bio->bi_size + nxt->bi_size > q->max_segment_size)	1167	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1168	return 0;	1168	return 0;
1169		1169
1170	/*	1170	/*
1171	* bio and nxt are contigous in memory, check if the queue allows	1171	* bio and nxt are contigous in memory, check if the queue allows
1172	* these two to be merged into one	1172	* these two to be merged into one
1173	*/	1173	*/
1174	if (BIO_SEG_BOUNDARY(q, bio, nxt))	1174	if (BIO_SEG_BOUNDARY(q, bio, nxt))
1175	return 1;	1175	return 1;
1176		1176
1177	return 0;	1177	return 0;
1178	}	1178	}
1179		1179
1180	static int blk_hw_contig_segment(request_queue_t q, struct bio bio,	1180	static int blk_hw_contig_segment(request_queue_t q, struct bio bio,
1181	struct bio *nxt)	1181	struct bio *nxt)
1182	{	1182	{
1183	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))	1183	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1184	blk_recount_segments(q, bio);	1184	blk_recount_segments(q, bio);
1185	if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))	1185	if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
1186	blk_recount_segments(q, nxt);	1186	blk_recount_segments(q, nxt);
1187	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) \|\|	1187	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) \|\|
1188	BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))	1188	BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
1189	return 0;	1189	return 0;
1190	if (bio->bi_size + nxt->bi_size > q->max_segment_size)	1190	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1191	return 0;	1191	return 0;
1192		1192
1193	return 1;	1193	return 1;
1194	}	1194	}
1195		1195
1196	/*	1196	/*
1197	* map a request to scatterlist, return number of sg entries setup. Caller	1197	* map a request to scatterlist, return number of sg entries setup. Caller
1198	* must make sure sg can hold rq->nr_phys_segments entries	1198	* must make sure sg can hold rq->nr_phys_segments entries
1199	*/	1199	*/
1200	int blk_rq_map_sg(request_queue_t q, struct request rq, struct scatterlist *sg)	1200	int blk_rq_map_sg(request_queue_t q, struct request rq, struct scatterlist *sg)
1201	{	1201	{
1202	struct bio_vec bvec, bvprv;	1202	struct bio_vec bvec, bvprv;
1203	struct bio *bio;	1203	struct bio *bio;
1204	int nsegs, i, cluster;	1204	int nsegs, i, cluster;
1205		1205
1206	nsegs = 0;	1206	nsegs = 0;
1207	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);	1207	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1208		1208
1209	/*	1209	/*
1210	* for each bio in rq	1210	* for each bio in rq
1211	*/	1211	*/
1212	bvprv = NULL;	1212	bvprv = NULL;
1213	rq_for_each_bio(bio, rq) {	1213	rq_for_each_bio(bio, rq) {
1214	/*	1214	/*
1215	* for each segment in bio	1215	* for each segment in bio
1216	*/	1216	*/
1217	bio_for_each_segment(bvec, bio, i) {	1217	bio_for_each_segment(bvec, bio, i) {
1218	int nbytes = bvec->bv_len;	1218	int nbytes = bvec->bv_len;
1219		1219
1220	if (bvprv && cluster) {	1220	if (bvprv && cluster) {
1221	if (sg[nsegs - 1].length + nbytes > q->max_segment_size)	1221	if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
1222	goto new_segment;	1222	goto new_segment;
1223		1223
1224	if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))	1224	if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1225	goto new_segment;	1225	goto new_segment;
1226	if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))	1226	if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1227	goto new_segment;	1227	goto new_segment;
1228		1228
1229	sg[nsegs - 1].length += nbytes;	1229	sg[nsegs - 1].length += nbytes;
1230	} else {	1230	} else {
1231	new_segment:	1231	new_segment:
1232	memset(&sg[nsegs],0,sizeof(struct scatterlist));	1232	memset(&sg[nsegs],0,sizeof(struct scatterlist));
1233	sg[nsegs].page = bvec->bv_page;	1233	sg[nsegs].page = bvec->bv_page;
1234	sg[nsegs].length = nbytes;	1234	sg[nsegs].length = nbytes;
1235	sg[nsegs].offset = bvec->bv_offset;	1235	sg[nsegs].offset = bvec->bv_offset;
1236		1236
1237	nsegs++;	1237	nsegs++;
1238	}	1238	}
1239	bvprv = bvec;	1239	bvprv = bvec;
1240	} /* segments in bio */	1240	} /* segments in bio */
1241	} /* bios in rq */	1241	} /* bios in rq */
1242		1242
1243	return nsegs;	1243	return nsegs;
1244	}	1244	}
1245		1245
1246	EXPORT_SYMBOL(blk_rq_map_sg);	1246	EXPORT_SYMBOL(blk_rq_map_sg);
1247		1247
1248	/*	1248	/*
1249	* the standard queue merge functions, can be overridden with device	1249	* the standard queue merge functions, can be overridden with device
1250	* specific ones if so desired	1250	* specific ones if so desired
1251	*/	1251	*/
1252		1252
1253	static inline int ll_new_mergeable(request_queue_t *q,	1253	static inline int ll_new_mergeable(request_queue_t *q,
1254	struct request *req,	1254	struct request *req,
1255	struct bio *bio)	1255	struct bio *bio)
1256	{	1256	{
1257	int nr_phys_segs = bio_phys_segments(q, bio);	1257	int nr_phys_segs = bio_phys_segments(q, bio);
1258		1258
1259	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {	1259	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1260	req->flags \|= REQ_NOMERGE;	1260	req->flags \|= REQ_NOMERGE;
1261	if (req == q->last_merge)	1261	if (req == q->last_merge)
1262	q->last_merge = NULL;	1262	q->last_merge = NULL;
1263	return 0;	1263	return 0;
1264	}	1264	}
1265		1265
1266	/*	1266	/*
1267	* A hw segment is just getting larger, bump just the phys	1267	* A hw segment is just getting larger, bump just the phys
1268	* counter.	1268	* counter.
1269	*/	1269	*/
1270	req->nr_phys_segments += nr_phys_segs;	1270	req->nr_phys_segments += nr_phys_segs;
1271	return 1;	1271	return 1;
1272	}	1272	}
1273		1273
1274	static inline int ll_new_hw_segment(request_queue_t *q,	1274	static inline int ll_new_hw_segment(request_queue_t *q,
1275	struct request *req,	1275	struct request *req,
1276	struct bio *bio)	1276	struct bio *bio)
1277	{	1277	{
1278	int nr_hw_segs = bio_hw_segments(q, bio);	1278	int nr_hw_segs = bio_hw_segments(q, bio);
1279	int nr_phys_segs = bio_phys_segments(q, bio);	1279	int nr_phys_segs = bio_phys_segments(q, bio);
1280		1280
1281	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments	1281	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1282	\|\| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {	1282	\|\| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1283	req->flags \|= REQ_NOMERGE;	1283	req->flags \|= REQ_NOMERGE;
1284	if (req == q->last_merge)	1284	if (req == q->last_merge)
1285	q->last_merge = NULL;	1285	q->last_merge = NULL;
1286	return 0;	1286	return 0;
1287	}	1287	}
1288		1288
1289	/*	1289	/*
1290	* This will form the start of a new hw segment. Bump both	1290	* This will form the start of a new hw segment. Bump both
1291	* counters.	1291	* counters.
1292	*/	1292	*/
1293	req->nr_hw_segments += nr_hw_segs;	1293	req->nr_hw_segments += nr_hw_segs;
1294	req->nr_phys_segments += nr_phys_segs;	1294	req->nr_phys_segments += nr_phys_segs;
1295	return 1;	1295	return 1;
1296	}	1296	}
1297		1297
1298	static int ll_back_merge_fn(request_queue_t q, struct request req,	1298	static int ll_back_merge_fn(request_queue_t q, struct request req,
1299	struct bio *bio)	1299	struct bio *bio)
1300	{	1300	{
1301	unsigned short max_sectors;	1301	unsigned short max_sectors;
1302	int len;	1302	int len;
1303		1303
1304	if (unlikely(blk_pc_request(req)))	1304	if (unlikely(blk_pc_request(req)))
1305	max_sectors = q->max_hw_sectors;	1305	max_sectors = q->max_hw_sectors;
1306	else	1306	else
1307	max_sectors = q->max_sectors;	1307	max_sectors = q->max_sectors;
1308		1308
1309	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {	1309	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1310	req->flags \|= REQ_NOMERGE;	1310	req->flags \|= REQ_NOMERGE;
1311	if (req == q->last_merge)	1311	if (req == q->last_merge)
1312	q->last_merge = NULL;	1312	q->last_merge = NULL;
1313	return 0;	1313	return 0;
1314	}	1314	}
1315	if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))	1315	if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1316	blk_recount_segments(q, req->biotail);	1316	blk_recount_segments(q, req->biotail);
1317	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))	1317	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1318	blk_recount_segments(q, bio);	1318	blk_recount_segments(q, bio);
1319	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;	1319	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1320	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&	1320	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1321	!BIOVEC_VIRT_OVERSIZE(len)) {	1321	!BIOVEC_VIRT_OVERSIZE(len)) {
1322	int mergeable = ll_new_mergeable(q, req, bio);	1322	int mergeable = ll_new_mergeable(q, req, bio);
1323		1323
1324	if (mergeable) {	1324	if (mergeable) {
1325	if (req->nr_hw_segments == 1)	1325	if (req->nr_hw_segments == 1)
1326	req->bio->bi_hw_front_size = len;	1326	req->bio->bi_hw_front_size = len;
1327	if (bio->bi_hw_segments == 1)	1327	if (bio->bi_hw_segments == 1)
1328	bio->bi_hw_back_size = len;	1328	bio->bi_hw_back_size = len;
1329	}	1329	}
1330	return mergeable;	1330	return mergeable;
1331	}	1331	}
1332		1332
1333	return ll_new_hw_segment(q, req, bio);	1333	return ll_new_hw_segment(q, req, bio);
1334	}	1334	}
1335		1335
1336	static int ll_front_merge_fn(request_queue_t q, struct request req,	1336	static int ll_front_merge_fn(request_queue_t q, struct request req,
1337	struct bio *bio)	1337	struct bio *bio)
1338	{	1338	{
1339	unsigned short max_sectors;	1339	unsigned short max_sectors;
1340	int len;	1340	int len;
1341		1341
1342	if (unlikely(blk_pc_request(req)))	1342	if (unlikely(blk_pc_request(req)))
1343	max_sectors = q->max_hw_sectors;	1343	max_sectors = q->max_hw_sectors;
1344	else	1344	else
1345	max_sectors = q->max_sectors;	1345	max_sectors = q->max_sectors;
1346		1346
1347		1347
1348	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {	1348	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1349	req->flags \|= REQ_NOMERGE;	1349	req->flags \|= REQ_NOMERGE;
1350	if (req == q->last_merge)	1350	if (req == q->last_merge)
1351	q->last_merge = NULL;	1351	q->last_merge = NULL;
1352	return 0;	1352	return 0;
1353	}	1353	}
1354	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;	1354	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1355	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))	1355	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1356	blk_recount_segments(q, bio);	1356	blk_recount_segments(q, bio);
1357	if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))	1357	if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1358	blk_recount_segments(q, req->bio);	1358	blk_recount_segments(q, req->bio);
1359	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&	1359	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1360	!BIOVEC_VIRT_OVERSIZE(len)) {	1360	!BIOVEC_VIRT_OVERSIZE(len)) {
1361	int mergeable = ll_new_mergeable(q, req, bio);	1361	int mergeable = ll_new_mergeable(q, req, bio);
1362		1362
1363	if (mergeable) {	1363	if (mergeable) {
1364	if (bio->bi_hw_segments == 1)	1364	if (bio->bi_hw_segments == 1)
1365	bio->bi_hw_front_size = len;	1365	bio->bi_hw_front_size = len;
1366	if (req->nr_hw_segments == 1)	1366	if (req->nr_hw_segments == 1)
1367	req->biotail->bi_hw_back_size = len;	1367	req->biotail->bi_hw_back_size = len;
1368	}	1368	}
1369	return mergeable;	1369	return mergeable;
1370	}	1370	}
1371		1371
1372	return ll_new_hw_segment(q, req, bio);	1372	return ll_new_hw_segment(q, req, bio);
1373	}	1373	}
1374		1374
1375	static int ll_merge_requests_fn(request_queue_t q, struct request req,	1375	static int ll_merge_requests_fn(request_queue_t q, struct request req,
1376	struct request *next)	1376	struct request *next)
1377	{	1377	{
1378	int total_phys_segments;	1378	int total_phys_segments;
1379	int total_hw_segments;	1379	int total_hw_segments;
1380		1380
1381	/*	1381	/*
1382	* First check if the either of the requests are re-queued	1382	* First check if the either of the requests are re-queued
1383	* requests. Can't merge them if they are.	1383	* requests. Can't merge them if they are.
1384	*/	1384	*/
1385	if (req->special \|\| next->special)	1385	if (req->special \|\| next->special)
1386	return 0;	1386	return 0;
1387		1387
1388	/*	1388	/*
1389	* Will it become too large?	1389	* Will it become too large?
1390	*/	1390	*/
1391	if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)	1391	if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1392	return 0;	1392	return 0;
1393		1393
1394	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;	1394	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1395	if (blk_phys_contig_segment(q, req->biotail, next->bio))	1395	if (blk_phys_contig_segment(q, req->biotail, next->bio))
1396	total_phys_segments--;	1396	total_phys_segments--;
1397		1397
1398	if (total_phys_segments > q->max_phys_segments)	1398	if (total_phys_segments > q->max_phys_segments)
1399	return 0;	1399	return 0;
1400		1400
1401	total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;	1401	total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1402	if (blk_hw_contig_segment(q, req->biotail, next->bio)) {	1402	if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1403	int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;	1403	int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1404	/*	1404	/*
1405	* propagate the combined length to the end of the requests	1405	* propagate the combined length to the end of the requests
1406	*/	1406	*/
1407	if (req->nr_hw_segments == 1)	1407	if (req->nr_hw_segments == 1)
1408	req->bio->bi_hw_front_size = len;	1408	req->bio->bi_hw_front_size = len;
1409	if (next->nr_hw_segments == 1)	1409	if (next->nr_hw_segments == 1)
1410	next->biotail->bi_hw_back_size = len;	1410	next->biotail->bi_hw_back_size = len;
1411	total_hw_segments--;	1411	total_hw_segments--;
1412	}	1412	}
1413		1413
1414	if (total_hw_segments > q->max_hw_segments)	1414	if (total_hw_segments > q->max_hw_segments)
1415	return 0;	1415	return 0;
1416		1416
1417	/* Merge is OK... */	1417	/* Merge is OK... */
1418	req->nr_phys_segments = total_phys_segments;	1418	req->nr_phys_segments = total_phys_segments;
1419	req->nr_hw_segments = total_hw_segments;	1419	req->nr_hw_segments = total_hw_segments;
1420	return 1;	1420	return 1;
1421	}	1421	}
1422		1422
1423	/*	1423	/*
1424	* "plug" the device if there are no outstanding requests: this will	1424	* "plug" the device if there are no outstanding requests: this will
1425	* force the transfer to start only after we have put all the requests	1425	* force the transfer to start only after we have put all the requests
1426	* on the list.	1426	* on the list.
1427	*	1427	*
1428	* This is called with interrupts off and no requests on the queue and	1428	* This is called with interrupts off and no requests on the queue and
1429	* with the queue lock held.	1429	* with the queue lock held.
1430	*/	1430	*/
1431	void blk_plug_device(request_queue_t *q)	1431	void blk_plug_device(request_queue_t *q)
1432	{	1432	{
1433	WARN_ON(!irqs_disabled());	1433	WARN_ON(!irqs_disabled());
1434		1434
1435	/*	1435	/*
1436	* don't plug a stopped queue, it must be paired with blk_start_queue()	1436	* don't plug a stopped queue, it must be paired with blk_start_queue()
1437	* which will restart the queueing	1437	* which will restart the queueing
1438	*/	1438	*/
1439	if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))	1439	if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
1440	return;	1440	return;
1441		1441
1442	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))	1442	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1443	mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);	1443	mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1444	}	1444	}
1445		1445
1446	EXPORT_SYMBOL(blk_plug_device);	1446	EXPORT_SYMBOL(blk_plug_device);
1447		1447
1448	/*	1448	/*
1449	* remove the queue from the plugged list, if present. called with	1449	* remove the queue from the plugged list, if present. called with
1450	* queue lock held and interrupts disabled.	1450	* queue lock held and interrupts disabled.
1451	*/	1451	*/
1452	int blk_remove_plug(request_queue_t *q)	1452	int blk_remove_plug(request_queue_t *q)
1453	{	1453	{
1454	WARN_ON(!irqs_disabled());	1454	WARN_ON(!irqs_disabled());
1455		1455
1456	if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))	1456	if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1457	return 0;	1457	return 0;
1458		1458
1459	del_timer(&q->unplug_timer);	1459	del_timer(&q->unplug_timer);
1460	return 1;	1460	return 1;
1461	}	1461	}
1462		1462
1463	EXPORT_SYMBOL(blk_remove_plug);	1463	EXPORT_SYMBOL(blk_remove_plug);
1464		1464
1465	/*	1465	/*
1466	* remove the plug and let it rip..	1466	* remove the plug and let it rip..
1467	*/	1467	*/
1468	void __generic_unplug_device(request_queue_t *q)	1468	void __generic_unplug_device(request_queue_t *q)
1469	{	1469	{
1470	if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))	1470	if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))
1471	return;	1471	return;
1472		1472
1473	if (!blk_remove_plug(q))	1473	if (!blk_remove_plug(q))
1474	return;	1474	return;
1475		1475
1476	q->request_fn(q);	1476	q->request_fn(q);
1477	}	1477	}
1478	EXPORT_SYMBOL(__generic_unplug_device);	1478	EXPORT_SYMBOL(__generic_unplug_device);
1479		1479
1480	/**	1480	/**
1481	* generic_unplug_device - fire a request queue	1481	* generic_unplug_device - fire a request queue
1482	* @q: The &request_queue_t in question	1482	* @q: The &request_queue_t in question
1483	*	1483	*
1484	* Description:	1484	* Description:
1485	* Linux uses plugging to build bigger requests queues before letting	1485	* Linux uses plugging to build bigger requests queues before letting
1486	* the device have at them. If a queue is plugged, the I/O scheduler	1486	* the device have at them. If a queue is plugged, the I/O scheduler
1487	* is still adding and merging requests on the queue. Once the queue	1487	* is still adding and merging requests on the queue. Once the queue
1488	* gets unplugged, the request_fn defined for the queue is invoked and	1488	* gets unplugged, the request_fn defined for the queue is invoked and
1489	* transfers started.	1489	* transfers started.
1490	**/	1490	**/
1491	void generic_unplug_device(request_queue_t *q)	1491	void generic_unplug_device(request_queue_t *q)
1492	{	1492	{
1493	spin_lock_irq(q->queue_lock);	1493	spin_lock_irq(q->queue_lock);
1494	__generic_unplug_device(q);	1494	__generic_unplug_device(q);
1495	spin_unlock_irq(q->queue_lock);	1495	spin_unlock_irq(q->queue_lock);
1496	}	1496	}
1497	EXPORT_SYMBOL(generic_unplug_device);	1497	EXPORT_SYMBOL(generic_unplug_device);
1498		1498
1499	static void blk_backing_dev_unplug(struct backing_dev_info *bdi,	1499	static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1500	struct page *page)	1500	struct page *page)
1501	{	1501	{
1502	request_queue_t *q = bdi->unplug_io_data;	1502	request_queue_t *q = bdi->unplug_io_data;
1503		1503
1504	/*	1504	/*
1505	* devices don't necessarily have an ->unplug_fn defined	1505	* devices don't necessarily have an ->unplug_fn defined
1506	*/	1506	*/
1507	if (q->unplug_fn)	1507	if (q->unplug_fn)
1508	q->unplug_fn(q);	1508	q->unplug_fn(q);
1509	}	1509	}
1510		1510
1511	static void blk_unplug_work(void *data)	1511	static void blk_unplug_work(void *data)
1512	{	1512	{
1513	request_queue_t *q = data;	1513	request_queue_t *q = data;
1514		1514
1515	q->unplug_fn(q);	1515	q->unplug_fn(q);
1516	}	1516	}
1517		1517
1518	static void blk_unplug_timeout(unsigned long data)	1518	static void blk_unplug_timeout(unsigned long data)
1519	{	1519	{
1520	request_queue_t q = (request_queue_t )data;	1520	request_queue_t q = (request_queue_t )data;
1521		1521
1522	kblockd_schedule_work(&q->unplug_work);	1522	kblockd_schedule_work(&q->unplug_work);
1523	}	1523	}
1524		1524
1525	/**	1525	/**
1526	* blk_start_queue - restart a previously stopped queue	1526	* blk_start_queue - restart a previously stopped queue
1527	* @q: The &request_queue_t in question	1527	* @q: The &request_queue_t in question
1528	*	1528	*
1529	* Description:	1529	* Description:
1530	* blk_start_queue() will clear the stop flag on the queue, and call	1530	* blk_start_queue() will clear the stop flag on the queue, and call
1531	* the request_fn for the queue if it was in a stopped state when	1531	* the request_fn for the queue if it was in a stopped state when
1532	* entered. Also see blk_stop_queue(). Queue lock must be held.	1532	* entered. Also see blk_stop_queue(). Queue lock must be held.
1533	**/	1533	**/
1534	void blk_start_queue(request_queue_t *q)	1534	void blk_start_queue(request_queue_t *q)
1535	{	1535	{
1536	clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);	1536	clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1537		1537
1538	/*	1538	/*
1539	* one level of recursion is ok and is much faster than kicking	1539	* one level of recursion is ok and is much faster than kicking
1540	* the unplug handling	1540	* the unplug handling
1541	*/	1541	*/
1542	if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {	1542	if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1543	q->request_fn(q);	1543	q->request_fn(q);
1544	clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);	1544	clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1545	} else {	1545	} else {
1546	blk_plug_device(q);	1546	blk_plug_device(q);
1547	kblockd_schedule_work(&q->unplug_work);	1547	kblockd_schedule_work(&q->unplug_work);
1548	}	1548	}
1549	}	1549	}
1550		1550
1551	EXPORT_SYMBOL(blk_start_queue);	1551	EXPORT_SYMBOL(blk_start_queue);
1552		1552
1553	/**	1553	/**
1554	* blk_stop_queue - stop a queue	1554	* blk_stop_queue - stop a queue
1555	* @q: The &request_queue_t in question	1555	* @q: The &request_queue_t in question
1556	*	1556	*
1557	* Description:	1557	* Description:
1558	* The Linux block layer assumes that a block driver will consume all	1558	* The Linux block layer assumes that a block driver will consume all
1559	* entries on the request queue when the request_fn strategy is called.	1559	* entries on the request queue when the request_fn strategy is called.
1560	* Often this will not happen, because of hardware limitations (queue	1560	* Often this will not happen, because of hardware limitations (queue
1561	* depth settings). If a device driver gets a 'queue full' response,	1561	* depth settings). If a device driver gets a 'queue full' response,
1562	* or if it simply chooses not to queue more I/O at one point, it can	1562	* or if it simply chooses not to queue more I/O at one point, it can
1563	* call this function to prevent the request_fn from being called until	1563	* call this function to prevent the request_fn from being called until
1564	* the driver has signalled it's ready to go again. This happens by calling	1564	* the driver has signalled it's ready to go again. This happens by calling
1565	* blk_start_queue() to restart queue operations. Queue lock must be held.	1565	* blk_start_queue() to restart queue operations. Queue lock must be held.
1566	**/	1566	**/
1567	void blk_stop_queue(request_queue_t *q)	1567	void blk_stop_queue(request_queue_t *q)
1568	{	1568	{
1569	blk_remove_plug(q);	1569	blk_remove_plug(q);
1570	set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);	1570	set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1571	}	1571	}
1572	EXPORT_SYMBOL(blk_stop_queue);	1572	EXPORT_SYMBOL(blk_stop_queue);
1573		1573
1574	/**	1574	/**
1575	* blk_sync_queue - cancel any pending callbacks on a queue	1575	* blk_sync_queue - cancel any pending callbacks on a queue
1576	* @q: the queue	1576	* @q: the queue
1577	*	1577	*
1578	* Description:	1578	* Description:
1579	* The block layer may perform asynchronous callback activity	1579	* The block layer may perform asynchronous callback activity
1580	* on a queue, such as calling the unplug function after a timeout.	1580	* on a queue, such as calling the unplug function after a timeout.
1581	* A block device may call blk_sync_queue to ensure that any	1581	* A block device may call blk_sync_queue to ensure that any
1582	* such activity is cancelled, thus allowing it to release resources	1582	* such activity is cancelled, thus allowing it to release resources
1583	* the the callbacks might use. The caller must already have made sure	1583	* the the callbacks might use. The caller must already have made sure
1584	* that its ->make_request_fn will not re-add plugging prior to calling	1584	* that its ->make_request_fn will not re-add plugging prior to calling
1585	* this function.	1585	* this function.
1586	*	1586	*
1587	*/	1587	*/
1588	void blk_sync_queue(struct request_queue *q)	1588	void blk_sync_queue(struct request_queue *q)
1589	{	1589	{
1590	del_timer_sync(&q->unplug_timer);	1590	del_timer_sync(&q->unplug_timer);
1591	kblockd_flush();	1591	kblockd_flush();
1592	}	1592	}
1593	EXPORT_SYMBOL(blk_sync_queue);	1593	EXPORT_SYMBOL(blk_sync_queue);
1594		1594
1595	/**	1595	/**
1596	* blk_run_queue - run a single device queue	1596	* blk_run_queue - run a single device queue
1597	* @q: The queue to run	1597	* @q: The queue to run
1598	*/	1598	*/
1599	void blk_run_queue(struct request_queue *q)	1599	void blk_run_queue(struct request_queue *q)
1600	{	1600	{
1601	unsigned long flags;	1601	unsigned long flags;
1602		1602
1603	spin_lock_irqsave(q->queue_lock, flags);	1603	spin_lock_irqsave(q->queue_lock, flags);
1604	blk_remove_plug(q);	1604	blk_remove_plug(q);
1605	if (!elv_queue_empty(q))	1605	if (!elv_queue_empty(q))
1606	q->request_fn(q);	1606	q->request_fn(q);
1607	spin_unlock_irqrestore(q->queue_lock, flags);	1607	spin_unlock_irqrestore(q->queue_lock, flags);
1608	}	1608	}
1609	EXPORT_SYMBOL(blk_run_queue);	1609	EXPORT_SYMBOL(blk_run_queue);
1610		1610
1611	/**	1611	/**
1612	* blk_cleanup_queue: - release a &request_queue_t when it is no longer needed	1612	* blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
1613	* @q: the request queue to be released	1613	* @q: the request queue to be released
1614	*	1614	*
1615	* Description:	1615	* Description:
1616	* blk_cleanup_queue is the pair to blk_init_queue() or	1616	* blk_cleanup_queue is the pair to blk_init_queue() or
1617	* blk_queue_make_request(). It should be called when a request queue is	1617	* blk_queue_make_request(). It should be called when a request queue is
1618	* being released; typically when a block device is being de-registered.	1618	* being released; typically when a block device is being de-registered.
1619	* Currently, its primary task it to free all the &struct request	1619	* Currently, its primary task it to free all the &struct request
1620	* structures that were allocated to the queue and the queue itself.	1620	* structures that were allocated to the queue and the queue itself.
1621	*	1621	*
1622	* Caveat:	1622	* Caveat:
1623	* Hopefully the low level driver will have finished any	1623	* Hopefully the low level driver will have finished any
1624	* outstanding requests first...	1624	* outstanding requests first...
1625	**/	1625	**/
1626	void blk_cleanup_queue(request_queue_t * q)	1626	void blk_cleanup_queue(request_queue_t * q)
1627	{	1627	{
1628	struct request_list *rl = &q->rq;	1628	struct request_list *rl = &q->rq;
1629		1629
1630	if (!atomic_dec_and_test(&q->refcnt))	1630	if (!atomic_dec_and_test(&q->refcnt))
1631	return;	1631	return;
1632		1632
1633	if (q->elevator)	1633	if (q->elevator)
1634	elevator_exit(q->elevator);	1634	elevator_exit(q->elevator);
1635		1635
1636	blk_sync_queue(q);	1636	blk_sync_queue(q);
1637		1637
1638	if (rl->rq_pool)	1638	if (rl->rq_pool)
1639	mempool_destroy(rl->rq_pool);	1639	mempool_destroy(rl->rq_pool);
1640		1640
1641	if (q->queue_tags)	1641	if (q->queue_tags)
1642	__blk_queue_free_tags(q);	1642	__blk_queue_free_tags(q);
1643		1643
1644	blk_queue_ordered(q, QUEUE_ORDERED_NONE);	1644	blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1645		1645
1646	kmem_cache_free(requestq_cachep, q);	1646	kmem_cache_free(requestq_cachep, q);
1647	}	1647	}
1648		1648
1649	EXPORT_SYMBOL(blk_cleanup_queue);	1649	EXPORT_SYMBOL(blk_cleanup_queue);
1650		1650
1651	static int blk_init_free_list(request_queue_t *q)	1651	static int blk_init_free_list(request_queue_t *q)
1652	{	1652	{
1653	struct request_list *rl = &q->rq;	1653	struct request_list *rl = &q->rq;
1654		1654
1655	rl->count[READ] = rl->count[WRITE] = 0;	1655	rl->count[READ] = rl->count[WRITE] = 0;
1656	rl->starved[READ] = rl->starved[WRITE] = 0;	1656	rl->starved[READ] = rl->starved[WRITE] = 0;
1657	rl->elvpriv = 0;	1657	rl->elvpriv = 0;
1658	init_waitqueue_head(&rl->wait[READ]);	1658	init_waitqueue_head(&rl->wait[READ]);
1659	init_waitqueue_head(&rl->wait[WRITE]);	1659	init_waitqueue_head(&rl->wait[WRITE]);
1660		1660
1661	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,	1661	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1662	mempool_free_slab, request_cachep, q->node);	1662	mempool_free_slab, request_cachep, q->node);
1663		1663
1664	if (!rl->rq_pool)	1664	if (!rl->rq_pool)
1665	return -ENOMEM;	1665	return -ENOMEM;
1666		1666
1667	return 0;	1667	return 0;
1668	}	1668	}
1669		1669
1670	static int __make_request(request_queue_t , struct bio );	1670	static int __make_request(request_queue_t , struct bio );
1671		1671
1672	request_queue_t *blk_alloc_queue(gfp_t gfp_mask)	1672	request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1673	{	1673	{
1674	return blk_alloc_queue_node(gfp_mask, -1);	1674	return blk_alloc_queue_node(gfp_mask, -1);
1675	}	1675	}
1676	EXPORT_SYMBOL(blk_alloc_queue);	1676	EXPORT_SYMBOL(blk_alloc_queue);
1677		1677
1678	request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)	1678	request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1679	{	1679	{
1680	request_queue_t *q;	1680	request_queue_t *q;
1681		1681
1682	q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);	1682	q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);
1683	if (!q)	1683	if (!q)
1684	return NULL;	1684	return NULL;
1685		1685
1686	memset(q, 0, sizeof(*q));	1686	memset(q, 0, sizeof(*q));
1687	init_timer(&q->unplug_timer);	1687	init_timer(&q->unplug_timer);
1688	atomic_set(&q->refcnt, 1);	1688	atomic_set(&q->refcnt, 1);
1689		1689
1690	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;	1690	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1691	q->backing_dev_info.unplug_io_data = q;	1691	q->backing_dev_info.unplug_io_data = q;
1692		1692
1693	return q;	1693	return q;
1694	}	1694	}
1695	EXPORT_SYMBOL(blk_alloc_queue_node);	1695	EXPORT_SYMBOL(blk_alloc_queue_node);
1696		1696
1697	/**	1697	/**
1698	* blk_init_queue - prepare a request queue for use with a block device	1698	* blk_init_queue - prepare a request queue for use with a block device
1699	* @rfn: The function to be called to process requests that have been	1699	* @rfn: The function to be called to process requests that have been
1700	* placed on the queue.	1700	* placed on the queue.
1701	* @lock: Request queue spin lock	1701	* @lock: Request queue spin lock
1702	*	1702	*
1703	* Description:	1703	* Description:
1704	* If a block device wishes to use the standard request handling procedures,	1704	* If a block device wishes to use the standard request handling procedures,
1705	* which sorts requests and coalesces adjacent requests, then it must	1705	* which sorts requests and coalesces adjacent requests, then it must
1706	* call blk_init_queue(). The function @rfn will be called when there	1706	* call blk_init_queue(). The function @rfn will be called when there
1707	* are requests on the queue that need to be processed. If the device	1707	* are requests on the queue that need to be processed. If the device
1708	* supports plugging, then @rfn may not be called immediately when requests	1708	* supports plugging, then @rfn may not be called immediately when requests
1709	* are available on the queue, but may be called at some time later instead.	1709	* are available on the queue, but may be called at some time later instead.
1710	* Plugged queues are generally unplugged when a buffer belonging to one	1710	* Plugged queues are generally unplugged when a buffer belonging to one
1711	* of the requests on the queue is needed, or due to memory pressure.	1711	* of the requests on the queue is needed, or due to memory pressure.
1712	*	1712	*
1713	* @rfn is not required, or even expected, to remove all requests off the	1713	* @rfn is not required, or even expected, to remove all requests off the
1714	* queue, but only as many as it can handle at a time. If it does leave	1714	* queue, but only as many as it can handle at a time. If it does leave
1715	* requests on the queue, it is responsible for arranging that the requests	1715	* requests on the queue, it is responsible for arranging that the requests
1716	* get dealt with eventually.	1716	* get dealt with eventually.
1717	*	1717	*
1718	* The queue spin lock must be held while manipulating the requests on the	1718	* The queue spin lock must be held while manipulating the requests on the
1719	* request queue.	1719	* request queue.
1720	*	1720	*
1721	* Function returns a pointer to the initialized request queue, or NULL if	1721	* Function returns a pointer to the initialized request queue, or NULL if
1722	* it didn't succeed.	1722	* it didn't succeed.
1723	*	1723	*
1724	* Note:	1724	* Note:
1725	* blk_init_queue() must be paired with a blk_cleanup_queue() call	1725	* blk_init_queue() must be paired with a blk_cleanup_queue() call
1726	* when the block device is deactivated (such as at module unload).	1726	* when the block device is deactivated (such as at module unload).
1727	**/	1727	**/
1728		1728
1729	request_queue_t blk_init_queue(request_fn_proc rfn, spinlock_t *lock)	1729	request_queue_t blk_init_queue(request_fn_proc rfn, spinlock_t *lock)
1730	{	1730	{
1731	return blk_init_queue_node(rfn, lock, -1);	1731	return blk_init_queue_node(rfn, lock, -1);
1732	}	1732	}
1733	EXPORT_SYMBOL(blk_init_queue);	1733	EXPORT_SYMBOL(blk_init_queue);
1734		1734
1735	request_queue_t *	1735	request_queue_t *
1736	blk_init_queue_node(request_fn_proc rfn, spinlock_t lock, int node_id)	1736	blk_init_queue_node(request_fn_proc rfn, spinlock_t lock, int node_id)
1737	{	1737	{
1738	request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);	1738	request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1739		1739
1740	if (!q)	1740	if (!q)
1741	return NULL;	1741	return NULL;
1742		1742
1743	q->node = node_id;	1743	q->node = node_id;
1744	if (blk_init_free_list(q))	1744	if (blk_init_free_list(q))
1745	goto out_init;	1745	goto out_init;
1746		1746
1747	/*	1747	/*
1748	* if caller didn't supply a lock, they get per-queue locking with	1748	* if caller didn't supply a lock, they get per-queue locking with
1749	* our embedded lock	1749	* our embedded lock
1750	*/	1750	*/
1751	if (!lock) {	1751	if (!lock) {
1752	spin_lock_init(&q->__queue_lock);	1752	spin_lock_init(&q->__queue_lock);
1753	lock = &q->__queue_lock;	1753	lock = &q->__queue_lock;
1754	}	1754	}
1755		1755
1756	q->request_fn = rfn;	1756	q->request_fn = rfn;
1757	q->back_merge_fn = ll_back_merge_fn;	1757	q->back_merge_fn = ll_back_merge_fn;
1758	q->front_merge_fn = ll_front_merge_fn;	1758	q->front_merge_fn = ll_front_merge_fn;
1759	q->merge_requests_fn = ll_merge_requests_fn;	1759	q->merge_requests_fn = ll_merge_requests_fn;
1760	q->prep_rq_fn = NULL;	1760	q->prep_rq_fn = NULL;
1761	q->unplug_fn = generic_unplug_device;	1761	q->unplug_fn = generic_unplug_device;
1762	q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);	1762	q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1763	q->queue_lock = lock;	1763	q->queue_lock = lock;
1764		1764
1765	blk_queue_segment_boundary(q, 0xffffffff);	1765	blk_queue_segment_boundary(q, 0xffffffff);
1766		1766
1767	blk_queue_make_request(q, __make_request);	1767	blk_queue_make_request(q, __make_request);
1768	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);	1768	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
1769		1769
1770	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);	1770	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
1771	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);	1771	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
1772		1772
1773	/*	1773	/*
1774	* all done	1774	* all done
1775	*/	1775	*/
1776	if (!elevator_init(q, NULL)) {	1776	if (!elevator_init(q, NULL)) {
1777	blk_queue_congestion_threshold(q);	1777	blk_queue_congestion_threshold(q);
1778	return q;	1778	return q;
1779	}	1779	}
1780		1780
1781	blk_cleanup_queue(q);	1781	blk_cleanup_queue(q);
1782	out_init:	1782	out_init:
1783	kmem_cache_free(requestq_cachep, q);	1783	kmem_cache_free(requestq_cachep, q);
1784	return NULL;	1784	return NULL;
1785	}	1785	}
1786	EXPORT_SYMBOL(blk_init_queue_node);	1786	EXPORT_SYMBOL(blk_init_queue_node);
1787		1787
1788	int blk_get_queue(request_queue_t *q)	1788	int blk_get_queue(request_queue_t *q)
1789	{	1789	{
1790	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {	1790	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
1791	atomic_inc(&q->refcnt);	1791	atomic_inc(&q->refcnt);
1792	return 0;	1792	return 0;
1793	}	1793	}
1794		1794
1795	return 1;	1795	return 1;
1796	}	1796	}
1797		1797
1798	EXPORT_SYMBOL(blk_get_queue);	1798	EXPORT_SYMBOL(blk_get_queue);
1799		1799
1800	static inline void blk_free_request(request_queue_t q, struct request rq)	1800	static inline void blk_free_request(request_queue_t q, struct request rq)
1801	{	1801	{
1802	if (rq->flags & REQ_ELVPRIV)	1802	if (rq->flags & REQ_ELVPRIV)
1803	elv_put_request(q, rq);	1803	elv_put_request(q, rq);
1804	mempool_free(rq, q->rq.rq_pool);	1804	mempool_free(rq, q->rq.rq_pool);
1805	}	1805	}
1806		1806
1807	static inline struct request *	1807	static inline struct request *
1808	blk_alloc_request(request_queue_t q, int rw, struct bio bio,	1808	blk_alloc_request(request_queue_t q, int rw, struct bio bio,
1809	int priv, gfp_t gfp_mask)	1809	int priv, gfp_t gfp_mask)
1810	{	1810	{
1811	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);	1811	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1812		1812
1813	if (!rq)	1813	if (!rq)
1814	return NULL;	1814	return NULL;
1815		1815
1816	/*	1816	/*
1817	* first three bits are identical in rq->flags and bio->bi_rw,	1817	* first three bits are identical in rq->flags and bio->bi_rw,
1818	* see bio.h and blkdev.h	1818	* see bio.h and blkdev.h
1819	*/	1819	*/
1820	rq->flags = rw;	1820	rq->flags = rw;
1821		1821
1822	if (priv) {	1822	if (priv) {
1823	if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {	1823	if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
1824	mempool_free(rq, q->rq.rq_pool);	1824	mempool_free(rq, q->rq.rq_pool);
1825	return NULL;	1825	return NULL;
1826	}	1826	}
1827	rq->flags \|= REQ_ELVPRIV;	1827	rq->flags \|= REQ_ELVPRIV;
1828	}	1828	}
1829		1829
1830	return rq;	1830	return rq;
1831	}	1831	}
1832		1832
1833	/*	1833	/*
1834	* ioc_batching returns true if the ioc is a valid batching request and	1834	* ioc_batching returns true if the ioc is a valid batching request and
1835	* should be given priority access to a request.	1835	* should be given priority access to a request.
1836	*/	1836	*/
1837	static inline int ioc_batching(request_queue_t q, struct io_context ioc)	1837	static inline int ioc_batching(request_queue_t q, struct io_context ioc)
1838	{	1838	{
1839	if (!ioc)	1839	if (!ioc)
1840	return 0;	1840	return 0;
1841		1841
1842	/*	1842	/*
1843	* Make sure the process is able to allocate at least 1 request	1843	* Make sure the process is able to allocate at least 1 request
1844	* even if the batch times out, otherwise we could theoretically	1844	* even if the batch times out, otherwise we could theoretically
1845	* lose wakeups.	1845	* lose wakeups.
1846	*/	1846	*/
1847	return ioc->nr_batch_requests == q->nr_batching \|\|	1847	return ioc->nr_batch_requests == q->nr_batching \|\|
1848	(ioc->nr_batch_requests > 0	1848	(ioc->nr_batch_requests > 0
1849	&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));	1849	&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
1850	}	1850	}
1851		1851
1852	/*	1852	/*
1853	* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This	1853	* ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
1854	* will cause the process to be a "batcher" on all queues in the system. This	1854	* will cause the process to be a "batcher" on all queues in the system. This
1855	* is the behaviour we want though - once it gets a wakeup it should be given	1855	* is the behaviour we want though - once it gets a wakeup it should be given
1856	* a nice run.	1856	* a nice run.
1857	*/	1857	*/
1858	static void ioc_set_batching(request_queue_t q, struct io_context ioc)	1858	static void ioc_set_batching(request_queue_t q, struct io_context ioc)
1859	{	1859	{
1860	if (!ioc \|\| ioc_batching(q, ioc))	1860	if (!ioc \|\| ioc_batching(q, ioc))
1861	return;	1861	return;
1862		1862
1863	ioc->nr_batch_requests = q->nr_batching;	1863	ioc->nr_batch_requests = q->nr_batching;
1864	ioc->last_waited = jiffies;	1864	ioc->last_waited = jiffies;
1865	}	1865	}
1866		1866
1867	static void __freed_request(request_queue_t *q, int rw)	1867	static void __freed_request(request_queue_t *q, int rw)
1868	{	1868	{
1869	struct request_list *rl = &q->rq;	1869	struct request_list *rl = &q->rq;
1870		1870
1871	if (rl->count[rw] < queue_congestion_off_threshold(q))	1871	if (rl->count[rw] < queue_congestion_off_threshold(q))
1872	clear_queue_congested(q, rw);	1872	clear_queue_congested(q, rw);
1873		1873
1874	if (rl->count[rw] + 1 <= q->nr_requests) {	1874	if (rl->count[rw] + 1 <= q->nr_requests) {
1875	if (waitqueue_active(&rl->wait[rw]))	1875	if (waitqueue_active(&rl->wait[rw]))
1876	wake_up(&rl->wait[rw]);	1876	wake_up(&rl->wait[rw]);
1877		1877
1878	blk_clear_queue_full(q, rw);	1878	blk_clear_queue_full(q, rw);
1879	}	1879	}
1880	}	1880	}
1881		1881
1882	/*	1882	/*
1883	* A request has just been released. Account for it, update the full and	1883	* A request has just been released. Account for it, update the full and
1884	* congestion status, wake up any waiters. Called under q->queue_lock.	1884	* congestion status, wake up any waiters. Called under q->queue_lock.
1885	*/	1885	*/
1886	static void freed_request(request_queue_t *q, int rw, int priv)	1886	static void freed_request(request_queue_t *q, int rw, int priv)
1887	{	1887	{
1888	struct request_list *rl = &q->rq;	1888	struct request_list *rl = &q->rq;
1889		1889
1890	rl->count[rw]--;	1890	rl->count[rw]--;
1891	if (priv)	1891	if (priv)
1892	rl->elvpriv--;	1892	rl->elvpriv--;
1893		1893
1894	__freed_request(q, rw);	1894	__freed_request(q, rw);
1895		1895
1896	if (unlikely(rl->starved[rw ^ 1]))	1896	if (unlikely(rl->starved[rw ^ 1]))
1897	__freed_request(q, rw ^ 1);	1897	__freed_request(q, rw ^ 1);
1898	}	1898	}
1899		1899
1900	#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)	1900	#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
1901	/*	1901	/*
1902	* Get a free request, queue_lock must be held.	1902	* Get a free request, queue_lock must be held.
1903	* Returns NULL on failure, with queue_lock held.	1903	* Returns NULL on failure, with queue_lock held.
1904	* Returns !NULL on success, with queue_lock not held.	1904	* Returns !NULL on success, with queue_lock not held.
1905	*/	1905	*/
1906	static struct request get_request(request_queue_t q, int rw, struct bio *bio,	1906	static struct request get_request(request_queue_t q, int rw, struct bio *bio,
1907	gfp_t gfp_mask)	1907	gfp_t gfp_mask)
1908	{	1908	{
1909	struct request *rq = NULL;	1909	struct request *rq = NULL;
1910	struct request_list *rl = &q->rq;	1910	struct request_list *rl = &q->rq;
1911	struct io_context *ioc = NULL;	1911	struct io_context *ioc = NULL;
1912	int may_queue, priv;	1912	int may_queue, priv;
1913		1913
1914	may_queue = elv_may_queue(q, rw, bio);	1914	may_queue = elv_may_queue(q, rw, bio);
1915	if (may_queue == ELV_MQUEUE_NO)	1915	if (may_queue == ELV_MQUEUE_NO)
1916	goto rq_starved;	1916	goto rq_starved;
1917		1917
1918	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {	1918	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
1919	if (rl->count[rw]+1 >= q->nr_requests) {	1919	if (rl->count[rw]+1 >= q->nr_requests) {
1920	ioc = current_io_context(GFP_ATOMIC);	1920	ioc = current_io_context(GFP_ATOMIC);
1921	/*	1921	/*
1922	* The queue will fill after this allocation, so set	1922	* The queue will fill after this allocation, so set
1923	* it as full, and mark this process as "batching".	1923	* it as full, and mark this process as "batching".
1924	* This process will be allowed to complete a batch of	1924	* This process will be allowed to complete a batch of
1925	* requests, others will be blocked.	1925	* requests, others will be blocked.
1926	*/	1926	*/
1927	if (!blk_queue_full(q, rw)) {	1927	if (!blk_queue_full(q, rw)) {
1928	ioc_set_batching(q, ioc);	1928	ioc_set_batching(q, ioc);
1929	blk_set_queue_full(q, rw);	1929	blk_set_queue_full(q, rw);
1930	} else {	1930	} else {
1931	if (may_queue != ELV_MQUEUE_MUST	1931	if (may_queue != ELV_MQUEUE_MUST
1932	&& !ioc_batching(q, ioc)) {	1932	&& !ioc_batching(q, ioc)) {
1933	/*	1933	/*
1934	* The queue is full and the allocating	1934	* The queue is full and the allocating
1935	* process is not a "batcher", and not	1935	* process is not a "batcher", and not
1936	* exempted by the IO scheduler	1936	* exempted by the IO scheduler
1937	*/	1937	*/
1938	goto out;	1938	goto out;
1939	}	1939	}
1940	}	1940	}
1941	}	1941	}
1942	set_queue_congested(q, rw);	1942	set_queue_congested(q, rw);
1943	}	1943	}
1944		1944
1945	/*	1945	/*
1946	* Only allow batching queuers to allocate up to 50% over the defined	1946	* Only allow batching queuers to allocate up to 50% over the defined
1947	* limit of requests, otherwise we could have thousands of requests	1947	* limit of requests, otherwise we could have thousands of requests
1948	* allocated with any setting of ->nr_requests	1948	* allocated with any setting of ->nr_requests
1949	*/	1949	*/
1950	if (rl->count[rw] >= (3 * q->nr_requests / 2))	1950	if (rl->count[rw] >= (3 * q->nr_requests / 2))
1951	goto out;	1951	goto out;
1952		1952
1953	rl->count[rw]++;	1953	rl->count[rw]++;
1954	rl->starved[rw] = 0;	1954	rl->starved[rw] = 0;
1955		1955
1956	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);	1956	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1957	if (priv)	1957	if (priv)
1958	rl->elvpriv++;	1958	rl->elvpriv++;
1959		1959
1960	spin_unlock_irq(q->queue_lock);	1960	spin_unlock_irq(q->queue_lock);
1961		1961
1962	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);	1962	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1963	if (unlikely(!rq)) {	1963	if (unlikely(!rq)) {
1964	/*	1964	/*
1965	* Allocation failed presumably due to memory. Undo anything	1965	* Allocation failed presumably due to memory. Undo anything
1966	* we might have messed up.	1966	* we might have messed up.
1967	*	1967	*
1968	* Allocating task should really be put onto the front of the	1968	* Allocating task should really be put onto the front of the
1969	* wait queue, but this is pretty rare.	1969	* wait queue, but this is pretty rare.
1970	*/	1970	*/
1971	spin_lock_irq(q->queue_lock);	1971	spin_lock_irq(q->queue_lock);
1972	freed_request(q, rw, priv);	1972	freed_request(q, rw, priv);
1973		1973
1974	/*	1974	/*
1975	* in the very unlikely event that allocation failed and no	1975	* in the very unlikely event that allocation failed and no
1976	* requests for this direction was pending, mark us starved	1976	* requests for this direction was pending, mark us starved
1977	* so that freeing of a request in the other direction will	1977	* so that freeing of a request in the other direction will
1978	* notice us. another possible fix would be to split the	1978	* notice us. another possible fix would be to split the
1979	* rq mempool into READ and WRITE	1979	* rq mempool into READ and WRITE
1980	*/	1980	*/
1981	rq_starved:	1981	rq_starved:
1982	if (unlikely(rl->count[rw] == 0))	1982	if (unlikely(rl->count[rw] == 0))
1983	rl->starved[rw] = 1;	1983	rl->starved[rw] = 1;
1984		1984
1985	goto out;	1985	goto out;
1986	}	1986	}
1987		1987
1988	/*	1988	/*
1989	* ioc may be NULL here, and ioc_batching will be false. That's	1989	* ioc may be NULL here, and ioc_batching will be false. That's
1990	* OK, if the queue is under the request limit then requests need	1990	* OK, if the queue is under the request limit then requests need
1991	* not count toward the nr_batch_requests limit. There will always	1991	* not count toward the nr_batch_requests limit. There will always
1992	* be some limit enforced by BLK_BATCH_TIME.	1992	* be some limit enforced by BLK_BATCH_TIME.
1993	*/	1993	*/
1994	if (ioc_batching(q, ioc))	1994	if (ioc_batching(q, ioc))
1995	ioc->nr_batch_requests--;	1995	ioc->nr_batch_requests--;
1996		1996
1997	rq_init(q, rq);	1997	rq_init(q, rq);
1998	rq->rl = rl;	1998	rq->rl = rl;
1999	out:	1999	out:
2000	return rq;	2000	return rq;
2001	}	2001	}
2002		2002
2003	/*	2003	/*
2004	* No available requests for this queue, unplug the device and wait for some	2004	* No available requests for this queue, unplug the device and wait for some
2005	* requests to become available.	2005	* requests to become available.
2006	*	2006	*
2007	* Called with q->queue_lock held, and returns with it unlocked.	2007	* Called with q->queue_lock held, and returns with it unlocked.
2008	*/	2008	*/
2009	static struct request get_request_wait(request_queue_t q, int rw,	2009	static struct request get_request_wait(request_queue_t q, int rw,
2010	struct bio *bio)	2010	struct bio *bio)
2011	{	2011	{
2012	struct request *rq;	2012	struct request *rq;
2013		2013
2014	rq = get_request(q, rw, bio, GFP_NOIO);	2014	rq = get_request(q, rw, bio, GFP_NOIO);
2015	while (!rq) {	2015	while (!rq) {
2016	DEFINE_WAIT(wait);	2016	DEFINE_WAIT(wait);
2017	struct request_list *rl = &q->rq;	2017	struct request_list *rl = &q->rq;
2018		2018
2019	prepare_to_wait_exclusive(&rl->wait[rw], &wait,	2019	prepare_to_wait_exclusive(&rl->wait[rw], &wait,
2020	TASK_UNINTERRUPTIBLE);	2020	TASK_UNINTERRUPTIBLE);
2021		2021
2022	rq = get_request(q, rw, bio, GFP_NOIO);	2022	rq = get_request(q, rw, bio, GFP_NOIO);
2023		2023
2024	if (!rq) {	2024	if (!rq) {
2025	struct io_context *ioc;	2025	struct io_context *ioc;
2026		2026
2027	__generic_unplug_device(q);	2027	__generic_unplug_device(q);
2028	spin_unlock_irq(q->queue_lock);	2028	spin_unlock_irq(q->queue_lock);
2029	io_schedule();	2029	io_schedule();
2030		2030
2031	/*	2031	/*
2032	* After sleeping, we become a "batching" process and	2032	* After sleeping, we become a "batching" process and
2033	* will be able to allocate at least one request, and	2033	* will be able to allocate at least one request, and
2034	* up to a big batch of them for a small period time.	2034	* up to a big batch of them for a small period time.
2035	* See ioc_batching, ioc_set_batching	2035	* See ioc_batching, ioc_set_batching
2036	*/	2036	*/
2037	ioc = current_io_context(GFP_NOIO);	2037	ioc = current_io_context(GFP_NOIO);
2038	ioc_set_batching(q, ioc);	2038	ioc_set_batching(q, ioc);
2039		2039
2040	spin_lock_irq(q->queue_lock);	2040	spin_lock_irq(q->queue_lock);
2041	}	2041	}
2042	finish_wait(&rl->wait[rw], &wait);	2042	finish_wait(&rl->wait[rw], &wait);
2043	}	2043	}
2044		2044
2045	return rq;	2045	return rq;
2046	}	2046	}
2047		2047
2048	struct request blk_get_request(request_queue_t q, int rw, gfp_t gfp_mask)	2048	struct request blk_get_request(request_queue_t q, int rw, gfp_t gfp_mask)
2049	{	2049	{
2050	struct request *rq;	2050	struct request *rq;
2051		2051
2052	BUG_ON(rw != READ && rw != WRITE);	2052	BUG_ON(rw != READ && rw != WRITE);
2053		2053
2054	spin_lock_irq(q->queue_lock);	2054	spin_lock_irq(q->queue_lock);
2055	if (gfp_mask & __GFP_WAIT) {	2055	if (gfp_mask & __GFP_WAIT) {
2056	rq = get_request_wait(q, rw, NULL);	2056	rq = get_request_wait(q, rw, NULL);
2057	} else {	2057	} else {
2058	rq = get_request(q, rw, NULL, gfp_mask);	2058	rq = get_request(q, rw, NULL, gfp_mask);
2059	if (!rq)	2059	if (!rq)
2060	spin_unlock_irq(q->queue_lock);	2060	spin_unlock_irq(q->queue_lock);
2061	}	2061	}
2062	/* q->queue_lock is unlocked at this point */	2062	/* q->queue_lock is unlocked at this point */
2063		2063
2064	return rq;	2064	return rq;
2065	}	2065	}
2066	EXPORT_SYMBOL(blk_get_request);	2066	EXPORT_SYMBOL(blk_get_request);
2067		2067
2068	/**	2068	/**
2069	* blk_requeue_request - put a request back on queue	2069	* blk_requeue_request - put a request back on queue
2070	* @q: request queue where request should be inserted	2070	* @q: request queue where request should be inserted
2071	* @rq: request to be inserted	2071	* @rq: request to be inserted
2072	*	2072	*
2073	* Description:	2073	* Description:
2074	* Drivers often keep queueing requests until the hardware cannot accept	2074	* Drivers often keep queueing requests until the hardware cannot accept
2075	* more, when that condition happens we need to put the request back	2075	* more, when that condition happens we need to put the request back
2076	* on the queue. Must be called with queue lock held.	2076	* on the queue. Must be called with queue lock held.
2077	*/	2077	*/
2078	void blk_requeue_request(request_queue_t q, struct request rq)	2078	void blk_requeue_request(request_queue_t q, struct request rq)
2079	{	2079	{
2080	if (blk_rq_tagged(rq))	2080	if (blk_rq_tagged(rq))
2081	blk_queue_end_tag(q, rq);	2081	blk_queue_end_tag(q, rq);
2082		2082
2083	elv_requeue_request(q, rq);	2083	elv_requeue_request(q, rq);
2084	}	2084	}
2085		2085
2086	EXPORT_SYMBOL(blk_requeue_request);	2086	EXPORT_SYMBOL(blk_requeue_request);
2087		2087
2088	/**	2088	/**
2089	* blk_insert_request - insert a special request in to a request queue	2089	* blk_insert_request - insert a special request in to a request queue
2090	* @q: request queue where request should be inserted	2090	* @q: request queue where request should be inserted
2091	* @rq: request to be inserted	2091	* @rq: request to be inserted
2092	* @at_head: insert request at head or tail of queue	2092	* @at_head: insert request at head or tail of queue
2093	* @data: private data	2093	* @data: private data
2094	*	2094	*
2095	* Description:	2095	* Description:
2096	* Many block devices need to execute commands asynchronously, so they don't	2096	* Many block devices need to execute commands asynchronously, so they don't
2097	* block the whole kernel from preemption during request execution. This is	2097	* block the whole kernel from preemption during request execution. This is
2098	* accomplished normally by inserting aritficial requests tagged as	2098	* accomplished normally by inserting aritficial requests tagged as
2099	* REQ_SPECIAL in to the corresponding request queue, and letting them be	2099	* REQ_SPECIAL in to the corresponding request queue, and letting them be
2100	* scheduled for actual execution by the request queue.	2100	* scheduled for actual execution by the request queue.
2101	*	2101	*
2102	* We have the option of inserting the head or the tail of the queue.	2102	* We have the option of inserting the head or the tail of the queue.
2103	* Typically we use the tail for new ioctls and so forth. We use the head	2103	* Typically we use the tail for new ioctls and so forth. We use the head
2104	* of the queue for things like a QUEUE_FULL message from a device, or a	2104	* of the queue for things like a QUEUE_FULL message from a device, or a
2105	* host that is unable to accept a particular command.	2105	* host that is unable to accept a particular command.
2106	*/	2106	*/
2107	void blk_insert_request(request_queue_t q, struct request rq,	2107	void blk_insert_request(request_queue_t q, struct request rq,
2108	int at_head, void *data)	2108	int at_head, void *data)
2109	{	2109	{
2110	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;	2110	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2111	unsigned long flags;	2111	unsigned long flags;
2112		2112
2113	/*	2113	/*
2114	* tell I/O scheduler that this isn't a regular read/write (ie it	2114	* tell I/O scheduler that this isn't a regular read/write (ie it
2115	* must not attempt merges on this) and that it acts as a soft	2115	* must not attempt merges on this) and that it acts as a soft
2116	* barrier	2116	* barrier
2117	*/	2117	*/
2118	rq->flags \|= REQ_SPECIAL \| REQ_SOFTBARRIER;	2118	rq->flags \|= REQ_SPECIAL \| REQ_SOFTBARRIER;
2119		2119
2120	rq->special = data;	2120	rq->special = data;
2121		2121
2122	spin_lock_irqsave(q->queue_lock, flags);	2122	spin_lock_irqsave(q->queue_lock, flags);
2123		2123
2124	/*	2124	/*
2125	* If command is tagged, release the tag	2125	* If command is tagged, release the tag
2126	*/	2126	*/
2127	if (blk_rq_tagged(rq))	2127	if (blk_rq_tagged(rq))
2128	blk_queue_end_tag(q, rq);	2128	blk_queue_end_tag(q, rq);
2129		2129
2130	drive_stat_acct(rq, rq->nr_sectors, 1);	2130	drive_stat_acct(rq, rq->nr_sectors, 1);
2131	__elv_add_request(q, rq, where, 0);	2131	__elv_add_request(q, rq, where, 0);
2132		2132
2133	if (blk_queue_plugged(q))	2133	if (blk_queue_plugged(q))
2134	__generic_unplug_device(q);	2134	__generic_unplug_device(q);
2135	else	2135	else
2136	q->request_fn(q);	2136	q->request_fn(q);
2137	spin_unlock_irqrestore(q->queue_lock, flags);	2137	spin_unlock_irqrestore(q->queue_lock, flags);
2138	}	2138	}
2139		2139
2140	EXPORT_SYMBOL(blk_insert_request);	2140	EXPORT_SYMBOL(blk_insert_request);
2141		2141
2142	/**	2142	/**
2143	* blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage	2143	* blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
2144	* @q: request queue where request should be inserted	2144	* @q: request queue where request should be inserted
2145	* @rq: request structure to fill	2145	* @rq: request structure to fill
2146	* @ubuf: the user buffer	2146	* @ubuf: the user buffer
2147	* @len: length of user data	2147	* @len: length of user data
2148	*	2148	*
2149	* Description:	2149	* Description:
2150	* Data will be mapped directly for zero copy io, if possible. Otherwise	2150	* Data will be mapped directly for zero copy io, if possible. Otherwise
2151	* a kernel bounce buffer is used.	2151	* a kernel bounce buffer is used.
2152	*	2152	*
2153	* A matching blk_rq_unmap_user() must be issued at the end of io, while	2153	* A matching blk_rq_unmap_user() must be issued at the end of io, while
2154	* still in process context.	2154	* still in process context.
2155	*	2155	*
2156	* Note: The mapped bio may need to be bounced through blk_queue_bounce()	2156	* Note: The mapped bio may need to be bounced through blk_queue_bounce()
2157	* before being submitted to the device, as pages mapped may be out of	2157	* before being submitted to the device, as pages mapped may be out of
2158	* reach. It's the callers responsibility to make sure this happens. The	2158	* reach. It's the callers responsibility to make sure this happens. The
2159	* original bio must be passed back in to blk_rq_unmap_user() for proper	2159	* original bio must be passed back in to blk_rq_unmap_user() for proper
2160	* unmapping.	2160	* unmapping.
2161	*/	2161	*/
2162	int blk_rq_map_user(request_queue_t q, struct request rq, void __user *ubuf,	2162	int blk_rq_map_user(request_queue_t q, struct request rq, void __user *ubuf,
2163	unsigned int len)	2163	unsigned int len)
2164	{	2164	{
2165	unsigned long uaddr;	2165	unsigned long uaddr;
2166	struct bio *bio;	2166	struct bio *bio;
2167	int reading;	2167	int reading;
2168		2168
2169	if (len > (q->max_hw_sectors << 9))	2169	if (len > (q->max_hw_sectors << 9))
2170	return -EINVAL;	2170	return -EINVAL;
2171	if (!len \|\| !ubuf)	2171	if (!len \|\| !ubuf)
2172	return -EINVAL;	2172	return -EINVAL;
2173		2173
2174	reading = rq_data_dir(rq) == READ;	2174	reading = rq_data_dir(rq) == READ;
2175		2175
2176	/*	2176	/*
2177	* if alignment requirement is satisfied, map in user pages for	2177	* if alignment requirement is satisfied, map in user pages for
2178	* direct dma. else, set up kernel bounce buffers	2178	* direct dma. else, set up kernel bounce buffers
2179	*/	2179	*/
2180	uaddr = (unsigned long) ubuf;	2180	uaddr = (unsigned long) ubuf;
2181	if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))	2181	if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
2182	bio = bio_map_user(q, NULL, uaddr, len, reading);	2182	bio = bio_map_user(q, NULL, uaddr, len, reading);
2183	else	2183	else
2184	bio = bio_copy_user(q, uaddr, len, reading);	2184	bio = bio_copy_user(q, uaddr, len, reading);
2185		2185
2186	if (!IS_ERR(bio)) {	2186	if (!IS_ERR(bio)) {
2187	rq->bio = rq->biotail = bio;	2187	rq->bio = rq->biotail = bio;
2188	blk_rq_bio_prep(q, rq, bio);	2188	blk_rq_bio_prep(q, rq, bio);
2189		2189
2190	rq->buffer = rq->data = NULL;	2190	rq->buffer = rq->data = NULL;
2191	rq->data_len = len;	2191	rq->data_len = len;
2192	return 0;	2192	return 0;
2193	}	2193	}
2194		2194
2195	/*	2195	/*
2196	* bio is the err-ptr	2196	* bio is the err-ptr
2197	*/	2197	*/
2198	return PTR_ERR(bio);	2198	return PTR_ERR(bio);
2199	}	2199	}
2200		2200
2201	EXPORT_SYMBOL(blk_rq_map_user);	2201	EXPORT_SYMBOL(blk_rq_map_user);
2202		2202
2203	/**	2203	/**
2204	* blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage	2204	* blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
2205	* @q: request queue where request should be inserted	2205	* @q: request queue where request should be inserted
2206	* @rq: request to map data to	2206	* @rq: request to map data to
2207	* @iov: pointer to the iovec	2207	* @iov: pointer to the iovec
2208	* @iov_count: number of elements in the iovec	2208	* @iov_count: number of elements in the iovec
2209	*	2209	*
2210	* Description:	2210	* Description:
2211	* Data will be mapped directly for zero copy io, if possible. Otherwise	2211	* Data will be mapped directly for zero copy io, if possible. Otherwise
2212	* a kernel bounce buffer is used.	2212	* a kernel bounce buffer is used.
2213	*	2213	*
2214	* A matching blk_rq_unmap_user() must be issued at the end of io, while	2214	* A matching blk_rq_unmap_user() must be issued at the end of io, while
2215	* still in process context.	2215	* still in process context.
2216	*	2216	*
2217	* Note: The mapped bio may need to be bounced through blk_queue_bounce()	2217	* Note: The mapped bio may need to be bounced through blk_queue_bounce()
2218	* before being submitted to the device, as pages mapped may be out of	2218	* before being submitted to the device, as pages mapped may be out of
2219	* reach. It's the callers responsibility to make sure this happens. The	2219	* reach. It's the callers responsibility to make sure this happens. The
2220	* original bio must be passed back in to blk_rq_unmap_user() for proper	2220	* original bio must be passed back in to blk_rq_unmap_user() for proper
2221	* unmapping.	2221	* unmapping.
2222	*/	2222	*/
2223	int blk_rq_map_user_iov(request_queue_t q, struct request rq,	2223	int blk_rq_map_user_iov(request_queue_t q, struct request rq,
2224	struct sg_iovec *iov, int iov_count)	2224	struct sg_iovec *iov, int iov_count)
2225	{	2225	{
2226	struct bio *bio;	2226	struct bio *bio;
2227		2227
2228	if (!iov \|\| iov_count <= 0)	2228	if (!iov \|\| iov_count <= 0)
2229	return -EINVAL;	2229	return -EINVAL;
2230		2230
2231	/* we don't allow misaligned data like bio_map_user() does. If the	2231	/* we don't allow misaligned data like bio_map_user() does. If the
2232	* user is using sg, they're expected to know the alignment constraints	2232	* user is using sg, they're expected to know the alignment constraints
2233	* and respect them accordingly */	2233	* and respect them accordingly */
2234	bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);	2234	bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);
2235	if (IS_ERR(bio))	2235	if (IS_ERR(bio))
2236	return PTR_ERR(bio);	2236	return PTR_ERR(bio);
2237		2237
2238	rq->bio = rq->biotail = bio;	2238	rq->bio = rq->biotail = bio;
2239	blk_rq_bio_prep(q, rq, bio);	2239	blk_rq_bio_prep(q, rq, bio);
2240	rq->buffer = rq->data = NULL;	2240	rq->buffer = rq->data = NULL;
2241	rq->data_len = bio->bi_size;	2241	rq->data_len = bio->bi_size;
2242	return 0;	2242	return 0;
2243	}	2243	}
2244		2244
2245	EXPORT_SYMBOL(blk_rq_map_user_iov);	2245	EXPORT_SYMBOL(blk_rq_map_user_iov);
2246		2246
2247	/**	2247	/**
2248	* blk_rq_unmap_user - unmap a request with user data	2248	* blk_rq_unmap_user - unmap a request with user data
2249	* @bio: bio to be unmapped	2249	* @bio: bio to be unmapped
2250	* @ulen: length of user buffer	2250	* @ulen: length of user buffer
2251	*	2251	*
2252	* Description:	2252	* Description:
2253	* Unmap a bio previously mapped by blk_rq_map_user().	2253	* Unmap a bio previously mapped by blk_rq_map_user().
2254	*/	2254	*/
2255	int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)	2255	int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)
2256	{	2256	{
2257	int ret = 0;	2257	int ret = 0;
2258		2258
2259	if (bio) {	2259	if (bio) {
2260	if (bio_flagged(bio, BIO_USER_MAPPED))	2260	if (bio_flagged(bio, BIO_USER_MAPPED))
2261	bio_unmap_user(bio);	2261	bio_unmap_user(bio);
2262	else	2262	else
2263	ret = bio_uncopy_user(bio);	2263	ret = bio_uncopy_user(bio);
2264	}	2264	}
2265		2265
2266	return 0;	2266	return 0;
2267	}	2267	}
2268		2268
2269	EXPORT_SYMBOL(blk_rq_unmap_user);	2269	EXPORT_SYMBOL(blk_rq_unmap_user);
2270		2270
2271	/**	2271	/**
2272	* blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage	2272	* blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
2273	* @q: request queue where request should be inserted	2273	* @q: request queue where request should be inserted
2274	* @rq: request to fill	2274	* @rq: request to fill
2275	* @kbuf: the kernel buffer	2275	* @kbuf: the kernel buffer
2276	* @len: length of user data	2276	* @len: length of user data
2277	* @gfp_mask: memory allocation flags	2277	* @gfp_mask: memory allocation flags
2278	*/	2278	*/
2279	int blk_rq_map_kern(request_queue_t q, struct request rq, void *kbuf,	2279	int blk_rq_map_kern(request_queue_t q, struct request rq, void *kbuf,
2280	unsigned int len, gfp_t gfp_mask)	2280	unsigned int len, gfp_t gfp_mask)
2281	{	2281	{
2282	struct bio *bio;	2282	struct bio *bio;
2283		2283
2284	if (len > (q->max_hw_sectors << 9))	2284	if (len > (q->max_hw_sectors << 9))
2285	return -EINVAL;	2285	return -EINVAL;
2286	if (!len \|\| !kbuf)	2286	if (!len \|\| !kbuf)
2287	return -EINVAL;	2287	return -EINVAL;
2288		2288
2289	bio = bio_map_kern(q, kbuf, len, gfp_mask);	2289	bio = bio_map_kern(q, kbuf, len, gfp_mask);
2290	if (IS_ERR(bio))	2290	if (IS_ERR(bio))
2291	return PTR_ERR(bio);	2291	return PTR_ERR(bio);
2292		2292
2293	if (rq_data_dir(rq) == WRITE)	2293	if (rq_data_dir(rq) == WRITE)
2294	bio->bi_rw \|= (1 << BIO_RW);	2294	bio->bi_rw \|= (1 << BIO_RW);
2295		2295
2296	rq->bio = rq->biotail = bio;	2296	rq->bio = rq->biotail = bio;
2297	blk_rq_bio_prep(q, rq, bio);	2297	blk_rq_bio_prep(q, rq, bio);
2298		2298
2299	rq->buffer = rq->data = NULL;	2299	rq->buffer = rq->data = NULL;
2300	rq->data_len = len;	2300	rq->data_len = len;
2301	return 0;	2301	return 0;
2302	}	2302	}
2303		2303
2304	EXPORT_SYMBOL(blk_rq_map_kern);	2304	EXPORT_SYMBOL(blk_rq_map_kern);
2305		2305
2306	/**	2306	/**
2307	* blk_execute_rq_nowait - insert a request into queue for execution	2307	* blk_execute_rq_nowait - insert a request into queue for execution
2308	* @q: queue to insert the request in	2308	* @q: queue to insert the request in
2309	* @bd_disk: matching gendisk	2309	* @bd_disk: matching gendisk
2310	* @rq: request to insert	2310	* @rq: request to insert
2311	* @at_head: insert request at head or tail of queue	2311	* @at_head: insert request at head or tail of queue
2312	* @done: I/O completion handler	2312	* @done: I/O completion handler
2313	*	2313	*
2314	* Description:	2314	* Description:
2315	* Insert a fully prepared request at the back of the io scheduler queue	2315	* Insert a fully prepared request at the back of the io scheduler queue
2316	* for execution. Don't wait for completion.	2316	* for execution. Don't wait for completion.
2317	*/	2317	*/
2318	void blk_execute_rq_nowait(request_queue_t q, struct gendisk bd_disk,	2318	void blk_execute_rq_nowait(request_queue_t q, struct gendisk bd_disk,
2319	struct request *rq, int at_head,	2319	struct request *rq, int at_head,
2320	void (done)(struct request ))	2320	void (done)(struct request ))
2321	{	2321	{
2322	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;	2322	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2323		2323
2324	rq->rq_disk = bd_disk;	2324	rq->rq_disk = bd_disk;
2325	rq->flags \|= REQ_NOMERGE;	2325	rq->flags \|= REQ_NOMERGE;
2326	rq->end_io = done;	2326	rq->end_io = done;
2327	elv_add_request(q, rq, where, 1);	2327	elv_add_request(q, rq, where, 1);
2328	generic_unplug_device(q);	2328	generic_unplug_device(q);
2329	}	2329	}
2330		2330
2331	EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);	2331	EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2332		2332
2333	/**	2333	/**
2334	* blk_execute_rq - insert a request into queue for execution	2334	* blk_execute_rq - insert a request into queue for execution
2335	* @q: queue to insert the request in	2335	* @q: queue to insert the request in
2336	* @bd_disk: matching gendisk	2336	* @bd_disk: matching gendisk
2337	* @rq: request to insert	2337	* @rq: request to insert
2338	* @at_head: insert request at head or tail of queue	2338	* @at_head: insert request at head or tail of queue
2339	*	2339	*
2340	* Description:	2340	* Description:
2341	* Insert a fully prepared request at the back of the io scheduler queue	2341	* Insert a fully prepared request at the back of the io scheduler queue
2342	* for execution and wait for completion.	2342	* for execution and wait for completion.
2343	*/	2343	*/
2344	int blk_execute_rq(request_queue_t q, struct gendisk bd_disk,	2344	int blk_execute_rq(request_queue_t q, struct gendisk bd_disk,
2345	struct request *rq, int at_head)	2345	struct request *rq, int at_head)
2346	{	2346	{
2347	DECLARE_COMPLETION(wait);	2347	DECLARE_COMPLETION(wait);
2348	char sense[SCSI_SENSE_BUFFERSIZE];	2348	char sense[SCSI_SENSE_BUFFERSIZE];
2349	int err = 0;	2349	int err = 0;
2350		2350
2351	/*	2351	/*
2352	* we need an extra reference to the request, so we can look at	2352	* we need an extra reference to the request, so we can look at
2353	* it after io completion	2353	* it after io completion
2354	*/	2354	*/
2355	rq->ref_count++;	2355	rq->ref_count++;
2356		2356
2357	if (!rq->sense) {	2357	if (!rq->sense) {
2358	memset(sense, 0, sizeof(sense));	2358	memset(sense, 0, sizeof(sense));
2359	rq->sense = sense;	2359	rq->sense = sense;
2360	rq->sense_len = 0;	2360	rq->sense_len = 0;
2361	}	2361	}
2362		2362
2363	rq->waiting = &wait;	2363	rq->waiting = &wait;
2364	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);	2364	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2365	wait_for_completion(&wait);	2365	wait_for_completion(&wait);
2366	rq->waiting = NULL;	2366	rq->waiting = NULL;
2367		2367
2368	if (rq->errors)	2368	if (rq->errors)
2369	err = -EIO;	2369	err = -EIO;
2370		2370
2371	return err;	2371	return err;
2372	}	2372	}
2373		2373
2374	EXPORT_SYMBOL(blk_execute_rq);	2374	EXPORT_SYMBOL(blk_execute_rq);
2375		2375
2376	/**	2376	/**
2377	* blkdev_issue_flush - queue a flush	2377	* blkdev_issue_flush - queue a flush
2378	* @bdev: blockdev to issue flush for	2378	* @bdev: blockdev to issue flush for
2379	* @error_sector: error sector	2379	* @error_sector: error sector
2380	*	2380	*
2381	* Description:	2381	* Description:
2382	* Issue a flush for the block device in question. Caller can supply	2382	* Issue a flush for the block device in question. Caller can supply
2383	* room for storing the error offset in case of a flush error, if they	2383	* room for storing the error offset in case of a flush error, if they
2384	* wish to. Caller must run wait_for_completion() on its own.	2384	* wish to. Caller must run wait_for_completion() on its own.
2385	*/	2385	*/
2386	int blkdev_issue_flush(struct block_device bdev, sector_t error_sector)	2386	int blkdev_issue_flush(struct block_device bdev, sector_t error_sector)
2387	{	2387	{
2388	request_queue_t *q;	2388	request_queue_t *q;
2389		2389
2390	if (bdev->bd_disk == NULL)	2390	if (bdev->bd_disk == NULL)
2391	return -ENXIO;	2391	return -ENXIO;
2392		2392
2393	q = bdev_get_queue(bdev);	2393	q = bdev_get_queue(bdev);
2394	if (!q)	2394	if (!q)
2395	return -ENXIO;	2395	return -ENXIO;
2396	if (!q->issue_flush_fn)	2396	if (!q->issue_flush_fn)
2397	return -EOPNOTSUPP;	2397	return -EOPNOTSUPP;
2398		2398
2399	return q->issue_flush_fn(q, bdev->bd_disk, error_sector);	2399	return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
2400	}	2400	}
2401		2401
2402	EXPORT_SYMBOL(blkdev_issue_flush);	2402	EXPORT_SYMBOL(blkdev_issue_flush);
2403		2403
2404	static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)	2404	static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
2405	{	2405	{
2406	int rw = rq_data_dir(rq);	2406	int rw = rq_data_dir(rq);
2407		2407
2408	if (!blk_fs_request(rq) \|\| !rq->rq_disk)	2408	if (!blk_fs_request(rq) \|\| !rq->rq_disk)
2409	return;	2409	return;
2410		2410
2411	if (!new_io) {	2411	if (!new_io) {
2412	__disk_stat_inc(rq->rq_disk, merges[rw]);	2412	__disk_stat_inc(rq->rq_disk, merges[rw]);
2413	} else {	2413	} else {
2414	disk_round_stats(rq->rq_disk);	2414	disk_round_stats(rq->rq_disk);
2415	rq->rq_disk->in_flight++;	2415	rq->rq_disk->in_flight++;
2416	}	2416	}
2417	}	2417	}
2418		2418
2419	/*	2419	/*
2420	* add-request adds a request to the linked list.	2420	* add-request adds a request to the linked list.
2421	* queue lock is held and interrupts disabled, as we muck with the	2421	* queue lock is held and interrupts disabled, as we muck with the
2422	* request queue list.	2422	* request queue list.
2423	*/	2423	*/
2424	static inline void add_request(request_queue_t * q, struct request * req)	2424	static inline void add_request(request_queue_t * q, struct request * req)
2425	{	2425	{
2426	drive_stat_acct(req, req->nr_sectors, 1);	2426	drive_stat_acct(req, req->nr_sectors, 1);
2427		2427
2428	if (q->activity_fn)	2428	if (q->activity_fn)
2429	q->activity_fn(q->activity_data, rq_data_dir(req));	2429	q->activity_fn(q->activity_data, rq_data_dir(req));
2430		2430
2431	/*	2431	/*
2432	* elevator indicated where it wants this request to be	2432	* elevator indicated where it wants this request to be
2433	* inserted at elevator_merge time	2433	* inserted at elevator_merge time
2434	*/	2434	*/
2435	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);	2435	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2436	}	2436	}
2437		2437
2438	/*	2438	/*
2439	* disk_round_stats() - Round off the performance stats on a struct	2439	* disk_round_stats() - Round off the performance stats on a struct
2440	* disk_stats.	2440	* disk_stats.
2441	*	2441	*
2442	* The average IO queue length and utilisation statistics are maintained	2442	* The average IO queue length and utilisation statistics are maintained
2443	* by observing the current state of the queue length and the amount of	2443	* by observing the current state of the queue length and the amount of
2444	* time it has been in this state for.	2444	* time it has been in this state for.
2445	*	2445	*
2446	* Normally, that accounting is done on IO completion, but that can result	2446	* Normally, that accounting is done on IO completion, but that can result
2447	* in more than a second's worth of IO being accounted for within any one	2447	* in more than a second's worth of IO being accounted for within any one
2448	* second, leading to >100% utilisation. To deal with that, we call this	2448	* second, leading to >100% utilisation. To deal with that, we call this
2449	* function to do a round-off before returning the results when reading	2449	* function to do a round-off before returning the results when reading
2450	* /proc/diskstats. This accounts immediately for all queue usage up to	2450	* /proc/diskstats. This accounts immediately for all queue usage up to
2451	* the current jiffies and restarts the counters again.	2451	* the current jiffies and restarts the counters again.
2452	*/	2452	*/
2453	void disk_round_stats(struct gendisk *disk)	2453	void disk_round_stats(struct gendisk *disk)
2454	{	2454	{
2455	unsigned long now = jiffies;	2455	unsigned long now = jiffies;
2456		2456
2457	if (now == disk->stamp)	2457	if (now == disk->stamp)
2458	return;	2458	return;
2459		2459
2460	if (disk->in_flight) {	2460	if (disk->in_flight) {
2461	__disk_stat_add(disk, time_in_queue,	2461	__disk_stat_add(disk, time_in_queue,
2462	disk->in_flight * (now - disk->stamp));	2462	disk->in_flight * (now - disk->stamp));
2463	__disk_stat_add(disk, io_ticks, (now - disk->stamp));	2463	__disk_stat_add(disk, io_ticks, (now - disk->stamp));
2464	}	2464	}
2465	disk->stamp = now;	2465	disk->stamp = now;
2466	}	2466	}
2467		2467
2468	/*	2468	/*
2469	* queue lock must be held	2469	* queue lock must be held
2470	*/	2470	*/
2471	void __blk_put_request(request_queue_t q, struct request req)	2471	void __blk_put_request(request_queue_t q, struct request req)
2472	{	2472	{
2473	struct request_list *rl = req->rl;	2473	struct request_list *rl = req->rl;
2474		2474
2475	if (unlikely(!q))	2475	if (unlikely(!q))
2476	return;	2476	return;
2477	if (unlikely(--req->ref_count))	2477	if (unlikely(--req->ref_count))
2478	return;	2478	return;
2479		2479
2480	elv_completed_request(q, req);	2480	elv_completed_request(q, req);
2481		2481
2482	req->rq_status = RQ_INACTIVE;	2482	req->rq_status = RQ_INACTIVE;
2483	req->rl = NULL;	2483	req->rl = NULL;
2484		2484
2485	/*	2485	/*
2486	* Request may not have originated from ll_rw_blk. if not,	2486	* Request may not have originated from ll_rw_blk. if not,
2487	* it didn't come out of our reserved rq pools	2487	* it didn't come out of our reserved rq pools
2488	*/	2488	*/
2489	if (rl) {	2489	if (rl) {
2490	int rw = rq_data_dir(req);	2490	int rw = rq_data_dir(req);
2491	int priv = req->flags & REQ_ELVPRIV;	2491	int priv = req->flags & REQ_ELVPRIV;
2492		2492
2493	BUG_ON(!list_empty(&req->queuelist));	2493	BUG_ON(!list_empty(&req->queuelist));
2494		2494
2495	blk_free_request(q, req);	2495	blk_free_request(q, req);
2496	freed_request(q, rw, priv);	2496	freed_request(q, rw, priv);
2497	}	2497	}
2498	}	2498	}
2499		2499
2500	EXPORT_SYMBOL_GPL(__blk_put_request);	2500	EXPORT_SYMBOL_GPL(__blk_put_request);
2501		2501
2502	void blk_put_request(struct request *req)	2502	void blk_put_request(struct request *req)
2503	{	2503	{
2504	unsigned long flags;	2504	unsigned long flags;
2505	request_queue_t *q = req->q;	2505	request_queue_t *q = req->q;
2506		2506
2507	/*	2507	/*
2508	* Gee, IDE calls in w/ NULL q. Fix IDE and remove the	2508	* Gee, IDE calls in w/ NULL q. Fix IDE and remove the
2509	* following if (q) test.	2509	* following if (q) test.
2510	*/	2510	*/
2511	if (q) {	2511	if (q) {
2512	spin_lock_irqsave(q->queue_lock, flags);	2512	spin_lock_irqsave(q->queue_lock, flags);
2513	__blk_put_request(q, req);	2513	__blk_put_request(q, req);
2514	spin_unlock_irqrestore(q->queue_lock, flags);	2514	spin_unlock_irqrestore(q->queue_lock, flags);
2515	}	2515	}
2516	}	2516	}
2517		2517
2518	EXPORT_SYMBOL(blk_put_request);	2518	EXPORT_SYMBOL(blk_put_request);
2519		2519
2520	/**	2520	/**
2521	* blk_end_sync_rq - executes a completion event on a request	2521	* blk_end_sync_rq - executes a completion event on a request
2522	* @rq: request to complete	2522	* @rq: request to complete
2523	*/	2523	*/
2524	void blk_end_sync_rq(struct request *rq)	2524	void blk_end_sync_rq(struct request *rq)
2525	{	2525	{
2526	struct completion *waiting = rq->waiting;	2526	struct completion *waiting = rq->waiting;
2527		2527
2528	rq->waiting = NULL;	2528	rq->waiting = NULL;
2529	__blk_put_request(rq->q, rq);	2529	__blk_put_request(rq->q, rq);
2530		2530
2531	/*	2531	/*
2532	* complete last, if this is a stack request the process (and thus	2532	* complete last, if this is a stack request the process (and thus
2533	* the rq pointer) could be invalid right after this complete()	2533	* the rq pointer) could be invalid right after this complete()
2534	*/	2534	*/
2535	complete(waiting);	2535	complete(waiting);
2536	}	2536	}
2537	EXPORT_SYMBOL(blk_end_sync_rq);	2537	EXPORT_SYMBOL(blk_end_sync_rq);
2538		2538
2539	/**	2539	/**
2540	* blk_congestion_wait - wait for a queue to become uncongested	2540	* blk_congestion_wait - wait for a queue to become uncongested
2541	* @rw: READ or WRITE	2541	* @rw: READ or WRITE
2542	* @timeout: timeout in jiffies	2542	* @timeout: timeout in jiffies
2543	*	2543	*
2544	* Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.	2544	* Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
2545	* If no queues are congested then just wait for the next request to be	2545	* If no queues are congested then just wait for the next request to be
2546	* returned.	2546	* returned.
2547	*/	2547	*/
2548	long blk_congestion_wait(int rw, long timeout)	2548	long blk_congestion_wait(int rw, long timeout)
2549	{	2549	{
2550	long ret;	2550	long ret;
2551	DEFINE_WAIT(wait);	2551	DEFINE_WAIT(wait);
2552	wait_queue_head_t *wqh = &congestion_wqh[rw];	2552	wait_queue_head_t *wqh = &congestion_wqh[rw];
2553		2553
2554	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);	2554	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
2555	ret = io_schedule_timeout(timeout);	2555	ret = io_schedule_timeout(timeout);
2556	finish_wait(wqh, &wait);	2556	finish_wait(wqh, &wait);
2557	return ret;	2557	return ret;
2558	}	2558	}
2559		2559
2560	EXPORT_SYMBOL(blk_congestion_wait);	2560	EXPORT_SYMBOL(blk_congestion_wait);
2561		2561
2562	/*	2562	/*
2563	* Has to be called with the request spinlock acquired	2563	* Has to be called with the request spinlock acquired
2564	*/	2564	*/
2565	static int attempt_merge(request_queue_t q, struct request req,	2565	static int attempt_merge(request_queue_t q, struct request req,
2566	struct request *next)	2566	struct request *next)
2567	{	2567	{
2568	if (!rq_mergeable(req) \|\| !rq_mergeable(next))	2568	if (!rq_mergeable(req) \|\| !rq_mergeable(next))
2569	return 0;	2569	return 0;
2570		2570
2571	/*	2571	/*
2572	* not contigious	2572	* not contigious
2573	*/	2573	*/
2574	if (req->sector + req->nr_sectors != next->sector)	2574	if (req->sector + req->nr_sectors != next->sector)
2575	return 0;	2575	return 0;
2576		2576
2577	if (rq_data_dir(req) != rq_data_dir(next)	2577	if (rq_data_dir(req) != rq_data_dir(next)
2578	\|\| req->rq_disk != next->rq_disk	2578	\|\| req->rq_disk != next->rq_disk
2579	\|\| next->waiting \|\| next->special)	2579	\|\| next->waiting \|\| next->special)
2580	return 0;	2580	return 0;
2581		2581
2582	/*	2582	/*
2583	* If we are allowed to merge, then append bio list	2583	* If we are allowed to merge, then append bio list
2584	* from next to rq and release next. merge_requests_fn	2584	* from next to rq and release next. merge_requests_fn
2585	* will have updated segment counts, update sector	2585	* will have updated segment counts, update sector
2586	* counts here.	2586	* counts here.
2587	*/	2587	*/
2588	if (!q->merge_requests_fn(q, req, next))	2588	if (!q->merge_requests_fn(q, req, next))
2589	return 0;	2589	return 0;
2590		2590
2591	/*	2591	/*
2592	* At this point we have either done a back merge	2592	* At this point we have either done a back merge
2593	* or front merge. We need the smaller start_time of	2593	* or front merge. We need the smaller start_time of
2594	* the merged requests to be the current request	2594	* the merged requests to be the current request
2595	* for accounting purposes.	2595	* for accounting purposes.
2596	*/	2596	*/
2597	if (time_after(req->start_time, next->start_time))	2597	if (time_after(req->start_time, next->start_time))
2598	req->start_time = next->start_time;	2598	req->start_time = next->start_time;
2599		2599
2600	req->biotail->bi_next = next->bio;	2600	req->biotail->bi_next = next->bio;
2601	req->biotail = next->biotail;	2601	req->biotail = next->biotail;
2602		2602
2603	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;	2603	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2604		2604
2605	elv_merge_requests(q, req, next);	2605	elv_merge_requests(q, req, next);
2606		2606
2607	if (req->rq_disk) {	2607	if (req->rq_disk) {
2608	disk_round_stats(req->rq_disk);	2608	disk_round_stats(req->rq_disk);
2609	req->rq_disk->in_flight--;	2609	req->rq_disk->in_flight--;
2610	}	2610	}
2611		2611
2612	req->ioprio = ioprio_best(req->ioprio, next->ioprio);	2612	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2613		2613
2614	__blk_put_request(q, next);	2614	__blk_put_request(q, next);
2615	return 1;	2615	return 1;
2616	}	2616	}
2617		2617
2618	static inline int attempt_back_merge(request_queue_t q, struct request rq)	2618	static inline int attempt_back_merge(request_queue_t q, struct request rq)
2619	{	2619	{
2620	struct request *next = elv_latter_request(q, rq);	2620	struct request *next = elv_latter_request(q, rq);
2621		2621
2622	if (next)	2622	if (next)
2623	return attempt_merge(q, rq, next);	2623	return attempt_merge(q, rq, next);
2624		2624
2625	return 0;	2625	return 0;
2626	}	2626	}
2627		2627
2628	static inline int attempt_front_merge(request_queue_t q, struct request rq)	2628	static inline int attempt_front_merge(request_queue_t q, struct request rq)
2629	{	2629	{
2630	struct request *prev = elv_former_request(q, rq);	2630	struct request *prev = elv_former_request(q, rq);
2631		2631
2632	if (prev)	2632	if (prev)
2633	return attempt_merge(q, prev, rq);	2633	return attempt_merge(q, prev, rq);
2634		2634
2635	return 0;	2635	return 0;
2636	}	2636	}
2637		2637
2638	/**	2638	/**
2639	* blk_attempt_remerge - attempt to remerge active head with next request	2639	* blk_attempt_remerge - attempt to remerge active head with next request
2640	* @q: The &request_queue_t belonging to the device	2640	* @q: The &request_queue_t belonging to the device
2641	* @rq: The head request (usually)	2641	* @rq: The head request (usually)
2642	*	2642	*
2643	* Description:	2643	* Description:
2644	* For head-active devices, the queue can easily be unplugged so quickly	2644	* For head-active devices, the queue can easily be unplugged so quickly
2645	* that proper merging is not done on the front request. This may hurt	2645	* that proper merging is not done on the front request. This may hurt
2646	* performance greatly for some devices. The block layer cannot safely	2646	* performance greatly for some devices. The block layer cannot safely
2647	* do merging on that first request for these queues, but the driver can	2647	* do merging on that first request for these queues, but the driver can
2648	* call this function and make it happen any way. Only the driver knows	2648	* call this function and make it happen any way. Only the driver knows
2649	* when it is safe to do so.	2649	* when it is safe to do so.
2650	**/	2650	**/
2651	void blk_attempt_remerge(request_queue_t q, struct request rq)	2651	void blk_attempt_remerge(request_queue_t q, struct request rq)
2652	{	2652	{
2653	unsigned long flags;	2653	unsigned long flags;
2654		2654
2655	spin_lock_irqsave(q->queue_lock, flags);	2655	spin_lock_irqsave(q->queue_lock, flags);
2656	attempt_back_merge(q, rq);	2656	attempt_back_merge(q, rq);
2657	spin_unlock_irqrestore(q->queue_lock, flags);	2657	spin_unlock_irqrestore(q->queue_lock, flags);
2658	}	2658	}
2659		2659
2660	EXPORT_SYMBOL(blk_attempt_remerge);	2660	EXPORT_SYMBOL(blk_attempt_remerge);
2661		2661
2662	static int __make_request(request_queue_t q, struct bio bio)	2662	static int __make_request(request_queue_t q, struct bio bio)
2663	{	2663	{
2664	struct request *req;	2664	struct request *req;
2665	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;	2665	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
2666	unsigned short prio;	2666	unsigned short prio;
2667	sector_t sector;	2667	sector_t sector;
2668		2668
2669	sector = bio->bi_sector;	2669	sector = bio->bi_sector;
2670	nr_sectors = bio_sectors(bio);	2670	nr_sectors = bio_sectors(bio);
2671	cur_nr_sectors = bio_cur_sectors(bio);	2671	cur_nr_sectors = bio_cur_sectors(bio);
2672	prio = bio_prio(bio);	2672	prio = bio_prio(bio);
2673		2673
2674	rw = bio_data_dir(bio);	2674	rw = bio_data_dir(bio);
2675	sync = bio_sync(bio);	2675	sync = bio_sync(bio);
2676		2676
2677	/*	2677	/*
2678	* low level driver can indicate that it wants pages above a	2678	* low level driver can indicate that it wants pages above a
2679	* certain limit bounced to low memory (ie for highmem, or even	2679	* certain limit bounced to low memory (ie for highmem, or even
2680	* ISA dma in theory)	2680	* ISA dma in theory)
2681	*/	2681	*/
2682	blk_queue_bounce(q, &bio);	2682	blk_queue_bounce(q, &bio);
2683		2683
2684	spin_lock_prefetch(q->queue_lock);	2684	spin_lock_prefetch(q->queue_lock);
2685		2685
2686	barrier = bio_barrier(bio);	2686	barrier = bio_barrier(bio);
2687	if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {	2687	if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
2688	err = -EOPNOTSUPP;	2688	err = -EOPNOTSUPP;
2689	goto end_io;	2689	goto end_io;
2690	}	2690	}
2691		2691
2692	spin_lock_irq(q->queue_lock);	2692	spin_lock_irq(q->queue_lock);
2693		2693
2694	if (unlikely(barrier) \|\| elv_queue_empty(q))	2694	if (unlikely(barrier) \|\| elv_queue_empty(q))
2695	goto get_rq;	2695	goto get_rq;
2696		2696
2697	el_ret = elv_merge(q, &req, bio);	2697	el_ret = elv_merge(q, &req, bio);
2698	switch (el_ret) {	2698	switch (el_ret) {
2699	case ELEVATOR_BACK_MERGE:	2699	case ELEVATOR_BACK_MERGE:
2700	BUG_ON(!rq_mergeable(req));	2700	BUG_ON(!rq_mergeable(req));
2701		2701
2702	if (!q->back_merge_fn(q, req, bio))	2702	if (!q->back_merge_fn(q, req, bio))
2703	break;	2703	break;
2704		2704
2705	req->biotail->bi_next = bio;	2705	req->biotail->bi_next = bio;
2706	req->biotail = bio;	2706	req->biotail = bio;
2707	req->nr_sectors = req->hard_nr_sectors += nr_sectors;	2707	req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2708	req->ioprio = ioprio_best(req->ioprio, prio);	2708	req->ioprio = ioprio_best(req->ioprio, prio);
2709	drive_stat_acct(req, nr_sectors, 0);	2709	drive_stat_acct(req, nr_sectors, 0);
2710	if (!attempt_back_merge(q, req))	2710	if (!attempt_back_merge(q, req))
2711	elv_merged_request(q, req);	2711	elv_merged_request(q, req);
2712	goto out;	2712	goto out;
2713		2713
2714	case ELEVATOR_FRONT_MERGE:	2714	case ELEVATOR_FRONT_MERGE:
2715	BUG_ON(!rq_mergeable(req));	2715	BUG_ON(!rq_mergeable(req));
2716		2716
2717	if (!q->front_merge_fn(q, req, bio))	2717	if (!q->front_merge_fn(q, req, bio))
2718	break;	2718	break;
2719		2719
2720	bio->bi_next = req->bio;	2720	bio->bi_next = req->bio;
2721	req->bio = bio;	2721	req->bio = bio;
2722		2722
2723	/*	2723	/*
2724	* may not be valid. if the low level driver said	2724	* may not be valid. if the low level driver said
2725	* it didn't need a bounce buffer then it better	2725	* it didn't need a bounce buffer then it better
2726	* not touch req->buffer either...	2726	* not touch req->buffer either...
2727	*/	2727	*/
2728	req->buffer = bio_data(bio);	2728	req->buffer = bio_data(bio);
2729	req->current_nr_sectors = cur_nr_sectors;	2729	req->current_nr_sectors = cur_nr_sectors;
2730	req->hard_cur_sectors = cur_nr_sectors;	2730	req->hard_cur_sectors = cur_nr_sectors;
2731	req->sector = req->hard_sector = sector;	2731	req->sector = req->hard_sector = sector;
2732	req->nr_sectors = req->hard_nr_sectors += nr_sectors;	2732	req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2733	req->ioprio = ioprio_best(req->ioprio, prio);	2733	req->ioprio = ioprio_best(req->ioprio, prio);
2734	drive_stat_acct(req, nr_sectors, 0);	2734	drive_stat_acct(req, nr_sectors, 0);
2735	if (!attempt_front_merge(q, req))	2735	if (!attempt_front_merge(q, req))
2736	elv_merged_request(q, req);	2736	elv_merged_request(q, req);
2737	goto out;	2737	goto out;
2738		2738
2739	/* ELV_NO_MERGE: elevator says don't/can't merge. */	2739	/* ELV_NO_MERGE: elevator says don't/can't merge. */
2740	default:	2740	default:
2741	;	2741	;
2742	}	2742	}
2743		2743
2744	get_rq:	2744	get_rq:
2745	/*	2745	/*
2746	* Grab a free request. This is might sleep but can not fail.	2746	* Grab a free request. This is might sleep but can not fail.
2747	* Returns with the queue unlocked.	2747	* Returns with the queue unlocked.
2748	*/	2748	*/
2749	req = get_request_wait(q, rw, bio);	2749	req = get_request_wait(q, rw, bio);
2750		2750
2751	/*	2751	/*
2752	* After dropping the lock and possibly sleeping here, our request	2752	* After dropping the lock and possibly sleeping here, our request
2753	* may now be mergeable after it had proven unmergeable (above).	2753	* may now be mergeable after it had proven unmergeable (above).
2754	* We don't worry about that case for efficiency. It won't happen	2754	* We don't worry about that case for efficiency. It won't happen
2755	* often, and the elevators are able to handle it.	2755	* often, and the elevators are able to handle it.
2756	*/	2756	*/
2757		2757
2758	req->flags \|= REQ_CMD;	2758	req->flags \|= REQ_CMD;
2759		2759
2760	/*	2760	/*
2761	* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)	2761	* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2762	*/	2762	*/
2763	if (bio_rw_ahead(bio) \|\| bio_failfast(bio))	2763	if (bio_rw_ahead(bio) \|\| bio_failfast(bio))
2764	req->flags \|= REQ_FAILFAST;	2764	req->flags \|= REQ_FAILFAST;
2765		2765
2766	/*	2766	/*
2767	* REQ_BARRIER implies no merging, but lets make it explicit	2767	* REQ_BARRIER implies no merging, but lets make it explicit
2768	*/	2768	*/
2769	if (unlikely(barrier))	2769	if (unlikely(barrier))
2770	req->flags \|= (REQ_HARDBARRIER \| REQ_NOMERGE);	2770	req->flags \|= (REQ_HARDBARRIER \| REQ_NOMERGE);
2771		2771
2772	req->errors = 0;	2772	req->errors = 0;
2773	req->hard_sector = req->sector = sector;	2773	req->hard_sector = req->sector = sector;
2774	req->hard_nr_sectors = req->nr_sectors = nr_sectors;	2774	req->hard_nr_sectors = req->nr_sectors = nr_sectors;
2775	req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;	2775	req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
2776	req->nr_phys_segments = bio_phys_segments(q, bio);	2776	req->nr_phys_segments = bio_phys_segments(q, bio);
2777	req->nr_hw_segments = bio_hw_segments(q, bio);	2777	req->nr_hw_segments = bio_hw_segments(q, bio);
2778	req->buffer = bio_data(bio); /* see ->buffer comment above */	2778	req->buffer = bio_data(bio); /* see ->buffer comment above */
2779	req->waiting = NULL;	2779	req->waiting = NULL;
2780	req->bio = req->biotail = bio;	2780	req->bio = req->biotail = bio;
2781	req->ioprio = prio;	2781	req->ioprio = prio;
2782	req->rq_disk = bio->bi_bdev->bd_disk;	2782	req->rq_disk = bio->bi_bdev->bd_disk;
2783	req->start_time = jiffies;	2783	req->start_time = jiffies;
2784		2784
2785	spin_lock_irq(q->queue_lock);	2785	spin_lock_irq(q->queue_lock);
2786	if (elv_queue_empty(q))	2786	if (elv_queue_empty(q))
2787	blk_plug_device(q);	2787	blk_plug_device(q);
2788	add_request(q, req);	2788	add_request(q, req);
2789	out:	2789	out:
2790	if (sync)	2790	if (sync)
2791	__generic_unplug_device(q);	2791	__generic_unplug_device(q);
2792		2792
2793	spin_unlock_irq(q->queue_lock);	2793	spin_unlock_irq(q->queue_lock);
2794	return 0;	2794	return 0;
2795		2795
2796	end_io:	2796	end_io:
2797	bio_endio(bio, nr_sectors << 9, err);	2797	bio_endio(bio, nr_sectors << 9, err);
2798	return 0;	2798	return 0;
2799	}	2799	}
2800		2800
2801	/*	2801	/*
2802	* If bio->bi_dev is a partition, remap the location	2802	* If bio->bi_dev is a partition, remap the location
2803	*/	2803	*/
2804	static inline void blk_partition_remap(struct bio *bio)	2804	static inline void blk_partition_remap(struct bio *bio)
2805	{	2805	{
2806	struct block_device *bdev = bio->bi_bdev;	2806	struct block_device *bdev = bio->bi_bdev;
2807		2807
2808	if (bdev != bdev->bd_contains) {	2808	if (bdev != bdev->bd_contains) {
2809	struct hd_struct *p = bdev->bd_part;	2809	struct hd_struct *p = bdev->bd_part;
2810	const int rw = bio_data_dir(bio);	2810	const int rw = bio_data_dir(bio);
2811		2811
2812	p->sectors[rw] += bio_sectors(bio);	2812	p->sectors[rw] += bio_sectors(bio);
2813	p->ios[rw]++;	2813	p->ios[rw]++;
2814		2814
2815	bio->bi_sector += p->start_sect;	2815	bio->bi_sector += p->start_sect;
2816	bio->bi_bdev = bdev->bd_contains;	2816	bio->bi_bdev = bdev->bd_contains;
2817	}	2817	}
2818	}	2818	}
2819		2819
2820	static void handle_bad_sector(struct bio *bio)	2820	static void handle_bad_sector(struct bio *bio)
2821	{	2821	{
2822	char b[BDEVNAME_SIZE];	2822	char b[BDEVNAME_SIZE];
2823		2823
2824	printk(KERN_INFO "attempt to access beyond end of device\n");	2824	printk(KERN_INFO "attempt to access beyond end of device\n");
2825	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",	2825	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
2826	bdevname(bio->bi_bdev, b),	2826	bdevname(bio->bi_bdev, b),
2827	bio->bi_rw,	2827	bio->bi_rw,
2828	(unsigned long long)bio->bi_sector + bio_sectors(bio),	2828	(unsigned long long)bio->bi_sector + bio_sectors(bio),
2829	(long long)(bio->bi_bdev->bd_inode->i_size >> 9));	2829	(long long)(bio->bi_bdev->bd_inode->i_size >> 9));
2830		2830
2831	set_bit(BIO_EOF, &bio->bi_flags);	2831	set_bit(BIO_EOF, &bio->bi_flags);
2832	}	2832	}
2833		2833
2834	/**	2834	/**
2835	* generic_make_request: hand a buffer to its device driver for I/O	2835	* generic_make_request: hand a buffer to its device driver for I/O
2836	* @bio: The bio describing the location in memory and on the device.	2836	* @bio: The bio describing the location in memory and on the device.
2837	*	2837	*
2838	* generic_make_request() is used to make I/O requests of block	2838	* generic_make_request() is used to make I/O requests of block
2839	* devices. It is passed a &struct bio, which describes the I/O that needs	2839	* devices. It is passed a &struct bio, which describes the I/O that needs
2840	* to be done.	2840	* to be done.
2841	*	2841	*
2842	* generic_make_request() does not return any status. The	2842	* generic_make_request() does not return any status. The
2843	* success/failure status of the request, along with notification of	2843	* success/failure status of the request, along with notification of
2844	* completion, is delivered asynchronously through the bio->bi_end_io	2844	* completion, is delivered asynchronously through the bio->bi_end_io
2845	* function described (one day) else where.	2845	* function described (one day) else where.
2846	*	2846	*
2847	* The caller of generic_make_request must make sure that bi_io_vec	2847	* The caller of generic_make_request must make sure that bi_io_vec
2848	* are set to describe the memory buffer, and that bi_dev and bi_sector are	2848	* are set to describe the memory buffer, and that bi_dev and bi_sector are
2849	* set to describe the device address, and the	2849	* set to describe the device address, and the
2850	* bi_end_io and optionally bi_private are set to describe how	2850	* bi_end_io and optionally bi_private are set to describe how
2851	* completion notification should be signaled.	2851	* completion notification should be signaled.
2852	*	2852	*
2853	* generic_make_request and the drivers it calls may use bi_next if this	2853	* generic_make_request and the drivers it calls may use bi_next if this
2854	* bio happens to be merged with someone else, and may change bi_dev and	2854	* bio happens to be merged with someone else, and may change bi_dev and
2855	* bi_sector for remaps as it sees fit. So the values of these fields	2855	* bi_sector for remaps as it sees fit. So the values of these fields
2856	* should NOT be depended on after the call to generic_make_request.	2856	* should NOT be depended on after the call to generic_make_request.
2857	*/	2857	*/
2858	void generic_make_request(struct bio *bio)	2858	void generic_make_request(struct bio *bio)
2859	{	2859	{
2860	request_queue_t *q;	2860	request_queue_t *q;
2861	sector_t maxsector;	2861	sector_t maxsector;
2862	int ret, nr_sectors = bio_sectors(bio);	2862	int ret, nr_sectors = bio_sectors(bio);
2863		2863
2864	might_sleep();	2864	might_sleep();
2865	/* Test device or partition size, when known. */	2865	/* Test device or partition size, when known. */
2866	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;	2866	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
2867	if (maxsector) {	2867	if (maxsector) {
2868	sector_t sector = bio->bi_sector;	2868	sector_t sector = bio->bi_sector;
2869		2869
2870	if (maxsector < nr_sectors \|\| maxsector - nr_sectors < sector) {	2870	if (maxsector < nr_sectors \|\| maxsector - nr_sectors < sector) {
2871	/*	2871	/*
2872	* This may well happen - the kernel calls bread()	2872	* This may well happen - the kernel calls bread()
2873	* without checking the size of the device, e.g., when	2873	* without checking the size of the device, e.g., when
2874	* mounting a device.	2874	* mounting a device.
2875	*/	2875	*/
2876	handle_bad_sector(bio);	2876	handle_bad_sector(bio);
2877	goto end_io;	2877	goto end_io;
2878	}	2878	}
2879	}	2879	}
2880		2880
2881	/*	2881	/*
2882	* Resolve the mapping until finished. (drivers are	2882	* Resolve the mapping until finished. (drivers are
2883	* still free to implement/resolve their own stacking	2883	* still free to implement/resolve their own stacking
2884	* by explicitly returning 0)	2884	* by explicitly returning 0)
2885	*	2885	*
2886	* NOTE: we don't repeat the blk_size check for each new device.	2886	* NOTE: we don't repeat the blk_size check for each new device.
2887	* Stacking drivers are expected to know what they are doing.	2887	* Stacking drivers are expected to know what they are doing.
2888	*/	2888	*/
2889	do {	2889	do {
2890	char b[BDEVNAME_SIZE];	2890	char b[BDEVNAME_SIZE];
2891		2891
2892	q = bdev_get_queue(bio->bi_bdev);	2892	q = bdev_get_queue(bio->bi_bdev);
2893	if (!q) {	2893	if (!q) {
2894	printk(KERN_ERR	2894	printk(KERN_ERR
2895	"generic_make_request: Trying to access "	2895	"generic_make_request: Trying to access "
2896	"nonexistent block-device %s (%Lu)\n",	2896	"nonexistent block-device %s (%Lu)\n",
2897	bdevname(bio->bi_bdev, b),	2897	bdevname(bio->bi_bdev, b),
2898	(long long) bio->bi_sector);	2898	(long long) bio->bi_sector);
2899	end_io:	2899	end_io:
2900	bio_endio(bio, bio->bi_size, -EIO);	2900	bio_endio(bio, bio->bi_size, -EIO);
2901	break;	2901	break;
2902	}	2902	}
2903		2903
2904	if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {	2904	if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
2905	printk("bio too big device %s (%u > %u)\n",	2905	printk("bio too big device %s (%u > %u)\n",
2906	bdevname(bio->bi_bdev, b),	2906	bdevname(bio->bi_bdev, b),
2907	bio_sectors(bio),	2907	bio_sectors(bio),
2908	q->max_hw_sectors);	2908	q->max_hw_sectors);
2909	goto end_io;	2909	goto end_io;
2910	}	2910	}
2911		2911
2912	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))	2912	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
2913	goto end_io;	2913	goto end_io;
2914		2914
2915	/*	2915	/*
2916	* If this device has partitions, remap block n	2916	* If this device has partitions, remap block n
2917	* of partition p to block n+start(p) of the disk.	2917	* of partition p to block n+start(p) of the disk.
2918	*/	2918	*/
2919	blk_partition_remap(bio);	2919	blk_partition_remap(bio);
2920		2920
2921	ret = q->make_request_fn(q, bio);	2921	ret = q->make_request_fn(q, bio);
2922	} while (ret);	2922	} while (ret);
2923	}	2923	}
2924		2924
2925	EXPORT_SYMBOL(generic_make_request);	2925	EXPORT_SYMBOL(generic_make_request);
2926		2926
2927	/**	2927	/**
2928	* submit_bio: submit a bio to the block device layer for I/O	2928	* submit_bio: submit a bio to the block device layer for I/O
2929	* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)	2929	* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
2930	* @bio: The &struct bio which describes the I/O	2930	* @bio: The &struct bio which describes the I/O
2931	*	2931	*
2932	* submit_bio() is very similar in purpose to generic_make_request(), and	2932	* submit_bio() is very similar in purpose to generic_make_request(), and
2933	* uses that function to do most of the work. Both are fairly rough	2933	* uses that function to do most of the work. Both are fairly rough
2934	* interfaces, @bio must be presetup and ready for I/O.	2934	* interfaces, @bio must be presetup and ready for I/O.
2935	*	2935	*
2936	*/	2936	*/
2937	void submit_bio(int rw, struct bio *bio)	2937	void submit_bio(int rw, struct bio *bio)
2938	{	2938	{
2939	int count = bio_sectors(bio);	2939	int count = bio_sectors(bio);
2940		2940
2941	BIO_BUG_ON(!bio->bi_size);	2941	BIO_BUG_ON(!bio->bi_size);
2942	BIO_BUG_ON(!bio->bi_io_vec);	2942	BIO_BUG_ON(!bio->bi_io_vec);
2943	bio->bi_rw \|= rw;	2943	bio->bi_rw \|= rw;
2944	if (rw & WRITE)	2944	if (rw & WRITE)
2945	mod_page_state(pgpgout, count);	2945	mod_page_state(pgpgout, count);
2946	else	2946	else
2947	mod_page_state(pgpgin, count);	2947	mod_page_state(pgpgin, count);
2948		2948
2949	if (unlikely(block_dump)) {	2949	if (unlikely(block_dump)) {
2950	char b[BDEVNAME_SIZE];	2950	char b[BDEVNAME_SIZE];
2951	printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",	2951	printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
2952	current->comm, current->pid,	2952	current->comm, current->pid,
2953	(rw & WRITE) ? "WRITE" : "READ",	2953	(rw & WRITE) ? "WRITE" : "READ",
2954	(unsigned long long)bio->bi_sector,	2954	(unsigned long long)bio->bi_sector,
2955	bdevname(bio->bi_bdev,b));	2955	bdevname(bio->bi_bdev,b));
2956	}	2956	}
2957		2957
2958	generic_make_request(bio);	2958	generic_make_request(bio);
2959	}	2959	}
2960		2960
2961	EXPORT_SYMBOL(submit_bio);	2961	EXPORT_SYMBOL(submit_bio);
2962		2962
2963	static void blk_recalc_rq_segments(struct request *rq)	2963	static void blk_recalc_rq_segments(struct request *rq)
2964	{	2964	{
2965	struct bio bio, prevbio = NULL;	2965	struct bio bio, prevbio = NULL;
2966	int nr_phys_segs, nr_hw_segs;	2966	int nr_phys_segs, nr_hw_segs;
2967	unsigned int phys_size, hw_size;	2967	unsigned int phys_size, hw_size;
2968	request_queue_t *q = rq->q;	2968	request_queue_t *q = rq->q;
2969		2969
2970	if (!rq->bio)	2970	if (!rq->bio)
2971	return;	2971	return;
2972		2972
2973	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;	2973	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
2974	rq_for_each_bio(bio, rq) {	2974	rq_for_each_bio(bio, rq) {
2975	/* Force bio hw/phys segs to be recalculated. */	2975	/* Force bio hw/phys segs to be recalculated. */
2976	bio->bi_flags &= ~(1 << BIO_SEG_VALID);	2976	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
2977		2977
2978	nr_phys_segs += bio_phys_segments(q, bio);	2978	nr_phys_segs += bio_phys_segments(q, bio);
2979	nr_hw_segs += bio_hw_segments(q, bio);	2979	nr_hw_segs += bio_hw_segments(q, bio);
2980	if (prevbio) {	2980	if (prevbio) {
2981	int pseg = phys_size + prevbio->bi_size + bio->bi_size;	2981	int pseg = phys_size + prevbio->bi_size + bio->bi_size;
2982	int hseg = hw_size + prevbio->bi_size + bio->bi_size;	2982	int hseg = hw_size + prevbio->bi_size + bio->bi_size;
2983		2983
2984	if (blk_phys_contig_segment(q, prevbio, bio) &&	2984	if (blk_phys_contig_segment(q, prevbio, bio) &&
2985	pseg <= q->max_segment_size) {	2985	pseg <= q->max_segment_size) {
2986	nr_phys_segs--;	2986	nr_phys_segs--;
2987	phys_size += prevbio->bi_size + bio->bi_size;	2987	phys_size += prevbio->bi_size + bio->bi_size;
2988	} else	2988	} else
2989	phys_size = 0;	2989	phys_size = 0;
2990		2990
2991	if (blk_hw_contig_segment(q, prevbio, bio) &&	2991	if (blk_hw_contig_segment(q, prevbio, bio) &&
2992	hseg <= q->max_segment_size) {	2992	hseg <= q->max_segment_size) {
2993	nr_hw_segs--;	2993	nr_hw_segs--;
2994	hw_size += prevbio->bi_size + bio->bi_size;	2994	hw_size += prevbio->bi_size + bio->bi_size;
2995	} else	2995	} else
2996	hw_size = 0;	2996	hw_size = 0;
2997	}	2997	}
2998	prevbio = bio;	2998	prevbio = bio;
2999	}	2999	}
3000		3000
3001	rq->nr_phys_segments = nr_phys_segs;	3001	rq->nr_phys_segments = nr_phys_segs;
3002	rq->nr_hw_segments = nr_hw_segs;	3002	rq->nr_hw_segments = nr_hw_segs;
3003	}	3003	}
3004		3004
3005	static void blk_recalc_rq_sectors(struct request *rq, int nsect)	3005	static void blk_recalc_rq_sectors(struct request *rq, int nsect)
3006	{	3006	{
3007	if (blk_fs_request(rq)) {	3007	if (blk_fs_request(rq)) {
3008	rq->hard_sector += nsect;	3008	rq->hard_sector += nsect;
3009	rq->hard_nr_sectors -= nsect;	3009	rq->hard_nr_sectors -= nsect;
3010		3010
3011	/*	3011	/*
3012	* Move the I/O submission pointers ahead if required.	3012	* Move the I/O submission pointers ahead if required.
3013	*/	3013	*/
3014	if ((rq->nr_sectors >= rq->hard_nr_sectors) &&	3014	if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
3015	(rq->sector <= rq->hard_sector)) {	3015	(rq->sector <= rq->hard_sector)) {
3016	rq->sector = rq->hard_sector;	3016	rq->sector = rq->hard_sector;
3017	rq->nr_sectors = rq->hard_nr_sectors;	3017	rq->nr_sectors = rq->hard_nr_sectors;
3018	rq->hard_cur_sectors = bio_cur_sectors(rq->bio);	3018	rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
3019	rq->current_nr_sectors = rq->hard_cur_sectors;	3019	rq->current_nr_sectors = rq->hard_cur_sectors;
3020	rq->buffer = bio_data(rq->bio);	3020	rq->buffer = bio_data(rq->bio);
3021	}	3021	}
3022		3022
3023	/*	3023	/*
3024	* if total number of sectors is less than the first segment	3024	* if total number of sectors is less than the first segment
3025	* size, something has gone terribly wrong	3025	* size, something has gone terribly wrong
3026	*/	3026	*/
3027	if (rq->nr_sectors < rq->current_nr_sectors) {	3027	if (rq->nr_sectors < rq->current_nr_sectors) {
3028	printk("blk: request botched\n");	3028	printk("blk: request botched\n");
3029	rq->nr_sectors = rq->current_nr_sectors;	3029	rq->nr_sectors = rq->current_nr_sectors;
3030	}	3030	}
3031	}	3031	}
3032	}	3032	}
3033		3033
3034	static int __end_that_request_first(struct request *req, int uptodate,	3034	static int __end_that_request_first(struct request *req, int uptodate,
3035	int nr_bytes)	3035	int nr_bytes)
3036	{	3036	{
3037	int total_bytes, bio_nbytes, error, next_idx = 0;	3037	int total_bytes, bio_nbytes, error, next_idx = 0;
3038	struct bio *bio;	3038	struct bio *bio;
3039		3039
3040	/*	3040	/*
3041	* extend uptodate bool to allow < 0 value to be direct io error	3041	* extend uptodate bool to allow < 0 value to be direct io error
3042	*/	3042	*/
3043	error = 0;	3043	error = 0;
3044	if (end_io_error(uptodate))	3044	if (end_io_error(uptodate))
3045	error = !uptodate ? -EIO : uptodate;	3045	error = !uptodate ? -EIO : uptodate;
3046		3046
3047	/*	3047	/*
3048	* for a REQ_BLOCK_PC request, we want to carry any eventual	3048	* for a REQ_BLOCK_PC request, we want to carry any eventual
3049	* sense key with us all the way through	3049	* sense key with us all the way through
3050	*/	3050	*/
3051	if (!blk_pc_request(req))	3051	if (!blk_pc_request(req))
3052	req->errors = 0;	3052	req->errors = 0;
3053		3053
3054	if (!uptodate) {	3054	if (!uptodate) {
3055	if (blk_fs_request(req) && !(req->flags & REQ_QUIET))	3055	if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
3056	printk("end_request: I/O error, dev %s, sector %llu\n",	3056	printk("end_request: I/O error, dev %s, sector %llu\n",
3057	req->rq_disk ? req->rq_disk->disk_name : "?",	3057	req->rq_disk ? req->rq_disk->disk_name : "?",
3058	(unsigned long long)req->sector);	3058	(unsigned long long)req->sector);
3059	}	3059	}
3060		3060
3061	if (blk_fs_request(req) && req->rq_disk) {	3061	if (blk_fs_request(req) && req->rq_disk) {
3062	const int rw = rq_data_dir(req);	3062	const int rw = rq_data_dir(req);
3063		3063
3064	__disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);	3064	__disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
3065	}	3065	}
3066		3066
3067	total_bytes = bio_nbytes = 0;	3067	total_bytes = bio_nbytes = 0;
3068	while ((bio = req->bio) != NULL) {	3068	while ((bio = req->bio) != NULL) {
3069	int nbytes;	3069	int nbytes;
3070		3070
3071	if (nr_bytes >= bio->bi_size) {	3071	if (nr_bytes >= bio->bi_size) {
3072	req->bio = bio->bi_next;	3072	req->bio = bio->bi_next;
3073	nbytes = bio->bi_size;	3073	nbytes = bio->bi_size;
3074	bio_endio(bio, nbytes, error);	3074	bio_endio(bio, nbytes, error);
3075	next_idx = 0;	3075	next_idx = 0;
3076	bio_nbytes = 0;	3076	bio_nbytes = 0;
3077	} else {	3077	} else {
3078	int idx = bio->bi_idx + next_idx;	3078	int idx = bio->bi_idx + next_idx;
3079		3079
3080	if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {	3080	if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
3081	blk_dump_rq_flags(req, "__end_that");	3081	blk_dump_rq_flags(req, "__end_that");
3082	printk("%s: bio idx %d >= vcnt %d\n",	3082	printk("%s: bio idx %d >= vcnt %d\n",
3083	__FUNCTION__,	3083	__FUNCTION__,
3084	bio->bi_idx, bio->bi_vcnt);	3084	bio->bi_idx, bio->bi_vcnt);
3085	break;	3085	break;
3086	}	3086	}
3087		3087
3088	nbytes = bio_iovec_idx(bio, idx)->bv_len;	3088	nbytes = bio_iovec_idx(bio, idx)->bv_len;
3089	BIO_BUG_ON(nbytes > bio->bi_size);	3089	BIO_BUG_ON(nbytes > bio->bi_size);
3090		3090
3091	/*	3091	/*
3092	* not a complete bvec done	3092	* not a complete bvec done
3093	*/	3093	*/
3094	if (unlikely(nbytes > nr_bytes)) {	3094	if (unlikely(nbytes > nr_bytes)) {
3095	bio_nbytes += nr_bytes;	3095	bio_nbytes += nr_bytes;
3096	total_bytes += nr_bytes;	3096	total_bytes += nr_bytes;
3097	break;	3097	break;
3098	}	3098	}
3099		3099
3100	/*	3100	/*
3101	* advance to the next vector	3101	* advance to the next vector
3102	*/	3102	*/
3103	next_idx++;	3103	next_idx++;
3104	bio_nbytes += nbytes;	3104	bio_nbytes += nbytes;
3105	}	3105	}
3106		3106
3107	total_bytes += nbytes;	3107	total_bytes += nbytes;
3108	nr_bytes -= nbytes;	3108	nr_bytes -= nbytes;
3109		3109
3110	if ((bio = req->bio)) {	3110	if ((bio = req->bio)) {
3111	/*	3111	/*
3112	* end more in this run, or just return 'not-done'	3112	* end more in this run, or just return 'not-done'
3113	*/	3113	*/
3114	if (unlikely(nr_bytes <= 0))	3114	if (unlikely(nr_bytes <= 0))
3115	break;	3115	break;
3116	}	3116	}
3117	}	3117	}
3118		3118
3119	/*	3119	/*
3120	* completely done	3120	* completely done
3121	*/	3121	*/
3122	if (!req->bio)	3122	if (!req->bio)
3123	return 0;	3123	return 0;
3124		3124
3125	/*	3125	/*
3126	* if the request wasn't completed, update state	3126	* if the request wasn't completed, update state
3127	*/	3127	*/
3128	if (bio_nbytes) {	3128	if (bio_nbytes) {
3129	bio_endio(bio, bio_nbytes, error);	3129	bio_endio(bio, bio_nbytes, error);
3130	bio->bi_idx += next_idx;	3130	bio->bi_idx += next_idx;
3131	bio_iovec(bio)->bv_offset += nr_bytes;	3131	bio_iovec(bio)->bv_offset += nr_bytes;
3132	bio_iovec(bio)->bv_len -= nr_bytes;	3132	bio_iovec(bio)->bv_len -= nr_bytes;
3133	}	3133	}
3134		3134
3135	blk_recalc_rq_sectors(req, total_bytes >> 9);	3135	blk_recalc_rq_sectors(req, total_bytes >> 9);
3136	blk_recalc_rq_segments(req);	3136	blk_recalc_rq_segments(req);
3137	return 1;	3137	return 1;
3138	}	3138	}
3139		3139
3140	/**	3140	/**
3141	* end_that_request_first - end I/O on a request	3141	* end_that_request_first - end I/O on a request
3142	* @req: the request being processed	3142	* @req: the request being processed
3143	* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error	3143	* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3144	* @nr_sectors: number of sectors to end I/O on	3144	* @nr_sectors: number of sectors to end I/O on
3145	*	3145	*
3146	* Description:	3146	* Description:
3147	* Ends I/O on a number of sectors attached to @req, and sets it up	3147	* Ends I/O on a number of sectors attached to @req, and sets it up
3148	* for the next range of segments (if any) in the cluster.	3148	* for the next range of segments (if any) in the cluster.
3149	*	3149	*
3150	* Return:	3150	* Return:
3151	* 0 - we are done with this request, call end_that_request_last()	3151	* 0 - we are done with this request, call end_that_request_last()
3152	* 1 - still buffers pending for this request	3152	* 1 - still buffers pending for this request
3153	**/	3153	**/
3154	int end_that_request_first(struct request *req, int uptodate, int nr_sectors)	3154	int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
3155	{	3155	{
3156	return __end_that_request_first(req, uptodate, nr_sectors << 9);	3156	return __end_that_request_first(req, uptodate, nr_sectors << 9);
3157	}	3157	}
3158		3158
3159	EXPORT_SYMBOL(end_that_request_first);	3159	EXPORT_SYMBOL(end_that_request_first);
3160		3160
3161	/**	3161	/**
3162	* end_that_request_chunk - end I/O on a request	3162	* end_that_request_chunk - end I/O on a request
3163	* @req: the request being processed	3163	* @req: the request being processed
3164	* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error	3164	* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3165	* @nr_bytes: number of bytes to complete	3165	* @nr_bytes: number of bytes to complete
3166	*	3166	*
3167	* Description:	3167	* Description:
3168	* Ends I/O on a number of bytes attached to @req, and sets it up	3168	* Ends I/O on a number of bytes attached to @req, and sets it up
3169	* for the next range of segments (if any). Like end_that_request_first(),	3169	* for the next range of segments (if any). Like end_that_request_first(),
3170	* but deals with bytes instead of sectors.	3170	* but deals with bytes instead of sectors.
3171	*	3171	*
3172	* Return:	3172	* Return:
3173	* 0 - we are done with this request, call end_that_request_last()	3173	* 0 - we are done with this request, call end_that_request_last()
3174	* 1 - still buffers pending for this request	3174	* 1 - still buffers pending for this request
3175	**/	3175	**/
3176	int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)	3176	int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3177	{	3177	{
3178	return __end_that_request_first(req, uptodate, nr_bytes);	3178	return __end_that_request_first(req, uptodate, nr_bytes);
3179	}	3179	}
3180		3180
3181	EXPORT_SYMBOL(end_that_request_chunk);	3181	EXPORT_SYMBOL(end_that_request_chunk);
3182		3182
3183	/*	3183	/*
3184	* queue lock must be held	3184	* queue lock must be held
3185	*/	3185	*/
3186	void end_that_request_last(struct request *req)	3186	void end_that_request_last(struct request *req)
3187	{	3187	{
3188	struct gendisk *disk = req->rq_disk;	3188	struct gendisk *disk = req->rq_disk;
3189		3189
3190	if (unlikely(laptop_mode) && blk_fs_request(req))	3190	if (unlikely(laptop_mode) && blk_fs_request(req))
3191	laptop_io_completion();	3191	laptop_io_completion();
3192		3192
3193	if (disk && blk_fs_request(req)) {	3193	if (disk && blk_fs_request(req)) {
3194	unsigned long duration = jiffies - req->start_time;	3194	unsigned long duration = jiffies - req->start_time;
3195	const int rw = rq_data_dir(req);	3195	const int rw = rq_data_dir(req);
3196		3196
3197	__disk_stat_inc(disk, ios[rw]);	3197	__disk_stat_inc(disk, ios[rw]);
3198	__disk_stat_add(disk, ticks[rw], duration);	3198	__disk_stat_add(disk, ticks[rw], duration);
3199	disk_round_stats(disk);	3199	disk_round_stats(disk);
3200	disk->in_flight--;	3200	disk->in_flight--;
3201	}	3201	}
3202	if (req->end_io)	3202	if (req->end_io)
3203	req->end_io(req);	3203	req->end_io(req);
3204	else	3204	else
3205	__blk_put_request(req->q, req);	3205	__blk_put_request(req->q, req);
3206	}	3206	}
3207		3207
3208	EXPORT_SYMBOL(end_that_request_last);	3208	EXPORT_SYMBOL(end_that_request_last);
3209		3209
3210	void end_request(struct request *req, int uptodate)	3210	void end_request(struct request *req, int uptodate)
3211	{	3211	{
3212	if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {	3212	if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3213	add_disk_randomness(req->rq_disk);	3213	add_disk_randomness(req->rq_disk);
3214	blkdev_dequeue_request(req);	3214	blkdev_dequeue_request(req);
3215	end_that_request_last(req);	3215	end_that_request_last(req);
3216	}	3216	}
3217	}	3217	}
3218		3218
3219	EXPORT_SYMBOL(end_request);	3219	EXPORT_SYMBOL(end_request);
3220		3220
3221	void blk_rq_bio_prep(request_queue_t q, struct request rq, struct bio *bio)	3221	void blk_rq_bio_prep(request_queue_t q, struct request rq, struct bio *bio)
3222	{	3222	{
3223	/* first three bits are identical in rq->flags and bio->bi_rw */	3223	/* first three bits are identical in rq->flags and bio->bi_rw */
3224	rq->flags \|= (bio->bi_rw & 7);	3224	rq->flags \|= (bio->bi_rw & 7);
3225		3225
3226	rq->nr_phys_segments = bio_phys_segments(q, bio);	3226	rq->nr_phys_segments = bio_phys_segments(q, bio);
3227	rq->nr_hw_segments = bio_hw_segments(q, bio);	3227	rq->nr_hw_segments = bio_hw_segments(q, bio);
3228	rq->current_nr_sectors = bio_cur_sectors(bio);	3228	rq->current_nr_sectors = bio_cur_sectors(bio);
3229	rq->hard_cur_sectors = rq->current_nr_sectors;	3229	rq->hard_cur_sectors = rq->current_nr_sectors;
3230	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);	3230	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
3231	rq->buffer = bio_data(bio);	3231	rq->buffer = bio_data(bio);
3232		3232
3233	rq->bio = rq->biotail = bio;	3233	rq->bio = rq->biotail = bio;
3234	}	3234	}
3235		3235
3236	EXPORT_SYMBOL(blk_rq_bio_prep);	3236	EXPORT_SYMBOL(blk_rq_bio_prep);
3237		3237
3238	int kblockd_schedule_work(struct work_struct *work)	3238	int kblockd_schedule_work(struct work_struct *work)
3239	{	3239	{
3240	return queue_work(kblockd_workqueue, work);	3240	return queue_work(kblockd_workqueue, work);
3241	}	3241	}
3242		3242
3243	EXPORT_SYMBOL(kblockd_schedule_work);	3243	EXPORT_SYMBOL(kblockd_schedule_work);
3244		3244
3245	void kblockd_flush(void)	3245	void kblockd_flush(void)
3246	{	3246	{
3247	flush_workqueue(kblockd_workqueue);	3247	flush_workqueue(kblockd_workqueue);
3248	}	3248	}
3249	EXPORT_SYMBOL(kblockd_flush);	3249	EXPORT_SYMBOL(kblockd_flush);
3250		3250
3251	int __init blk_dev_init(void)	3251	int __init blk_dev_init(void)
3252	{	3252	{
3253	kblockd_workqueue = create_workqueue("kblockd");	3253	kblockd_workqueue = create_workqueue("kblockd");
3254	if (!kblockd_workqueue)	3254	if (!kblockd_workqueue)
3255	panic("Failed to create kblockd\n");	3255	panic("Failed to create kblockd\n");
3256		3256
3257	request_cachep = kmem_cache_create("blkdev_requests",	3257	request_cachep = kmem_cache_create("blkdev_requests",
3258	sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);	3258	sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);
3259		3259
3260	requestq_cachep = kmem_cache_create("blkdev_queue",	3260	requestq_cachep = kmem_cache_create("blkdev_queue",
3261	sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);	3261	sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);
3262		3262
3263	iocontext_cachep = kmem_cache_create("blkdev_ioc",	3263	iocontext_cachep = kmem_cache_create("blkdev_ioc",
3264	sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);	3264	sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3265		3265
3266	blk_max_low_pfn = max_low_pfn;	3266	blk_max_low_pfn = max_low_pfn;
3267	blk_max_pfn = max_pfn;	3267	blk_max_pfn = max_pfn;
3268		3268
3269	return 0;	3269	return 0;
3270	}	3270	}
3271		3271
3272	/*	3272	/*
3273	* IO Context helper functions	3273	* IO Context helper functions
3274	*/	3274	*/
3275	void put_io_context(struct io_context *ioc)	3275	void put_io_context(struct io_context *ioc)
3276	{	3276	{
3277	if (ioc == NULL)	3277	if (ioc == NULL)
3278	return;	3278	return;
3279		3279
3280	BUG_ON(atomic_read(&ioc->refcount) == 0);	3280	BUG_ON(atomic_read(&ioc->refcount) == 0);
3281		3281
3282	if (atomic_dec_and_test(&ioc->refcount)) {	3282	if (atomic_dec_and_test(&ioc->refcount)) {
3283	if (ioc->aic && ioc->aic->dtor)	3283	if (ioc->aic && ioc->aic->dtor)
3284	ioc->aic->dtor(ioc->aic);	3284	ioc->aic->dtor(ioc->aic);
3285	if (ioc->cic && ioc->cic->dtor)	3285	if (ioc->cic && ioc->cic->dtor)
3286	ioc->cic->dtor(ioc->cic);	3286	ioc->cic->dtor(ioc->cic);
3287		3287
3288	kmem_cache_free(iocontext_cachep, ioc);	3288	kmem_cache_free(iocontext_cachep, ioc);
3289	}	3289	}
3290	}	3290	}
3291	EXPORT_SYMBOL(put_io_context);	3291	EXPORT_SYMBOL(put_io_context);
3292		3292
3293	/* Called by the exitting task */	3293	/* Called by the exitting task */
3294	void exit_io_context(void)	3294	void exit_io_context(void)
3295	{	3295	{
3296	unsigned long flags;	3296	unsigned long flags;
3297	struct io_context *ioc;	3297	struct io_context *ioc;
3298		3298
3299	local_irq_save(flags);	3299	local_irq_save(flags);
3300	task_lock(current);	3300	task_lock(current);
3301	ioc = current->io_context;	3301	ioc = current->io_context;
3302	current->io_context = NULL;	3302	current->io_context = NULL;
3303	ioc->task = NULL;	3303	ioc->task = NULL;
3304	task_unlock(current);	3304	task_unlock(current);
3305	local_irq_restore(flags);	3305	local_irq_restore(flags);
3306		3306
3307	if (ioc->aic && ioc->aic->exit)	3307	if (ioc->aic && ioc->aic->exit)
3308	ioc->aic->exit(ioc->aic);	3308	ioc->aic->exit(ioc->aic);
3309	if (ioc->cic && ioc->cic->exit)	3309	if (ioc->cic && ioc->cic->exit)
3310	ioc->cic->exit(ioc->cic);	3310	ioc->cic->exit(ioc->cic);
3311		3311
3312	put_io_context(ioc);	3312	put_io_context(ioc);
3313	}	3313	}
3314		3314
3315	/*	3315	/*
3316	* If the current task has no IO context then create one and initialise it.	3316	* If the current task has no IO context then create one and initialise it.
3317	* Otherwise, return its existing IO context.	3317	* Otherwise, return its existing IO context.
3318	*	3318	*
3319	* This returned IO context doesn't have a specifically elevated refcount,	3319	* This returned IO context doesn't have a specifically elevated refcount,
3320	* but since the current task itself holds a reference, the context can be	3320	* but since the current task itself holds a reference, the context can be
3321	* used in general code, so long as it stays within `current` context.	3321	* used in general code, so long as it stays within `current` context.
3322	*/	3322	*/
3323	struct io_context *current_io_context(gfp_t gfp_flags)	3323	struct io_context *current_io_context(gfp_t gfp_flags)
3324	{	3324	{
3325	struct task_struct *tsk = current;	3325	struct task_struct *tsk = current;
3326	struct io_context *ret;	3326	struct io_context *ret;
3327		3327
3328	ret = tsk->io_context;	3328	ret = tsk->io_context;
3329	if (likely(ret))	3329	if (likely(ret))
3330	return ret;	3330	return ret;
3331		3331
3332	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);	3332	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
3333	if (ret) {	3333	if (ret) {
3334	atomic_set(&ret->refcount, 1);	3334	atomic_set(&ret->refcount, 1);
3335	ret->task = current;	3335	ret->task = current;
3336	ret->set_ioprio = NULL;	3336	ret->set_ioprio = NULL;
3337	ret->last_waited = jiffies; /* doesn't matter... */	3337	ret->last_waited = jiffies; /* doesn't matter... */
3338	ret->nr_batch_requests = 0; /* because this is 0 */	3338	ret->nr_batch_requests = 0; /* because this is 0 */
3339	ret->aic = NULL;	3339	ret->aic = NULL;
3340	ret->cic = NULL;	3340	ret->cic = NULL;
3341	tsk->io_context = ret;	3341	tsk->io_context = ret;
3342	}	3342	}
3343		3343
3344	return ret;	3344	return ret;
3345	}	3345	}
3346	EXPORT_SYMBOL(current_io_context);	3346	EXPORT_SYMBOL(current_io_context);
3347		3347
3348	/*	3348	/*
3349	* If the current task has no IO context then create one and initialise it.	3349	* If the current task has no IO context then create one and initialise it.
3350	* If it does have a context, take a ref on it.	3350	* If it does have a context, take a ref on it.
3351	*	3351	*
3352	* This is always called in the context of the task which submitted the I/O.	3352	* This is always called in the context of the task which submitted the I/O.
3353	*/	3353	*/
3354	struct io_context *get_io_context(gfp_t gfp_flags)	3354	struct io_context *get_io_context(gfp_t gfp_flags)
3355	{	3355	{
3356	struct io_context *ret;	3356	struct io_context *ret;
3357	ret = current_io_context(gfp_flags);	3357	ret = current_io_context(gfp_flags);
3358	if (likely(ret))	3358	if (likely(ret))
3359	atomic_inc(&ret->refcount);	3359	atomic_inc(&ret->refcount);
3360	return ret;	3360	return ret;
3361	}	3361	}
3362	EXPORT_SYMBOL(get_io_context);	3362	EXPORT_SYMBOL(get_io_context);
3363		3363
3364	void copy_io_context(struct io_context pdst, struct io_context psrc)	3364	void copy_io_context(struct io_context pdst, struct io_context psrc)
3365	{	3365	{
3366	struct io_context src = psrc;	3366	struct io_context src = psrc;
3367	struct io_context dst = pdst;	3367	struct io_context dst = pdst;
3368		3368
3369	if (src) {	3369	if (src) {
3370	BUG_ON(atomic_read(&src->refcount) == 0);	3370	BUG_ON(atomic_read(&src->refcount) == 0);
3371	atomic_inc(&src->refcount);	3371	atomic_inc(&src->refcount);
3372	put_io_context(dst);	3372	put_io_context(dst);
3373	*pdst = src;	3373	*pdst = src;
3374	}	3374	}
3375	}	3375	}
3376	EXPORT_SYMBOL(copy_io_context);	3376	EXPORT_SYMBOL(copy_io_context);
3377		3377
3378	void swap_io_context(struct io_context ioc1, struct io_context ioc2)	3378	void swap_io_context(struct io_context ioc1, struct io_context ioc2)
3379	{	3379	{
3380	struct io_context *temp;	3380	struct io_context *temp;
3381	temp = *ioc1;	3381	temp = *ioc1;
3382	ioc1 = ioc2;	3382	ioc1 = ioc2;
3383	*ioc2 = temp;	3383	*ioc2 = temp;
3384	}	3384	}
3385	EXPORT_SYMBOL(swap_io_context);	3385	EXPORT_SYMBOL(swap_io_context);
3386		3386
3387	/*	3387	/*
3388	* sysfs parts below	3388	* sysfs parts below
3389	*/	3389	*/
3390	struct queue_sysfs_entry {	3390	struct queue_sysfs_entry {
3391	struct attribute attr;	3391	struct attribute attr;
3392	ssize_t (show)(struct request_queue , char *);	3392	ssize_t (show)(struct request_queue , char *);
3393	ssize_t (store)(struct request_queue , const char *, size_t);	3393	ssize_t (store)(struct request_queue , const char *, size_t);
3394	};	3394	};
3395		3395
3396	static ssize_t	3396	static ssize_t
3397	queue_var_show(unsigned int var, char *page)	3397	queue_var_show(unsigned int var, char *page)
3398	{	3398	{
3399	return sprintf(page, "%d\n", var);	3399	return sprintf(page, "%d\n", var);
3400	}	3400	}
3401		3401
3402	static ssize_t	3402	static ssize_t
3403	queue_var_store(unsigned long var, const char page, size_t count)	3403	queue_var_store(unsigned long var, const char page, size_t count)
3404	{	3404	{
3405	char p = (char ) page;	3405	char p = (char ) page;
3406		3406
3407	*var = simple_strtoul(p, &p, 10);	3407	*var = simple_strtoul(p, &p, 10);
3408	return count;	3408	return count;
3409	}	3409	}
3410		3410
3411	static ssize_t queue_requests_show(struct request_queue q, char page)	3411	static ssize_t queue_requests_show(struct request_queue q, char page)
3412	{	3412	{
3413	return queue_var_show(q->nr_requests, (page));	3413	return queue_var_show(q->nr_requests, (page));
3414	}	3414	}
3415		3415
3416	static ssize_t	3416	static ssize_t
3417	queue_requests_store(struct request_queue q, const char page, size_t count)	3417	queue_requests_store(struct request_queue q, const char page, size_t count)
3418	{	3418	{
3419	struct request_list *rl = &q->rq;	3419	struct request_list *rl = &q->rq;
3420		3420
3421	int ret = queue_var_store(&q->nr_requests, page, count);	3421	int ret = queue_var_store(&q->nr_requests, page, count);
3422	if (q->nr_requests < BLKDEV_MIN_RQ)	3422	if (q->nr_requests < BLKDEV_MIN_RQ)
3423	q->nr_requests = BLKDEV_MIN_RQ;	3423	q->nr_requests = BLKDEV_MIN_RQ;
3424	blk_queue_congestion_threshold(q);	3424	blk_queue_congestion_threshold(q);
3425		3425
3426	if (rl->count[READ] >= queue_congestion_on_threshold(q))	3426	if (rl->count[READ] >= queue_congestion_on_threshold(q))
3427	set_queue_congested(q, READ);	3427	set_queue_congested(q, READ);
3428	else if (rl->count[READ] < queue_congestion_off_threshold(q))	3428	else if (rl->count[READ] < queue_congestion_off_threshold(q))
3429	clear_queue_congested(q, READ);	3429	clear_queue_congested(q, READ);
3430		3430
3431	if (rl->count[WRITE] >= queue_congestion_on_threshold(q))	3431	if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
3432	set_queue_congested(q, WRITE);	3432	set_queue_congested(q, WRITE);
3433	else if (rl->count[WRITE] < queue_congestion_off_threshold(q))	3433	else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
3434	clear_queue_congested(q, WRITE);	3434	clear_queue_congested(q, WRITE);
3435		3435
3436	if (rl->count[READ] >= q->nr_requests) {	3436	if (rl->count[READ] >= q->nr_requests) {
3437	blk_set_queue_full(q, READ);	3437	blk_set_queue_full(q, READ);
3438	} else if (rl->count[READ]+1 <= q->nr_requests) {	3438	} else if (rl->count[READ]+1 <= q->nr_requests) {
3439	blk_clear_queue_full(q, READ);	3439	blk_clear_queue_full(q, READ);
3440	wake_up(&rl->wait[READ]);	3440	wake_up(&rl->wait[READ]);
3441	}	3441	}
3442		3442
3443	if (rl->count[WRITE] >= q->nr_requests) {	3443	if (rl->count[WRITE] >= q->nr_requests) {
3444	blk_set_queue_full(q, WRITE);	3444	blk_set_queue_full(q, WRITE);
3445	} else if (rl->count[WRITE]+1 <= q->nr_requests) {	3445	} else if (rl->count[WRITE]+1 <= q->nr_requests) {
3446	blk_clear_queue_full(q, WRITE);	3446	blk_clear_queue_full(q, WRITE);
3447	wake_up(&rl->wait[WRITE]);	3447	wake_up(&rl->wait[WRITE]);
3448	}	3448	}
3449	return ret;	3449	return ret;
3450	}	3450	}
3451		3451
3452	static ssize_t queue_ra_show(struct request_queue q, char page)	3452	static ssize_t queue_ra_show(struct request_queue q, char page)
3453	{	3453	{
3454	int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);	3454	int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3455		3455
3456	return queue_var_show(ra_kb, (page));	3456	return queue_var_show(ra_kb, (page));
3457	}	3457	}
3458		3458
3459	static ssize_t	3459	static ssize_t
3460	queue_ra_store(struct request_queue q, const char page, size_t count)	3460	queue_ra_store(struct request_queue q, const char page, size_t count)
3461	{	3461	{
3462	unsigned long ra_kb;	3462	unsigned long ra_kb;
3463	ssize_t ret = queue_var_store(&ra_kb, page, count);	3463	ssize_t ret = queue_var_store(&ra_kb, page, count);
3464		3464
3465	spin_lock_irq(q->queue_lock);	3465	spin_lock_irq(q->queue_lock);
3466	if (ra_kb > (q->max_sectors >> 1))	3466	if (ra_kb > (q->max_sectors >> 1))
3467	ra_kb = (q->max_sectors >> 1);	3467	ra_kb = (q->max_sectors >> 1);
3468		3468
3469	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);	3469	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3470	spin_unlock_irq(q->queue_lock);	3470	spin_unlock_irq(q->queue_lock);
3471		3471
3472	return ret;	3472	return ret;
3473	}	3473	}
3474		3474
3475	static ssize_t queue_max_sectors_show(struct request_queue q, char page)	3475	static ssize_t queue_max_sectors_show(struct request_queue q, char page)
3476	{	3476	{
3477	int max_sectors_kb = q->max_sectors >> 1;	3477	int max_sectors_kb = q->max_sectors >> 1;
3478		3478
3479	return queue_var_show(max_sectors_kb, (page));	3479	return queue_var_show(max_sectors_kb, (page));
3480	}	3480	}
3481		3481
3482	static ssize_t	3482	static ssize_t
3483	queue_max_sectors_store(struct request_queue q, const char page, size_t count)	3483	queue_max_sectors_store(struct request_queue q, const char page, size_t count)
3484	{	3484	{
3485	unsigned long max_sectors_kb,	3485	unsigned long max_sectors_kb,
3486	max_hw_sectors_kb = q->max_hw_sectors >> 1,	3486	max_hw_sectors_kb = q->max_hw_sectors >> 1,
3487	page_kb = 1 << (PAGE_CACHE_SHIFT - 10);	3487	page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
3488	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);	3488	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
3489	int ra_kb;	3489	int ra_kb;
3490		3490
3491	if (max_sectors_kb > max_hw_sectors_kb \|\| max_sectors_kb < page_kb)	3491	if (max_sectors_kb > max_hw_sectors_kb \|\| max_sectors_kb < page_kb)
3492	return -EINVAL;	3492	return -EINVAL;
3493	/*	3493	/*
3494	* Take the queue lock to update the readahead and max_sectors	3494	* Take the queue lock to update the readahead and max_sectors
3495	* values synchronously:	3495	* values synchronously:
3496	*/	3496	*/
3497	spin_lock_irq(q->queue_lock);	3497	spin_lock_irq(q->queue_lock);
3498	/*	3498	/*
3499	* Trim readahead window as well, if necessary:	3499	* Trim readahead window as well, if necessary:
3500	*/	3500	*/
3501	ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);	3501	ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3502	if (ra_kb > max_sectors_kb)	3502	if (ra_kb > max_sectors_kb)
3503	q->backing_dev_info.ra_pages =	3503	q->backing_dev_info.ra_pages =
3504	max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);	3504	max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
3505		3505
3506	q->max_sectors = max_sectors_kb << 1;	3506	q->max_sectors = max_sectors_kb << 1;
3507	spin_unlock_irq(q->queue_lock);	3507	spin_unlock_irq(q->queue_lock);
3508		3508
3509	return ret;	3509	return ret;
3510	}	3510	}
3511		3511
3512	static ssize_t queue_max_hw_sectors_show(struct request_queue q, char page)	3512	static ssize_t queue_max_hw_sectors_show(struct request_queue q, char page)
3513	{	3513	{
3514	int max_hw_sectors_kb = q->max_hw_sectors >> 1;	3514	int max_hw_sectors_kb = q->max_hw_sectors >> 1;
3515		3515
3516	return queue_var_show(max_hw_sectors_kb, (page));	3516	return queue_var_show(max_hw_sectors_kb, (page));
3517	}	3517	}
3518		3518
3519		3519
3520	static struct queue_sysfs_entry queue_requests_entry = {	3520	static struct queue_sysfs_entry queue_requests_entry = {
3521	.attr = {.name = "nr_requests", .mode = S_IRUGO \| S_IWUSR },	3521	.attr = {.name = "nr_requests", .mode = S_IRUGO \| S_IWUSR },
3522	.show = queue_requests_show,	3522	.show = queue_requests_show,
3523	.store = queue_requests_store,	3523	.store = queue_requests_store,
3524	};	3524	};
3525		3525
3526	static struct queue_sysfs_entry queue_ra_entry = {	3526	static struct queue_sysfs_entry queue_ra_entry = {
3527	.attr = {.name = "read_ahead_kb", .mode = S_IRUGO \| S_IWUSR },	3527	.attr = {.name = "read_ahead_kb", .mode = S_IRUGO \| S_IWUSR },
3528	.show = queue_ra_show,	3528	.show = queue_ra_show,
3529	.store = queue_ra_store,	3529	.store = queue_ra_store,
3530	};	3530	};
3531		3531
3532	static struct queue_sysfs_entry queue_max_sectors_entry = {	3532	static struct queue_sysfs_entry queue_max_sectors_entry = {
3533	.attr = {.name = "max_sectors_kb", .mode = S_IRUGO \| S_IWUSR },	3533	.attr = {.name = "max_sectors_kb", .mode = S_IRUGO \| S_IWUSR },
3534	.show = queue_max_sectors_show,	3534	.show = queue_max_sectors_show,
3535	.store = queue_max_sectors_store,	3535	.store = queue_max_sectors_store,
3536	};	3536	};
3537		3537
3538	static struct queue_sysfs_entry queue_max_hw_sectors_entry = {	3538	static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
3539	.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },	3539	.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
3540	.show = queue_max_hw_sectors_show,	3540	.show = queue_max_hw_sectors_show,
3541	};	3541	};
3542		3542
3543	static struct queue_sysfs_entry queue_iosched_entry = {	3543	static struct queue_sysfs_entry queue_iosched_entry = {
3544	.attr = {.name = "scheduler", .mode = S_IRUGO \| S_IWUSR },	3544	.attr = {.name = "scheduler", .mode = S_IRUGO \| S_IWUSR },
3545	.show = elv_iosched_show,	3545	.show = elv_iosched_show,
3546	.store = elv_iosched_store,	3546	.store = elv_iosched_store,
3547	};	3547	};
3548		3548
3549	static struct attribute *default_attrs[] = {	3549	static struct attribute *default_attrs[] = {
3550	&queue_requests_entry.attr,	3550	&queue_requests_entry.attr,
3551	&queue_ra_entry.attr,	3551	&queue_ra_entry.attr,
3552	&queue_max_hw_sectors_entry.attr,	3552	&queue_max_hw_sectors_entry.attr,
3553	&queue_max_sectors_entry.attr,	3553	&queue_max_sectors_entry.attr,
3554	&queue_iosched_entry.attr,	3554	&queue_iosched_entry.attr,
3555	NULL,	3555	NULL,
3556	};	3556	};
3557		3557
3558	#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)	3558	#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
3559		3559
3560	static ssize_t	3560	static ssize_t
3561	queue_attr_show(struct kobject kobj, struct attribute attr, char *page)	3561	queue_attr_show(struct kobject kobj, struct attribute attr, char *page)
3562	{	3562	{
3563	struct queue_sysfs_entry *entry = to_queue(attr);	3563	struct queue_sysfs_entry *entry = to_queue(attr);
3564	struct request_queue *q;	3564	struct request_queue *q;
3565		3565
3566	q = container_of(kobj, struct request_queue, kobj);	3566	q = container_of(kobj, struct request_queue, kobj);
3567	if (!entry->show)	3567	if (!entry->show)
3568	return -EIO;	3568	return -EIO;
3569		3569
3570	return entry->show(q, page);	3570	return entry->show(q, page);
3571	}	3571	}
3572		3572
3573	static ssize_t	3573	static ssize_t
3574	queue_attr_store(struct kobject kobj, struct attribute attr,	3574	queue_attr_store(struct kobject kobj, struct attribute attr,
3575	const char *page, size_t length)	3575	const char *page, size_t length)
3576	{	3576	{
3577	struct queue_sysfs_entry *entry = to_queue(attr);	3577	struct queue_sysfs_entry *entry = to_queue(attr);
3578	struct request_queue *q;	3578	struct request_queue *q;
3579		3579
3580	q = container_of(kobj, struct request_queue, kobj);	3580	q = container_of(kobj, struct request_queue, kobj);
3581	if (!entry->store)	3581	if (!entry->store)
3582	return -EIO;	3582	return -EIO;
3583		3583
3584	return entry->store(q, page, length);	3584	return entry->store(q, page, length);
3585	}	3585	}
3586		3586
3587	static struct sysfs_ops queue_sysfs_ops = {	3587	static struct sysfs_ops queue_sysfs_ops = {
3588	.show = queue_attr_show,	3588	.show = queue_attr_show,
3589	.store = queue_attr_store,	3589	.store = queue_attr_store,
3590	};	3590	};
3591		3591
3592	static struct kobj_type queue_ktype = {	3592	static struct kobj_type queue_ktype = {
3593	.sysfs_ops = &queue_sysfs_ops,	3593	.sysfs_ops = &queue_sysfs_ops,
3594	.default_attrs = default_attrs,	3594	.default_attrs = default_attrs,
3595	};	3595	};
3596		3596
3597	int blk_register_queue(struct gendisk *disk)	3597	int blk_register_queue(struct gendisk *disk)
3598	{	3598	{
3599	int ret;	3599	int ret;
3600		3600
3601	request_queue_t *q = disk->queue;	3601	request_queue_t *q = disk->queue;
3602		3602
3603	if (!q \|\| !q->request_fn)	3603	if (!q \|\| !q->request_fn)
3604	return -ENXIO;	3604	return -ENXIO;
3605		3605
3606	q->kobj.parent = kobject_get(&disk->kobj);	3606	q->kobj.parent = kobject_get(&disk->kobj);
3607	if (!q->kobj.parent)	3607	if (!q->kobj.parent)
3608	return -EBUSY;	3608	return -EBUSY;
3609		3609
3610	snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");	3610	snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
3611	q->kobj.ktype = &queue_ktype;	3611	q->kobj.ktype = &queue_ktype;
3612		3612
3613	ret = kobject_register(&q->kobj);	3613	ret = kobject_register(&q->kobj);
3614	if (ret < 0)	3614	if (ret < 0)
3615	return ret;	3615	return ret;
3616		3616
3617	ret = elv_register_queue(q);	3617	ret = elv_register_queue(q);
3618	if (ret) {	3618	if (ret) {
3619	kobject_unregister(&q->kobj);	3619	kobject_unregister(&q->kobj);
3620	return ret;	3620	return ret;
3621	}	3621	}
3622		3622
3623	return 0;	3623	return 0;
3624	}	3624	}
3625		3625
3626	void blk_unregister_queue(struct gendisk *disk)	3626	void blk_unregister_queue(struct gendisk *disk)
3627	{	3627	{
3628	request_queue_t *q = disk->queue;	3628	request_queue_t *q = disk->queue;
3629		3629
3630	if (q && q->request_fn) {	3630	if (q && q->request_fn) {
3631	elv_unregister_queue(q);	3631	elv_unregister_queue(q);
3632		3632
3633	kobject_unregister(&q->kobj);	3633	kobject_unregister(&q->kobj);
3634	kobject_put(&disk->kobj);	3634	kobject_put(&disk->kobj);
3635	}	3635	}
3636	}	3636	}
3637		3637

block/scsi_ioctl.c

Diff comments View file @ 6410009

1	/*	1	/*
2	* Copyright (C) 2001 Jens Axboe <axboe@suse.de>	2	* Copyright (C) 2001 Jens Axboe <axboe@suse.de>
3	*	3	*
4	* This program is free software; you can redistribute it and/or modify	4	* This program is free software; you can redistribute it and/or modify
5	* it under the terms of the GNU General Public License version 2 as	5	* it under the terms of the GNU General Public License version 2 as
6	* published by the Free Software Foundation.	6	* published by the Free Software Foundation.
7	*	7	*
8	* This program is distributed in the hope that it will be useful,	8	* This program is distributed in the hope that it will be useful,
9	* but WITHOUT ANY WARRANTY; without even the implied warranty of	9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
10	*	10	*
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	* GNU General Public License for more details.	12	* GNU General Public License for more details.
13	*	13	*
14	* You should have received a copy of the GNU General Public Licens	14	* You should have received a copy of the GNU General Public Licens
15	* along with this program; if not, write to the Free Software	15	* along with this program; if not, write to the Free Software
16	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-	16	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17	*	17	*
18	*/	18	*/
19	#include <linux/kernel.h>	19	#include <linux/kernel.h>
20	#include <linux/errno.h>	20	#include <linux/errno.h>
21	#include <linux/string.h>	21	#include <linux/string.h>
22	#include <linux/module.h>	22	#include <linux/module.h>
23	#include <linux/blkdev.h>	23	#include <linux/blkdev.h>
24	#include <linux/completion.h>	24	#include <linux/completion.h>
25	#include <linux/cdrom.h>	25	#include <linux/cdrom.h>
26	#include <linux/slab.h>	26	#include <linux/slab.h>
27	#include <linux/times.h>	27	#include <linux/times.h>
28	#include <asm/uaccess.h>	28	#include <asm/uaccess.h>
29		29
30	#include <scsi/scsi.h>	30	#include <scsi/scsi.h>
31	#include <scsi/scsi_ioctl.h>	31	#include <scsi/scsi_ioctl.h>
32	#include <scsi/scsi_cmnd.h>	32	#include <scsi/scsi_cmnd.h>
33		33
34	/* Command group 3 is reserved and should never be used. */	34	/* Command group 3 is reserved and should never be used. */
35	const unsigned char scsi_command_size[8] =	35	const unsigned char scsi_command_size[8] =
36	{	36	{
37	6, 10, 10, 12,	37	6, 10, 10, 12,
38	16, 12, 10, 10	38	16, 12, 10, 10
39	};	39	};
40		40
41	EXPORT_SYMBOL(scsi_command_size);	41	EXPORT_SYMBOL(scsi_command_size);
42		42
43	#define BLK_DEFAULT_TIMEOUT (60 * HZ)	43	#define BLK_DEFAULT_TIMEOUT (60 * HZ)
44		44
45	#include <scsi/sg.h>	45	#include <scsi/sg.h>
46		46
47	static int sg_get_version(int __user *p)	47	static int sg_get_version(int __user *p)
48	{	48	{
49	static int sg_version_num = 30527;	49	static const int sg_version_num = 30527;
50	return put_user(sg_version_num, p);	50	return put_user(sg_version_num, p);
51	}	51	}
52		52
53	static int scsi_get_idlun(request_queue_t q, int __user p)	53	static int scsi_get_idlun(request_queue_t q, int __user p)
54	{	54	{
55	return put_user(0, p);	55	return put_user(0, p);
56	}	56	}
57		57
58	static int scsi_get_bus(request_queue_t q, int __user p)	58	static int scsi_get_bus(request_queue_t q, int __user p)
59	{	59	{
60	return put_user(0, p);	60	return put_user(0, p);
61	}	61	}
62		62
63	static int sg_get_timeout(request_queue_t *q)	63	static int sg_get_timeout(request_queue_t *q)
64	{	64	{
65	return q->sg_timeout / (HZ / USER_HZ);	65	return q->sg_timeout / (HZ / USER_HZ);
66	}	66	}
67		67
68	static int sg_set_timeout(request_queue_t q, int __user p)	68	static int sg_set_timeout(request_queue_t q, int __user p)
69	{	69	{
70	int timeout, err = get_user(timeout, p);	70	int timeout, err = get_user(timeout, p);
71		71
72	if (!err)	72	if (!err)
73	q->sg_timeout = timeout * (HZ / USER_HZ);	73	q->sg_timeout = timeout * (HZ / USER_HZ);
74		74
75	return err;	75	return err;
76	}	76	}
77		77
78	static int sg_get_reserved_size(request_queue_t q, int __user p)	78	static int sg_get_reserved_size(request_queue_t q, int __user p)
79	{	79	{
80	return put_user(q->sg_reserved_size, p);	80	return put_user(q->sg_reserved_size, p);
81	}	81	}
82		82
83	static int sg_set_reserved_size(request_queue_t q, int __user p)	83	static int sg_set_reserved_size(request_queue_t q, int __user p)
84	{	84	{
85	int size, err = get_user(size, p);	85	int size, err = get_user(size, p);
86		86
87	if (err)	87	if (err)
88	return err;	88	return err;
89		89
90	if (size < 0)	90	if (size < 0)
91	return -EINVAL;	91	return -EINVAL;
92	if (size > (q->max_sectors << 9))	92	if (size > (q->max_sectors << 9))
93	size = q->max_sectors << 9;	93	size = q->max_sectors << 9;
94		94
95	q->sg_reserved_size = size;	95	q->sg_reserved_size = size;
96	return 0;	96	return 0;
97	}	97	}
98		98
99	/*	99	/*
100	* will always return that we are ATAPI even for a real SCSI drive, I'm not	100	* will always return that we are ATAPI even for a real SCSI drive, I'm not
101	* so sure this is worth doing anything about (why would you care??)	101	* so sure this is worth doing anything about (why would you care??)
102	*/	102	*/
103	static int sg_emulated_host(request_queue_t q, int __user p)	103	static int sg_emulated_host(request_queue_t q, int __user p)
104	{	104	{
105	return put_user(1, p);	105	return put_user(1, p);
106	}	106	}
107		107
108	#define CMD_READ_SAFE 0x01	108	#define CMD_READ_SAFE 0x01
109	#define CMD_WRITE_SAFE 0x02	109	#define CMD_WRITE_SAFE 0x02
110	#define CMD_WARNED 0x04	110	#define CMD_WARNED 0x04
111	#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE	111	#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
112	#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE	112	#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
113		113
114	static int verify_command(struct file file, unsigned char cmd)	114	static int verify_command(struct file file, unsigned char cmd)
115	{	115	{
116	static unsigned char cmd_type[256] = {	116	static unsigned char cmd_type[256] = {
117		117
118	/* Basic read-only commands */	118	/* Basic read-only commands */
119	safe_for_read(TEST_UNIT_READY),	119	safe_for_read(TEST_UNIT_READY),
120	safe_for_read(REQUEST_SENSE),	120	safe_for_read(REQUEST_SENSE),
121	safe_for_read(READ_6),	121	safe_for_read(READ_6),
122	safe_for_read(READ_10),	122	safe_for_read(READ_10),
123	safe_for_read(READ_12),	123	safe_for_read(READ_12),
124	safe_for_read(READ_16),	124	safe_for_read(READ_16),
125	safe_for_read(READ_BUFFER),	125	safe_for_read(READ_BUFFER),
126	safe_for_read(READ_DEFECT_DATA),	126	safe_for_read(READ_DEFECT_DATA),
127	safe_for_read(READ_LONG),	127	safe_for_read(READ_LONG),
128	safe_for_read(INQUIRY),	128	safe_for_read(INQUIRY),
129	safe_for_read(MODE_SENSE),	129	safe_for_read(MODE_SENSE),
130	safe_for_read(MODE_SENSE_10),	130	safe_for_read(MODE_SENSE_10),
131	safe_for_read(LOG_SENSE),	131	safe_for_read(LOG_SENSE),
132	safe_for_read(START_STOP),	132	safe_for_read(START_STOP),
133	safe_for_read(GPCMD_VERIFY_10),	133	safe_for_read(GPCMD_VERIFY_10),
134	safe_for_read(VERIFY_16),	134	safe_for_read(VERIFY_16),
135		135
136	/* Audio CD commands */	136	/* Audio CD commands */
137	safe_for_read(GPCMD_PLAY_CD),	137	safe_for_read(GPCMD_PLAY_CD),
138	safe_for_read(GPCMD_PLAY_AUDIO_10),	138	safe_for_read(GPCMD_PLAY_AUDIO_10),
139	safe_for_read(GPCMD_PLAY_AUDIO_MSF),	139	safe_for_read(GPCMD_PLAY_AUDIO_MSF),
140	safe_for_read(GPCMD_PLAY_AUDIO_TI),	140	safe_for_read(GPCMD_PLAY_AUDIO_TI),
141	safe_for_read(GPCMD_PAUSE_RESUME),	141	safe_for_read(GPCMD_PAUSE_RESUME),
142		142
143	/* CD/DVD data reading */	143	/* CD/DVD data reading */
144	safe_for_read(GPCMD_READ_BUFFER_CAPACITY),	144	safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
145	safe_for_read(GPCMD_READ_CD),	145	safe_for_read(GPCMD_READ_CD),
146	safe_for_read(GPCMD_READ_CD_MSF),	146	safe_for_read(GPCMD_READ_CD_MSF),
147	safe_for_read(GPCMD_READ_DISC_INFO),	147	safe_for_read(GPCMD_READ_DISC_INFO),
148	safe_for_read(GPCMD_READ_CDVD_CAPACITY),	148	safe_for_read(GPCMD_READ_CDVD_CAPACITY),
149	safe_for_read(GPCMD_READ_DVD_STRUCTURE),	149	safe_for_read(GPCMD_READ_DVD_STRUCTURE),
150	safe_for_read(GPCMD_READ_HEADER),	150	safe_for_read(GPCMD_READ_HEADER),
151	safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),	151	safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
152	safe_for_read(GPCMD_READ_SUBCHANNEL),	152	safe_for_read(GPCMD_READ_SUBCHANNEL),
153	safe_for_read(GPCMD_READ_TOC_PMA_ATIP),	153	safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
154	safe_for_read(GPCMD_REPORT_KEY),	154	safe_for_read(GPCMD_REPORT_KEY),
155	safe_for_read(GPCMD_SCAN),	155	safe_for_read(GPCMD_SCAN),
156	safe_for_read(GPCMD_GET_CONFIGURATION),	156	safe_for_read(GPCMD_GET_CONFIGURATION),
157	safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),	157	safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
158	safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),	158	safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
159	safe_for_read(GPCMD_GET_PERFORMANCE),	159	safe_for_read(GPCMD_GET_PERFORMANCE),
160	safe_for_read(GPCMD_SEEK),	160	safe_for_read(GPCMD_SEEK),
161	safe_for_read(GPCMD_STOP_PLAY_SCAN),	161	safe_for_read(GPCMD_STOP_PLAY_SCAN),
162		162
163	/* Basic writing commands */	163	/* Basic writing commands */
164	safe_for_write(WRITE_6),	164	safe_for_write(WRITE_6),
165	safe_for_write(WRITE_10),	165	safe_for_write(WRITE_10),
166	safe_for_write(WRITE_VERIFY),	166	safe_for_write(WRITE_VERIFY),
167	safe_for_write(WRITE_12),	167	safe_for_write(WRITE_12),
168	safe_for_write(WRITE_VERIFY_12),	168	safe_for_write(WRITE_VERIFY_12),
169	safe_for_write(WRITE_16),	169	safe_for_write(WRITE_16),
170	safe_for_write(WRITE_LONG),	170	safe_for_write(WRITE_LONG),
171	safe_for_write(WRITE_LONG_2),	171	safe_for_write(WRITE_LONG_2),
172	safe_for_write(ERASE),	172	safe_for_write(ERASE),
173	safe_for_write(GPCMD_MODE_SELECT_10),	173	safe_for_write(GPCMD_MODE_SELECT_10),
174	safe_for_write(MODE_SELECT),	174	safe_for_write(MODE_SELECT),
175	safe_for_write(LOG_SELECT),	175	safe_for_write(LOG_SELECT),
176	safe_for_write(GPCMD_BLANK),	176	safe_for_write(GPCMD_BLANK),
177	safe_for_write(GPCMD_CLOSE_TRACK),	177	safe_for_write(GPCMD_CLOSE_TRACK),
178	safe_for_write(GPCMD_FLUSH_CACHE),	178	safe_for_write(GPCMD_FLUSH_CACHE),
179	safe_for_write(GPCMD_FORMAT_UNIT),	179	safe_for_write(GPCMD_FORMAT_UNIT),
180	safe_for_write(GPCMD_REPAIR_RZONE_TRACK),	180	safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
181	safe_for_write(GPCMD_RESERVE_RZONE_TRACK),	181	safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
182	safe_for_write(GPCMD_SEND_DVD_STRUCTURE),	182	safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
183	safe_for_write(GPCMD_SEND_EVENT),	183	safe_for_write(GPCMD_SEND_EVENT),
184	safe_for_write(GPCMD_SEND_KEY),	184	safe_for_write(GPCMD_SEND_KEY),
185	safe_for_write(GPCMD_SEND_OPC),	185	safe_for_write(GPCMD_SEND_OPC),
186	safe_for_write(GPCMD_SEND_CUE_SHEET),	186	safe_for_write(GPCMD_SEND_CUE_SHEET),
187	safe_for_write(GPCMD_SET_SPEED),	187	safe_for_write(GPCMD_SET_SPEED),
188	safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),	188	safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
189	safe_for_write(GPCMD_LOAD_UNLOAD),	189	safe_for_write(GPCMD_LOAD_UNLOAD),
190	safe_for_write(GPCMD_SET_STREAMING),	190	safe_for_write(GPCMD_SET_STREAMING),
191	};	191	};
192	unsigned char type = cmd_type[cmd[0]];	192	unsigned char type = cmd_type[cmd[0]];
193		193
194	/* Anybody who can open the device can do a read-safe command */	194	/* Anybody who can open the device can do a read-safe command */
195	if (type & CMD_READ_SAFE)	195	if (type & CMD_READ_SAFE)
196	return 0;	196	return 0;
197		197
198	/* Write-safe commands just require a writable open.. */	198	/* Write-safe commands just require a writable open.. */
199	if (type & CMD_WRITE_SAFE) {	199	if (type & CMD_WRITE_SAFE) {
200	if (file->f_mode & FMODE_WRITE)	200	if (file->f_mode & FMODE_WRITE)
201	return 0;	201	return 0;
202	}	202	}
203		203
204	/* And root can do any command.. */	204	/* And root can do any command.. */
205	if (capable(CAP_SYS_RAWIO))	205	if (capable(CAP_SYS_RAWIO))
206	return 0;	206	return 0;
207		207
208	if (!type) {	208	if (!type) {
209	cmd_type[cmd[0]] = CMD_WARNED;	209	cmd_type[cmd[0]] = CMD_WARNED;
210	printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);	210	printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
211	}	211	}
212		212
213	/* Otherwise fail it with an "Operation not permitted" */	213	/* Otherwise fail it with an "Operation not permitted" */
214	return -EPERM;	214	return -EPERM;
215	}	215	}
216		216
217	static int sg_io(struct file file, request_queue_t q,	217	static int sg_io(struct file file, request_queue_t q,
218	struct gendisk bd_disk, struct sg_io_hdr hdr)	218	struct gendisk bd_disk, struct sg_io_hdr hdr)
219	{	219	{
220	unsigned long start_time;	220	unsigned long start_time;
221	int writing = 0, ret = 0;	221	int writing = 0, ret = 0;
222	struct request *rq;	222	struct request *rq;
223	struct bio *bio;	223	struct bio *bio;
224	char sense[SCSI_SENSE_BUFFERSIZE];	224	char sense[SCSI_SENSE_BUFFERSIZE];
225	unsigned char cmd[BLK_MAX_CDB];	225	unsigned char cmd[BLK_MAX_CDB];
226		226
227	if (hdr->interface_id != 'S')	227	if (hdr->interface_id != 'S')
228	return -EINVAL;	228	return -EINVAL;
229	if (hdr->cmd_len > BLK_MAX_CDB)	229	if (hdr->cmd_len > BLK_MAX_CDB)
230	return -EINVAL;	230	return -EINVAL;
231	if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))	231	if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
232	return -EFAULT;	232	return -EFAULT;
233	if (verify_command(file, cmd))	233	if (verify_command(file, cmd))
234	return -EPERM;	234	return -EPERM;
235		235
236	if (hdr->dxfer_len > (q->max_hw_sectors << 9))	236	if (hdr->dxfer_len > (q->max_hw_sectors << 9))
237	return -EIO;	237	return -EIO;
238		238
239	if (hdr->dxfer_len)	239	if (hdr->dxfer_len)
240	switch (hdr->dxfer_direction) {	240	switch (hdr->dxfer_direction) {
241	default:	241	default:
242	return -EINVAL;	242	return -EINVAL;
243	case SG_DXFER_TO_FROM_DEV:	243	case SG_DXFER_TO_FROM_DEV:
244	case SG_DXFER_TO_DEV:	244	case SG_DXFER_TO_DEV:
245	writing = 1;	245	writing = 1;
246	break;	246	break;
247	case SG_DXFER_FROM_DEV:	247	case SG_DXFER_FROM_DEV:
248	break;	248	break;
249	}	249	}
250		250
251	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);	251	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
252	if (!rq)	252	if (!rq)
253	return -ENOMEM;	253	return -ENOMEM;
254		254
255	if (hdr->iovec_count) {	255	if (hdr->iovec_count) {
256	const int size = sizeof(struct sg_iovec) * hdr->iovec_count;	256	const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
257	struct sg_iovec *iov;	257	struct sg_iovec *iov;
258		258
259	iov = kmalloc(size, GFP_KERNEL);	259	iov = kmalloc(size, GFP_KERNEL);
260	if (!iov) {	260	if (!iov) {
261	ret = -ENOMEM;	261	ret = -ENOMEM;
262	goto out;	262	goto out;
263	}	263	}
264		264
265	if (copy_from_user(iov, hdr->dxferp, size)) {	265	if (copy_from_user(iov, hdr->dxferp, size)) {
266	kfree(iov);	266	kfree(iov);
267	ret = -EFAULT;	267	ret = -EFAULT;
268	goto out;	268	goto out;
269	}	269	}
270		270
271	ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count);	271	ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count);
272	kfree(iov);	272	kfree(iov);
273	} else if (hdr->dxfer_len)	273	} else if (hdr->dxfer_len)
274	ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);	274	ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);
275		275
276	if (ret)	276	if (ret)
277	goto out;	277	goto out;
278		278
279	/*	279	/*
280	* fill in request structure	280	* fill in request structure
281	*/	281	*/
282	rq->cmd_len = hdr->cmd_len;	282	rq->cmd_len = hdr->cmd_len;
283	memcpy(rq->cmd, cmd, hdr->cmd_len);	283	memcpy(rq->cmd, cmd, hdr->cmd_len);
284	if (sizeof(rq->cmd) != hdr->cmd_len)	284	if (sizeof(rq->cmd) != hdr->cmd_len)
285	memset(rq->cmd + hdr->cmd_len, 0, sizeof(rq->cmd) - hdr->cmd_len);	285	memset(rq->cmd + hdr->cmd_len, 0, sizeof(rq->cmd) - hdr->cmd_len);
286		286
287	memset(sense, 0, sizeof(sense));	287	memset(sense, 0, sizeof(sense));
288	rq->sense = sense;	288	rq->sense = sense;
289	rq->sense_len = 0;	289	rq->sense_len = 0;
290		290
291	rq->flags \|= REQ_BLOCK_PC;	291	rq->flags \|= REQ_BLOCK_PC;
292	bio = rq->bio;	292	bio = rq->bio;
293		293
294	/*	294	/*
295	* bounce this after holding a reference to the original bio, it's	295	* bounce this after holding a reference to the original bio, it's
296	* needed for proper unmapping	296	* needed for proper unmapping
297	*/	297	*/
298	if (rq->bio)	298	if (rq->bio)
299	blk_queue_bounce(q, &rq->bio);	299	blk_queue_bounce(q, &rq->bio);
300		300
301	rq->timeout = (hdr->timeout * HZ) / 1000;	301	rq->timeout = (hdr->timeout * HZ) / 1000;
302	if (!rq->timeout)	302	if (!rq->timeout)
303	rq->timeout = q->sg_timeout;	303	rq->timeout = q->sg_timeout;
304	if (!rq->timeout)	304	if (!rq->timeout)
305	rq->timeout = BLK_DEFAULT_TIMEOUT;	305	rq->timeout = BLK_DEFAULT_TIMEOUT;
306		306
307	start_time = jiffies;	307	start_time = jiffies;
308		308
309	/* ignore return value. All information is passed back to caller	309	/* ignore return value. All information is passed back to caller
310	* (if he doesn't check that is his problem).	310	* (if he doesn't check that is his problem).
311	* N.B. a non-zero SCSI status is _not_ necessarily an error.	311	* N.B. a non-zero SCSI status is _not_ necessarily an error.
312	*/	312	*/
313	blk_execute_rq(q, bd_disk, rq, 0);	313	blk_execute_rq(q, bd_disk, rq, 0);
314		314
315	/* write to all output members */	315	/* write to all output members */
316	hdr->status = 0xff & rq->errors;	316	hdr->status = 0xff & rq->errors;
317	hdr->masked_status = status_byte(rq->errors);	317	hdr->masked_status = status_byte(rq->errors);
318	hdr->msg_status = msg_byte(rq->errors);	318	hdr->msg_status = msg_byte(rq->errors);
319	hdr->host_status = host_byte(rq->errors);	319	hdr->host_status = host_byte(rq->errors);
320	hdr->driver_status = driver_byte(rq->errors);	320	hdr->driver_status = driver_byte(rq->errors);
321	hdr->info = 0;	321	hdr->info = 0;
322	if (hdr->masked_status \|\| hdr->host_status \|\| hdr->driver_status)	322	if (hdr->masked_status \|\| hdr->host_status \|\| hdr->driver_status)
323	hdr->info \|= SG_INFO_CHECK;	323	hdr->info \|= SG_INFO_CHECK;
324	hdr->resid = rq->data_len;	324	hdr->resid = rq->data_len;
325	hdr->duration = ((jiffies - start_time) * 1000) / HZ;	325	hdr->duration = ((jiffies - start_time) * 1000) / HZ;
326	hdr->sb_len_wr = 0;	326	hdr->sb_len_wr = 0;
327		327
328	if (rq->sense_len && hdr->sbp) {	328	if (rq->sense_len && hdr->sbp) {
329	int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len);	329	int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len);
330		330
331	if (!copy_to_user(hdr->sbp, rq->sense, len))	331	if (!copy_to_user(hdr->sbp, rq->sense, len))
332	hdr->sb_len_wr = len;	332	hdr->sb_len_wr = len;
333	}	333	}
334		334
335	if (blk_rq_unmap_user(bio, hdr->dxfer_len))	335	if (blk_rq_unmap_user(bio, hdr->dxfer_len))
336	ret = -EFAULT;	336	ret = -EFAULT;
337		337
338	/* may not have succeeded, but output values written to control	338	/* may not have succeeded, but output values written to control
339	* structure (struct sg_io_hdr). */	339	* structure (struct sg_io_hdr). */
340	out:	340	out:
341	blk_put_request(rq);	341	blk_put_request(rq);
342	return ret;	342	return ret;
343	}	343	}
344		344
345	#define OMAX_SB_LEN 16 /* For backward compatibility */	345	#define OMAX_SB_LEN 16 /* For backward compatibility */
346		346
347	static int sg_scsi_ioctl(struct file file, request_queue_t q,	347	static int sg_scsi_ioctl(struct file file, request_queue_t q,
348	struct gendisk bd_disk, Scsi_Ioctl_Command __user sic)	348	struct gendisk bd_disk, Scsi_Ioctl_Command __user sic)
349	{	349	{
350	struct request *rq;	350	struct request *rq;
351	int err;	351	int err;
352	unsigned int in_len, out_len, bytes, opcode, cmdlen;	352	unsigned int in_len, out_len, bytes, opcode, cmdlen;
353	char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE];	353	char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE];
354		354
355	/*	355	/*
356	* get in an out lengths, verify they don't exceed a page worth of data	356	* get in an out lengths, verify they don't exceed a page worth of data
357	*/	357	*/
358	if (get_user(in_len, &sic->inlen))	358	if (get_user(in_len, &sic->inlen))
359	return -EFAULT;	359	return -EFAULT;
360	if (get_user(out_len, &sic->outlen))	360	if (get_user(out_len, &sic->outlen))
361	return -EFAULT;	361	return -EFAULT;
362	if (in_len > PAGE_SIZE \|\| out_len > PAGE_SIZE)	362	if (in_len > PAGE_SIZE \|\| out_len > PAGE_SIZE)
363	return -EINVAL;	363	return -EINVAL;
364	if (get_user(opcode, sic->data))	364	if (get_user(opcode, sic->data))
365	return -EFAULT;	365	return -EFAULT;
366		366
367	bytes = max(in_len, out_len);	367	bytes = max(in_len, out_len);
368	if (bytes) {	368	if (bytes) {
369	buffer = kmalloc(bytes, q->bounce_gfp \| GFP_USER\| __GFP_NOWARN);	369	buffer = kmalloc(bytes, q->bounce_gfp \| GFP_USER\| __GFP_NOWARN);
370	if (!buffer)	370	if (!buffer)
371	return -ENOMEM;	371	return -ENOMEM;
372		372
373	memset(buffer, 0, bytes);	373	memset(buffer, 0, bytes);
374	}	374	}
375		375
376	rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);	376	rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
377		377
378	cmdlen = COMMAND_SIZE(opcode);	378	cmdlen = COMMAND_SIZE(opcode);
379		379
380	/*	380	/*
381	* get command and data to send to device, if any	381	* get command and data to send to device, if any
382	*/	382	*/
383	err = -EFAULT;	383	err = -EFAULT;
384	rq->cmd_len = cmdlen;	384	rq->cmd_len = cmdlen;
385	if (copy_from_user(rq->cmd, sic->data, cmdlen))	385	if (copy_from_user(rq->cmd, sic->data, cmdlen))
386	goto error;	386	goto error;
387		387
388	if (copy_from_user(buffer, sic->data + cmdlen, in_len))	388	if (copy_from_user(buffer, sic->data + cmdlen, in_len))
389	goto error;	389	goto error;
390		390
391	err = verify_command(file, rq->cmd);	391	err = verify_command(file, rq->cmd);
392	if (err)	392	if (err)
393	goto error;	393	goto error;
394		394
395	switch (opcode) {	395	switch (opcode) {
396	case SEND_DIAGNOSTIC:	396	case SEND_DIAGNOSTIC:
397	case FORMAT_UNIT:	397	case FORMAT_UNIT:
398	rq->timeout = FORMAT_UNIT_TIMEOUT;	398	rq->timeout = FORMAT_UNIT_TIMEOUT;
399	break;	399	break;
400	case START_STOP:	400	case START_STOP:
401	rq->timeout = START_STOP_TIMEOUT;	401	rq->timeout = START_STOP_TIMEOUT;
402	break;	402	break;
403	case MOVE_MEDIUM:	403	case MOVE_MEDIUM:
404	rq->timeout = MOVE_MEDIUM_TIMEOUT;	404	rq->timeout = MOVE_MEDIUM_TIMEOUT;
405	break;	405	break;
406	case READ_ELEMENT_STATUS:	406	case READ_ELEMENT_STATUS:
407	rq->timeout = READ_ELEMENT_STATUS_TIMEOUT;	407	rq->timeout = READ_ELEMENT_STATUS_TIMEOUT;
408	break;	408	break;
409	case READ_DEFECT_DATA:	409	case READ_DEFECT_DATA:
410	rq->timeout = READ_DEFECT_DATA_TIMEOUT;	410	rq->timeout = READ_DEFECT_DATA_TIMEOUT;
411	break;	411	break;
412	default:	412	default:
413	rq->timeout = BLK_DEFAULT_TIMEOUT;	413	rq->timeout = BLK_DEFAULT_TIMEOUT;
414	break;	414	break;
415	}	415	}
416		416
417	memset(sense, 0, sizeof(sense));	417	memset(sense, 0, sizeof(sense));
418	rq->sense = sense;	418	rq->sense = sense;
419	rq->sense_len = 0;	419	rq->sense_len = 0;
420		420
421	rq->data = buffer;	421	rq->data = buffer;
422	rq->data_len = bytes;	422	rq->data_len = bytes;
423	rq->flags \|= REQ_BLOCK_PC;	423	rq->flags \|= REQ_BLOCK_PC;
424		424
425	blk_execute_rq(q, bd_disk, rq, 0);	425	blk_execute_rq(q, bd_disk, rq, 0);
426	err = rq->errors & 0xff; /* only 8 bit SCSI status */	426	err = rq->errors & 0xff; /* only 8 bit SCSI status */
427	if (err) {	427	if (err) {
428	if (rq->sense_len && rq->sense) {	428	if (rq->sense_len && rq->sense) {
429	bytes = (OMAX_SB_LEN > rq->sense_len) ?	429	bytes = (OMAX_SB_LEN > rq->sense_len) ?
430	rq->sense_len : OMAX_SB_LEN;	430	rq->sense_len : OMAX_SB_LEN;
431	if (copy_to_user(sic->data, rq->sense, bytes))	431	if (copy_to_user(sic->data, rq->sense, bytes))
432	err = -EFAULT;	432	err = -EFAULT;
433	}	433	}
434	} else {	434	} else {
435	if (copy_to_user(sic->data, buffer, out_len))	435	if (copy_to_user(sic->data, buffer, out_len))
436	err = -EFAULT;	436	err = -EFAULT;
437	}	437	}
438		438
439	error:	439	error:
440	kfree(buffer);	440	kfree(buffer);
441	blk_put_request(rq);	441	blk_put_request(rq);
442	return err;	442	return err;
443	}	443	}
444		444
445		445
446	/* Send basic block requests */	446	/* Send basic block requests */
447	static int __blk_send_generic(request_queue_t q, struct gendisk bd_disk, int cmd, int data)	447	static int __blk_send_generic(request_queue_t q, struct gendisk bd_disk, int cmd, int data)
448	{	448	{
449	struct request *rq;	449	struct request *rq;
450	int err;	450	int err;
451		451
452	rq = blk_get_request(q, WRITE, __GFP_WAIT);	452	rq = blk_get_request(q, WRITE, __GFP_WAIT);
453	rq->flags \|= REQ_BLOCK_PC;	453	rq->flags \|= REQ_BLOCK_PC;
454	rq->data = NULL;	454	rq->data = NULL;
455	rq->data_len = 0;	455	rq->data_len = 0;
456	rq->timeout = BLK_DEFAULT_TIMEOUT;	456	rq->timeout = BLK_DEFAULT_TIMEOUT;
457	memset(rq->cmd, 0, sizeof(rq->cmd));	457	memset(rq->cmd, 0, sizeof(rq->cmd));
458	rq->cmd[0] = cmd;	458	rq->cmd[0] = cmd;
459	rq->cmd[4] = data;	459	rq->cmd[4] = data;
460	rq->cmd_len = 6;	460	rq->cmd_len = 6;
461	err = blk_execute_rq(q, bd_disk, rq, 0);	461	err = blk_execute_rq(q, bd_disk, rq, 0);
462	blk_put_request(rq);	462	blk_put_request(rq);
463		463
464	return err;	464	return err;
465	}	465	}
466		466
467	static inline int blk_send_start_stop(request_queue_t q, struct gendisk bd_disk, int data)	467	static inline int blk_send_start_stop(request_queue_t q, struct gendisk bd_disk, int data)
468	{	468	{
469	return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data);	469	return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data);
470	}	470	}
471		471
472	int scsi_cmd_ioctl(struct file file, struct gendisk bd_disk, unsigned int cmd, void __user *arg)	472	int scsi_cmd_ioctl(struct file file, struct gendisk bd_disk, unsigned int cmd, void __user *arg)
473	{	473	{
474	request_queue_t *q;	474	request_queue_t *q;
475	int err;	475	int err;
476		476
477	q = bd_disk->queue;	477	q = bd_disk->queue;
478	if (!q)	478	if (!q)
479	return -ENXIO;	479	return -ENXIO;
480		480
481	if (blk_get_queue(q))	481	if (blk_get_queue(q))
482	return -ENXIO;	482	return -ENXIO;
483		483
484	switch (cmd) {	484	switch (cmd) {
485	/*	485	/*
486	* new sgv3 interface	486	* new sgv3 interface
487	*/	487	*/
488	case SG_GET_VERSION_NUM:	488	case SG_GET_VERSION_NUM:
489	err = sg_get_version(arg);	489	err = sg_get_version(arg);
490	break;	490	break;
491	case SCSI_IOCTL_GET_IDLUN:	491	case SCSI_IOCTL_GET_IDLUN:
492	err = scsi_get_idlun(q, arg);	492	err = scsi_get_idlun(q, arg);
493	break;	493	break;
494	case SCSI_IOCTL_GET_BUS_NUMBER:	494	case SCSI_IOCTL_GET_BUS_NUMBER:
495	err = scsi_get_bus(q, arg);	495	err = scsi_get_bus(q, arg);
496	break;	496	break;
497	case SG_SET_TIMEOUT:	497	case SG_SET_TIMEOUT:
498	err = sg_set_timeout(q, arg);	498	err = sg_set_timeout(q, arg);
499	break;	499	break;
500	case SG_GET_TIMEOUT:	500	case SG_GET_TIMEOUT:
501	err = sg_get_timeout(q);	501	err = sg_get_timeout(q);
502	break;	502	break;
503	case SG_GET_RESERVED_SIZE:	503	case SG_GET_RESERVED_SIZE:
504	err = sg_get_reserved_size(q, arg);	504	err = sg_get_reserved_size(q, arg);
505	break;	505	break;
506	case SG_SET_RESERVED_SIZE:	506	case SG_SET_RESERVED_SIZE:
507	err = sg_set_reserved_size(q, arg);	507	err = sg_set_reserved_size(q, arg);
508	break;	508	break;
509	case SG_EMULATED_HOST:	509	case SG_EMULATED_HOST:
510	err = sg_emulated_host(q, arg);	510	err = sg_emulated_host(q, arg);
511	break;	511	break;
512	case SG_IO: {	512	case SG_IO: {
513	struct sg_io_hdr hdr;	513	struct sg_io_hdr hdr;
514		514
515	err = -EFAULT;	515	err = -EFAULT;
516	if (copy_from_user(&hdr, arg, sizeof(hdr)))	516	if (copy_from_user(&hdr, arg, sizeof(hdr)))
517	break;	517	break;
518	err = sg_io(file, q, bd_disk, &hdr);	518	err = sg_io(file, q, bd_disk, &hdr);
519	if (err == -EFAULT)	519	if (err == -EFAULT)
520	break;	520	break;
521		521
522	if (copy_to_user(arg, &hdr, sizeof(hdr)))	522	if (copy_to_user(arg, &hdr, sizeof(hdr)))
523	err = -EFAULT;	523	err = -EFAULT;
524	break;	524	break;
525	}	525	}
526	case CDROM_SEND_PACKET: {	526	case CDROM_SEND_PACKET: {
527	struct cdrom_generic_command cgc;	527	struct cdrom_generic_command cgc;
528	struct sg_io_hdr hdr;	528	struct sg_io_hdr hdr;
529		529
530	err = -EFAULT;	530	err = -EFAULT;
531	if (copy_from_user(&cgc, arg, sizeof(cgc)))	531	if (copy_from_user(&cgc, arg, sizeof(cgc)))
532	break;	532	break;
533	cgc.timeout = clock_t_to_jiffies(cgc.timeout);	533	cgc.timeout = clock_t_to_jiffies(cgc.timeout);
534	memset(&hdr, 0, sizeof(hdr));	534	memset(&hdr, 0, sizeof(hdr));
535	hdr.interface_id = 'S';	535	hdr.interface_id = 'S';
536	hdr.cmd_len = sizeof(cgc.cmd);	536	hdr.cmd_len = sizeof(cgc.cmd);
537	hdr.dxfer_len = cgc.buflen;	537	hdr.dxfer_len = cgc.buflen;
538	err = 0;	538	err = 0;
539	switch (cgc.data_direction) {	539	switch (cgc.data_direction) {
540	case CGC_DATA_UNKNOWN:	540	case CGC_DATA_UNKNOWN:
541	hdr.dxfer_direction = SG_DXFER_UNKNOWN;	541	hdr.dxfer_direction = SG_DXFER_UNKNOWN;
542	break;	542	break;
543	case CGC_DATA_WRITE:	543	case CGC_DATA_WRITE:
544	hdr.dxfer_direction = SG_DXFER_TO_DEV;	544	hdr.dxfer_direction = SG_DXFER_TO_DEV;
545	break;	545	break;
546	case CGC_DATA_READ:	546	case CGC_DATA_READ:
547	hdr.dxfer_direction = SG_DXFER_FROM_DEV;	547	hdr.dxfer_direction = SG_DXFER_FROM_DEV;
548	break;	548	break;
549	case CGC_DATA_NONE:	549	case CGC_DATA_NONE:
550	hdr.dxfer_direction = SG_DXFER_NONE;	550	hdr.dxfer_direction = SG_DXFER_NONE;
551	break;	551	break;
552	default:	552	default:
553	err = -EINVAL;	553	err = -EINVAL;
554	}	554	}
555	if (err)	555	if (err)
556	break;	556	break;
557		557
558	hdr.dxferp = cgc.buffer;	558	hdr.dxferp = cgc.buffer;
559	hdr.sbp = cgc.sense;	559	hdr.sbp = cgc.sense;
560	if (hdr.sbp)	560	if (hdr.sbp)
561	hdr.mx_sb_len = sizeof(struct request_sense);	561	hdr.mx_sb_len = sizeof(struct request_sense);
562	hdr.timeout = cgc.timeout;	562	hdr.timeout = cgc.timeout;
563	hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd;	563	hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd;
564	hdr.cmd_len = sizeof(cgc.cmd);	564	hdr.cmd_len = sizeof(cgc.cmd);
565		565
566	err = sg_io(file, q, bd_disk, &hdr);	566	err = sg_io(file, q, bd_disk, &hdr);
567	if (err == -EFAULT)	567	if (err == -EFAULT)
568	break;	568	break;
569		569
570	if (hdr.status)	570	if (hdr.status)
571	err = -EIO;	571	err = -EIO;
572		572
573	cgc.stat = err;	573	cgc.stat = err;
574	cgc.buflen = hdr.resid;	574	cgc.buflen = hdr.resid;
575	if (copy_to_user(arg, &cgc, sizeof(cgc)))	575	if (copy_to_user(arg, &cgc, sizeof(cgc)))
576	err = -EFAULT;	576	err = -EFAULT;
577		577
578	break;	578	break;
579	}	579	}
580		580
581	/*	581	/*
582	* old junk scsi send command ioctl	582	* old junk scsi send command ioctl
583	*/	583	*/
584	case SCSI_IOCTL_SEND_COMMAND:	584	case SCSI_IOCTL_SEND_COMMAND:
585	printk(KERN_WARNING "program %s is using a deprecated SCSI ioctl, please convert it to SG_IO\n", current->comm);	585	printk(KERN_WARNING "program %s is using a deprecated SCSI ioctl, please convert it to SG_IO\n", current->comm);
586	err = -EINVAL;	586	err = -EINVAL;
587	if (!arg)	587	if (!arg)
588	break;	588	break;
589		589
590	err = sg_scsi_ioctl(file, q, bd_disk, arg);	590	err = sg_scsi_ioctl(file, q, bd_disk, arg);
591	break;	591	break;
592	case CDROMCLOSETRAY:	592	case CDROMCLOSETRAY:
593	err = blk_send_start_stop(q, bd_disk, 0x03);	593	err = blk_send_start_stop(q, bd_disk, 0x03);
594	break;	594	break;
595	case CDROMEJECT:	595	case CDROMEJECT:
596	err = blk_send_start_stop(q, bd_disk, 0x02);	596	err = blk_send_start_stop(q, bd_disk, 0x02);
597	break;	597	break;
598	default:	598	default:
599	err = -ENOTTY;	599	err = -ENOTTY;
600	}	600	}
601		601
602	blk_put_queue(q);	602	blk_put_queue(q);
603	return err;	603	return err;
604	}	604	}
605		605
606	EXPORT_SYMBOL(scsi_cmd_ioctl);	606	EXPORT_SYMBOL(scsi_cmd_ioctl);
607		607