Doug / smarc-fsl-linux-kernel | Embedian Git Server

Commit 867d1191fca388a79e4bb500dd85a9e871c96b99

Authored by Tejun Heo 2005-04-24 15:06:05 +0800

Committed by James Bottomley 2005-05-21 01:53:28 +0800

[SCSI] remove requeue feature from blk_insert_request()

blk_insert_request() has a unobivous feature of requeuing a
request setting REQ_SPECIAL|REQ_SOFTBARRIER.  SCSI midlayer
was the only user and as previous patches removed the usage,
remove the feature from blk_insert_request().  Only special
requests should be queued with blk_insert_request().  All
requeueing should go through blk_requeue_request().

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

Showing 5 changed files with 11 additions and 19 deletions Inline Diff

drivers/block/ll_rw_blk.c
drivers/block/paride/pd.c
drivers/block/sx8.c
drivers/scsi/scsi_lib.c
include/linux/blkdev.h

drivers/block/ll_rw_blk.c

Diff comments View file @ 867d119

 /*
  *  linux/drivers/block/ll_rw_blk.c
  *
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
  * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
  */
 /*
  * This handles all read/write requests to block devices
  */
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 /*
  * for max sense size
  */
 #include <scsi/scsi_cmnd.h>
 static void blk_unplug_work(void *data);
 static void blk_unplug_timeout(unsigned long data);
 /*
  * For the allocated request tables
  */
 static kmem_cache_t *request_cachep;
 /*
  * For queue allocation
  */
 static kmem_cache_t *requestq_cachep;
 /*
  * For io context allocations
  */
 static kmem_cache_t *iocontext_cachep;
 static wait_queue_head_t congestion_wqh[2] = {
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
 	};
 /*
  * Controlling structure to kblockd
  */
 static struct workqueue_struct *kblockd_workqueue;
 unsigned long blk_max_low_pfn, blk_max_pfn;
 EXPORT_SYMBOL(blk_max_low_pfn);
 EXPORT_SYMBOL(blk_max_pfn);
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME	(HZ/50UL)
 /* Number of requests a "batching" process may submit */
 #define BLK_BATCH_REQ	32
 /*
  * Return the threshold (number of used requests) at which the queue is
  * considered to be congested.  It include a little hysteresis to keep the
  * context switch rate down.
  */
 static inline int queue_congestion_on_threshold(struct request_queue *q)
 {
 	return q->nr_congestion_on;
 }
 /*
  * The threshold at which a queue is considered to be uncongested
  */
 static inline int queue_congestion_off_threshold(struct request_queue *q)
 {
 	return q->nr_congestion_off;
 }
 static void blk_queue_congestion_threshold(struct request_queue *q)
 {
 	int nr;
 	nr = q->nr_requests - (q->nr_requests / 8) + 1;
 	if (nr > q->nr_requests)
 		nr = q->nr_requests;
 	q->nr_congestion_on = nr;
 	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
 	if (nr < 1)
 		nr = 1;
 	q->nr_congestion_off = nr;
 }
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
  */
 static void clear_queue_congested(request_queue_t *q, int rw)
 {
 	enum bdi_state bit;
 	wait_queue_head_t *wqh = &congestion_wqh[rw];
 	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
 	clear_bit(bit, &q->backing_dev_info.state);
 	smp_mb__after_clear_bit();
 	if (waitqueue_active(wqh))
 		wake_up(wqh);
 }
 /*
  * A queue has just entered congestion.  Flag that in the queue's VM-visible
  * state flags and increment the global gounter of congested queues.
  */
 static void set_queue_congested(request_queue_t *q, int rw)
 {
 	enum bdi_state bit;
 	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
 	set_bit(bit, &q->backing_dev_info.state);
 }
 /**
  * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
  * @bdev:	device
  *
  * Locates the passed device's request queue and returns the address of its
  * backing_dev_info
  *
  * Will return NULL if the request queue cannot be located.
  */
 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 {
 	struct backing_dev_info *ret = NULL;
 	request_queue_t *q = bdev_get_queue(bdev);
 	if (q)
 		ret = &q->backing_dev_info;
 	return ret;
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
 {
 	q->activity_fn = fn;
 	q->activity_data = data;
 }
 EXPORT_SYMBOL(blk_queue_activity_fn);
 /**
  * blk_queue_prep_rq - set a prepare_request function for queue
  * @q:		queue
  * @pfn:	prepare_request function
  *
  * It's possible for a queue to register a prepare_request callback which
  * is invoked before the request is handed to the request_fn. The goal of
  * the function is to prepare a request for I/O, it can be used to build a
  * cdb from the request data for instance.
  *
  */
 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
 {
 	q->prep_rq_fn = pfn;
 }
 EXPORT_SYMBOL(blk_queue_prep_rq);
 /**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
  * @mbfn:	merge_bvec_fn
  *
  * Usually queues have static limitations on the max sectors or segments that
  * we can put in a request. Stacking drivers may have some settings that
  * are dynamic, and thus we have to query the queue whether it is ok to
  * add a new bio_vec to a bio at a given offset or not. If the block device
  * has such limitations, it needs to register a merge_bvec_fn to control
  * the size of bio's sent to it. Note that a block device *must* allow a
  * single page to be added to an empty bio. The block device driver may want
  * to use the bio_split() function to deal with these bio's. By default
  * no merge_bvec_fn is defined for a queue, and only the fixed limits are
  * honored.
  */
 void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
 {
 	q->merge_bvec_fn = mbfn;
 }
 EXPORT_SYMBOL(blk_queue_merge_bvec);
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
  * @mfn: the alternate make_request function
  *
  * Description:
  *    The normal way for &struct bios to be passed to a device
  *    driver is for them to be collected into requests on a request
  *    queue, and then to allow the device driver to select requests
  *    off that queue when it is ready.  This works well for many block
  *    devices. However some block devices (typically virtual devices
  *    such as md or lvm) do not benefit from the processing on the
  *    request queue, and are served best by having the requests passed
  *    directly to them.  This can be achieved by providing a function
  *    to blk_queue_make_request().
  *
  * Caveat:
  *    The driver that does this *must* be able to deal appropriately
  *    with buffers in "highmemory". This can be accomplished by either calling
  *    __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
  *    blk_queue_bounce() to create a buffer in normal memory.
  **/
 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 {
 	/*
 	 * set defaults
 	 */
 	q->nr_requests = BLKDEV_MAX_RQ;
 	q->max_phys_segments = MAX_PHYS_SEGMENTS;
 	q->max_hw_segments = MAX_HW_SEGMENTS;
 	q->make_request_fn = mfn;
 	q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	blk_queue_max_sectors(q, MAX_SECTORS);
 	blk_queue_hardsect_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
 	q->unplug_thresh = 4;		/* hmm */
 	q->unplug_delay = (3 * HZ) / 1000;	/* 3 milliseconds */
 	if (q->unplug_delay == 0)
 		q->unplug_delay = 1;
 	INIT_WORK(&q->unplug_work, blk_unplug_work, q);
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 	blk_queue_activity_fn(q, NULL, NULL);
 	INIT_LIST_HEAD(&q->drain_list);
 }
 EXPORT_SYMBOL(blk_queue_make_request);
 static inline void rq_init(request_queue_t *q, struct request *rq)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
 	rq->buffer = NULL;
 	rq->ref_count = 1;
 	rq->q = q;
 	rq->waiting = NULL;
 	rq->special = NULL;
 	rq->data_len = 0;
 	rq->data = NULL;
 	rq->sense = NULL;
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
 }
 /**
  * blk_queue_ordered - does this queue support ordered writes
  * @q:     the request queue
  * @flag:  see below
  *
  * Description:
  *   For journalled file systems, doing ordered writes on a commit
  *   block instead of explicitly doing wait_on_buffer (which is bad
  *   for performance) can be a big win. Block drivers supporting this
  *   feature should call this function and indicate so.
  *
  **/
 void blk_queue_ordered(request_queue_t *q, int flag)
 {
 	switch (flag) {
 		case QUEUE_ORDERED_NONE:
 			if (q->flush_rq)
 				kmem_cache_free(request_cachep, q->flush_rq);
 			q->flush_rq = NULL;
 			q->ordered = flag;
 			break;
 		case QUEUE_ORDERED_TAG:
 			q->ordered = flag;
 			break;
 		case QUEUE_ORDERED_FLUSH:
 			q->ordered = flag;
 			if (!q->flush_rq)
 				q->flush_rq = kmem_cache_alloc(request_cachep,
 								GFP_KERNEL);
 			break;
 		default:
 			printk("blk_queue_ordered: bad value %d\n", flag);
 			break;
 	}
 }
 EXPORT_SYMBOL(blk_queue_ordered);
 /**
  * blk_queue_issue_flush_fn - set function for issuing a flush
  * @q:     the request queue
  * @iff:   the function to be called issuing the flush
  *
  * Description:
  *   If a driver supports issuing a flush command, the support is notified
  *   to the block layer by defining it through this call.
  *
  **/
 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
 {
 	q->issue_flush_fn = iff;
 }
 EXPORT_SYMBOL(blk_queue_issue_flush_fn);
 /*
  * Cache flushing for ordered writes handling
  */
 static void blk_pre_flush_end_io(struct request *flush_rq)
 {
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 	rq->flags |= REQ_BAR_PREFLUSH;
 	if (!flush_rq->errors)
 		elv_requeue_request(q, rq);
 	else {
 		q->end_flush_fn(q, flush_rq);
 		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
 		q->request_fn(q);
 	}
 }
 static void blk_post_flush_end_io(struct request *flush_rq)
 {
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 	rq->flags |= REQ_BAR_POSTFLUSH;
 	q->end_flush_fn(q, flush_rq);
 	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
 	q->request_fn(q);
 }
 struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
 {
 	struct request *flush_rq = q->flush_rq;
 	BUG_ON(!blk_barrier_rq(rq));
 	if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))
 		return NULL;
 	rq_init(q, flush_rq);
 	flush_rq->elevator_private = NULL;
 	flush_rq->flags = REQ_BAR_FLUSH;
 	flush_rq->rq_disk = rq->rq_disk;
 	flush_rq->rl = NULL;
 	/*
 	 * prepare_flush returns 0 if no flush is needed, just mark both
 	 * pre and post flush as done in that case
 	 */
 	if (!q->prepare_flush_fn(q, flush_rq)) {
 		rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
 		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
 		return rq;
 	}
 	/*
 	 * some drivers dequeue requests right away, some only after io
 	 * completion. make sure the request is dequeued.
 	 */
 	if (!list_empty(&rq->queuelist))
 		blkdev_dequeue_request(rq);
 	elv_deactivate_request(q, rq);
 	flush_rq->end_io_data = rq;
 	flush_rq->end_io = blk_pre_flush_end_io;
 	__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
 	return flush_rq;
 }
 static void blk_start_post_flush(request_queue_t *q, struct request *rq)
 {
 	struct request *flush_rq = q->flush_rq;
 	BUG_ON(!blk_barrier_rq(rq));
 	rq_init(q, flush_rq);
 	flush_rq->elevator_private = NULL;
 	flush_rq->flags = REQ_BAR_FLUSH;
 	flush_rq->rq_disk = rq->rq_disk;
 	flush_rq->rl = NULL;
 	if (q->prepare_flush_fn(q, flush_rq)) {
 		flush_rq->end_io_data = rq;
 		flush_rq->end_io = blk_post_flush_end_io;
 		__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
 		q->request_fn(q);
 	}
 }
 static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,
 					int sectors)
 {
 	if (sectors > rq->nr_sectors)
 		sectors = rq->nr_sectors;
 	rq->nr_sectors -= sectors;
 	return rq->nr_sectors;
 }
 static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,
 				     int sectors, int queue_locked)
 {
 	if (q->ordered != QUEUE_ORDERED_FLUSH)
 		return 0;
 	if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
 		return 0;
 	if (blk_barrier_postflush(rq))
 		return 0;
 	if (!blk_check_end_barrier(q, rq, sectors)) {
 		unsigned long flags = 0;
 		if (!queue_locked)
 			spin_lock_irqsave(q->queue_lock, flags);
 		blk_start_post_flush(q, rq);
 		if (!queue_locked)
 			spin_unlock_irqrestore(q->queue_lock, flags);
 	}
 	return 1;
 }
 /**
  * blk_complete_barrier_rq - complete possible barrier request
  * @q:  the request queue for the device
  * @rq:  the request
  * @sectors:  number of sectors to complete
  *
  * Description:
  *   Used in driver end_io handling to determine whether to postpone
  *   completion of a barrier request until a post flush has been done. This
  *   is the unlocked variant, used if the caller doesn't already hold the
  *   queue lock.
  **/
 int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
 {
 	return __blk_complete_barrier_rq(q, rq, sectors, 0);
 }
 EXPORT_SYMBOL(blk_complete_barrier_rq);
 /**
  * blk_complete_barrier_rq_locked - complete possible barrier request
  * @q:  the request queue for the device
  * @rq:  the request
  * @sectors:  number of sectors to complete
  *
  * Description:
  *   See blk_complete_barrier_rq(). This variant must be used if the caller
  *   holds the queue lock.
  **/
 int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
 				   int sectors)
 {
 	return __blk_complete_barrier_rq(q, rq, sectors, 1);
 }
 EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
 /**
  * blk_queue_bounce_limit - set bounce buffer limit for queue
  * @q:  the request queue for the device
  * @dma_addr:   bus address limit
  *
  * Description:
  *    Different hardware can have different requirements as to what pages
  *    it can do I/O directly to. A low level driver can call
  *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
  *    buffers for doing I/O to pages residing above @page. By default
  *    the block layer sets this to the highest numbered "low" memory page.
  **/
 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
 {
 	unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
 	/*
 	 * set appropriate bounce gfp mask -- unfortunately we don't have a
 	 * full 4GB zone, so we have to resort to low memory for any bounces.
 	 * ISA has its own < 16MB zone.
 	 */
 	if (bounce_pfn < blk_max_low_pfn) {
 		BUG_ON(dma_addr < BLK_BOUNCE_ISA);
 		init_emergency_isa_pool();
 		q->bounce_gfp = GFP_NOIO | GFP_DMA;
 	} else
 		q->bounce_gfp = GFP_NOIO;
 	q->bounce_pfn = bounce_pfn;
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 /**
  * blk_queue_max_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
  * @max_sectors:  max sectors in the usual 512b unit
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the size of
  *    received requests.
  **/
 void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
 {
 	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
 		printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
 	}
 	q->max_sectors = q->max_hw_sectors = max_sectors;
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
  *    physical data segments in a request.  This would be the largest sized
  *    scatter list the driver could handle.
  **/
 void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
 {
 	if (!max_segments) {
 		max_segments = 1;
 		printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
 	}
 	q->max_phys_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_phys_segments);
 /**
  * blk_queue_max_hw_segments - set max hw segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
  *    hw data segments in a request.  This would be the largest number of
  *    address/length pairs the host adapter can actually give as once
  *    to the device.
  **/
 void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
 {
 	if (!max_segments) {
 		max_segments = 1;
 		printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
 	}
 	q->max_hw_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_segments);
 /**
  * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
  * @q:  the request queue for the device
  * @max_size:  max size of segment in bytes
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the size of a
  *    coalesced segment
  **/
 void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
 {
 	if (max_size < PAGE_CACHE_SIZE) {
 		max_size = PAGE_CACHE_SIZE;
 		printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
 	}
 	q->max_segment_size = max_size;
 }
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 /**
  * blk_queue_hardsect_size - set hardware sector size for the queue
  * @q:  the request queue for the device
  * @size:  the hardware sector size, in bytes
  *
  * Description:
  *   This should typically be set to the lowest possible sector size
  *   that the hardware can operate on (possible without reverting to
  *   even internal read-modify-write operations). Usually the default
  *   of 512 covers most hardware.
  **/
 void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
 {
 	q->hardsect_size = size;
 }
 EXPORT_SYMBOL(blk_queue_hardsect_size);
 /*
  * Returns the minimum that is _not_ zero, unless both are zero.
  */
 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
 /**
  * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
  * @t:	the stacking driver (top)
  * @b:  the underlying device (bottom)
  **/
 void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
 {
 	/* zero is "infinity" */
 	t->max_sectors = t->max_hw_sectors =
 		min_not_zero(t->max_sectors,b->max_sectors);
 	t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
 	t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
 	t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
 	t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 /**
  * blk_queue_segment_boundary - set boundary rules for segment merging
  * @q:  the request queue for the device
  * @mask:  the memory boundary mask
  **/
 void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
 {
 	if (mask < PAGE_CACHE_SIZE - 1) {
 		mask = PAGE_CACHE_SIZE - 1;
 		printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
 	}
 	q->seg_boundary_mask = mask;
 }
 EXPORT_SYMBOL(blk_queue_segment_boundary);
 /**
  * blk_queue_dma_alignment - set dma length and memory alignment
  * @q:     the request queue for the device
  * @mask:  alignment mask
  *
  * description:
  *    set required memory and length aligment for direct dma transactions.
  *    this is used when buiding direct io requests for the queue.
  *
  **/
 void blk_queue_dma_alignment(request_queue_t *q, int mask)
 {
 	q->dma_alignment = mask;
 }
 EXPORT_SYMBOL(blk_queue_dma_alignment);
 /**
  * blk_queue_find_tag - find a request by its tag and queue
  *
  * @q:	 The request queue for the device
  * @tag: The tag of the request
  *
  * Notes:
  *    Should be used when a device returns a tag and you want to match
  *    it with a request.
  *
  *    no locks need be held.
  **/
 struct request *blk_queue_find_tag(request_queue_t *q, int tag)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
 		return NULL;
 	return bqt->tag_index[tag];
 }
 EXPORT_SYMBOL(blk_queue_find_tag);
 /**
  * __blk_queue_free_tags - release tag maintenance info
  * @q:  the request queue for the device
  *
  *  Notes:
  *    blk_cleanup_queue() will take care of calling this function, if tagging
  *    has been used. So there's no need to call this directly.
  **/
 static void __blk_queue_free_tags(request_queue_t *q)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	if (!bqt)
 		return;
 	if (atomic_dec_and_test(&bqt->refcnt)) {
 		BUG_ON(bqt->busy);
 		BUG_ON(!list_empty(&bqt->busy_list));
 		kfree(bqt->tag_index);
 		bqt->tag_index = NULL;
 		kfree(bqt->tag_map);
 		bqt->tag_map = NULL;
 		kfree(bqt);
 	}
 	q->queue_tags = NULL;
 	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
 }
 /**
  * blk_queue_free_tags - release tag maintenance info
  * @q:  the request queue for the device
  *
  *  Notes:
  *	This is used to disabled tagged queuing to a device, yet leave
  *	queue in function.
  **/
 void blk_queue_free_tags(request_queue_t *q)
 {
 	clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
 }
 EXPORT_SYMBOL(blk_queue_free_tags);
 static int
 init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
 {
 	int bits, i;
 	struct request **tag_index;
 	unsigned long *tag_map;
 	if (depth > q->nr_requests * 2) {
 		depth = q->nr_requests * 2;
 		printk(KERN_ERR "%s: adjusted depth to %d\n",
 				__FUNCTION__, depth);
 	}
 	tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
 	if (!tag_index)
 		goto fail;
 	bits = (depth / BLK_TAGS_PER_LONG) + 1;
 	tag_map = kmalloc(bits * sizeof(unsigned long), GFP_ATOMIC);
 	if (!tag_map)
 		goto fail;
 	memset(tag_index, 0, depth * sizeof(struct request *));
 	memset(tag_map, 0, bits * sizeof(unsigned long));
 	tags->max_depth = depth;
 	tags->real_max_depth = bits * BITS_PER_LONG;
 	tags->tag_index = tag_index;
 	tags->tag_map = tag_map;
 	/*
 	 * set the upper bits if the depth isn't a multiple of the word size
 	 */
 	for (i = depth; i < bits * BLK_TAGS_PER_LONG; i++)
 		__set_bit(i, tag_map);
 	return 0;
 fail:
 	kfree(tag_index);
 	return -ENOMEM;
 }
 /**
  * blk_queue_init_tags - initialize the queue tag info
  * @q:  the request queue for the device
  * @depth:  the maximum queue depth supported
  * @tags: the tag to use
  **/
 int blk_queue_init_tags(request_queue_t *q, int depth,
 			struct blk_queue_tag *tags)
 {
 	int rc;
 	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
 	if (!tags && !q->queue_tags) {
 		tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
 		if (!tags)
 			goto fail;
 		if (init_tag_map(q, tags, depth))
 			goto fail;
 		INIT_LIST_HEAD(&tags->busy_list);
 		tags->busy = 0;
 		atomic_set(&tags->refcnt, 1);
 	} else if (q->queue_tags) {
 		if ((rc = blk_queue_resize_tags(q, depth)))
 			return rc;
 		set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
 		return 0;
 	} else
 		atomic_inc(&tags->refcnt);
 	/*
 	 * assign it, all done
 	 */
 	q->queue_tags = tags;
 	q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
 	return 0;
 fail:
 	kfree(tags);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(blk_queue_init_tags);
 /**
  * blk_queue_resize_tags - change the queueing depth
  * @q:  the request queue for the device
  * @new_depth: the new max command queueing depth
  *
  *  Notes:
  *    Must be called with the queue lock held.
  **/
 int blk_queue_resize_tags(request_queue_t *q, int new_depth)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	struct request **tag_index;
 	unsigned long *tag_map;
 	int bits, max_depth;
 	if (!bqt)
 		return -ENXIO;
 	/*
 	 * don't bother sizing down
 	 */
 	if (new_depth <= bqt->real_max_depth) {
 		bqt->max_depth = new_depth;
 		return 0;
 	}
 	/*
 	 * save the old state info, so we can copy it back
 	 */
 	tag_index = bqt->tag_index;
 	tag_map = bqt->tag_map;
 	max_depth = bqt->real_max_depth;
 	if (init_tag_map(q, bqt, new_depth))
 		return -ENOMEM;
 	memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
 	bits = max_depth / BLK_TAGS_PER_LONG;
 	memcpy(bqt->tag_map, tag_map, bits * sizeof(unsigned long));
 	kfree(tag_index);
 	kfree(tag_map);
 	return 0;
 }
 EXPORT_SYMBOL(blk_queue_resize_tags);
 /**
  * blk_queue_end_tag - end tag operations for a request
  * @q:  the request queue for the device
  * @rq: the request that has completed
  *
  *  Description:
  *    Typically called when end_that_request_first() returns 0, meaning
  *    all transfers have been done for a request. It's important to call
  *    this function before end_that_request_last(), as that will put the
  *    request back on the free list thus corrupting the internal tag list.
  *
  *  Notes:
  *   queue lock must be held.
  **/
 void blk_queue_end_tag(request_queue_t *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	int tag = rq->tag;
 	BUG_ON(tag == -1);
 	if (unlikely(tag >= bqt->real_max_depth))
 		return;
 	if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
 		printk("attempt to clear non-busy tag (%d)\n", tag);
 		return;
 	}
 	list_del_init(&rq->queuelist);
 	rq->flags &= ~REQ_QUEUED;
 	rq->tag = -1;
 	if (unlikely(bqt->tag_index[tag] == NULL))
 		printk("tag %d is missing\n", tag);
 	bqt->tag_index[tag] = NULL;
 	bqt->busy--;
 }
 EXPORT_SYMBOL(blk_queue_end_tag);
 /**
  * blk_queue_start_tag - find a free tag and assign it
  * @q:  the request queue for the device
  * @rq:  the block request that needs tagging
  *
  *  Description:
  *    This can either be used as a stand-alone helper, or possibly be
  *    assigned as the queue &prep_rq_fn (in which case &struct request
  *    automagically gets a tag assigned). Note that this function
  *    assumes that any type of request can be queued! if this is not
  *    true for your device, you must check the request type before
  *    calling this function.  The request will also be removed from
  *    the request queue, so it's the drivers responsibility to readd
  *    it if it should need to be restarted for some reason.
  *
  *  Notes:
  *   queue lock must be held.
  **/
 int blk_queue_start_tag(request_queue_t *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	unsigned long *map = bqt->tag_map;
 	int tag = 0;
 	if (unlikely((rq->flags & REQ_QUEUED))) {
 		printk(KERN_ERR
 		       "request %p for device [%s] already tagged %d",
 		       rq, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
 		BUG();
 	}
 	for (map = bqt->tag_map; *map == -1UL; map++) {
 		tag += BLK_TAGS_PER_LONG;
 		if (tag >= bqt->max_depth)
 			return 1;
 	}
 	tag += ffz(*map);
 	__set_bit(tag, bqt->tag_map);
 	rq->flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
 	blkdev_dequeue_request(rq);
 	list_add(&rq->queuelist, &bqt->busy_list);
 	bqt->busy++;
 	return 0;
 }
 EXPORT_SYMBOL(blk_queue_start_tag);
 /**
  * blk_queue_invalidate_tags - invalidate all pending tags
  * @q:  the request queue for the device
  *
  *  Description:
  *   Hardware conditions may dictate a need to stop all pending requests.
  *   In this case, we will safely clear the block side of the tag queue and
  *   readd all requests to the request queue in the right order.
  *
  *  Notes:
  *   queue lock must be held.
  **/
 void blk_queue_invalidate_tags(request_queue_t *q)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	struct list_head *tmp, *n;
 	struct request *rq;
 	list_for_each_safe(tmp, n, &bqt->busy_list) {
 		rq = list_entry_rq(tmp);
 		if (rq->tag == -1) {
 			printk("bad tag found on list\n");
 			list_del_init(&rq->queuelist);
 			rq->flags &= ~REQ_QUEUED;
 		} else
 			blk_queue_end_tag(q, rq);
 		rq->flags &= ~REQ_STARTED;
 		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 	}
 }
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
 static char *rq_flags[] = {
 	"REQ_RW",
 	"REQ_FAILFAST",
 	"REQ_SOFTBARRIER",
 	"REQ_HARDBARRIER",
 	"REQ_CMD",
 	"REQ_NOMERGE",
 	"REQ_STARTED",
 	"REQ_DONTPREP",
 	"REQ_QUEUED",
 	"REQ_PC",
 	"REQ_BLOCK_PC",
 	"REQ_SENSE",
 	"REQ_FAILED",
 	"REQ_QUIET",
 	"REQ_SPECIAL",
 	"REQ_DRIVE_CMD",
 	"REQ_DRIVE_TASK",
 	"REQ_DRIVE_TASKFILE",
 	"REQ_PREEMPT",
 	"REQ_PM_SUSPEND",
 	"REQ_PM_RESUME",
 	"REQ_PM_SHUTDOWN",
 };
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 	printk("%s: dev %s: flags = ", msg,
 		rq->rq_disk ? rq->rq_disk->disk_name : "?");
 	bit = 0;
 	do {
 		if (rq->flags & (1 << bit))
 			printk("%s ", rq_flags[bit]);
 		bit++;
 	} while (bit < __REQ_NR_BITS);
 	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
 						       rq->nr_sectors,
 						       rq->current_nr_sectors);
 	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
 	if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
 		printk("cdb: ");
 		for (bit = 0; bit < sizeof(rq->cmd); bit++)
 			printk("%02x ", rq->cmd[bit]);
 		printk("\n");
 	}
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 void blk_recount_segments(request_queue_t *q, struct bio *bio)
 {
 	struct bio_vec *bv, *bvprv = NULL;
 	int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
 	int high, highprv = 1;
 	if (unlikely(!bio->bi_io_vec))
 		return;
 	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
 	hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
 	bio_for_each_segment(bv, bio, i) {
 		/*
 		 * the trick here is making sure that a high page is never
 		 * considered part of another segment, since that might
 		 * change with the bounce page.
 		 */
 		high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
 		if (high || highprv)
 			goto new_hw_segment;
 		if (cluster) {
 			if (seg_size + bv->bv_len > q->max_segment_size)
 				goto new_segment;
 			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
 				goto new_segment;
 			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
 				goto new_segment;
 			if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
 				goto new_hw_segment;
 			seg_size += bv->bv_len;
 			hw_seg_size += bv->bv_len;
 			bvprv = bv;
 			continue;
 		}
 new_segment:
 		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
 		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
 			hw_seg_size += bv->bv_len;
 		} else {
 new_hw_segment:
 			if (hw_seg_size > bio->bi_hw_front_size)
 				bio->bi_hw_front_size = hw_seg_size;
 			hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
 			nr_hw_segs++;
 		}
 		nr_phys_segs++;
 		bvprv = bv;
 		seg_size = bv->bv_len;
 		highprv = high;
 	}
 	if (hw_seg_size > bio->bi_hw_back_size)
 		bio->bi_hw_back_size = hw_seg_size;
 	if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
 		bio->bi_hw_front_size = hw_seg_size;
 	bio->bi_phys_segments = nr_phys_segs;
 	bio->bi_hw_segments = nr_hw_segs;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
 int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
 				   struct bio *nxt)
 {
 	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
 		return 0;
 	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
 		return 0;
 	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
 		return 0;
 	/*
 	 * bio and nxt are contigous in memory, check if the queue allows
 	 * these two to be merged into one
 	 */
 	if (BIO_SEG_BOUNDARY(q, bio, nxt))
 		return 1;
 	return 0;
 }
 EXPORT_SYMBOL(blk_phys_contig_segment);
 int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
 				 struct bio *nxt)
 {
 	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
 		blk_recount_segments(q, bio);
 	if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
 		blk_recount_segments(q, nxt);
 	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
 	    BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
 		return 0;
 	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
 		return 0;
 	return 1;
 }
 EXPORT_SYMBOL(blk_hw_contig_segment);
 /*
  * map a request to scatterlist, return number of sg entries setup. Caller
  * must make sure sg can hold rq->nr_phys_segments entries
  */
 int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)
 {
 	struct bio_vec *bvec, *bvprv;
 	struct bio *bio;
 	int nsegs, i, cluster;
 	nsegs = 0;
 	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
 	/*
 	 * for each bio in rq
 	 */
 	bvprv = NULL;
 	rq_for_each_bio(bio, rq) {
 		/*
 		 * for each segment in bio
 		 */
 		bio_for_each_segment(bvec, bio, i) {
 			int nbytes = bvec->bv_len;
 			if (bvprv && cluster) {
 				if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
 					goto new_segment;
 				if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
 					goto new_segment;
 				if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
 					goto new_segment;
 				sg[nsegs - 1].length += nbytes;
 			} else {
 new_segment:
 				memset(&sg[nsegs],0,sizeof(struct scatterlist));
 				sg[nsegs].page = bvec->bv_page;
 				sg[nsegs].length = nbytes;
 				sg[nsegs].offset = bvec->bv_offset;
 				nsegs++;
 			}
 			bvprv = bvec;
 		} /* segments in bio */
 	} /* bios in rq */
 	return nsegs;
 }
 EXPORT_SYMBOL(blk_rq_map_sg);
 /*
  * the standard queue merge functions, can be overridden with device
  * specific ones if so desired
  */
 static inline int ll_new_mergeable(request_queue_t *q,
 				   struct request *req,
 				   struct bio *bio)
 {
 	int nr_phys_segs = bio_phys_segments(q, bio);
 	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
 		req->flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
 	}
 	/*
 	 * A hw segment is just getting larger, bump just the phys
 	 * counter.
 	 */
 	req->nr_phys_segments += nr_phys_segs;
 	return 1;
 }
 static inline int ll_new_hw_segment(request_queue_t *q,
 				    struct request *req,
 				    struct bio *bio)
 {
 	int nr_hw_segs = bio_hw_segments(q, bio);
 	int nr_phys_segs = bio_phys_segments(q, bio);
 	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
 		req->flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
 	}
 	/*
 	 * This will form the start of a new hw segment.  Bump both
 	 * counters.
 	 */
 	req->nr_hw_segments += nr_hw_segs;
 	req->nr_phys_segments += nr_phys_segs;
 	return 1;
 }
 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 			    struct bio *bio)
 {
 	int len;
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
 	}
 	if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
 		blk_recount_segments(q, req->biotail);
 	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
 		blk_recount_segments(q, bio);
 	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
 	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
 	    !BIOVEC_VIRT_OVERSIZE(len)) {
 		int mergeable =  ll_new_mergeable(q, req, bio);
 		if (mergeable) {
 			if (req->nr_hw_segments == 1)
 				req->bio->bi_hw_front_size = len;
 			if (bio->bi_hw_segments == 1)
 				bio->bi_hw_back_size = len;
 		}
 		return mergeable;
 	}
 	return ll_new_hw_segment(q, req, bio);
 }
 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
 			     struct bio *bio)
 {
 	int len;
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
 	}
 	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
 	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
 		blk_recount_segments(q, bio);
 	if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
 		blk_recount_segments(q, req->bio);
 	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
 	    !BIOVEC_VIRT_OVERSIZE(len)) {
 		int mergeable =  ll_new_mergeable(q, req, bio);
 		if (mergeable) {
 			if (bio->bi_hw_segments == 1)
 				bio->bi_hw_front_size = len;
 			if (req->nr_hw_segments == 1)
 				req->biotail->bi_hw_back_size = len;
 		}
 		return mergeable;
 	}
 	return ll_new_hw_segment(q, req, bio);
 }
 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
 				struct request *next)
 {
 	int total_phys_segments = req->nr_phys_segments +next->nr_phys_segments;
 	int total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
 	/*
 	 * First check if the either of the requests are re-queued
 	 * requests.  Can't merge them if they are.
 	 */
 	if (req->special || next->special)
 		return 0;
 	/*
 	 * Will it become to large?
 	 */
 	if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
 		return 0;
 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
 	if (blk_phys_contig_segment(q, req->biotail, next->bio))
 		total_phys_segments--;
 	if (total_phys_segments > q->max_phys_segments)
 		return 0;
 	total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
 	if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
 		int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
 		/*
 		 * propagate the combined length to the end of the requests
 		 */
 		if (req->nr_hw_segments == 1)
 			req->bio->bi_hw_front_size = len;
 		if (next->nr_hw_segments == 1)
 			next->biotail->bi_hw_back_size = len;
 		total_hw_segments--;
 	}
 	if (total_hw_segments > q->max_hw_segments)
 		return 0;
 	/* Merge is OK... */
 	req->nr_phys_segments = total_phys_segments;
 	req->nr_hw_segments = total_hw_segments;
 	return 1;
 }
 /*
  * "plug" the device if there are no outstanding requests: this will
  * force the transfer to start only after we have put all the requests
  * on the list.
  *
  * This is called with interrupts off and no requests on the queue and
  * with the queue lock held.
  */
 void blk_plug_device(request_queue_t *q)
 {
 	WARN_ON(!irqs_disabled());
 	/*
 	 * don't plug a stopped queue, it must be paired with blk_start_queue()
 	 * which will restart the queueing
 	 */
 	if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
 		return;
 	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
 }
 EXPORT_SYMBOL(blk_plug_device);
 /*
  * remove the queue from the plugged list, if present. called with
  * queue lock held and interrupts disabled.
  */
 int blk_remove_plug(request_queue_t *q)
 {
 	WARN_ON(!irqs_disabled());
 	if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
 		return 0;
 	del_timer(&q->unplug_timer);
 	return 1;
 }
 EXPORT_SYMBOL(blk_remove_plug);
 /*
  * remove the plug and let it rip..
  */
 void __generic_unplug_device(request_queue_t *q)
 {
 	if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
 		return;
 	if (!blk_remove_plug(q))
 		return;
 	/*
 	 * was plugged, fire request_fn if queue has stuff to do
 	 */
 	if (elv_next_request(q))
 		q->request_fn(q);
 }
 EXPORT_SYMBOL(__generic_unplug_device);
 /**
  * generic_unplug_device - fire a request queue
  * @q:    The &request_queue_t in question
  *
  * Description:
  *   Linux uses plugging to build bigger requests queues before letting
  *   the device have at them. If a queue is plugged, the I/O scheduler
  *   is still adding and merging requests on the queue. Once the queue
  *   gets unplugged, the request_fn defined for the queue is invoked and
  *   transfers started.
  **/
 void generic_unplug_device(request_queue_t *q)
 {
 	spin_lock_irq(q->queue_lock);
 	__generic_unplug_device(q);
 	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL(generic_unplug_device);
 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 				   struct page *page)
 {
 	request_queue_t *q = bdi->unplug_io_data;
 	/*
 	 * devices don't necessarily have an ->unplug_fn defined
 	 */
 	if (q->unplug_fn)
 		q->unplug_fn(q);
 }
 static void blk_unplug_work(void *data)
 {
 	request_queue_t *q = data;
 	q->unplug_fn(q);
 }
 static void blk_unplug_timeout(unsigned long data)
 {
 	request_queue_t *q = (request_queue_t *)data;
 	kblockd_schedule_work(&q->unplug_work);
 }
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &request_queue_t in question
  *
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
  *   entered. Also see blk_stop_queue(). Queue lock must be held.
  **/
 void blk_start_queue(request_queue_t *q)
 {
 	clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
 	/*
 	 * one level of recursion is ok and is much faster than kicking
 	 * the unplug handling
 	 */
 	if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
 		q->request_fn(q);
 		clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
 	} else {
 		blk_plug_device(q);
 		kblockd_schedule_work(&q->unplug_work);
 	}
 }
 EXPORT_SYMBOL(blk_start_queue);
 /**
  * blk_stop_queue - stop a queue
  * @q:    The &request_queue_t in question
  *
  * Description:
  *   The Linux block layer assumes that a block driver will consume all
  *   entries on the request queue when the request_fn strategy is called.
  *   Often this will not happen, because of hardware limitations (queue
  *   depth settings). If a device driver gets a 'queue full' response,
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
  *   blk_start_queue() to restart queue operations. Queue lock must be held.
  **/
 void blk_stop_queue(request_queue_t *q)
 {
 	blk_remove_plug(q);
 	set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 /**
  * blk_sync_queue - cancel any pending callbacks on a queue
  * @q: the queue
  *
  * Description:
  *     The block layer may perform asynchronous callback activity
  *     on a queue, such as calling the unplug function after a timeout.
  *     A block device may call blk_sync_queue to ensure that any
  *     such activity is cancelled, thus allowing it to release resources
  *     the the callbacks might use. The caller must already have made sure
  *     that its ->make_request_fn will not re-add plugging prior to calling
  *     this function.
  *
  */
 void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->unplug_timer);
 	kblockd_flush();
 }
 EXPORT_SYMBOL(blk_sync_queue);
 /**
  * blk_run_queue - run a single device queue
  * @q:	The queue to run
  */
 void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_remove_plug(q);
 	if (!elv_queue_empty(q))
 		q->request_fn(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
 /**
  * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
  * @q:    the request queue to be released
  *
  * Description:
  *     blk_cleanup_queue is the pair to blk_init_queue() or
  *     blk_queue_make_request().  It should be called when a request queue is
  *     being released; typically when a block device is being de-registered.
  *     Currently, its primary task it to free all the &struct request
  *     structures that were allocated to the queue and the queue itself.
  *
  * Caveat:
  *     Hopefully the low level driver will have finished any
  *     outstanding requests first...
  **/
 void blk_cleanup_queue(request_queue_t * q)
 {
 	struct request_list *rl = &q->rq;
 	if (!atomic_dec_and_test(&q->refcnt))
 		return;
 	if (q->elevator)
 		elevator_exit(q->elevator);
 	blk_sync_queue(q);
 	if (rl->rq_pool)
 		mempool_destroy(rl->rq_pool);
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 	blk_queue_ordered(q, QUEUE_ORDERED_NONE);
 	kmem_cache_free(requestq_cachep, q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
 static int blk_init_free_list(request_queue_t *q)
 {
 	struct request_list *rl = &q->rq;
 	rl->count[READ] = rl->count[WRITE] = 0;
 	rl->starved[READ] = rl->starved[WRITE] = 0;
 	init_waitqueue_head(&rl->wait[READ]);
 	init_waitqueue_head(&rl->wait[WRITE]);
 	init_waitqueue_head(&rl->drain);
 	rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep);
 	if (!rl->rq_pool)
 		return -ENOMEM;
 	return 0;
 }
 static int __make_request(request_queue_t *, struct bio *);
 request_queue_t *blk_alloc_queue(int gfp_mask)
 {
 	request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask);
 	if (!q)
 		return NULL;
 	memset(q, 0, sizeof(*q));
 	init_timer(&q->unplug_timer);
 	atomic_set(&q->refcnt, 1);
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	return q;
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 /**
  * blk_init_queue  - prepare a request queue for use with a block device
  * @rfn:  The function to be called to process requests that have been
  *        placed on the queue.
  * @lock: Request queue spin lock
  *
  * Description:
  *    If a block device wishes to use the standard request handling procedures,
  *    which sorts requests and coalesces adjacent requests, then it must
  *    call blk_init_queue().  The function @rfn will be called when there
  *    are requests on the queue that need to be processed.  If the device
  *    supports plugging, then @rfn may not be called immediately when requests
  *    are available on the queue, but may be called at some time later instead.
  *    Plugged queues are generally unplugged when a buffer belonging to one
  *    of the requests on the queue is needed, or due to memory pressure.
  *
  *    @rfn is not required, or even expected, to remove all requests off the
  *    queue, but only as many as it can handle at a time.  If it does leave
  *    requests on the queue, it is responsible for arranging that the requests
  *    get dealt with eventually.
  *
  *    The queue spin lock must be held while manipulating the requests on the
  *    request queue.
  *
  *    Function returns a pointer to the initialized request queue, or NULL if
  *    it didn't succeed.
  *
  * Note:
  *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
 	request_queue_t *q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return NULL;
 	if (blk_init_free_list(q))
 		goto out_init;
 	/*
 	 * if caller didn't supply a lock, they get per-queue locking with
 	 * our embedded lock
 	 */
 	if (!lock) {
 		spin_lock_init(&q->__queue_lock);
 		lock = &q->__queue_lock;
 	}
 	q->request_fn		= rfn;
 	q->back_merge_fn       	= ll_back_merge_fn;
 	q->front_merge_fn      	= ll_front_merge_fn;
 	q->merge_requests_fn	= ll_merge_requests_fn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
 	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER);
 	q->queue_lock		= lock;
 	blk_queue_segment_boundary(q, 0xffffffff);
 	blk_queue_make_request(q, __make_request);
 	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
 	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
 	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
 	/*
 	 * all done
 	 */
 	if (!elevator_init(q, NULL)) {
 		blk_queue_congestion_threshold(q);
 		return q;
 	}
 	blk_cleanup_queue(q);
 out_init:
 	kmem_cache_free(requestq_cachep, q);
 	return NULL;
 }
 EXPORT_SYMBOL(blk_init_queue);
 int blk_get_queue(request_queue_t *q)
 {
 	if (!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
 		atomic_inc(&q->refcnt);
 		return 0;
 	}
 	return 1;
 }
 EXPORT_SYMBOL(blk_get_queue);
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
 	elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
 						int gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 	if (!rq)
 		return NULL;
 	/*
 	 * first three bits are identical in rq->flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 */
 	rq->flags = rw;
 	if (!elv_set_request(q, rq, gfp_mask))
 		return rq;
 	mempool_free(rq, q->rq.rq_pool);
 	return NULL;
 }
 /*
  * ioc_batching returns true if the ioc is a valid batching request and
  * should be given priority access to a request.
  */
 static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)
 {
 	if (!ioc)
 		return 0;
 	/*
 	 * Make sure the process is able to allocate at least 1 request
 	 * even if the batch times out, otherwise we could theoretically
 	 * lose wakeups.
 	 */
 	return ioc->nr_batch_requests == q->nr_batching ||
 		(ioc->nr_batch_requests > 0
 		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
 }
 /*
  * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
  * will cause the process to be a "batcher" on all queues in the system. This
  * is the behaviour we want though - once it gets a wakeup it should be given
  * a nice run.
  */
 void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
 {
 	if (!ioc || ioc_batching(q, ioc))
 		return;
 	ioc->nr_batch_requests = q->nr_batching;
 	ioc->last_waited = jiffies;
 }
 static void __freed_request(request_queue_t *q, int rw)
 {
 	struct request_list *rl = &q->rq;
 	if (rl->count[rw] < queue_congestion_off_threshold(q))
 		clear_queue_congested(q, rw);
 	if (rl->count[rw] + 1 <= q->nr_requests) {
 		smp_mb();
 		if (waitqueue_active(&rl->wait[rw]))
 			wake_up(&rl->wait[rw]);
 		blk_clear_queue_full(q, rw);
 	}
 }
 /*
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
 static void freed_request(request_queue_t *q, int rw)
 {
 	struct request_list *rl = &q->rq;
 	rl->count[rw]--;
 	__freed_request(q, rw);
 	if (unlikely(rl->starved[rw ^ 1]))
 		__freed_request(q, rw ^ 1);
 	if (!rl->count[READ] && !rl->count[WRITE]) {
 		smp_mb();
 		if (unlikely(waitqueue_active(&rl->drain)))
 			wake_up(&rl->drain);
 	}
 }
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
 /*
  * Get a free request, queue_lock must not be held
  */
 static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = get_io_context(gfp_mask);
 	if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
 		goto out;
 	spin_lock_irq(q->queue_lock);
 	if (rl->count[rw]+1 >= q->nr_requests) {
 		/*
 		 * The queue will fill after this allocation, so set it as
 		 * full, and mark this process as "batching". This process
 		 * will be allowed to complete a batch of requests, others
 		 * will be blocked.
 		 */
 		if (!blk_queue_full(q, rw)) {
 			ioc_set_batching(q, ioc);
 			blk_set_queue_full(q, rw);
 		}
 	}
 	switch (elv_may_queue(q, rw)) {
 		case ELV_MQUEUE_NO:
 			goto rq_starved;
 		case ELV_MQUEUE_MAY:
 			break;
 		case ELV_MQUEUE_MUST:
 			goto get_rq;
 	}
 	if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) {
 		/*
 		 * The queue is full and the allocating process is not a
 		 * "batcher", and not exempted by the IO scheduler
 		 */
 		spin_unlock_irq(q->queue_lock);
 		goto out;
 	}
 get_rq:
 	rl->count[rw]++;
 	rl->starved[rw] = 0;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
 	spin_unlock_irq(q->queue_lock);
 	rq = blk_alloc_request(q, rw, gfp_mask);
 	if (!rq) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
 		 * we might have messed up.
 		 *
 		 * Allocating task should really be put onto the front of the
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
 		freed_request(q, rw);
 		/*
 		 * in the very unlikely event that allocation failed and no
 		 * requests for this direction was pending, mark us starved
 		 * so that freeing of a request in the other direction will
 		 * notice us. another possible fix would be to split the
 		 * rq mempool into READ and WRITE
 		 */
 rq_starved:
 		if (unlikely(rl->count[rw] == 0))
 			rl->starved[rw] = 1;
 		spin_unlock_irq(q->queue_lock);
 		goto out;
 	}
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 	rq_init(q, rq);
 	rq->rl = rl;
 out:
 	put_io_context(ioc);
 	return rq;
 }
 /*
  * No available requests for this queue, unplug the device and wait for some
  * requests to become available.
  */
 static struct request *get_request_wait(request_queue_t *q, int rw)
 {
 	DEFINE_WAIT(wait);
 	struct request *rq;
 	generic_unplug_device(q);
 	do {
 		struct request_list *rl = &q->rq;
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 		rq = get_request(q, rw, GFP_NOIO);
 		if (!rq) {
 			struct io_context *ioc;
 			io_schedule();
 			/*
 			 * After sleeping, we become a "batching" process and
 			 * will be able to allocate at least one request, and
 			 * up to a big batch of them for a small period time.
 			 * See ioc_batching, ioc_set_batching
 			 */
 			ioc = get_io_context(GFP_NOIO);
 			ioc_set_batching(q, ioc);
 			put_io_context(ioc);
 		}
 		finish_wait(&rl->wait[rw], &wait);
 	} while (!rq);
 	return rq;
 }
 struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
 {
 	struct request *rq;
 	BUG_ON(rw != READ && rw != WRITE);
 	if (gfp_mask & __GFP_WAIT)
 		rq = get_request_wait(q, rw);
 	else
 		rq = get_request(q, rw, gfp_mask);
 	return rq;
 }
 EXPORT_SYMBOL(blk_get_request);
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  *
  * Description:
  *    Drivers often keep queueing requests until the hardware cannot accept
  *    more, when that condition happens we need to put the request back
  *    on the queue. Must be called with queue lock held.
  */
 void blk_requeue_request(request_queue_t *q, struct request *rq)
 {
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 	elv_requeue_request(q, rq);
 }
 EXPORT_SYMBOL(blk_requeue_request);
 /**
  * blk_insert_request - insert a special request in to a request queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  * @at_head:	insert request at head or tail of queue
  * @data:	private data
- * @reinsert:	true if request it a reinsertion of previously processed one
  *
  * Description:
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
  *    REQ_SPECIAL in to the corresponding request queue, and letting them be
  *    scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
  *    of the queue for things like a QUEUE_FULL message from a device, or a
  *    host that is unable to accept a particular command.
  */
 void blk_insert_request(request_queue_t *q, struct request *rq,
-			int at_head, void *data, int reinsert)
+			int at_head, void *data)
 {
+	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 	unsigned long flags;
 	/*
 	 * tell I/O scheduler that this isn't a regular read/write (ie it
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
 	rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
 	rq->special = data;
 	spin_lock_irqsave(q->queue_lock, flags);
 	/*
 	 * If command is tagged, release the tag
 	 */
-	if (reinsert)
+	if (blk_rq_tagged(rq))
-		blk_requeue_request(q, rq);
+		blk_queue_end_tag(q, rq);
-	else {
-		int where = ELEVATOR_INSERT_BACK;
-		if (at_head)
+	drive_stat_acct(rq, rq->nr_sectors, 1);
-			where = ELEVATOR_INSERT_FRONT;
+	__elv_add_request(q, rq, where, 0);
-		if (blk_rq_tagged(rq))
-			blk_queue_end_tag(q, rq);
-		drive_stat_acct(rq, rq->nr_sectors, 1);
-		__elv_add_request(q, rq, where, 0);
-	}
 	if (blk_queue_plugged(q))
 		__generic_unplug_device(q);
 	else
 		q->request_fn(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
 /**
  * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
  * @q:		request queue where request should be inserted
  * @rw:		READ or WRITE data
  * @ubuf:	the user buffer
  * @len:	length of user data
  *
  * Description:
  *    Data will be mapped directly for zero copy io, if possible. Otherwise
  *    a kernel bounce buffer is used.
  *
  *    A matching blk_rq_unmap_user() must be issued at the end of io, while
  *    still in process context.
  *
  *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
  *    before being submitted to the device, as pages mapped may be out of
  *    reach. It's the callers responsibility to make sure this happens. The
  *    original bio must be passed back in to blk_rq_unmap_user() for proper
  *    unmapping.
  */
 struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf,
 				unsigned int len)
 {
 	unsigned long uaddr;
 	struct request *rq;
 	struct bio *bio;
 	if (len > (q->max_sectors << 9))
 		return ERR_PTR(-EINVAL);
 	if ((!len && ubuf) || (len && !ubuf))
 		return ERR_PTR(-EINVAL);
 	rq = blk_get_request(q, rw, __GFP_WAIT);
 	if (!rq)
 		return ERR_PTR(-ENOMEM);
 	/*
 	 * if alignment requirement is satisfied, map in user pages for
 	 * direct dma. else, set up kernel bounce buffers
 	 */
 	uaddr = (unsigned long) ubuf;
 	if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
 		bio = bio_map_user(q, NULL, uaddr, len, rw == READ);
 	else
 		bio = bio_copy_user(q, uaddr, len, rw == READ);
 	if (!IS_ERR(bio)) {
 		rq->bio = rq->biotail = bio;
 		blk_rq_bio_prep(q, rq, bio);
 		rq->buffer = rq->data = NULL;
 		rq->data_len = len;
 		return rq;
 	}
 	/*
 	 * bio is the err-ptr
 	 */
 	blk_put_request(rq);
 	return (struct request *) bio;
 }
 EXPORT_SYMBOL(blk_rq_map_user);
 /**
  * blk_rq_unmap_user - unmap a request with user data
  * @rq:		request to be unmapped
  * @bio:	bio for the request
  * @ulen:	length of user buffer
  *
  * Description:
  *    Unmap a request previously mapped by blk_rq_map_user().
  */
 int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen)
 {
 	int ret = 0;
 	if (bio) {
 		if (bio_flagged(bio, BIO_USER_MAPPED))
 			bio_unmap_user(bio);
 		else
 			ret = bio_uncopy_user(bio);
 	}
 	blk_put_request(rq);
 	return ret;
 }
 EXPORT_SYMBOL(blk_rq_unmap_user);
 /**
  * blk_execute_rq - insert a request into queue for execution
  * @q:		queue to insert the request in
  * @bd_disk:	matching gendisk
  * @rq:		request to insert
  *
  * Description:
  *    Insert a fully prepared request at the back of the io scheduler queue
  *    for execution.
  */
 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
 		   struct request *rq)
 {
 	DECLARE_COMPLETION(wait);
 	char sense[SCSI_SENSE_BUFFERSIZE];
 	int err = 0;
 	rq->rq_disk = bd_disk;
 	/*
 	 * we need an extra reference to the request, so we can look at
 	 * it after io completion
 	 */
 	rq->ref_count++;
 	if (!rq->sense) {
 		memset(sense, 0, sizeof(sense));
 		rq->sense = sense;
 		rq->sense_len = 0;
 	}
 	rq->flags |= REQ_NOMERGE;
 	rq->waiting = &wait;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
 	generic_unplug_device(q);
 	wait_for_completion(&wait);
 	rq->waiting = NULL;
 	if (rq->errors)
 		err = -EIO;
 	return err;
 }
 EXPORT_SYMBOL(blk_execute_rq);
 /**
  * blkdev_issue_flush - queue a flush
  * @bdev:	blockdev to issue flush for
  * @error_sector:	error sector
  *
  * Description:
  *    Issue a flush for the block device in question. Caller can supply
  *    room for storing the error offset in case of a flush error, if they
  *    wish to.  Caller must run wait_for_completion() on its own.
  */
 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 {
 	request_queue_t *q;
 	if (bdev->bd_disk == NULL)
 		return -ENXIO;
 	q = bdev_get_queue(bdev);
 	if (!q)
 		return -ENXIO;
 	if (!q->issue_flush_fn)
 		return -EOPNOTSUPP;
 	return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 /**
  * blkdev_scsi_issue_flush_fn - issue flush for SCSI devices
  * @q:		device queue
  * @disk:	gendisk
  * @error_sector:	error offset
  *
  * Description:
  *    Devices understanding the SCSI command set, can use this function as
  *    a helper for issuing a cache flush. Note: driver is required to store
  *    the error offset (in case of error flushing) in ->sector of struct
  *    request.
  */
 int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
 			       sector_t *error_sector)
 {
 	struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	int ret;
 	rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
 	rq->sector = 0;
 	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->cmd[0] = 0x35;
 	rq->cmd_len = 12;
 	rq->data = NULL;
 	rq->data_len = 0;
 	rq->timeout = 60 * HZ;
 	ret = blk_execute_rq(q, disk, rq);
 	if (ret && error_sector)
 		*error_sector = rq->sector;
 	blk_put_request(rq);
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn);
 void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
 {
 	int rw = rq_data_dir(rq);
 	if (!blk_fs_request(rq) || !rq->rq_disk)
 		return;
 	if (rw == READ) {
 		__disk_stat_add(rq->rq_disk, read_sectors, nr_sectors);
 		if (!new_io)
 			__disk_stat_inc(rq->rq_disk, read_merges);
 	} else if (rw == WRITE) {
 		__disk_stat_add(rq->rq_disk, write_sectors, nr_sectors);
 		if (!new_io)
 			__disk_stat_inc(rq->rq_disk, write_merges);
 	}
 	if (new_io) {
 		disk_round_stats(rq->rq_disk);
 		rq->rq_disk->in_flight++;
 	}
 }
 /*
  * add-request adds a request to the linked list.
  * queue lock is held and interrupts disabled, as we muck with the
  * request queue list.
  */
 static inline void add_request(request_queue_t * q, struct request * req)
 {
 	drive_stat_acct(req, req->nr_sectors, 1);
 	if (q->activity_fn)
 		q->activity_fn(q->activity_data, rq_data_dir(req));
 	/*
 	 * elevator indicated where it wants this request to be
 	 * inserted at elevator_merge time
 	 */
 	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 }
 /*
  * disk_round_stats()	- Round off the performance stats on a struct
  * disk_stats.
  *
  * The average IO queue length and utilisation statistics are maintained
  * by observing the current state of the queue length and the amount of
  * time it has been in this state for.
  *
  * Normally, that accounting is done on IO completion, but that can result
  * in more than a second's worth of IO being accounted for within any one
  * second, leading to >100% utilisation.  To deal with that, we call this
  * function to do a round-off before returning the results when reading
  * /proc/diskstats.  This accounts immediately for all queue usage up to
  * the current jiffies and restarts the counters again.
  */
 void disk_round_stats(struct gendisk *disk)
 {
 	unsigned long now = jiffies;
 	__disk_stat_add(disk, time_in_queue,
 			disk->in_flight * (now - disk->stamp));
 	disk->stamp = now;
 	if (disk->in_flight)
 		__disk_stat_add(disk, io_ticks, (now - disk->stamp_idle));
 	disk->stamp_idle = now;
 }
 /*
  * queue lock must be held
  */
 static void __blk_put_request(request_queue_t *q, struct request *req)
 {
 	struct request_list *rl = req->rl;
 	if (unlikely(!q))
 		return;
 	if (unlikely(--req->ref_count))
 		return;
 	req->rq_status = RQ_INACTIVE;
 	req->q = NULL;
 	req->rl = NULL;
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (rl) {
 		int rw = rq_data_dir(req);
 		elv_completed_request(q, req);
 		BUG_ON(!list_empty(&req->queuelist));
 		blk_free_request(q, req);
 		freed_request(q, rw);
 	}
 }
 void blk_put_request(struct request *req)
 {
 	/*
 	 * if req->rl isn't set, this request didnt originate from the
 	 * block layer, so it's safe to just disregard it
 	 */
 	if (req->rl) {
 		unsigned long flags;
 		request_queue_t *q = req->q;
 		spin_lock_irqsave(q->queue_lock, flags);
 		__blk_put_request(q, req);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
 }
 EXPORT_SYMBOL(blk_put_request);
 /**
  * blk_end_sync_rq - executes a completion event on a request
  * @rq: request to complete
  */
 void blk_end_sync_rq(struct request *rq)
 {
 	struct completion *waiting = rq->waiting;
 	rq->waiting = NULL;
 	__blk_put_request(rq->q, rq);
 	/*
 	 * complete last, if this is a stack request the process (and thus
 	 * the rq pointer) could be invalid right after this complete()
 	 */
 	complete(waiting);
 }
 EXPORT_SYMBOL(blk_end_sync_rq);
 /**
  * blk_congestion_wait - wait for a queue to become uncongested
  * @rw: READ or WRITE
  * @timeout: timeout in jiffies
  *
  * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
  * If no queues are congested then just wait for the next request to be
  * returned.
  */
 long blk_congestion_wait(int rw, long timeout)
 {
 	long ret;
 	DEFINE_WAIT(wait);
 	wait_queue_head_t *wqh = &congestion_wqh[rw];
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 	ret = io_schedule_timeout(timeout);
 	finish_wait(wqh, &wait);
 	return ret;
 }
 EXPORT_SYMBOL(blk_congestion_wait);
 /*
  * Has to be called with the request spinlock acquired
  */
 static int attempt_merge(request_queue_t *q, struct request *req,
 			  struct request *next)
 {
 	if (!rq_mergeable(req) || !rq_mergeable(next))
 		return 0;
 	/*
 	 * not contigious
 	 */
 	if (req->sector + req->nr_sectors != next->sector)
 		return 0;
 	if (rq_data_dir(req) != rq_data_dir(next)
 	    || req->rq_disk != next->rq_disk
 	    || next->waiting || next->special)
 		return 0;
 	/*
 	 * If we are allowed to merge, then append bio list
 	 * from next to rq and release next. merge_requests_fn
 	 * will have updated segment counts, update sector
 	 * counts here.
 	 */
 	if (!q->merge_requests_fn(q, req, next))
 		return 0;
 	/*
 	 * At this point we have either done a back merge
 	 * or front merge. We need the smaller start_time of
 	 * the merged requests to be the current request
 	 * for accounting purposes.
 	 */
 	if (time_after(req->start_time, next->start_time))
 		req->start_time = next->start_time;
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
 	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
 	elv_merge_requests(q, req, next);
 	if (req->rq_disk) {
 		disk_round_stats(req->rq_disk);
 		req->rq_disk->in_flight--;
 	}
 	__blk_put_request(q, next);
 	return 1;
 }
 static inline int attempt_back_merge(request_queue_t *q, struct request *rq)
 {
 	struct request *next = elv_latter_request(q, rq);
 	if (next)
 		return attempt_merge(q, rq, next);
 	return 0;
 }
 static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
 {
 	struct request *prev = elv_former_request(q, rq);
 	if (prev)
 		return attempt_merge(q, prev, rq);
 	return 0;
 }
 /**
  * blk_attempt_remerge  - attempt to remerge active head with next request
  * @q:    The &request_queue_t belonging to the device
  * @rq:   The head request (usually)
  *
  * Description:
  *    For head-active devices, the queue can easily be unplugged so quickly
  *    that proper merging is not done on the front request. This may hurt
  *    performance greatly for some devices. The block layer cannot safely
  *    do merging on that first request for these queues, but the driver can
  *    call this function and make it happen any way. Only the driver knows
  *    when it is safe to do so.
  **/
 void blk_attempt_remerge(request_queue_t *q, struct request *rq)
 {
 	unsigned long flags;
 	spin_lock_irqsave(q->queue_lock, flags);
 	attempt_back_merge(q, rq);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_attempt_remerge);
 /*
  * Non-locking blk_attempt_remerge variant.
  */
 void __blk_attempt_remerge(request_queue_t *q, struct request *rq)
 {
 	attempt_back_merge(q, rq);
 }
 EXPORT_SYMBOL(__blk_attempt_remerge);
 static int __make_request(request_queue_t *q, struct bio *bio)
 {
 	struct request *req, *freereq = NULL;
 	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
 	sector_t sector;
 	sector = bio->bi_sector;
 	nr_sectors = bio_sectors(bio);
 	cur_nr_sectors = bio_cur_sectors(bio);
 	rw = bio_data_dir(bio);
 	sync = bio_sync(bio);
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
 	 * ISA dma in theory)
 	 */
 	blk_queue_bounce(q, &bio);
 	spin_lock_prefetch(q->queue_lock);
 	barrier = bio_barrier(bio);
 	if (barrier && (q->ordered == QUEUE_ORDERED_NONE)) {
 		err = -EOPNOTSUPP;
 		goto end_io;
 	}
 again:
 	spin_lock_irq(q->queue_lock);
 	if (elv_queue_empty(q)) {
 		blk_plug_device(q);
 		goto get_rq;
 	}
 	if (barrier)
 		goto get_rq;
 	el_ret = elv_merge(q, &req, bio);
 	switch (el_ret) {
 		case ELEVATOR_BACK_MERGE:
 			BUG_ON(!rq_mergeable(req));
 			if (!q->back_merge_fn(q, req, bio))
 				break;
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req);
 			goto out;
 		case ELEVATOR_FRONT_MERGE:
 			BUG_ON(!rq_mergeable(req));
 			if (!q->front_merge_fn(q, req, bio))
 				break;
 			bio->bi_next = req->bio;
 			req->bio = bio;
 			/*
 			 * may not be valid. if the low level driver said
 			 * it didn't need a bounce buffer then it better
 			 * not touch req->buffer either...
 			 */
 			req->buffer = bio_data(bio);
 			req->current_nr_sectors = cur_nr_sectors;
 			req->hard_cur_sectors = cur_nr_sectors;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req);
 			goto out;
 		/*
 		 * elevator says don't/can't merge. get new request
 		 */
 		case ELEVATOR_NO_MERGE:
 			break;
 		default:
 			printk("elevator returned crap (%d)\n", el_ret);
 			BUG();
 	}
 	/*
 	 * Grab a free request from the freelist - if that is empty, check
 	 * if we are doing read ahead and abort instead of blocking for
 	 * a free slot.
 	 */
 get_rq:
 	if (freereq) {
 		req = freereq;
 		freereq = NULL;
 	} else {
 		spin_unlock_irq(q->queue_lock);
 		if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
 			/*
 			 * READA bit set
 			 */
 			err = -EWOULDBLOCK;
 			if (bio_rw_ahead(bio))
 				goto end_io;
 			freereq = get_request_wait(q, rw);
 		}
 		goto again;
 	}
 	req->flags |= REQ_CMD;
 	/*
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 */
 	if (bio_rw_ahead(bio) || bio_failfast(bio))
 		req->flags |= REQ_FAILFAST;
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
 	if (barrier)
 		req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
 	req->hard_nr_sectors = req->nr_sectors = nr_sectors;
 	req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
 	req->nr_phys_segments = bio_phys_segments(q, bio);
 	req->nr_hw_segments = bio_hw_segments(q, bio);
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->waiting = NULL;
 	req->bio = req->biotail = bio;
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->start_time = jiffies;
 	add_request(q, req);
 out:
 	if (freereq)
 		__blk_put_request(q, freereq);
 	if (sync)
 		__generic_unplug_device(q);
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 end_io:
 	bio_endio(bio, nr_sectors << 9, err);
 	return 0;
 }
 /*
  * If bio->bi_dev is a partition, remap the location
  */
 static inline void blk_partition_remap(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	if (bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		switch (bio->bi_rw) {
 		case READ:
 			p->read_sectors += bio_sectors(bio);
 			p->reads++;
 			break;
 		case WRITE:
 			p->write_sectors += bio_sectors(bio);
 			p->writes++;
 			break;
 		}
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 	}
 }
 void blk_finish_queue_drain(request_queue_t *q)
 {
 	struct request_list *rl = &q->rq;
 	struct request *rq;
 	spin_lock_irq(q->queue_lock);
 	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
 	while (!list_empty(&q->drain_list)) {
 		rq = list_entry_rq(q->drain_list.next);
 		list_del_init(&rq->queuelist);
 		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
 	}
 	spin_unlock_irq(q->queue_lock);
 	wake_up(&rl->wait[0]);
 	wake_up(&rl->wait[1]);
 	wake_up(&rl->drain);
 }
 static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
 {
 	int wait = rl->count[READ] + rl->count[WRITE];
 	if (dispatch)
 		wait += !list_empty(&q->queue_head);
 	return wait;
 }
 /*
  * We rely on the fact that only requests allocated through blk_alloc_request()
  * have io scheduler private data structures associated with them. Any other
  * type of request (allocated on stack or through kmalloc()) should not go
  * to the io scheduler core, but be attached to the queue head instead.
  */
 void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
 {
 	struct request_list *rl = &q->rq;
 	DEFINE_WAIT(wait);
 	spin_lock_irq(q->queue_lock);
 	set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
 	while (wait_drain(q, rl, wait_dispatch)) {
 		prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
 		if (wait_drain(q, rl, wait_dispatch)) {
 			__generic_unplug_device(q);
 			spin_unlock_irq(q->queue_lock);
 			io_schedule();
 			spin_lock_irq(q->queue_lock);
 		}
 		finish_wait(&rl->drain, &wait);
 	}
 	spin_unlock_irq(q->queue_lock);
 }
 /*
  * block waiting for the io scheduler being started again.
  */
 static inline void block_wait_queue_running(request_queue_t *q)
 {
 	DEFINE_WAIT(wait);
 	while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
 		struct request_list *rl = &q->rq;
 		prepare_to_wait_exclusive(&rl->drain, &wait,
 				TASK_UNINTERRUPTIBLE);
 		/*
 		 * re-check the condition. avoids using prepare_to_wait()
 		 * in the fast path (queue is running)
 		 */
 		if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
 			io_schedule();
 		finish_wait(&rl->drain, &wait);
 	}
 }
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
 	printk(KERN_INFO "attempt to access beyond end of device\n");
 	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
 			bdevname(bio->bi_bdev, b),
 			bio->bi_rw,
 			(unsigned long long)bio->bi_sector + bio_sectors(bio),
 			(long long)(bio->bi_bdev->bd_inode->i_size >> 9));
 	set_bit(BIO_EOF, &bio->bi_flags);
 }
 /**
  * generic_make_request: hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
  * devices. It is passed a &struct bio, which describes the I/O that needs
  * to be done.
  *
  * generic_make_request() does not return any status.  The
  * success/failure status of the request, along with notification of
  * completion, is delivered asynchronously through the bio->bi_end_io
  * function described (one day) else where.
  *
  * The caller of generic_make_request must make sure that bi_io_vec
  * are set to describe the memory buffer, and that bi_dev and bi_sector are
  * set to describe the device address, and the
  * bi_end_io and optionally bi_private are set to describe how
  * completion notification should be signaled.
  *
  * generic_make_request and the drivers it calls may use bi_next if this
  * bio happens to be merged with someone else, and may change bi_dev and
  * bi_sector for remaps as it sees fit.  So the values of these fields
  * should NOT be depended on after the call to generic_make_request.
  */
 void generic_make_request(struct bio *bio)
 {
 	request_queue_t *q;
 	sector_t maxsector;
 	int ret, nr_sectors = bio_sectors(bio);
 	might_sleep();
 	/* Test device or partition size, when known. */
 	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
 	if (maxsector) {
 		sector_t sector = bio->bi_sector;
 		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
 			/*
 			 * This may well happen - the kernel calls bread()
 			 * without checking the size of the device, e.g., when
 			 * mounting a device.
 			 */
 			handle_bad_sector(bio);
 			goto end_io;
 		}
 	}
 	/*
 	 * Resolve the mapping until finished. (drivers are
 	 * still free to implement/resolve their own stacking
 	 * by explicitly returning 0)
 	 *
 	 * NOTE: we don't repeat the blk_size check for each new device.
 	 * Stacking drivers are expected to know what they are doing.
 	 */
 	do {
 		char b[BDEVNAME_SIZE];
 		q = bdev_get_queue(bio->bi_bdev);
 		if (!q) {
 			printk(KERN_ERR
 			       "generic_make_request: Trying to access "
 				"nonexistent block-device %s (%Lu)\n",
 				bdevname(bio->bi_bdev, b),
 				(long long) bio->bi_sector);
 end_io:
 			bio_endio(bio, bio->bi_size, -EIO);
 			break;
 		}
 		if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
 			printk("bio too big device %s (%u > %u)\n",
 				bdevname(bio->bi_bdev, b),
 				bio_sectors(bio),
 				q->max_hw_sectors);
 			goto end_io;
 		}
 		if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))
 			goto end_io;
 		block_wait_queue_running(q);
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
 		 */
 		blk_partition_remap(bio);
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
 }
 EXPORT_SYMBOL(generic_make_request);
 /**
  * submit_bio: submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
  * uses that function to do most of the work. Both are fairly rough
  * interfaces, @bio must be presetup and ready for I/O.
  *
  */
 void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw = rw;
 	if (rw & WRITE)
 		mod_page_state(pgpgout, count);
 	else
 		mod_page_state(pgpgin, count);
 	if (unlikely(block_dump)) {
 		char b[BDEVNAME_SIZE];
 		printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
 			current->comm, current->pid,
 			(rw & WRITE) ? "WRITE" : "READ",
 			(unsigned long long)bio->bi_sector,
 			bdevname(bio->bi_bdev,b));
 	}
 	generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 void blk_recalc_rq_segments(struct request *rq)
 {
 	struct bio *bio, *prevbio = NULL;
 	int nr_phys_segs, nr_hw_segs;
 	unsigned int phys_size, hw_size;
 	request_queue_t *q = rq->q;
 	if (!rq->bio)
 		return;
 	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
 	rq_for_each_bio(bio, rq) {
 		/* Force bio hw/phys segs to be recalculated. */
 		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 		nr_phys_segs += bio_phys_segments(q, bio);
 		nr_hw_segs += bio_hw_segments(q, bio);
 		if (prevbio) {
 			int pseg = phys_size + prevbio->bi_size + bio->bi_size;
 			int hseg = hw_size + prevbio->bi_size + bio->bi_size;
 			if (blk_phys_contig_segment(q, prevbio, bio) &&
 			    pseg <= q->max_segment_size) {
 				nr_phys_segs--;
 				phys_size += prevbio->bi_size + bio->bi_size;
 			} else
 				phys_size = 0;
 			if (blk_hw_contig_segment(q, prevbio, bio) &&
 			    hseg <= q->max_segment_size) {
 				nr_hw_segs--;
 				hw_size += prevbio->bi_size + bio->bi_size;
 			} else
 				hw_size = 0;
 		}
 		prevbio = bio;
 	}
 	rq->nr_phys_segments = nr_phys_segs;
 	rq->nr_hw_segments = nr_hw_segs;
 }
 void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {
 	if (blk_fs_request(rq)) {
 		rq->hard_sector += nsect;
 		rq->hard_nr_sectors -= nsect;
 		/*
 		 * Move the I/O submission pointers ahead if required.
 		 */
 		if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
 		    (rq->sector <= rq->hard_sector)) {
 			rq->sector = rq->hard_sector;
 			rq->nr_sectors = rq->hard_nr_sectors;
 			rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
 			rq->current_nr_sectors = rq->hard_cur_sectors;
 			rq->buffer = bio_data(rq->bio);
 		}
 		/*
 		 * if total number of sectors is less than the first segment
 		 * size, something has gone terribly wrong
 		 */
 		if (rq->nr_sectors < rq->current_nr_sectors) {
 			printk("blk: request botched\n");
 			rq->nr_sectors = rq->current_nr_sectors;
 		}
 	}
 }
 static int __end_that_request_first(struct request *req, int uptodate,
 				    int nr_bytes)
 {
 	int total_bytes, bio_nbytes, error, next_idx = 0;
 	struct bio *bio;
 	/*
 	 * extend uptodate bool to allow < 0 value to be direct io error
 	 */
 	error = 0;
 	if (end_io_error(uptodate))
 		error = !uptodate ? -EIO : uptodate;
 	/*
 	 * for a REQ_BLOCK_PC request, we want to carry any eventual
 	 * sense key with us all the way through
 	 */
 	if (!blk_pc_request(req))
 		req->errors = 0;
 	if (!uptodate) {
 		if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
 			printk("end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)req->sector);
 	}
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
 			bio_endio(bio, nbytes, error);
 			next_idx = 0;
 			bio_nbytes = 0;
 		} else {
 			int idx = bio->bi_idx + next_idx;
 			if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
 				blk_dump_rq_flags(req, "__end_that");
 				printk("%s: bio idx %d >= vcnt %d\n",
 						__FUNCTION__,
 						bio->bi_idx, bio->bi_vcnt);
 				break;
 			}
 			nbytes = bio_iovec_idx(bio, idx)->bv_len;
 			BIO_BUG_ON(nbytes > bio->bi_size);
 			/*
 			 * not a complete bvec done
 			 */
 			if (unlikely(nbytes > nr_bytes)) {
 				bio_nbytes += nr_bytes;
 				total_bytes += nr_bytes;
 				break;
 			}
 			/*
 			 * advance to the next vector
 			 */
 			next_idx++;
 			bio_nbytes += nbytes;
 		}
 		total_bytes += nbytes;
 		nr_bytes -= nbytes;
 		if ((bio = req->bio)) {
 			/*
 			 * end more in this run, or just return 'not-done'
 			 */
 			if (unlikely(nr_bytes <= 0))
 				break;
 		}
 	}
 	/*
 	 * completely done
 	 */
 	if (!req->bio)
 		return 0;
 	/*
 	 * if the request wasn't completed, update state
 	 */
 	if (bio_nbytes) {
 		bio_endio(bio, bio_nbytes, error);
 		bio->bi_idx += next_idx;
 		bio_iovec(bio)->bv_offset += nr_bytes;
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 	blk_recalc_rq_sectors(req, total_bytes >> 9);
 	blk_recalc_rq_segments(req);
 	return 1;
 }
 /**
  * end_that_request_first - end I/O on a request
  * @req:      the request being processed
  * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
  * @nr_sectors: number of sectors to end I/O on
  *
  * Description:
  *     Ends I/O on a number of sectors attached to @req, and sets it up
  *     for the next range of segments (if any) in the cluster.
  *
  * Return:
  *     0 - we are done with this request, call end_that_request_last()
  *     1 - still buffers pending for this request
  **/
 int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
 {
 	return __end_that_request_first(req, uptodate, nr_sectors << 9);
 }
 EXPORT_SYMBOL(end_that_request_first);
 /**
  * end_that_request_chunk - end I/O on a request
  * @req:      the request being processed
  * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @req, and sets it up
  *     for the next range of segments (if any). Like end_that_request_first(),
  *     but deals with bytes instead of sectors.
  *
  * Return:
  *     0 - we are done with this request, call end_that_request_last()
  *     1 - still buffers pending for this request
  **/
 int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
 {
 	return __end_that_request_first(req, uptodate, nr_bytes);
 }
 EXPORT_SYMBOL(end_that_request_chunk);
 /*
  * queue lock must be held
  */
 void end_that_request_last(struct request *req)
 {
 	struct gendisk *disk = req->rq_disk;
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
 	if (disk && blk_fs_request(req)) {
 		unsigned long duration = jiffies - req->start_time;
 		switch (rq_data_dir(req)) {
 		    case WRITE:
 			__disk_stat_inc(disk, writes);
 			__disk_stat_add(disk, write_ticks, duration);
 			break;
 		    case READ:
 			__disk_stat_inc(disk, reads);
 			__disk_stat_add(disk, read_ticks, duration);
 			break;
 		}
 		disk_round_stats(disk);
 		disk->in_flight--;
 	}
 	if (req->end_io)
 		req->end_io(req);
 	else
 		__blk_put_request(req->q, req);
 }
 EXPORT_SYMBOL(end_that_request_last);
 void end_request(struct request *req, int uptodate)
 {
 	if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
 		add_disk_randomness(req->rq_disk);
 		blkdev_dequeue_request(req);
 		end_that_request_last(req);
 	}
 }
 EXPORT_SYMBOL(end_request);
 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
 {
 	/* first three bits are identical in rq->flags and bio->bi_rw */
 	rq->flags |= (bio->bi_rw & 7);
 	rq->nr_phys_segments = bio_phys_segments(q, bio);
 	rq->nr_hw_segments = bio_hw_segments(q, bio);
 	rq->current_nr_sectors = bio_cur_sectors(bio);
 	rq->hard_cur_sectors = rq->current_nr_sectors;
 	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
 	rq->buffer = bio_data(bio);
 	rq->bio = rq->biotail = bio;
 }
 EXPORT_SYMBOL(blk_rq_bio_prep);
 int kblockd_schedule_work(struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 void kblockd_flush(void)
 {
 	flush_workqueue(kblockd_workqueue);
 }
 EXPORT_SYMBOL(kblockd_flush);
 int __init blk_dev_init(void)
 {
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
 	request_cachep = kmem_cache_create("blkdev_requests",
 			sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);
 	requestq_cachep = kmem_cache_create("blkdev_queue",
 			sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
 			sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
 	blk_max_low_pfn = max_low_pfn;
 	blk_max_pfn = max_pfn;
 	return 0;
 }
 /*
  * IO Context helper functions
  */
 void put_io_context(struct io_context *ioc)
 {
 	if (ioc == NULL)
 		return;
 	BUG_ON(atomic_read(&ioc->refcount) == 0);
 	if (atomic_dec_and_test(&ioc->refcount)) {
 		if (ioc->aic && ioc->aic->dtor)
 			ioc->aic->dtor(ioc->aic);
 		if (ioc->cic && ioc->cic->dtor)
 			ioc->cic->dtor(ioc->cic);
 		kmem_cache_free(iocontext_cachep, ioc);
 	}
 }
 EXPORT_SYMBOL(put_io_context);
 /* Called by the exitting task */
 void exit_io_context(void)
 {
 	unsigned long flags;
 	struct io_context *ioc;
 	local_irq_save(flags);
 	ioc = current->io_context;
 	current->io_context = NULL;
 	local_irq_restore(flags);
 	if (ioc->aic && ioc->aic->exit)
 		ioc->aic->exit(ioc->aic);
 	if (ioc->cic && ioc->cic->exit)
 		ioc->cic->exit(ioc->cic);
 	put_io_context(ioc);
 }
 /*
  * If the current task has no IO context then create one and initialise it.
  * If it does have a context, take a ref on it.
  *
  * This is always called in the context of the task which submitted the I/O.
  * But weird things happen, so we disable local interrupts to ensure exclusive
  * access to *current.
  */
 struct io_context *get_io_context(int gfp_flags)
 {
 	struct task_struct *tsk = current;
 	unsigned long flags;
 	struct io_context *ret;
 	local_irq_save(flags);
 	ret = tsk->io_context;
 	if (ret)
 		goto out;
 	local_irq_restore(flags);
 	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
 		ret->pid = tsk->pid;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->cic = NULL;
 		spin_lock_init(&ret->lock);
 		local_irq_save(flags);
 		/*
 		 * very unlikely, someone raced with us in setting up the task
 		 * io context. free new context and just grab a reference.
 		 */
 		if (!tsk->io_context)
 			tsk->io_context = ret;
 		else {
 			kmem_cache_free(iocontext_cachep, ret);
 			ret = tsk->io_context;
 		}
 out:
 		atomic_inc(&ret->refcount);
 		local_irq_restore(flags);
 	}
 	return ret;
 }
 EXPORT_SYMBOL(get_io_context);
 void copy_io_context(struct io_context **pdst, struct io_context **psrc)
 {
 	struct io_context *src = *psrc;
 	struct io_context *dst = *pdst;
 	if (src) {
 		BUG_ON(atomic_read(&src->refcount) == 0);
 		atomic_inc(&src->refcount);
 		put_io_context(dst);
 		*pdst = src;
 	}
 }
 EXPORT_SYMBOL(copy_io_context);
 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
 {
 	struct io_context *temp;
 	temp = *ioc1;
 	*ioc1 = *ioc2;
 	*ioc2 = temp;
 }
 EXPORT_SYMBOL(swap_io_context);
 /*
  * sysfs parts below
  */
 struct queue_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct request_queue *, char *);
 	ssize_t (*store)(struct request_queue *, const char *, size_t);
 };
 static ssize_t
 queue_var_show(unsigned int var, char *page)
 {
 	return sprintf(page, "%d\n", var);
 }
 static ssize_t
 queue_var_store(unsigned long *var, const char *page, size_t count)
 {
 	char *p = (char *) page;
 	*var = simple_strtoul(p, &p, 10);
 	return count;
 }
 static ssize_t queue_requests_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(q->nr_requests, (page));
 }
 static ssize_t
 queue_requests_store(struct request_queue *q, const char *page, size_t count)
 {
 	struct request_list *rl = &q->rq;
 	int ret = queue_var_store(&q->nr_requests, page, count);
 	if (q->nr_requests < BLKDEV_MIN_RQ)
 		q->nr_requests = BLKDEV_MIN_RQ;
 	blk_queue_congestion_threshold(q);
 	if (rl->count[READ] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, READ);
 	else if (rl->count[READ] < queue_congestion_off_threshold(q))
 		clear_queue_congested(q, READ);
 	if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, WRITE);
 	else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
 		clear_queue_congested(q, WRITE);
 	if (rl->count[READ] >= q->nr_requests) {
 		blk_set_queue_full(q, READ);
 	} else if (rl->count[READ]+1 <= q->nr_requests) {
 		blk_clear_queue_full(q, READ);
 		wake_up(&rl->wait[READ]);
 	}
 	if (rl->count[WRITE] >= q->nr_requests) {
 		blk_set_queue_full(q, WRITE);
 	} else if (rl->count[WRITE]+1 <= q->nr_requests) {
 		blk_clear_queue_full(q, WRITE);
 		wake_up(&rl->wait[WRITE]);
 	}
 	return ret;
 }
 static ssize_t queue_ra_show(struct request_queue *q, char *page)
 {
 	int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
 	return queue_var_show(ra_kb, (page));
 }
 static ssize_t
 queue_ra_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long ra_kb;
 	ssize_t ret = queue_var_store(&ra_kb, page, count);
 	spin_lock_irq(q->queue_lock);
 	if (ra_kb > (q->max_sectors >> 1))
 		ra_kb = (q->max_sectors >> 1);
 	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
 	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 {
 	int max_sectors_kb = q->max_sectors >> 1;
 	return queue_var_show(max_sectors_kb, (page));
 }
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long max_sectors_kb,
 			max_hw_sectors_kb = q->max_hw_sectors >> 1,
 			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
 	int ra_kb;
 	if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
 		return -EINVAL;
 	/*
 	 * Take the queue lock to update the readahead and max_sectors
 	 * values synchronously:
 	 */
 	spin_lock_irq(q->queue_lock);
 	/*
 	 * Trim readahead window as well, if necessary:
 	 */
 	ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
 	if (ra_kb > max_sectors_kb)
 		q->backing_dev_info.ra_pages =
 				max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
 	q->max_sectors = max_sectors_kb << 1;
 	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 {
 	int max_hw_sectors_kb = q->max_hw_sectors >> 1;
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
 	.store = queue_requests_store,
 };
 static struct queue_sysfs_entry queue_ra_entry = {
 	.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_ra_show,
 	.store = queue_ra_store,
 };
 static struct queue_sysfs_entry queue_max_sectors_entry = {
 	.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_max_sectors_show,
 	.store = queue_max_sectors_store,
 };
 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
 	.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
 	.show = queue_max_hw_sectors_show,
 };
 static struct queue_sysfs_entry queue_iosched_entry = {
 	.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
 	.show = elv_iosched_show,
 	.store = elv_iosched_store,
 };
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
 	&queue_max_hw_sectors_entry.attr,
 	&queue_max_sectors_entry.attr,
 	&queue_iosched_entry.attr,
 	NULL,
 };
 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
 static ssize_t
 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 {
 	struct queue_sysfs_entry *entry = to_queue(attr);
 	struct request_queue *q;
 	q = container_of(kobj, struct request_queue, kobj);
 	if (!entry->show)
 		return 0;
 	return entry->show(q, page);
 }
 static ssize_t
 queue_attr_store(struct kobject *kobj, struct attribute *attr,
 		    const char *page, size_t length)
 {
 	struct queue_sysfs_entry *entry = to_queue(attr);
 	struct request_queue *q;
 	q = container_of(kobj, struct request_queue, kobj);
 	if (!entry->store)
 		return -EINVAL;
 	return entry->store(q, page, length);
 }
 static struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
 };
 struct kobj_type queue_ktype = {
 	.sysfs_ops	= &queue_sysfs_ops,
 	.default_attrs	= default_attrs,
 };
 int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
 	request_queue_t *q = disk->queue;
 	if (!q || !q->request_fn)
 		return -ENXIO;
 	q->kobj.parent = kobject_get(&disk->kobj);
 	if (!q->kobj.parent)
 		return -EBUSY;
 	snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
 	q->kobj.ktype = &queue_ktype;
 	ret = kobject_register(&q->kobj);
 	if (ret < 0)
 		return ret;
 	ret = elv_register_queue(q);
 	if (ret) {
 		kobject_unregister(&q->kobj);
 		return ret;
 	}
 	return 0;
 }
 void blk_unregister_queue(struct gendisk *disk)
 {
 	request_queue_t *q = disk->queue;
 	if (q && q->request_fn) {
 		elv_unregister_queue(q);
 		kobject_unregister(&q->kobj);
 		kobject_put(&disk->kobj);
 	}
 }

drivers/block/paride/pd.c

Diff comments View file @ 867d119

1	/*	1	/*
2	pd.c (c) 1997-8 Grant R. Guenther <grant@torque.net>	2	pd.c (c) 1997-8 Grant R. Guenther <grant@torque.net>
3	Under the terms of the GNU General Public License.	3	Under the terms of the GNU General Public License.
4		4
5	This is the high-level driver for parallel port IDE hard	5	This is the high-level driver for parallel port IDE hard
6	drives based on chips supported by the paride module.	6	drives based on chips supported by the paride module.
7		7
8	By default, the driver will autoprobe for a single parallel	8	By default, the driver will autoprobe for a single parallel
9	port IDE drive, but if their individual parameters are	9	port IDE drive, but if their individual parameters are
10	specified, the driver can handle up to 4 drives.	10	specified, the driver can handle up to 4 drives.
11		11
12	The behaviour of the pd driver can be altered by setting	12	The behaviour of the pd driver can be altered by setting
13	some parameters from the insmod command line. The following	13	some parameters from the insmod command line. The following
14	parameters are adjustable:	14	parameters are adjustable:
15		15
16	drive0 These four arguments can be arrays of	16	drive0 These four arguments can be arrays of
17	drive1 1-8 integers as follows:	17	drive1 1-8 integers as follows:
18	drive2	18	drive2
19	drive3 <prt>,<pro>,<uni>,<mod>,<geo>,<sby>,<dly>,<slv>	19	drive3 <prt>,<pro>,<uni>,<mod>,<geo>,<sby>,<dly>,<slv>
20		20
21	Where,	21	Where,
22		22
23	<prt> is the base of the parallel port address for	23	<prt> is the base of the parallel port address for
24	the corresponding drive. (required)	24	the corresponding drive. (required)
25		25
26	<pro> is the protocol number for the adapter that	26	<pro> is the protocol number for the adapter that
27	supports this drive. These numbers are	27	supports this drive. These numbers are
28	logged by 'paride' when the protocol modules	28	logged by 'paride' when the protocol modules
29	are initialised. (0 if not given)	29	are initialised. (0 if not given)
30		30
31	<uni> for those adapters that support chained	31	<uni> for those adapters that support chained
32	devices, this is the unit selector for the	32	devices, this is the unit selector for the
33	chain of devices on the given port. It should	33	chain of devices on the given port. It should
34	be zero for devices that don't support chaining.	34	be zero for devices that don't support chaining.
35	(0 if not given)	35	(0 if not given)
36		36
37	<mod> this can be -1 to choose the best mode, or one	37	<mod> this can be -1 to choose the best mode, or one
38	of the mode numbers supported by the adapter.	38	of the mode numbers supported by the adapter.
39	(-1 if not given)	39	(-1 if not given)
40		40
41	<geo> this defaults to 0 to indicate that the driver	41	<geo> this defaults to 0 to indicate that the driver
42	should use the CHS geometry provided by the drive	42	should use the CHS geometry provided by the drive
43	itself. If set to 1, the driver will provide	43	itself. If set to 1, the driver will provide
44	a logical geometry with 64 heads and 32 sectors	44	a logical geometry with 64 heads and 32 sectors
45	per track, to be consistent with most SCSI	45	per track, to be consistent with most SCSI
46	drivers. (0 if not given)	46	drivers. (0 if not given)
47		47
48	<sby> set this to zero to disable the power saving	48	<sby> set this to zero to disable the power saving
49	standby mode, if needed. (1 if not given)	49	standby mode, if needed. (1 if not given)
50		50
51	<dly> some parallel ports require the driver to	51	<dly> some parallel ports require the driver to
52	go more slowly. -1 sets a default value that	52	go more slowly. -1 sets a default value that
53	should work with the chosen protocol. Otherwise,	53	should work with the chosen protocol. Otherwise,
54	set this to a small integer, the larger it is	54	set this to a small integer, the larger it is
55	the slower the port i/o. In some cases, setting	55	the slower the port i/o. In some cases, setting
56	this to zero will speed up the device. (default -1)	56	this to zero will speed up the device. (default -1)
57		57
58	<slv> IDE disks can be jumpered to master or slave.	58	<slv> IDE disks can be jumpered to master or slave.
59	Set this to 0 to choose the master drive, 1 to	59	Set this to 0 to choose the master drive, 1 to
60	choose the slave, -1 (the default) to choose the	60	choose the slave, -1 (the default) to choose the
61	first drive found.	61	first drive found.
62		62
63		63
64	major You may use this parameter to overide the	64	major You may use this parameter to overide the
65	default major number (45) that this driver	65	default major number (45) that this driver
66	will use. Be sure to change the device	66	will use. Be sure to change the device
67	name as well.	67	name as well.
68		68
69	name This parameter is a character string that	69	name This parameter is a character string that
70	contains the name the kernel will use for this	70	contains the name the kernel will use for this
71	device (in /proc output, for instance).	71	device (in /proc output, for instance).
72	(default "pd")	72	(default "pd")
73		73
74	cluster The driver will attempt to aggregate requests	74	cluster The driver will attempt to aggregate requests
75	for adjacent blocks into larger multi-block	75	for adjacent blocks into larger multi-block
76	clusters. The maximum cluster size (in 512	76	clusters. The maximum cluster size (in 512
77	byte sectors) is set with this parameter.	77	byte sectors) is set with this parameter.
78	(default 64)	78	(default 64)
79		79
80	verbose This parameter controls the amount of logging	80	verbose This parameter controls the amount of logging
81	that the driver will do. Set it to 0 for	81	that the driver will do. Set it to 0 for
82	normal operation, 1 to see autoprobe progress	82	normal operation, 1 to see autoprobe progress
83	messages, or 2 to see additional debugging	83	messages, or 2 to see additional debugging
84	output. (default 0)	84	output. (default 0)
85		85
86	nice This parameter controls the driver's use of	86	nice This parameter controls the driver's use of
87	idle CPU time, at the expense of some speed.	87	idle CPU time, at the expense of some speed.
88		88
89	If this driver is built into the kernel, you can use kernel	89	If this driver is built into the kernel, you can use kernel
90	the following command line parameters, with the same values	90	the following command line parameters, with the same values
91	as the corresponding module parameters listed above:	91	as the corresponding module parameters listed above:
92		92
93	pd.drive0	93	pd.drive0
94	pd.drive1	94	pd.drive1
95	pd.drive2	95	pd.drive2
96	pd.drive3	96	pd.drive3
97	pd.cluster	97	pd.cluster
98	pd.nice	98	pd.nice
99		99
100	In addition, you can use the parameter pd.disable to disable	100	In addition, you can use the parameter pd.disable to disable
101	the driver entirely.	101	the driver entirely.
102		102
103	*/	103	*/
104		104
105	/* Changes:	105	/* Changes:
106		106
107	1.01 GRG 1997.01.24 Restored pd_reset()	107	1.01 GRG 1997.01.24 Restored pd_reset()
108	Added eject ioctl	108	Added eject ioctl
109	1.02 GRG 1998.05.06 SMP spinlock changes,	109	1.02 GRG 1998.05.06 SMP spinlock changes,
110	Added slave support	110	Added slave support
111	1.03 GRG 1998.06.16 Eliminate an Ugh.	111	1.03 GRG 1998.06.16 Eliminate an Ugh.
112	1.04 GRG 1998.08.15 Extra debugging, use HZ in loop timing	112	1.04 GRG 1998.08.15 Extra debugging, use HZ in loop timing
113	1.05 GRG 1998.09.24 Added jumbo support	113	1.05 GRG 1998.09.24 Added jumbo support
114		114
115	*/	115	*/
116		116
117	#define PD_VERSION "1.05"	117	#define PD_VERSION "1.05"
118	#define PD_MAJOR 45	118	#define PD_MAJOR 45
119	#define PD_NAME "pd"	119	#define PD_NAME "pd"
120	#define PD_UNITS 4	120	#define PD_UNITS 4
121		121
122	/* Here are things one can override from the insmod command.	122	/* Here are things one can override from the insmod command.
123	Most are autoprobed by paride unless set here. Verbose is off	123	Most are autoprobed by paride unless set here. Verbose is off
124	by default.	124	by default.
125		125
126	*/	126	*/
127		127
128	static int verbose = 0;	128	static int verbose = 0;
129	static int major = PD_MAJOR;	129	static int major = PD_MAJOR;
130	static char *name = PD_NAME;	130	static char *name = PD_NAME;
131	static int cluster = 64;	131	static int cluster = 64;
132	static int nice = 0;	132	static int nice = 0;
133	static int disable = 0;	133	static int disable = 0;
134		134
135	static int drive0[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };	135	static int drive0[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
136	static int drive1[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };	136	static int drive1[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
137	static int drive2[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };	137	static int drive2[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
138	static int drive3[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };	138	static int drive3[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
139		139
140	static int (*drives[4])[8] = {&drive0, &drive1, &drive2, &drive3};	140	static int (*drives[4])[8] = {&drive0, &drive1, &drive2, &drive3};
141		141
142	enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};	142	enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
143		143
144	/* end of parameters */	144	/* end of parameters */
145		145
146	#include <linux/init.h>	146	#include <linux/init.h>
147	#include <linux/module.h>	147	#include <linux/module.h>
148	#include <linux/fs.h>	148	#include <linux/fs.h>
149	#include <linux/delay.h>	149	#include <linux/delay.h>
150	#include <linux/hdreg.h>	150	#include <linux/hdreg.h>
151	#include <linux/cdrom.h> /* for the eject ioctl */	151	#include <linux/cdrom.h> /* for the eject ioctl */
152	#include <linux/blkdev.h>	152	#include <linux/blkdev.h>
153	#include <linux/blkpg.h>	153	#include <linux/blkpg.h>
154	#include <asm/uaccess.h>	154	#include <asm/uaccess.h>
155	#include <linux/sched.h>	155	#include <linux/sched.h>
156	#include <linux/workqueue.h>	156	#include <linux/workqueue.h>
157		157
158	static DEFINE_SPINLOCK(pd_lock);	158	static DEFINE_SPINLOCK(pd_lock);
159		159
160	module_param(verbose, bool, 0);	160	module_param(verbose, bool, 0);
161	module_param(major, int, 0);	161	module_param(major, int, 0);
162	module_param(name, charp, 0);	162	module_param(name, charp, 0);
163	module_param(cluster, int, 0);	163	module_param(cluster, int, 0);
164	module_param(nice, int, 0);	164	module_param(nice, int, 0);
165	module_param_array(drive0, int, NULL, 0);	165	module_param_array(drive0, int, NULL, 0);
166	module_param_array(drive1, int, NULL, 0);	166	module_param_array(drive1, int, NULL, 0);
167	module_param_array(drive2, int, NULL, 0);	167	module_param_array(drive2, int, NULL, 0);
168	module_param_array(drive3, int, NULL, 0);	168	module_param_array(drive3, int, NULL, 0);
169		169
170	#include "paride.h"	170	#include "paride.h"
171		171
172	#define PD_BITS 4	172	#define PD_BITS 4
173		173
174	/* numbers for "SCSI" geometry */	174	/* numbers for "SCSI" geometry */
175		175
176	#define PD_LOG_HEADS 64	176	#define PD_LOG_HEADS 64
177	#define PD_LOG_SECTS 32	177	#define PD_LOG_SECTS 32
178		178
179	#define PD_ID_OFF 54	179	#define PD_ID_OFF 54
180	#define PD_ID_LEN 14	180	#define PD_ID_LEN 14
181		181
182	#define PD_MAX_RETRIES 5	182	#define PD_MAX_RETRIES 5
183	#define PD_TMO 800 /* interrupt timeout in jiffies */	183	#define PD_TMO 800 /* interrupt timeout in jiffies */
184	#define PD_SPIN_DEL 50 /* spin delay in micro-seconds */	184	#define PD_SPIN_DEL 50 /* spin delay in micro-seconds */
185		185
186	#define PD_SPIN (1000000PD_TMO)/(HZPD_SPIN_DEL)	186	#define PD_SPIN (1000000PD_TMO)/(HZPD_SPIN_DEL)
187		187
188	#define STAT_ERR 0x00001	188	#define STAT_ERR 0x00001
189	#define STAT_INDEX 0x00002	189	#define STAT_INDEX 0x00002
190	#define STAT_ECC 0x00004	190	#define STAT_ECC 0x00004
191	#define STAT_DRQ 0x00008	191	#define STAT_DRQ 0x00008
192	#define STAT_SEEK 0x00010	192	#define STAT_SEEK 0x00010
193	#define STAT_WRERR 0x00020	193	#define STAT_WRERR 0x00020
194	#define STAT_READY 0x00040	194	#define STAT_READY 0x00040
195	#define STAT_BUSY 0x00080	195	#define STAT_BUSY 0x00080
196		196
197	#define ERR_AMNF 0x00100	197	#define ERR_AMNF 0x00100
198	#define ERR_TK0NF 0x00200	198	#define ERR_TK0NF 0x00200
199	#define ERR_ABRT 0x00400	199	#define ERR_ABRT 0x00400
200	#define ERR_MCR 0x00800	200	#define ERR_MCR 0x00800
201	#define ERR_IDNF 0x01000	201	#define ERR_IDNF 0x01000
202	#define ERR_MC 0x02000	202	#define ERR_MC 0x02000
203	#define ERR_UNC 0x04000	203	#define ERR_UNC 0x04000
204	#define ERR_TMO 0x10000	204	#define ERR_TMO 0x10000
205		205
206	#define IDE_READ 0x20	206	#define IDE_READ 0x20
207	#define IDE_WRITE 0x30	207	#define IDE_WRITE 0x30
208	#define IDE_READ_VRFY 0x40	208	#define IDE_READ_VRFY 0x40
209	#define IDE_INIT_DEV_PARMS 0x91	209	#define IDE_INIT_DEV_PARMS 0x91
210	#define IDE_STANDBY 0x96	210	#define IDE_STANDBY 0x96
211	#define IDE_ACKCHANGE 0xdb	211	#define IDE_ACKCHANGE 0xdb
212	#define IDE_DOORLOCK 0xde	212	#define IDE_DOORLOCK 0xde
213	#define IDE_DOORUNLOCK 0xdf	213	#define IDE_DOORUNLOCK 0xdf
214	#define IDE_IDENTIFY 0xec	214	#define IDE_IDENTIFY 0xec
215	#define IDE_EJECT 0xed	215	#define IDE_EJECT 0xed
216		216
217	#define PD_NAMELEN 8	217	#define PD_NAMELEN 8
218		218
219	struct pd_unit {	219	struct pd_unit {
220	struct pi_adapter pia; /* interface to paride layer */	220	struct pi_adapter pia; /* interface to paride layer */
221	struct pi_adapter *pi;	221	struct pi_adapter *pi;
222	int access; /* count of active opens ... */	222	int access; /* count of active opens ... */
223	int capacity; /* Size of this volume in sectors */	223	int capacity; /* Size of this volume in sectors */
224	int heads; /* physical geometry */	224	int heads; /* physical geometry */
225	int sectors;	225	int sectors;
226	int cylinders;	226	int cylinders;
227	int can_lba;	227	int can_lba;
228	int drive; /* master=0 slave=1 */	228	int drive; /* master=0 slave=1 */
229	int changed; /* Have we seen a disk change ? */	229	int changed; /* Have we seen a disk change ? */
230	int removable; /* removable media device ? */	230	int removable; /* removable media device ? */
231	int standby;	231	int standby;
232	int alt_geom;	232	int alt_geom;
233	char name[PD_NAMELEN]; /* pda, pdb, etc ... */	233	char name[PD_NAMELEN]; /* pda, pdb, etc ... */
234	struct gendisk *gd;	234	struct gendisk *gd;
235	};	235	};
236		236
237	static struct pd_unit pd[PD_UNITS];	237	static struct pd_unit pd[PD_UNITS];
238		238
239	static char pd_scratch[512]; /* scratch block buffer */	239	static char pd_scratch[512]; /* scratch block buffer */
240		240
241	static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR",	241	static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR",
242	"READY", "BUSY", "AMNF", "TK0NF", "ABRT", "MCR",	242	"READY", "BUSY", "AMNF", "TK0NF", "ABRT", "MCR",
243	"IDNF", "MC", "UNC", "???", "TMO"	243	"IDNF", "MC", "UNC", "???", "TMO"
244	};	244	};
245		245
246	static inline int status_reg(struct pd_unit *disk)	246	static inline int status_reg(struct pd_unit *disk)
247	{	247	{
248	return pi_read_regr(disk->pi, 1, 6);	248	return pi_read_regr(disk->pi, 1, 6);
249	}	249	}
250		250
251	static inline int read_reg(struct pd_unit *disk, int reg)	251	static inline int read_reg(struct pd_unit *disk, int reg)
252	{	252	{
253	return pi_read_regr(disk->pi, 0, reg);	253	return pi_read_regr(disk->pi, 0, reg);
254	}	254	}
255		255
256	static inline void write_status(struct pd_unit *disk, int val)	256	static inline void write_status(struct pd_unit *disk, int val)
257	{	257	{
258	pi_write_regr(disk->pi, 1, 6, val);	258	pi_write_regr(disk->pi, 1, 6, val);
259	}	259	}
260		260
261	static inline void write_reg(struct pd_unit *disk, int reg, int val)	261	static inline void write_reg(struct pd_unit *disk, int reg, int val)
262	{	262	{
263	pi_write_regr(disk->pi, 0, reg, val);	263	pi_write_regr(disk->pi, 0, reg, val);
264	}	264	}
265		265
266	static inline u8 DRIVE(struct pd_unit *disk)	266	static inline u8 DRIVE(struct pd_unit *disk)
267	{	267	{
268	return 0xa0+0x10*disk->drive;	268	return 0xa0+0x10*disk->drive;
269	}	269	}
270		270
271	/* ide command interface */	271	/* ide command interface */
272		272
273	static void pd_print_error(struct pd_unit disk, char msg, int status)	273	static void pd_print_error(struct pd_unit disk, char msg, int status)
274	{	274	{
275	int i;	275	int i;
276		276
277	printk("%s: %s: status = 0x%x =", disk->name, msg, status);	277	printk("%s: %s: status = 0x%x =", disk->name, msg, status);
278	for (i = 0; i < 18; i++)	278	for (i = 0; i < 18; i++)
279	if (status & (1 << i))	279	if (status & (1 << i))
280	printk(" %s", pd_errs[i]);	280	printk(" %s", pd_errs[i]);
281	printk("\n");	281	printk("\n");
282	}	282	}
283		283
284	static void pd_reset(struct pd_unit *disk)	284	static void pd_reset(struct pd_unit *disk)
285	{ /* called only for MASTER drive */	285	{ /* called only for MASTER drive */
286	write_status(disk, 4);	286	write_status(disk, 4);
287	udelay(50);	287	udelay(50);
288	write_status(disk, 0);	288	write_status(disk, 0);
289	udelay(250);	289	udelay(250);
290	}	290	}
291		291
292	#define DBMSG(msg) ((verbose>1)?(msg):NULL)	292	#define DBMSG(msg) ((verbose>1)?(msg):NULL)
293		293
294	static int pd_wait_for(struct pd_unit disk, int w, char msg)	294	static int pd_wait_for(struct pd_unit disk, int w, char msg)
295	{ /* polled wait */	295	{ /* polled wait */
296	int k, r, e;	296	int k, r, e;
297		297
298	k = 0;	298	k = 0;
299	while (k < PD_SPIN) {	299	while (k < PD_SPIN) {
300	r = status_reg(disk);	300	r = status_reg(disk);
301	k++;	301	k++;
302	if (((r & w) == w) && !(r & STAT_BUSY))	302	if (((r & w) == w) && !(r & STAT_BUSY))
303	break;	303	break;
304	udelay(PD_SPIN_DEL);	304	udelay(PD_SPIN_DEL);
305	}	305	}
306	e = (read_reg(disk, 1) << 8) + read_reg(disk, 7);	306	e = (read_reg(disk, 1) << 8) + read_reg(disk, 7);
307	if (k >= PD_SPIN)	307	if (k >= PD_SPIN)
308	e \|= ERR_TMO;	308	e \|= ERR_TMO;
309	if ((e & (STAT_ERR \| ERR_TMO)) && (msg != NULL))	309	if ((e & (STAT_ERR \| ERR_TMO)) && (msg != NULL))
310	pd_print_error(disk, msg, e);	310	pd_print_error(disk, msg, e);
311	return e;	311	return e;
312	}	312	}
313		313
314	static void pd_send_command(struct pd_unit *disk, int n, int s, int h, int c0, int c1, int func)	314	static void pd_send_command(struct pd_unit *disk, int n, int s, int h, int c0, int c1, int func)
315	{	315	{
316	write_reg(disk, 6, DRIVE(disk) + h);	316	write_reg(disk, 6, DRIVE(disk) + h);
317	write_reg(disk, 1, 0); /* the IDE task file */	317	write_reg(disk, 1, 0); /* the IDE task file */
318	write_reg(disk, 2, n);	318	write_reg(disk, 2, n);
319	write_reg(disk, 3, s);	319	write_reg(disk, 3, s);
320	write_reg(disk, 4, c0);	320	write_reg(disk, 4, c0);
321	write_reg(disk, 5, c1);	321	write_reg(disk, 5, c1);
322	write_reg(disk, 7, func);	322	write_reg(disk, 7, func);
323		323
324	udelay(1);	324	udelay(1);
325	}	325	}
326		326
327	static void pd_ide_command(struct pd_unit *disk, int func, int block, int count)	327	static void pd_ide_command(struct pd_unit *disk, int func, int block, int count)
328	{	328	{
329	int c1, c0, h, s;	329	int c1, c0, h, s;
330		330
331	if (disk->can_lba) {	331	if (disk->can_lba) {
332	s = block & 255;	332	s = block & 255;
333	c0 = (block >>= 8) & 255;	333	c0 = (block >>= 8) & 255;
334	c1 = (block >>= 8) & 255;	334	c1 = (block >>= 8) & 255;
335	h = ((block >>= 8) & 15) + 0x40;	335	h = ((block >>= 8) & 15) + 0x40;
336	} else {	336	} else {
337	s = (block % disk->sectors) + 1;	337	s = (block % disk->sectors) + 1;
338	h = (block /= disk->sectors) % disk->heads;	338	h = (block /= disk->sectors) % disk->heads;
339	c0 = (block /= disk->heads) % 256;	339	c0 = (block /= disk->heads) % 256;
340	c1 = (block >>= 8);	340	c1 = (block >>= 8);
341	}	341	}
342	pd_send_command(disk, count, s, h, c0, c1, func);	342	pd_send_command(disk, count, s, h, c0, c1, func);
343	}	343	}
344		344
345	/* The i/o request engine */	345	/* The i/o request engine */
346		346
347	enum action {Fail = 0, Ok = 1, Hold, Wait};	347	enum action {Fail = 0, Ok = 1, Hold, Wait};
348		348
349	static struct request pd_req; / current request */	349	static struct request pd_req; / current request */
350	static enum action (*phase)(void);	350	static enum action (*phase)(void);
351		351
352	static void run_fsm(void);	352	static void run_fsm(void);
353		353
354	static void ps_tq_int( void *data);	354	static void ps_tq_int( void *data);
355		355
356	static DECLARE_WORK(fsm_tq, ps_tq_int, NULL);	356	static DECLARE_WORK(fsm_tq, ps_tq_int, NULL);
357		357
358	static void schedule_fsm(void)	358	static void schedule_fsm(void)
359	{	359	{
360	if (!nice)	360	if (!nice)
361	schedule_work(&fsm_tq);	361	schedule_work(&fsm_tq);
362	else	362	else
363	schedule_delayed_work(&fsm_tq, nice-1);	363	schedule_delayed_work(&fsm_tq, nice-1);
364	}	364	}
365		365
366	static void ps_tq_int(void *data)	366	static void ps_tq_int(void *data)
367	{	367	{
368	run_fsm();	368	run_fsm();
369	}	369	}
370		370
371	static enum action do_pd_io_start(void);	371	static enum action do_pd_io_start(void);
372	static enum action pd_special(void);	372	static enum action pd_special(void);
373	static enum action do_pd_read_start(void);	373	static enum action do_pd_read_start(void);
374	static enum action do_pd_write_start(void);	374	static enum action do_pd_write_start(void);
375	static enum action do_pd_read_drq(void);	375	static enum action do_pd_read_drq(void);
376	static enum action do_pd_write_done(void);	376	static enum action do_pd_write_done(void);
377		377
378	static struct request_queue *pd_queue;	378	static struct request_queue *pd_queue;
379	static int pd_claimed;	379	static int pd_claimed;
380		380
381	static struct pd_unit pd_current; / current request's drive */	381	static struct pd_unit pd_current; / current request's drive */
382	static PIA pi_current; / current request's PIA */	382	static PIA pi_current; / current request's PIA */
383		383
384	static void run_fsm(void)	384	static void run_fsm(void)
385	{	385	{
386	while (1) {	386	while (1) {
387	enum action res;	387	enum action res;
388	unsigned long saved_flags;	388	unsigned long saved_flags;
389	int stop = 0;	389	int stop = 0;
390		390
391	if (!phase) {	391	if (!phase) {
392	pd_current = pd_req->rq_disk->private_data;	392	pd_current = pd_req->rq_disk->private_data;
393	pi_current = pd_current->pi;	393	pi_current = pd_current->pi;
394	phase = do_pd_io_start;	394	phase = do_pd_io_start;
395	}	395	}
396		396
397	switch (pd_claimed) {	397	switch (pd_claimed) {
398	case 0:	398	case 0:
399	pd_claimed = 1;	399	pd_claimed = 1;
400	if (!pi_schedule_claimed(pi_current, run_fsm))	400	if (!pi_schedule_claimed(pi_current, run_fsm))
401	return;	401	return;
402	case 1:	402	case 1:
403	pd_claimed = 2;	403	pd_claimed = 2;
404	pi_current->proto->connect(pi_current);	404	pi_current->proto->connect(pi_current);
405	}	405	}
406		406
407	switch(res = phase()) {	407	switch(res = phase()) {
408	case Ok: case Fail:	408	case Ok: case Fail:
409	pi_disconnect(pi_current);	409	pi_disconnect(pi_current);
410	pd_claimed = 0;	410	pd_claimed = 0;
411	phase = NULL;	411	phase = NULL;
412	spin_lock_irqsave(&pd_lock, saved_flags);	412	spin_lock_irqsave(&pd_lock, saved_flags);
413	end_request(pd_req, res);	413	end_request(pd_req, res);
414	pd_req = elv_next_request(pd_queue);	414	pd_req = elv_next_request(pd_queue);
415	if (!pd_req)	415	if (!pd_req)
416	stop = 1;	416	stop = 1;
417	spin_unlock_irqrestore(&pd_lock, saved_flags);	417	spin_unlock_irqrestore(&pd_lock, saved_flags);
418	if (stop)	418	if (stop)
419	return;	419	return;
420	case Hold:	420	case Hold:
421	schedule_fsm();	421	schedule_fsm();
422	return;	422	return;
423	case Wait:	423	case Wait:
424	pi_disconnect(pi_current);	424	pi_disconnect(pi_current);
425	pd_claimed = 0;	425	pd_claimed = 0;
426	}	426	}
427	}	427	}
428	}	428	}
429		429
430	static int pd_retries = 0; /* i/o error retry count */	430	static int pd_retries = 0; /* i/o error retry count */
431	static int pd_block; /* address of next requested block */	431	static int pd_block; /* address of next requested block */
432	static int pd_count; /* number of blocks still to do */	432	static int pd_count; /* number of blocks still to do */
433	static int pd_run; /* sectors in current cluster */	433	static int pd_run; /* sectors in current cluster */
434	static int pd_cmd; /* current command READ/WRITE */	434	static int pd_cmd; /* current command READ/WRITE */
435	static char pd_buf; / buffer for request in progress */	435	static char pd_buf; / buffer for request in progress */
436		436
437	static enum action do_pd_io_start(void)	437	static enum action do_pd_io_start(void)
438	{	438	{
439	if (pd_req->flags & REQ_SPECIAL) {	439	if (pd_req->flags & REQ_SPECIAL) {
440	phase = pd_special;	440	phase = pd_special;
441	return pd_special();	441	return pd_special();
442	}	442	}
443		443
444	pd_cmd = rq_data_dir(pd_req);	444	pd_cmd = rq_data_dir(pd_req);
445	if (pd_cmd == READ \|\| pd_cmd == WRITE) {	445	if (pd_cmd == READ \|\| pd_cmd == WRITE) {
446	pd_block = pd_req->sector;	446	pd_block = pd_req->sector;
447	pd_count = pd_req->current_nr_sectors;	447	pd_count = pd_req->current_nr_sectors;
448	if (pd_block + pd_count > get_capacity(pd_req->rq_disk))	448	if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
449	return Fail;	449	return Fail;
450	pd_run = pd_req->nr_sectors;	450	pd_run = pd_req->nr_sectors;
451	pd_buf = pd_req->buffer;	451	pd_buf = pd_req->buffer;
452	pd_retries = 0;	452	pd_retries = 0;
453	if (pd_cmd == READ)	453	if (pd_cmd == READ)
454	return do_pd_read_start();	454	return do_pd_read_start();
455	else	455	else
456	return do_pd_write_start();	456	return do_pd_write_start();
457	}	457	}
458	return Fail;	458	return Fail;
459	}	459	}
460		460
461	static enum action pd_special(void)	461	static enum action pd_special(void)
462	{	462	{
463	enum action (func)(struct pd_unit ) = pd_req->special;	463	enum action (func)(struct pd_unit ) = pd_req->special;
464	return func(pd_current);	464	return func(pd_current);
465	}	465	}
466		466
467	static int pd_next_buf(void)	467	static int pd_next_buf(void)
468	{	468	{
469	unsigned long saved_flags;	469	unsigned long saved_flags;
470		470
471	pd_count--;	471	pd_count--;
472	pd_run--;	472	pd_run--;
473	pd_buf += 512;	473	pd_buf += 512;
474	pd_block++;	474	pd_block++;
475	if (!pd_run)	475	if (!pd_run)
476	return 1;	476	return 1;
477	if (pd_count)	477	if (pd_count)
478	return 0;	478	return 0;
479	spin_lock_irqsave(&pd_lock, saved_flags);	479	spin_lock_irqsave(&pd_lock, saved_flags);
480	end_request(pd_req, 1);	480	end_request(pd_req, 1);
481	pd_count = pd_req->current_nr_sectors;	481	pd_count = pd_req->current_nr_sectors;
482	pd_buf = pd_req->buffer;	482	pd_buf = pd_req->buffer;
483	spin_unlock_irqrestore(&pd_lock, saved_flags);	483	spin_unlock_irqrestore(&pd_lock, saved_flags);
484	return 0;	484	return 0;
485	}	485	}
486		486
487	static unsigned long pd_timeout;	487	static unsigned long pd_timeout;
488		488
489	static enum action do_pd_read_start(void)	489	static enum action do_pd_read_start(void)
490	{	490	{
491	if (pd_wait_for(pd_current, STAT_READY, "do_pd_read") & STAT_ERR) {	491	if (pd_wait_for(pd_current, STAT_READY, "do_pd_read") & STAT_ERR) {
492	if (pd_retries < PD_MAX_RETRIES) {	492	if (pd_retries < PD_MAX_RETRIES) {
493	pd_retries++;	493	pd_retries++;
494	return Wait;	494	return Wait;
495	}	495	}
496	return Fail;	496	return Fail;
497	}	497	}
498	pd_ide_command(pd_current, IDE_READ, pd_block, pd_run);	498	pd_ide_command(pd_current, IDE_READ, pd_block, pd_run);
499	phase = do_pd_read_drq;	499	phase = do_pd_read_drq;
500	pd_timeout = jiffies + PD_TMO;	500	pd_timeout = jiffies + PD_TMO;
501	return Hold;	501	return Hold;
502	}	502	}
503		503
504	static enum action do_pd_write_start(void)	504	static enum action do_pd_write_start(void)
505	{	505	{
506	if (pd_wait_for(pd_current, STAT_READY, "do_pd_write") & STAT_ERR) {	506	if (pd_wait_for(pd_current, STAT_READY, "do_pd_write") & STAT_ERR) {
507	if (pd_retries < PD_MAX_RETRIES) {	507	if (pd_retries < PD_MAX_RETRIES) {
508	pd_retries++;	508	pd_retries++;
509	return Wait;	509	return Wait;
510	}	510	}
511	return Fail;	511	return Fail;
512	}	512	}
513	pd_ide_command(pd_current, IDE_WRITE, pd_block, pd_run);	513	pd_ide_command(pd_current, IDE_WRITE, pd_block, pd_run);
514	while (1) {	514	while (1) {
515	if (pd_wait_for(pd_current, STAT_DRQ, "do_pd_write_drq") & STAT_ERR) {	515	if (pd_wait_for(pd_current, STAT_DRQ, "do_pd_write_drq") & STAT_ERR) {
516	if (pd_retries < PD_MAX_RETRIES) {	516	if (pd_retries < PD_MAX_RETRIES) {
517	pd_retries++;	517	pd_retries++;
518	return Wait;	518	return Wait;
519	}	519	}
520	return Fail;	520	return Fail;
521	}	521	}
522	pi_write_block(pd_current->pi, pd_buf, 512);	522	pi_write_block(pd_current->pi, pd_buf, 512);
523	if (pd_next_buf())	523	if (pd_next_buf())
524	break;	524	break;
525	}	525	}
526	phase = do_pd_write_done;	526	phase = do_pd_write_done;
527	pd_timeout = jiffies + PD_TMO;	527	pd_timeout = jiffies + PD_TMO;
528	return Hold;	528	return Hold;
529	}	529	}
530		530
531	static inline int pd_ready(void)	531	static inline int pd_ready(void)
532	{	532	{
533	return !(status_reg(pd_current) & STAT_BUSY);	533	return !(status_reg(pd_current) & STAT_BUSY);
534	}	534	}
535		535
536	static enum action do_pd_read_drq(void)	536	static enum action do_pd_read_drq(void)
537	{	537	{
538	if (!pd_ready() && !time_after_eq(jiffies, pd_timeout))	538	if (!pd_ready() && !time_after_eq(jiffies, pd_timeout))
539	return Hold;	539	return Hold;
540		540
541	while (1) {	541	while (1) {
542	if (pd_wait_for(pd_current, STAT_DRQ, "do_pd_read_drq") & STAT_ERR) {	542	if (pd_wait_for(pd_current, STAT_DRQ, "do_pd_read_drq") & STAT_ERR) {
543	if (pd_retries < PD_MAX_RETRIES) {	543	if (pd_retries < PD_MAX_RETRIES) {
544	pd_retries++;	544	pd_retries++;
545	phase = do_pd_read_start;	545	phase = do_pd_read_start;
546	return Wait;	546	return Wait;
547	}	547	}
548	return Fail;	548	return Fail;
549	}	549	}
550	pi_read_block(pd_current->pi, pd_buf, 512);	550	pi_read_block(pd_current->pi, pd_buf, 512);
551	if (pd_next_buf())	551	if (pd_next_buf())
552	break;	552	break;
553	}	553	}
554	return Ok;	554	return Ok;
555	}	555	}
556		556
557	static enum action do_pd_write_done(void)	557	static enum action do_pd_write_done(void)
558	{	558	{
559	if (!pd_ready() && !time_after_eq(jiffies, pd_timeout))	559	if (!pd_ready() && !time_after_eq(jiffies, pd_timeout))
560	return Hold;	560	return Hold;
561		561
562	if (pd_wait_for(pd_current, STAT_READY, "do_pd_write_done") & STAT_ERR) {	562	if (pd_wait_for(pd_current, STAT_READY, "do_pd_write_done") & STAT_ERR) {
563	if (pd_retries < PD_MAX_RETRIES) {	563	if (pd_retries < PD_MAX_RETRIES) {
564	pd_retries++;	564	pd_retries++;
565	phase = do_pd_write_start;	565	phase = do_pd_write_start;
566	return Wait;	566	return Wait;
567	}	567	}
568	return Fail;	568	return Fail;
569	}	569	}
570	return Ok;	570	return Ok;
571	}	571	}
572		572
573	/* special io requests */	573	/* special io requests */
574		574
575	/* According to the ATA standard, the default CHS geometry should be	575	/* According to the ATA standard, the default CHS geometry should be
576	available following a reset. Some Western Digital drives come up	576	available following a reset. Some Western Digital drives come up
577	in a mode where only LBA addresses are accepted until the device	577	in a mode where only LBA addresses are accepted until the device
578	parameters are initialised.	578	parameters are initialised.
579	*/	579	*/
580		580
581	static void pd_init_dev_parms(struct pd_unit *disk)	581	static void pd_init_dev_parms(struct pd_unit *disk)
582	{	582	{
583	pd_wait_for(disk, 0, DBMSG("before init_dev_parms"));	583	pd_wait_for(disk, 0, DBMSG("before init_dev_parms"));
584	pd_send_command(disk, disk->sectors, 0, disk->heads - 1, 0, 0,	584	pd_send_command(disk, disk->sectors, 0, disk->heads - 1, 0, 0,
585	IDE_INIT_DEV_PARMS);	585	IDE_INIT_DEV_PARMS);
586	udelay(300);	586	udelay(300);
587	pd_wait_for(disk, 0, "Initialise device parameters");	587	pd_wait_for(disk, 0, "Initialise device parameters");
588	}	588	}
589		589
590	static enum action pd_door_lock(struct pd_unit *disk)	590	static enum action pd_door_lock(struct pd_unit *disk)
591	{	591	{
592	if (!(pd_wait_for(disk, STAT_READY, "Lock") & STAT_ERR)) {	592	if (!(pd_wait_for(disk, STAT_READY, "Lock") & STAT_ERR)) {
593	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORLOCK);	593	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORLOCK);
594	pd_wait_for(disk, STAT_READY, "Lock done");	594	pd_wait_for(disk, STAT_READY, "Lock done");
595	}	595	}
596	return Ok;	596	return Ok;
597	}	597	}
598		598
599	static enum action pd_door_unlock(struct pd_unit *disk)	599	static enum action pd_door_unlock(struct pd_unit *disk)
600	{	600	{
601	if (!(pd_wait_for(disk, STAT_READY, "Lock") & STAT_ERR)) {	601	if (!(pd_wait_for(disk, STAT_READY, "Lock") & STAT_ERR)) {
602	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORUNLOCK);	602	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORUNLOCK);
603	pd_wait_for(disk, STAT_READY, "Lock done");	603	pd_wait_for(disk, STAT_READY, "Lock done");
604	}	604	}
605	return Ok;	605	return Ok;
606	}	606	}
607		607
608	static enum action pd_eject(struct pd_unit *disk)	608	static enum action pd_eject(struct pd_unit *disk)
609	{	609	{
610	pd_wait_for(disk, 0, DBMSG("before unlock on eject"));	610	pd_wait_for(disk, 0, DBMSG("before unlock on eject"));
611	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORUNLOCK);	611	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORUNLOCK);
612	pd_wait_for(disk, 0, DBMSG("after unlock on eject"));	612	pd_wait_for(disk, 0, DBMSG("after unlock on eject"));
613	pd_wait_for(disk, 0, DBMSG("before eject"));	613	pd_wait_for(disk, 0, DBMSG("before eject"));
614	pd_send_command(disk, 0, 0, 0, 0, 0, IDE_EJECT);	614	pd_send_command(disk, 0, 0, 0, 0, 0, IDE_EJECT);
615	pd_wait_for(disk, 0, DBMSG("after eject"));	615	pd_wait_for(disk, 0, DBMSG("after eject"));
616	return Ok;	616	return Ok;
617	}	617	}
618		618
619	static enum action pd_media_check(struct pd_unit *disk)	619	static enum action pd_media_check(struct pd_unit *disk)
620	{	620	{
621	int r = pd_wait_for(disk, STAT_READY, DBMSG("before media_check"));	621	int r = pd_wait_for(disk, STAT_READY, DBMSG("before media_check"));
622	if (!(r & STAT_ERR)) {	622	if (!(r & STAT_ERR)) {
623	pd_send_command(disk, 1, 1, 0, 0, 0, IDE_READ_VRFY);	623	pd_send_command(disk, 1, 1, 0, 0, 0, IDE_READ_VRFY);
624	r = pd_wait_for(disk, STAT_READY, DBMSG("RDY after READ_VRFY"));	624	r = pd_wait_for(disk, STAT_READY, DBMSG("RDY after READ_VRFY"));
625	} else	625	} else
626	disk->changed = 1; /* say changed if other error */	626	disk->changed = 1; /* say changed if other error */
627	if (r & ERR_MC) {	627	if (r & ERR_MC) {
628	disk->changed = 1;	628	disk->changed = 1;
629	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_ACKCHANGE);	629	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_ACKCHANGE);
630	pd_wait_for(disk, STAT_READY, DBMSG("RDY after ACKCHANGE"));	630	pd_wait_for(disk, STAT_READY, DBMSG("RDY after ACKCHANGE"));
631	pd_send_command(disk, 1, 1, 0, 0, 0, IDE_READ_VRFY);	631	pd_send_command(disk, 1, 1, 0, 0, 0, IDE_READ_VRFY);
632	r = pd_wait_for(disk, STAT_READY, DBMSG("RDY after VRFY"));	632	r = pd_wait_for(disk, STAT_READY, DBMSG("RDY after VRFY"));
633	}	633	}
634	return Ok;	634	return Ok;
635	}	635	}
636		636
637	static void pd_standby_off(struct pd_unit *disk)	637	static void pd_standby_off(struct pd_unit *disk)
638	{	638	{
639	pd_wait_for(disk, 0, DBMSG("before STANDBY"));	639	pd_wait_for(disk, 0, DBMSG("before STANDBY"));
640	pd_send_command(disk, 0, 0, 0, 0, 0, IDE_STANDBY);	640	pd_send_command(disk, 0, 0, 0, 0, 0, IDE_STANDBY);
641	pd_wait_for(disk, 0, DBMSG("after STANDBY"));	641	pd_wait_for(disk, 0, DBMSG("after STANDBY"));
642	}	642	}
643		643
644	static enum action pd_identify(struct pd_unit *disk)	644	static enum action pd_identify(struct pd_unit *disk)
645	{	645	{
646	int j;	646	int j;
647	char id[PD_ID_LEN + 1];	647	char id[PD_ID_LEN + 1];
648		648
649	/* WARNING: here there may be dragons. reset() applies to both drives,	649	/* WARNING: here there may be dragons. reset() applies to both drives,
650	but we call it only on probing the MASTER. This should allow most	650	but we call it only on probing the MASTER. This should allow most
651	common configurations to work, but be warned that a reset can clear	651	common configurations to work, but be warned that a reset can clear
652	settings on the SLAVE drive.	652	settings on the SLAVE drive.
653	*/	653	*/
654		654
655	if (disk->drive == 0)	655	if (disk->drive == 0)
656	pd_reset(disk);	656	pd_reset(disk);
657		657
658	write_reg(disk, 6, DRIVE(disk));	658	write_reg(disk, 6, DRIVE(disk));
659	pd_wait_for(disk, 0, DBMSG("before IDENT"));	659	pd_wait_for(disk, 0, DBMSG("before IDENT"));
660	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_IDENTIFY);	660	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_IDENTIFY);
661		661
662	if (pd_wait_for(disk, STAT_DRQ, DBMSG("IDENT DRQ")) & STAT_ERR)	662	if (pd_wait_for(disk, STAT_DRQ, DBMSG("IDENT DRQ")) & STAT_ERR)
663	return Fail;	663	return Fail;
664	pi_read_block(disk->pi, pd_scratch, 512);	664	pi_read_block(disk->pi, pd_scratch, 512);
665	disk->can_lba = pd_scratch[99] & 2;	665	disk->can_lba = pd_scratch[99] & 2;
666	disk->sectors = le16_to_cpu((u16 ) (pd_scratch + 12));	666	disk->sectors = le16_to_cpu((u16 ) (pd_scratch + 12));
667	disk->heads = le16_to_cpu((u16 ) (pd_scratch + 6));	667	disk->heads = le16_to_cpu((u16 ) (pd_scratch + 6));
668	disk->cylinders = le16_to_cpu((u16 ) (pd_scratch + 2));	668	disk->cylinders = le16_to_cpu((u16 ) (pd_scratch + 2));
669	if (disk->can_lba)	669	if (disk->can_lba)
670	disk->capacity = le32_to_cpu((u32 ) (pd_scratch + 120));	670	disk->capacity = le32_to_cpu((u32 ) (pd_scratch + 120));
671	else	671	else
672	disk->capacity = disk->sectors * disk->heads * disk->cylinders;	672	disk->capacity = disk->sectors * disk->heads * disk->cylinders;
673		673
674	for (j = 0; j < PD_ID_LEN; j++)	674	for (j = 0; j < PD_ID_LEN; j++)
675	id[j ^ 1] = pd_scratch[j + PD_ID_OFF];	675	id[j ^ 1] = pd_scratch[j + PD_ID_OFF];
676	j = PD_ID_LEN - 1;	676	j = PD_ID_LEN - 1;
677	while ((j >= 0) && (id[j] <= 0x20))	677	while ((j >= 0) && (id[j] <= 0x20))
678	j--;	678	j--;
679	j++;	679	j++;
680	id[j] = 0;	680	id[j] = 0;
681		681
682	disk->removable = pd_scratch[0] & 0x80;	682	disk->removable = pd_scratch[0] & 0x80;
683		683
684	printk("%s: %s, %s, %d blocks [%dM], (%d/%d/%d), %s media\n",	684	printk("%s: %s, %s, %d blocks [%dM], (%d/%d/%d), %s media\n",
685	disk->name, id,	685	disk->name, id,
686	disk->drive ? "slave" : "master",	686	disk->drive ? "slave" : "master",
687	disk->capacity, disk->capacity / 2048,	687	disk->capacity, disk->capacity / 2048,
688	disk->cylinders, disk->heads, disk->sectors,	688	disk->cylinders, disk->heads, disk->sectors,
689	disk->removable ? "removable" : "fixed");	689	disk->removable ? "removable" : "fixed");
690		690
691	if (disk->capacity)	691	if (disk->capacity)
692	pd_init_dev_parms(disk);	692	pd_init_dev_parms(disk);
693	if (!disk->standby)	693	if (!disk->standby)
694	pd_standby_off(disk);	694	pd_standby_off(disk);
695		695
696	return Ok;	696	return Ok;
697	}	697	}
698		698
699	/* end of io request engine */	699	/* end of io request engine */
700		700
701	static void do_pd_request(request_queue_t * q)	701	static void do_pd_request(request_queue_t * q)
702	{	702	{
703	if (pd_req)	703	if (pd_req)
704	return;	704	return;
705	pd_req = elv_next_request(q);	705	pd_req = elv_next_request(q);
706	if (!pd_req)	706	if (!pd_req)
707	return;	707	return;
708		708
709	schedule_fsm();	709	schedule_fsm();
710	}	710	}
711		711
712	static int pd_special_command(struct pd_unit *disk,	712	static int pd_special_command(struct pd_unit *disk,
713	enum action (func)(struct pd_unit disk))	713	enum action (func)(struct pd_unit disk))
714	{	714	{
715	DECLARE_COMPLETION(wait);	715	DECLARE_COMPLETION(wait);
716	struct request rq;	716	struct request rq;
717	int err = 0;	717	int err = 0;
718		718
719	memset(&rq, 0, sizeof(rq));	719	memset(&rq, 0, sizeof(rq));
720	rq.errors = 0;	720	rq.errors = 0;
721	rq.rq_status = RQ_ACTIVE;	721	rq.rq_status = RQ_ACTIVE;
722	rq.rq_disk = disk->gd;	722	rq.rq_disk = disk->gd;
723	rq.ref_count = 1;	723	rq.ref_count = 1;
724	rq.waiting = &wait;	724	rq.waiting = &wait;
725	rq.end_io = blk_end_sync_rq;	725	rq.end_io = blk_end_sync_rq;
726	blk_insert_request(disk->gd->queue, &rq, 0, func, 0);	726	blk_insert_request(disk->gd->queue, &rq, 0, func);
727	wait_for_completion(&wait);	727	wait_for_completion(&wait);
728	rq.waiting = NULL;	728	rq.waiting = NULL;
729	if (rq.errors)	729	if (rq.errors)
730	err = -EIO;	730	err = -EIO;
731	blk_put_request(&rq);	731	blk_put_request(&rq);
732	return err;	732	return err;
733	}	733	}
734		734
735	/* kernel glue structures */	735	/* kernel glue structures */
736		736
737	static int pd_open(struct inode inode, struct file file)	737	static int pd_open(struct inode inode, struct file file)
738	{	738	{
739	struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;	739	struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;
740		740
741	disk->access++;	741	disk->access++;
742		742
743	if (disk->removable) {	743	if (disk->removable) {
744	pd_special_command(disk, pd_media_check);	744	pd_special_command(disk, pd_media_check);
745	pd_special_command(disk, pd_door_lock);	745	pd_special_command(disk, pd_door_lock);
746	}	746	}
747	return 0;	747	return 0;
748	}	748	}
749		749
750	static int pd_ioctl(struct inode inode, struct file file,	750	static int pd_ioctl(struct inode inode, struct file file,
751	unsigned int cmd, unsigned long arg)	751	unsigned int cmd, unsigned long arg)
752	{	752	{
753	struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;	753	struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;
754	struct hd_geometry __user geo = (struct hd_geometry __user ) arg;	754	struct hd_geometry __user geo = (struct hd_geometry __user ) arg;
755	struct hd_geometry g;	755	struct hd_geometry g;
756		756
757	switch (cmd) {	757	switch (cmd) {
758	case CDROMEJECT:	758	case CDROMEJECT:
759	if (disk->access == 1)	759	if (disk->access == 1)
760	pd_special_command(disk, pd_eject);	760	pd_special_command(disk, pd_eject);
761	return 0;	761	return 0;
762	case HDIO_GETGEO:	762	case HDIO_GETGEO:
763	if (disk->alt_geom) {	763	if (disk->alt_geom) {
764	g.heads = PD_LOG_HEADS;	764	g.heads = PD_LOG_HEADS;
765	g.sectors = PD_LOG_SECTS;	765	g.sectors = PD_LOG_SECTS;
766	g.cylinders = disk->capacity / (g.heads * g.sectors);	766	g.cylinders = disk->capacity / (g.heads * g.sectors);
767	} else {	767	} else {
768	g.heads = disk->heads;	768	g.heads = disk->heads;
769	g.sectors = disk->sectors;	769	g.sectors = disk->sectors;
770	g.cylinders = disk->cylinders;	770	g.cylinders = disk->cylinders;
771	}	771	}
772	g.start = get_start_sect(inode->i_bdev);	772	g.start = get_start_sect(inode->i_bdev);
773	if (copy_to_user(geo, &g, sizeof(struct hd_geometry)))	773	if (copy_to_user(geo, &g, sizeof(struct hd_geometry)))
774	return -EFAULT;	774	return -EFAULT;
775	return 0;	775	return 0;
776	default:	776	default:
777	return -EINVAL;	777	return -EINVAL;
778	}	778	}
779	}	779	}
780		780
781	static int pd_release(struct inode inode, struct file file)	781	static int pd_release(struct inode inode, struct file file)
782	{	782	{
783	struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;	783	struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;
784		784
785	if (!--disk->access && disk->removable)	785	if (!--disk->access && disk->removable)
786	pd_special_command(disk, pd_door_unlock);	786	pd_special_command(disk, pd_door_unlock);
787		787
788	return 0;	788	return 0;
789	}	789	}
790		790
791	static int pd_check_media(struct gendisk *p)	791	static int pd_check_media(struct gendisk *p)
792	{	792	{
793	struct pd_unit *disk = p->private_data;	793	struct pd_unit *disk = p->private_data;
794	int r;	794	int r;
795	if (!disk->removable)	795	if (!disk->removable)
796	return 0;	796	return 0;
797	pd_special_command(disk, pd_media_check);	797	pd_special_command(disk, pd_media_check);
798	r = disk->changed;	798	r = disk->changed;
799	disk->changed = 0;	799	disk->changed = 0;
800	return r;	800	return r;
801	}	801	}
802		802
803	static int pd_revalidate(struct gendisk *p)	803	static int pd_revalidate(struct gendisk *p)
804	{	804	{
805	struct pd_unit *disk = p->private_data;	805	struct pd_unit *disk = p->private_data;
806	if (pd_special_command(disk, pd_identify) == 0)	806	if (pd_special_command(disk, pd_identify) == 0)
807	set_capacity(p, disk->capacity);	807	set_capacity(p, disk->capacity);
808	else	808	else
809	set_capacity(p, 0);	809	set_capacity(p, 0);
810	return 0;	810	return 0;
811	}	811	}
812		812
813	static struct block_device_operations pd_fops = {	813	static struct block_device_operations pd_fops = {
814	.owner = THIS_MODULE,	814	.owner = THIS_MODULE,
815	.open = pd_open,	815	.open = pd_open,
816	.release = pd_release,	816	.release = pd_release,
817	.ioctl = pd_ioctl,	817	.ioctl = pd_ioctl,
818	.media_changed = pd_check_media,	818	.media_changed = pd_check_media,
819	.revalidate_disk= pd_revalidate	819	.revalidate_disk= pd_revalidate
820	};	820	};
821		821
822	/* probing */	822	/* probing */
823		823
824	static void pd_probe_drive(struct pd_unit *disk)	824	static void pd_probe_drive(struct pd_unit *disk)
825	{	825	{
826	struct gendisk *p = alloc_disk(1 << PD_BITS);	826	struct gendisk *p = alloc_disk(1 << PD_BITS);
827	if (!p)	827	if (!p)
828	return;	828	return;
829	strcpy(p->disk_name, disk->name);	829	strcpy(p->disk_name, disk->name);
830	p->fops = &pd_fops;	830	p->fops = &pd_fops;
831	p->major = major;	831	p->major = major;
832	p->first_minor = (disk - pd) << PD_BITS;	832	p->first_minor = (disk - pd) << PD_BITS;
833	disk->gd = p;	833	disk->gd = p;
834	p->private_data = disk;	834	p->private_data = disk;
835	p->queue = pd_queue;	835	p->queue = pd_queue;
836		836
837	if (disk->drive == -1) {	837	if (disk->drive == -1) {
838	for (disk->drive = 0; disk->drive <= 1; disk->drive++)	838	for (disk->drive = 0; disk->drive <= 1; disk->drive++)
839	if (pd_special_command(disk, pd_identify) == 0)	839	if (pd_special_command(disk, pd_identify) == 0)
840	return;	840	return;
841	} else if (pd_special_command(disk, pd_identify) == 0)	841	} else if (pd_special_command(disk, pd_identify) == 0)
842	return;	842	return;
843	disk->gd = NULL;	843	disk->gd = NULL;
844	put_disk(p);	844	put_disk(p);
845	}	845	}
846		846
847	static int pd_detect(void)	847	static int pd_detect(void)
848	{	848	{
849	int found = 0, unit, pd_drive_count = 0;	849	int found = 0, unit, pd_drive_count = 0;
850	struct pd_unit *disk;	850	struct pd_unit *disk;
851		851
852	for (unit = 0; unit < PD_UNITS; unit++) {	852	for (unit = 0; unit < PD_UNITS; unit++) {
853	int parm = drives[unit];	853	int parm = drives[unit];
854	struct pd_unit *disk = pd + unit;	854	struct pd_unit *disk = pd + unit;
855	disk->pi = &disk->pia;	855	disk->pi = &disk->pia;
856	disk->access = 0;	856	disk->access = 0;
857	disk->changed = 1;	857	disk->changed = 1;
858	disk->capacity = 0;	858	disk->capacity = 0;
859	disk->drive = parm[D_SLV];	859	disk->drive = parm[D_SLV];
860	snprintf(disk->name, PD_NAMELEN, "%s%c", name, 'a'+unit);	860	snprintf(disk->name, PD_NAMELEN, "%s%c", name, 'a'+unit);
861	disk->alt_geom = parm[D_GEO];	861	disk->alt_geom = parm[D_GEO];
862	disk->standby = parm[D_SBY];	862	disk->standby = parm[D_SBY];
863	if (parm[D_PRT])	863	if (parm[D_PRT])
864	pd_drive_count++;	864	pd_drive_count++;
865	}	865	}
866		866
867	if (pd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */	867	if (pd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
868	disk = pd;	868	disk = pd;
869	if (pi_init(disk->pi, 1, -1, -1, -1, -1, -1, pd_scratch,	869	if (pi_init(disk->pi, 1, -1, -1, -1, -1, -1, pd_scratch,
870	PI_PD, verbose, disk->name)) {	870	PI_PD, verbose, disk->name)) {
871	pd_probe_drive(disk);	871	pd_probe_drive(disk);
872	if (!disk->gd)	872	if (!disk->gd)
873	pi_release(disk->pi);	873	pi_release(disk->pi);
874	}	874	}
875		875
876	} else {	876	} else {
877	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {	877	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
878	int parm = drives[unit];	878	int parm = drives[unit];
879	if (!parm[D_PRT])	879	if (!parm[D_PRT])
880	continue;	880	continue;
881	if (pi_init(disk->pi, 0, parm[D_PRT], parm[D_MOD],	881	if (pi_init(disk->pi, 0, parm[D_PRT], parm[D_MOD],
882	parm[D_UNI], parm[D_PRO], parm[D_DLY],	882	parm[D_UNI], parm[D_PRO], parm[D_DLY],
883	pd_scratch, PI_PD, verbose, disk->name)) {	883	pd_scratch, PI_PD, verbose, disk->name)) {
884	pd_probe_drive(disk);	884	pd_probe_drive(disk);
885	if (!disk->gd)	885	if (!disk->gd)
886	pi_release(disk->pi);	886	pi_release(disk->pi);
887	}	887	}
888	}	888	}
889	}	889	}
890	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {	890	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
891	if (disk->gd) {	891	if (disk->gd) {
892	set_capacity(disk->gd, disk->capacity);	892	set_capacity(disk->gd, disk->capacity);
893	add_disk(disk->gd);	893	add_disk(disk->gd);
894	found = 1;	894	found = 1;
895	}	895	}
896	}	896	}
897	if (!found)	897	if (!found)
898	printk("%s: no valid drive found\n", name);	898	printk("%s: no valid drive found\n", name);
899	return found;	899	return found;
900	}	900	}
901		901
902	static int __init pd_init(void)	902	static int __init pd_init(void)
903	{	903	{
904	if (disable)	904	if (disable)
905	goto out1;	905	goto out1;
906		906
907	pd_queue = blk_init_queue(do_pd_request, &pd_lock);	907	pd_queue = blk_init_queue(do_pd_request, &pd_lock);
908	if (!pd_queue)	908	if (!pd_queue)
909	goto out1;	909	goto out1;
910		910
911	blk_queue_max_sectors(pd_queue, cluster);	911	blk_queue_max_sectors(pd_queue, cluster);
912		912
913	if (register_blkdev(major, name))	913	if (register_blkdev(major, name))
914	goto out2;	914	goto out2;
915		915
916	printk("%s: %s version %s, major %d, cluster %d, nice %d\n",	916	printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
917	name, name, PD_VERSION, major, cluster, nice);	917	name, name, PD_VERSION, major, cluster, nice);
918	if (!pd_detect())	918	if (!pd_detect())
919	goto out3;	919	goto out3;
920		920
921	return 0;	921	return 0;
922		922
923	out3:	923	out3:
924	unregister_blkdev(major, name);	924	unregister_blkdev(major, name);
925	out2:	925	out2:
926	blk_cleanup_queue(pd_queue);	926	blk_cleanup_queue(pd_queue);
927	out1:	927	out1:
928	return -ENODEV;	928	return -ENODEV;
929	}	929	}
930		930
931	static void __exit pd_exit(void)	931	static void __exit pd_exit(void)
932	{	932	{
933	struct pd_unit *disk;	933	struct pd_unit *disk;
934	int unit;	934	int unit;
935	unregister_blkdev(major, name);	935	unregister_blkdev(major, name);
936	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {	936	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
937	struct gendisk *p = disk->gd;	937	struct gendisk *p = disk->gd;
938	if (p) {	938	if (p) {
939	disk->gd = NULL;	939	disk->gd = NULL;
940	del_gendisk(p);	940	del_gendisk(p);
941	put_disk(p);	941	put_disk(p);
942	pi_release(disk->pi);	942	pi_release(disk->pi);
943	}	943	}
944	}	944	}
945	blk_cleanup_queue(pd_queue);	945	blk_cleanup_queue(pd_queue);
946	}	946	}
947		947
948	MODULE_LICENSE("GPL");	948	MODULE_LICENSE("GPL");
949	module_init(pd_init)	949	module_init(pd_init)
950	module_exit(pd_exit)	950	module_exit(pd_exit)
951		951

drivers/block/sx8.c

Diff comments View file @ 867d119

1	/*	1	/*
2	* sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware	2	* sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
3	*	3	*
4	* Copyright 2004 Red Hat, Inc.	4	* Copyright 2004 Red Hat, Inc.
5	*	5	*
6	* Author/maintainer: Jeff Garzik <jgarzik@pobox.com>	6	* Author/maintainer: Jeff Garzik <jgarzik@pobox.com>
7	*	7	*
8	* This file is subject to the terms and conditions of the GNU General Public	8	* This file is subject to the terms and conditions of the GNU General Public
9	* License. See the file "COPYING" in the main directory of this archive	9	* License. See the file "COPYING" in the main directory of this archive
10	* for more details.	10	* for more details.
11	*/	11	*/
12		12
13	#include <linux/kernel.h>	13	#include <linux/kernel.h>
14	#include <linux/module.h>	14	#include <linux/module.h>
15	#include <linux/init.h>	15	#include <linux/init.h>
16	#include <linux/pci.h>	16	#include <linux/pci.h>
17	#include <linux/slab.h>	17	#include <linux/slab.h>
18	#include <linux/spinlock.h>	18	#include <linux/spinlock.h>
19	#include <linux/blkdev.h>	19	#include <linux/blkdev.h>
20	#include <linux/sched.h>	20	#include <linux/sched.h>
21	#include <linux/devfs_fs_kernel.h>	21	#include <linux/devfs_fs_kernel.h>
22	#include <linux/interrupt.h>	22	#include <linux/interrupt.h>
23	#include <linux/compiler.h>	23	#include <linux/compiler.h>
24	#include <linux/workqueue.h>	24	#include <linux/workqueue.h>
25	#include <linux/bitops.h>	25	#include <linux/bitops.h>
26	#include <linux/delay.h>	26	#include <linux/delay.h>
27	#include <linux/time.h>	27	#include <linux/time.h>
28	#include <linux/hdreg.h>	28	#include <linux/hdreg.h>
29	#include <asm/io.h>	29	#include <asm/io.h>
30	#include <asm/semaphore.h>	30	#include <asm/semaphore.h>
31	#include <asm/uaccess.h>	31	#include <asm/uaccess.h>
32		32
33	MODULE_AUTHOR("Jeff Garzik");	33	MODULE_AUTHOR("Jeff Garzik");
34	MODULE_LICENSE("GPL");	34	MODULE_LICENSE("GPL");
35	MODULE_DESCRIPTION("Promise SATA SX8 block driver");	35	MODULE_DESCRIPTION("Promise SATA SX8 block driver");
36		36
37	#if 0	37	#if 0
38	#define CARM_DEBUG	38	#define CARM_DEBUG
39	#define CARM_VERBOSE_DEBUG	39	#define CARM_VERBOSE_DEBUG
40	#else	40	#else
41	#undef CARM_DEBUG	41	#undef CARM_DEBUG
42	#undef CARM_VERBOSE_DEBUG	42	#undef CARM_VERBOSE_DEBUG
43	#endif	43	#endif
44	#undef CARM_NDEBUG	44	#undef CARM_NDEBUG
45		45
46	#define DRV_NAME "sx8"	46	#define DRV_NAME "sx8"
47	#define DRV_VERSION "0.8"	47	#define DRV_VERSION "0.8"
48	#define PFX DRV_NAME ": "	48	#define PFX DRV_NAME ": "
49		49
50	#define NEXT_RESP(idx) ((idx + 1) % RMSG_Q_LEN)	50	#define NEXT_RESP(idx) ((idx + 1) % RMSG_Q_LEN)
51		51
52	/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */	52	/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
53	#define TAG_ENCODE(tag) (((tag) << 16) \| 0xf)	53	#define TAG_ENCODE(tag) (((tag) << 16) \| 0xf)
54	#define TAG_DECODE(tag) (((tag) >> 16) & 0x1f)	54	#define TAG_DECODE(tag) (((tag) >> 16) & 0x1f)
55	#define TAG_VALID(tag) ((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))	55	#define TAG_VALID(tag) ((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))
56		56
57	/* note: prints function name for you */	57	/* note: prints function name for you */
58	#ifdef CARM_DEBUG	58	#ifdef CARM_DEBUG
59	#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)	59	#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
60	#ifdef CARM_VERBOSE_DEBUG	60	#ifdef CARM_VERBOSE_DEBUG
61	#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)	61	#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
62	#else	62	#else
63	#define VPRINTK(fmt, args...)	63	#define VPRINTK(fmt, args...)
64	#endif /* CARM_VERBOSE_DEBUG */	64	#endif /* CARM_VERBOSE_DEBUG */
65	#else	65	#else
66	#define DPRINTK(fmt, args...)	66	#define DPRINTK(fmt, args...)
67	#define VPRINTK(fmt, args...)	67	#define VPRINTK(fmt, args...)
68	#endif /* CARM_DEBUG */	68	#endif /* CARM_DEBUG */
69		69
70	#ifdef CARM_NDEBUG	70	#ifdef CARM_NDEBUG
71	#define assert(expr)	71	#define assert(expr)
72	#else	72	#else
73	#define assert(expr) \	73	#define assert(expr) \
74	if(unlikely(!(expr))) { \	74	if(unlikely(!(expr))) { \
75	printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \	75	printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
76	#expr,__FILE__,__FUNCTION__,__LINE__); \	76	#expr,__FILE__,__FUNCTION__,__LINE__); \
77	}	77	}
78	#endif	78	#endif
79		79
80	/* defines only for the constants which don't work well as enums */	80	/* defines only for the constants which don't work well as enums */
81	struct carm_host;	81	struct carm_host;
82		82
83	enum {	83	enum {
84	/* adapter-wide limits */	84	/* adapter-wide limits */
85	CARM_MAX_PORTS = 8,	85	CARM_MAX_PORTS = 8,
86	CARM_SHM_SIZE = (4096 << 7),	86	CARM_SHM_SIZE = (4096 << 7),
87	CARM_MINORS_PER_MAJOR = 256 / CARM_MAX_PORTS,	87	CARM_MINORS_PER_MAJOR = 256 / CARM_MAX_PORTS,
88	CARM_MAX_WAIT_Q = CARM_MAX_PORTS + 1,	88	CARM_MAX_WAIT_Q = CARM_MAX_PORTS + 1,
89		89
90	/* command message queue limits */	90	/* command message queue limits */
91	CARM_MAX_REQ = 64, /* max command msgs per host */	91	CARM_MAX_REQ = 64, /* max command msgs per host */
92	CARM_MAX_Q = 1, /* one command at a time */	92	CARM_MAX_Q = 1, /* one command at a time */
93	CARM_MSG_LOW_WATER = (CARM_MAX_REQ / 4), /* refill mark */	93	CARM_MSG_LOW_WATER = (CARM_MAX_REQ / 4), /* refill mark */
94		94
95	/* S/G limits, host-wide and per-request */	95	/* S/G limits, host-wide and per-request */
96	CARM_MAX_REQ_SG = 32, /* max s/g entries per request */	96	CARM_MAX_REQ_SG = 32, /* max s/g entries per request */
97	CARM_SG_BOUNDARY = 0xffffUL, /* s/g segment boundary */	97	CARM_SG_BOUNDARY = 0xffffUL, /* s/g segment boundary */
98	CARM_MAX_HOST_SG = 600, /* max s/g entries per host */	98	CARM_MAX_HOST_SG = 600, /* max s/g entries per host */
99	CARM_SG_LOW_WATER = (CARM_MAX_HOST_SG / 4), /* re-fill mark */	99	CARM_SG_LOW_WATER = (CARM_MAX_HOST_SG / 4), /* re-fill mark */
100		100
101	/* hardware registers */	101	/* hardware registers */
102	CARM_IHQP = 0x1c,	102	CARM_IHQP = 0x1c,
103	CARM_INT_STAT = 0x10, /* interrupt status */	103	CARM_INT_STAT = 0x10, /* interrupt status */
104	CARM_INT_MASK = 0x14, /* interrupt mask */	104	CARM_INT_MASK = 0x14, /* interrupt mask */
105	CARM_HMUC = 0x18, /* host message unit control */	105	CARM_HMUC = 0x18, /* host message unit control */
106	RBUF_ADDR_LO = 0x20, /* response msg DMA buf low 32 bits */	106	RBUF_ADDR_LO = 0x20, /* response msg DMA buf low 32 bits */
107	RBUF_ADDR_HI = 0x24, /* response msg DMA buf high 32 bits */	107	RBUF_ADDR_HI = 0x24, /* response msg DMA buf high 32 bits */
108	RBUF_BYTE_SZ = 0x28,	108	RBUF_BYTE_SZ = 0x28,
109	CARM_RESP_IDX = 0x2c,	109	CARM_RESP_IDX = 0x2c,
110	CARM_CMS0 = 0x30, /* command message size reg 0 */	110	CARM_CMS0 = 0x30, /* command message size reg 0 */
111	CARM_LMUC = 0x48,	111	CARM_LMUC = 0x48,
112	CARM_HMPHA = 0x6c,	112	CARM_HMPHA = 0x6c,
113	CARM_INITC = 0xb5,	113	CARM_INITC = 0xb5,
114		114
115	/* bits in CARM_INT_{STAT,MASK} */	115	/* bits in CARM_INT_{STAT,MASK} */
116	INT_RESERVED = 0xfffffff0,	116	INT_RESERVED = 0xfffffff0,
117	INT_WATCHDOG = (1 << 3), /* watchdog timer */	117	INT_WATCHDOG = (1 << 3), /* watchdog timer */
118	INT_Q_OVERFLOW = (1 << 2), /* cmd msg q overflow */	118	INT_Q_OVERFLOW = (1 << 2), /* cmd msg q overflow */
119	INT_Q_AVAILABLE = (1 << 1), /* cmd msg q has free space */	119	INT_Q_AVAILABLE = (1 << 1), /* cmd msg q has free space */
120	INT_RESPONSE = (1 << 0), /* response msg available */	120	INT_RESPONSE = (1 << 0), /* response msg available */
121	INT_ACK_MASK = INT_WATCHDOG \| INT_Q_OVERFLOW,	121	INT_ACK_MASK = INT_WATCHDOG \| INT_Q_OVERFLOW,
122	INT_DEF_MASK = INT_RESERVED \| INT_Q_OVERFLOW \|	122	INT_DEF_MASK = INT_RESERVED \| INT_Q_OVERFLOW \|
123	INT_RESPONSE,	123	INT_RESPONSE,
124		124
125	/* command messages, and related register bits */	125	/* command messages, and related register bits */
126	CARM_HAVE_RESP = 0x01,	126	CARM_HAVE_RESP = 0x01,
127	CARM_MSG_READ = 1,	127	CARM_MSG_READ = 1,
128	CARM_MSG_WRITE = 2,	128	CARM_MSG_WRITE = 2,
129	CARM_MSG_VERIFY = 3,	129	CARM_MSG_VERIFY = 3,
130	CARM_MSG_GET_CAPACITY = 4,	130	CARM_MSG_GET_CAPACITY = 4,
131	CARM_MSG_FLUSH = 5,	131	CARM_MSG_FLUSH = 5,
132	CARM_MSG_IOCTL = 6,	132	CARM_MSG_IOCTL = 6,
133	CARM_MSG_ARRAY = 8,	133	CARM_MSG_ARRAY = 8,
134	CARM_MSG_MISC = 9,	134	CARM_MSG_MISC = 9,
135	CARM_CME = (1 << 2),	135	CARM_CME = (1 << 2),
136	CARM_RME = (1 << 1),	136	CARM_RME = (1 << 1),
137	CARM_WZBC = (1 << 0),	137	CARM_WZBC = (1 << 0),
138	CARM_RMI = (1 << 0),	138	CARM_RMI = (1 << 0),
139	CARM_Q_FULL = (1 << 3),	139	CARM_Q_FULL = (1 << 3),
140	CARM_MSG_SIZE = 288,	140	CARM_MSG_SIZE = 288,
141	CARM_Q_LEN = 48,	141	CARM_Q_LEN = 48,
142		142
143	/* CARM_MSG_IOCTL messages */	143	/* CARM_MSG_IOCTL messages */
144	CARM_IOC_SCAN_CHAN = 5, /* scan channels for devices */	144	CARM_IOC_SCAN_CHAN = 5, /* scan channels for devices */
145	CARM_IOC_GET_TCQ = 13, /* get tcq/ncq depth */	145	CARM_IOC_GET_TCQ = 13, /* get tcq/ncq depth */
146	CARM_IOC_SET_TCQ = 14, /* set tcq/ncq depth */	146	CARM_IOC_SET_TCQ = 14, /* set tcq/ncq depth */
147		147
148	IOC_SCAN_CHAN_NODEV = 0x1f,	148	IOC_SCAN_CHAN_NODEV = 0x1f,
149	IOC_SCAN_CHAN_OFFSET = 0x40,	149	IOC_SCAN_CHAN_OFFSET = 0x40,
150		150
151	/* CARM_MSG_ARRAY messages */	151	/* CARM_MSG_ARRAY messages */
152	CARM_ARRAY_INFO = 0,	152	CARM_ARRAY_INFO = 0,
153		153
154	ARRAY_NO_EXIST = (1 << 31),	154	ARRAY_NO_EXIST = (1 << 31),
155		155
156	/* response messages */	156	/* response messages */
157	RMSG_SZ = 8, /* sizeof(struct carm_response) */	157	RMSG_SZ = 8, /* sizeof(struct carm_response) */
158	RMSG_Q_LEN = 48, /* resp. msg list length */	158	RMSG_Q_LEN = 48, /* resp. msg list length */
159	RMSG_OK = 1, /* bit indicating msg was successful */	159	RMSG_OK = 1, /* bit indicating msg was successful */
160	/* length of entire resp. msg buffer */	160	/* length of entire resp. msg buffer */
161	RBUF_LEN = RMSG_SZ * RMSG_Q_LEN,	161	RBUF_LEN = RMSG_SZ * RMSG_Q_LEN,
162		162
163	PDC_SHM_SIZE = (4096 << 7), /* length of entire h/w buffer */	163	PDC_SHM_SIZE = (4096 << 7), /* length of entire h/w buffer */
164		164
165	/* CARM_MSG_MISC messages */	165	/* CARM_MSG_MISC messages */
166	MISC_GET_FW_VER = 2,	166	MISC_GET_FW_VER = 2,
167	MISC_ALLOC_MEM = 3,	167	MISC_ALLOC_MEM = 3,
168	MISC_SET_TIME = 5,	168	MISC_SET_TIME = 5,
169		169
170	/* MISC_GET_FW_VER feature bits */	170	/* MISC_GET_FW_VER feature bits */
171	FW_VER_4PORT = (1 << 2), /* 1=4 ports, 0=8 ports */	171	FW_VER_4PORT = (1 << 2), /* 1=4 ports, 0=8 ports */
172	FW_VER_NON_RAID = (1 << 1), /* 1=non-RAID firmware, 0=RAID */	172	FW_VER_NON_RAID = (1 << 1), /* 1=non-RAID firmware, 0=RAID */
173	FW_VER_ZCR = (1 << 0), /* zero channel RAID (whatever that is) */	173	FW_VER_ZCR = (1 << 0), /* zero channel RAID (whatever that is) */
174		174
175	/* carm_host flags */	175	/* carm_host flags */
176	FL_NON_RAID = FW_VER_NON_RAID,	176	FL_NON_RAID = FW_VER_NON_RAID,
177	FL_4PORT = FW_VER_4PORT,	177	FL_4PORT = FW_VER_4PORT,
178	FL_FW_VER_MASK = (FW_VER_NON_RAID \| FW_VER_4PORT),	178	FL_FW_VER_MASK = (FW_VER_NON_RAID \| FW_VER_4PORT),
179	FL_DAC = (1 << 16),	179	FL_DAC = (1 << 16),
180	FL_DYN_MAJOR = (1 << 17),	180	FL_DYN_MAJOR = (1 << 17),
181	};	181	};
182		182
183	enum scatter_gather_types {	183	enum scatter_gather_types {
184	SGT_32BIT = 0,	184	SGT_32BIT = 0,
185	SGT_64BIT = 1,	185	SGT_64BIT = 1,
186	};	186	};
187		187
188	enum host_states {	188	enum host_states {
189	HST_INVALID, /* invalid state; never used */	189	HST_INVALID, /* invalid state; never used */
190	HST_ALLOC_BUF, /* setting up master SHM area */	190	HST_ALLOC_BUF, /* setting up master SHM area */
191	HST_ERROR, /* we never leave here */	191	HST_ERROR, /* we never leave here */
192	HST_PORT_SCAN, /* start dev scan */	192	HST_PORT_SCAN, /* start dev scan */
193	HST_DEV_SCAN_START, /* start per-device probe */	193	HST_DEV_SCAN_START, /* start per-device probe */
194	HST_DEV_SCAN, /* continue per-device probe */	194	HST_DEV_SCAN, /* continue per-device probe */
195	HST_DEV_ACTIVATE, /* activate devices we found */	195	HST_DEV_ACTIVATE, /* activate devices we found */
196	HST_PROBE_FINISHED, /* probe is complete */	196	HST_PROBE_FINISHED, /* probe is complete */
197	HST_PROBE_START, /* initiate probe */	197	HST_PROBE_START, /* initiate probe */
198	HST_SYNC_TIME, /* tell firmware what time it is */	198	HST_SYNC_TIME, /* tell firmware what time it is */
199	HST_GET_FW_VER, /* get firmware version, adapter port cnt */	199	HST_GET_FW_VER, /* get firmware version, adapter port cnt */
200	};	200	};
201		201
202	#ifdef CARM_DEBUG	202	#ifdef CARM_DEBUG
203	static const char *state_name[] = {	203	static const char *state_name[] = {
204	"HST_INVALID",	204	"HST_INVALID",
205	"HST_ALLOC_BUF",	205	"HST_ALLOC_BUF",
206	"HST_ERROR",	206	"HST_ERROR",
207	"HST_PORT_SCAN",	207	"HST_PORT_SCAN",
208	"HST_DEV_SCAN_START",	208	"HST_DEV_SCAN_START",
209	"HST_DEV_SCAN",	209	"HST_DEV_SCAN",
210	"HST_DEV_ACTIVATE",	210	"HST_DEV_ACTIVATE",
211	"HST_PROBE_FINISHED",	211	"HST_PROBE_FINISHED",
212	"HST_PROBE_START",	212	"HST_PROBE_START",
213	"HST_SYNC_TIME",	213	"HST_SYNC_TIME",
214	"HST_GET_FW_VER",	214	"HST_GET_FW_VER",
215	};	215	};
216	#endif	216	#endif
217		217
218	struct carm_port {	218	struct carm_port {
219	unsigned int port_no;	219	unsigned int port_no;
220	unsigned int n_queued;	220	unsigned int n_queued;
221	struct gendisk *disk;	221	struct gendisk *disk;
222	struct carm_host *host;	222	struct carm_host *host;
223		223
224	/* attached device characteristics */	224	/* attached device characteristics */
225	u64 capacity;	225	u64 capacity;
226	char name[41];	226	char name[41];
227	u16 dev_geom_head;	227	u16 dev_geom_head;
228	u16 dev_geom_sect;	228	u16 dev_geom_sect;
229	u16 dev_geom_cyl;	229	u16 dev_geom_cyl;
230	};	230	};
231		231
232	struct carm_request {	232	struct carm_request {
233	unsigned int tag;	233	unsigned int tag;
234	int n_elem;	234	int n_elem;
235	unsigned int msg_type;	235	unsigned int msg_type;
236	unsigned int msg_subtype;	236	unsigned int msg_subtype;
237	unsigned int msg_bucket;	237	unsigned int msg_bucket;
238	struct request *rq;	238	struct request *rq;
239	struct carm_port *port;	239	struct carm_port *port;
240	struct scatterlist sg[CARM_MAX_REQ_SG];	240	struct scatterlist sg[CARM_MAX_REQ_SG];
241	};	241	};
242		242
243	struct carm_host {	243	struct carm_host {
244	unsigned long flags;	244	unsigned long flags;
245	void __iomem *mmio;	245	void __iomem *mmio;
246	void *shm;	246	void *shm;
247	dma_addr_t shm_dma;	247	dma_addr_t shm_dma;
248		248
249	int major;	249	int major;
250	int id;	250	int id;
251	char name[32];	251	char name[32];
252		252
253	spinlock_t lock;	253	spinlock_t lock;
254	struct pci_dev *pdev;	254	struct pci_dev *pdev;
255	unsigned int state;	255	unsigned int state;
256	u32 fw_ver;	256	u32 fw_ver;
257		257
258	request_queue_t *oob_q;	258	request_queue_t *oob_q;
259	unsigned int n_oob;	259	unsigned int n_oob;
260		260
261	unsigned int hw_sg_used;	261	unsigned int hw_sg_used;
262		262
263	unsigned int resp_idx;	263	unsigned int resp_idx;
264		264
265	unsigned int wait_q_prod;	265	unsigned int wait_q_prod;
266	unsigned int wait_q_cons;	266	unsigned int wait_q_cons;
267	request_queue_t *wait_q[CARM_MAX_WAIT_Q];	267	request_queue_t *wait_q[CARM_MAX_WAIT_Q];
268		268
269	unsigned int n_msgs;	269	unsigned int n_msgs;
270	u64 msg_alloc;	270	u64 msg_alloc;
271	struct carm_request req[CARM_MAX_REQ];	271	struct carm_request req[CARM_MAX_REQ];
272	void *msg_base;	272	void *msg_base;
273	dma_addr_t msg_dma;	273	dma_addr_t msg_dma;
274		274
275	int cur_scan_dev;	275	int cur_scan_dev;
276	unsigned long dev_active;	276	unsigned long dev_active;
277	unsigned long dev_present;	277	unsigned long dev_present;
278	struct carm_port port[CARM_MAX_PORTS];	278	struct carm_port port[CARM_MAX_PORTS];
279		279
280	struct work_struct fsm_task;	280	struct work_struct fsm_task;
281		281
282	struct semaphore probe_sem;	282	struct semaphore probe_sem;
283	};	283	};
284		284
285	struct carm_response {	285	struct carm_response {
286	__le32 ret_handle;	286	__le32 ret_handle;
287	__le32 status;	287	__le32 status;
288	} __attribute__((packed));	288	} __attribute__((packed));
289		289
290	struct carm_msg_sg {	290	struct carm_msg_sg {
291	__le32 start;	291	__le32 start;
292	__le32 len;	292	__le32 len;
293	} __attribute__((packed));	293	} __attribute__((packed));
294		294
295	struct carm_msg_rw {	295	struct carm_msg_rw {
296	u8 type;	296	u8 type;
297	u8 id;	297	u8 id;
298	u8 sg_count;	298	u8 sg_count;
299	u8 sg_type;	299	u8 sg_type;
300	__le32 handle;	300	__le32 handle;
301	__le32 lba;	301	__le32 lba;
302	__le16 lba_count;	302	__le16 lba_count;
303	__le16 lba_high;	303	__le16 lba_high;
304	struct carm_msg_sg sg[32];	304	struct carm_msg_sg sg[32];
305	} __attribute__((packed));	305	} __attribute__((packed));
306		306
307	struct carm_msg_allocbuf {	307	struct carm_msg_allocbuf {
308	u8 type;	308	u8 type;
309	u8 subtype;	309	u8 subtype;
310	u8 n_sg;	310	u8 n_sg;
311	u8 sg_type;	311	u8 sg_type;
312	__le32 handle;	312	__le32 handle;
313	__le32 addr;	313	__le32 addr;
314	__le32 len;	314	__le32 len;
315	__le32 evt_pool;	315	__le32 evt_pool;
316	__le32 n_evt;	316	__le32 n_evt;
317	__le32 rbuf_pool;	317	__le32 rbuf_pool;
318	__le32 n_rbuf;	318	__le32 n_rbuf;
319	__le32 msg_pool;	319	__le32 msg_pool;
320	__le32 n_msg;	320	__le32 n_msg;
321	struct carm_msg_sg sg[8];	321	struct carm_msg_sg sg[8];
322	} __attribute__((packed));	322	} __attribute__((packed));
323		323
324	struct carm_msg_ioctl {	324	struct carm_msg_ioctl {
325	u8 type;	325	u8 type;
326	u8 subtype;	326	u8 subtype;
327	u8 array_id;	327	u8 array_id;
328	u8 reserved1;	328	u8 reserved1;
329	__le32 handle;	329	__le32 handle;
330	__le32 data_addr;	330	__le32 data_addr;
331	u32 reserved2;	331	u32 reserved2;
332	} __attribute__((packed));	332	} __attribute__((packed));
333		333
334	struct carm_msg_sync_time {	334	struct carm_msg_sync_time {
335	u8 type;	335	u8 type;
336	u8 subtype;	336	u8 subtype;
337	u16 reserved1;	337	u16 reserved1;
338	__le32 handle;	338	__le32 handle;
339	u32 reserved2;	339	u32 reserved2;
340	__le32 timestamp;	340	__le32 timestamp;
341	} __attribute__((packed));	341	} __attribute__((packed));
342		342
343	struct carm_msg_get_fw_ver {	343	struct carm_msg_get_fw_ver {
344	u8 type;	344	u8 type;
345	u8 subtype;	345	u8 subtype;
346	u16 reserved1;	346	u16 reserved1;
347	__le32 handle;	347	__le32 handle;
348	__le32 data_addr;	348	__le32 data_addr;
349	u32 reserved2;	349	u32 reserved2;
350	} __attribute__((packed));	350	} __attribute__((packed));
351		351
352	struct carm_fw_ver {	352	struct carm_fw_ver {
353	__le32 version;	353	__le32 version;
354	u8 features;	354	u8 features;
355	u8 reserved1;	355	u8 reserved1;
356	u16 reserved2;	356	u16 reserved2;
357	} __attribute__((packed));	357	} __attribute__((packed));
358		358
359	struct carm_array_info {	359	struct carm_array_info {
360	__le32 size;	360	__le32 size;
361		361
362	__le16 size_hi;	362	__le16 size_hi;
363	__le16 stripe_size;	363	__le16 stripe_size;
364		364
365	__le32 mode;	365	__le32 mode;
366		366
367	__le16 stripe_blk_sz;	367	__le16 stripe_blk_sz;
368	__le16 reserved1;	368	__le16 reserved1;
369		369
370	__le16 cyl;	370	__le16 cyl;
371	__le16 head;	371	__le16 head;
372		372
373	__le16 sect;	373	__le16 sect;
374	u8 array_id;	374	u8 array_id;
375	u8 reserved2;	375	u8 reserved2;
376		376
377	char name[40];	377	char name[40];
378		378
379	__le32 array_status;	379	__le32 array_status;
380		380
381	/* device list continues beyond this point? */	381	/* device list continues beyond this point? */
382	} __attribute__((packed));	382	} __attribute__((packed));
383		383
384	static int carm_init_one (struct pci_dev pdev, const struct pci_device_id ent);	384	static int carm_init_one (struct pci_dev pdev, const struct pci_device_id ent);
385	static void carm_remove_one (struct pci_dev *pdev);	385	static void carm_remove_one (struct pci_dev *pdev);
386	static int carm_bdev_ioctl(struct inode ino, struct file fil,	386	static int carm_bdev_ioctl(struct inode ino, struct file fil,
387	unsigned int cmd, unsigned long arg);	387	unsigned int cmd, unsigned long arg);
388		388
389	static struct pci_device_id carm_pci_tbl[] = {	389	static struct pci_device_id carm_pci_tbl[] = {
390	{ PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },	390	{ PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
391	{ PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },	391	{ PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
392	{ } /* terminate list */	392	{ } /* terminate list */
393	};	393	};
394	MODULE_DEVICE_TABLE(pci, carm_pci_tbl);	394	MODULE_DEVICE_TABLE(pci, carm_pci_tbl);
395		395
396	static struct pci_driver carm_driver = {	396	static struct pci_driver carm_driver = {
397	.name = DRV_NAME,	397	.name = DRV_NAME,
398	.id_table = carm_pci_tbl,	398	.id_table = carm_pci_tbl,
399	.probe = carm_init_one,	399	.probe = carm_init_one,
400	.remove = carm_remove_one,	400	.remove = carm_remove_one,
401	};	401	};
402		402
403	static struct block_device_operations carm_bd_ops = {	403	static struct block_device_operations carm_bd_ops = {
404	.owner = THIS_MODULE,	404	.owner = THIS_MODULE,
405	.ioctl = carm_bdev_ioctl,	405	.ioctl = carm_bdev_ioctl,
406	};	406	};
407		407
408	static unsigned int carm_host_id;	408	static unsigned int carm_host_id;
409	static unsigned long carm_major_alloc;	409	static unsigned long carm_major_alloc;
410		410
411		411
412		412
413	static int carm_bdev_ioctl(struct inode ino, struct file fil,	413	static int carm_bdev_ioctl(struct inode ino, struct file fil,
414	unsigned int cmd, unsigned long arg)	414	unsigned int cmd, unsigned long arg)
415	{	415	{
416	void __user usermem = (void __user ) arg;	416	void __user usermem = (void __user ) arg;
417	struct carm_port *port = ino->i_bdev->bd_disk->private_data;	417	struct carm_port *port = ino->i_bdev->bd_disk->private_data;
418	struct hd_geometry geom;	418	struct hd_geometry geom;
419		419
420	switch (cmd) {	420	switch (cmd) {
421	case HDIO_GETGEO:	421	case HDIO_GETGEO:
422	if (!usermem)	422	if (!usermem)
423	return -EINVAL;	423	return -EINVAL;
424		424
425	geom.heads = (u8) port->dev_geom_head;	425	geom.heads = (u8) port->dev_geom_head;
426	geom.sectors = (u8) port->dev_geom_sect;	426	geom.sectors = (u8) port->dev_geom_sect;
427	geom.cylinders = port->dev_geom_cyl;	427	geom.cylinders = port->dev_geom_cyl;
428	geom.start = get_start_sect(ino->i_bdev);	428	geom.start = get_start_sect(ino->i_bdev);
429		429
430	if (copy_to_user(usermem, &geom, sizeof(geom)))	430	if (copy_to_user(usermem, &geom, sizeof(geom)))
431	return -EFAULT;	431	return -EFAULT;
432	return 0;	432	return 0;
433		433
434	default:	434	default:
435	break;	435	break;
436	}	436	}
437		437
438	return -EOPNOTSUPP;	438	return -EOPNOTSUPP;
439	}	439	}
440		440
441	static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };	441	static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
442		442
443	static inline int carm_lookup_bucket(u32 msg_size)	443	static inline int carm_lookup_bucket(u32 msg_size)
444	{	444	{
445	int i;	445	int i;
446		446
447	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)	447	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
448	if (msg_size <= msg_sizes[i])	448	if (msg_size <= msg_sizes[i])
449	return i;	449	return i;
450		450
451	return -ENOENT;	451	return -ENOENT;
452	}	452	}
453		453
454	static void carm_init_buckets(void __iomem *mmio)	454	static void carm_init_buckets(void __iomem *mmio)
455	{	455	{
456	unsigned int i;	456	unsigned int i;
457		457
458	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)	458	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
459	writel(msg_sizes[i], mmio + CARM_CMS0 + (4 * i));	459	writel(msg_sizes[i], mmio + CARM_CMS0 + (4 * i));
460	}	460	}
461		461
462	static inline void carm_ref_msg(struct carm_host host,	462	static inline void carm_ref_msg(struct carm_host host,
463	unsigned int msg_idx)	463	unsigned int msg_idx)
464	{	464	{
465	return host->msg_base + (msg_idx * CARM_MSG_SIZE);	465	return host->msg_base + (msg_idx * CARM_MSG_SIZE);
466	}	466	}
467		467
468	static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,	468	static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
469	unsigned int msg_idx)	469	unsigned int msg_idx)
470	{	470	{
471	return host->msg_dma + (msg_idx * CARM_MSG_SIZE);	471	return host->msg_dma + (msg_idx * CARM_MSG_SIZE);
472	}	472	}
473		473
474	static int carm_send_msg(struct carm_host *host,	474	static int carm_send_msg(struct carm_host *host,
475	struct carm_request *crq)	475	struct carm_request *crq)
476	{	476	{
477	void __iomem *mmio = host->mmio;	477	void __iomem *mmio = host->mmio;
478	u32 msg = (u32) carm_ref_msg_dma(host, crq->tag);	478	u32 msg = (u32) carm_ref_msg_dma(host, crq->tag);
479	u32 cm_bucket = crq->msg_bucket;	479	u32 cm_bucket = crq->msg_bucket;
480	u32 tmp;	480	u32 tmp;
481	int rc = 0;	481	int rc = 0;
482		482
483	VPRINTK("ENTER\n");	483	VPRINTK("ENTER\n");
484		484
485	tmp = readl(mmio + CARM_HMUC);	485	tmp = readl(mmio + CARM_HMUC);
486	if (tmp & CARM_Q_FULL) {	486	if (tmp & CARM_Q_FULL) {
487	#if 0	487	#if 0
488	tmp = readl(mmio + CARM_INT_MASK);	488	tmp = readl(mmio + CARM_INT_MASK);
489	tmp \|= INT_Q_AVAILABLE;	489	tmp \|= INT_Q_AVAILABLE;
490	writel(tmp, mmio + CARM_INT_MASK);	490	writel(tmp, mmio + CARM_INT_MASK);
491	readl(mmio + CARM_INT_MASK); /* flush */	491	readl(mmio + CARM_INT_MASK); /* flush */
492	#endif	492	#endif
493	DPRINTK("host msg queue full\n");	493	DPRINTK("host msg queue full\n");
494	rc = -EBUSY;	494	rc = -EBUSY;
495	} else {	495	} else {
496	writel(msg \| (cm_bucket << 1), mmio + CARM_IHQP);	496	writel(msg \| (cm_bucket << 1), mmio + CARM_IHQP);
497	readl(mmio + CARM_IHQP); /* flush */	497	readl(mmio + CARM_IHQP); /* flush */
498	}	498	}
499		499
500	return rc;	500	return rc;
501	}	501	}
502		502
503	static struct carm_request carm_get_request(struct carm_host host)	503	static struct carm_request carm_get_request(struct carm_host host)
504	{	504	{
505	unsigned int i;	505	unsigned int i;
506		506
507	/* obey global hardware limit on S/G entries */	507	/* obey global hardware limit on S/G entries */
508	if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG))	508	if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG))
509	return NULL;	509	return NULL;
510		510
511	for (i = 0; i < CARM_MAX_Q; i++)	511	for (i = 0; i < CARM_MAX_Q; i++)
512	if ((host->msg_alloc & (1ULL << i)) == 0) {	512	if ((host->msg_alloc & (1ULL << i)) == 0) {
513	struct carm_request *crq = &host->req[i];	513	struct carm_request *crq = &host->req[i];
514	crq->port = NULL;	514	crq->port = NULL;
515	crq->n_elem = 0;	515	crq->n_elem = 0;
516		516
517	host->msg_alloc \|= (1ULL << i);	517	host->msg_alloc \|= (1ULL << i);
518	host->n_msgs++;	518	host->n_msgs++;
519		519
520	assert(host->n_msgs <= CARM_MAX_REQ);	520	assert(host->n_msgs <= CARM_MAX_REQ);
521	return crq;	521	return crq;
522	}	522	}
523		523
524	DPRINTK("no request available, returning NULL\n");	524	DPRINTK("no request available, returning NULL\n");
525	return NULL;	525	return NULL;
526	}	526	}
527		527
528	static int carm_put_request(struct carm_host host, struct carm_request crq)	528	static int carm_put_request(struct carm_host host, struct carm_request crq)
529	{	529	{
530	assert(crq->tag < CARM_MAX_Q);	530	assert(crq->tag < CARM_MAX_Q);
531		531
532	if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0))	532	if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0))
533	return -EINVAL; /* tried to clear a tag that was not active */	533	return -EINVAL; /* tried to clear a tag that was not active */
534		534
535	assert(host->hw_sg_used >= crq->n_elem);	535	assert(host->hw_sg_used >= crq->n_elem);
536		536
537	host->msg_alloc &= ~(1ULL << crq->tag);	537	host->msg_alloc &= ~(1ULL << crq->tag);
538	host->hw_sg_used -= crq->n_elem;	538	host->hw_sg_used -= crq->n_elem;
539	host->n_msgs--;	539	host->n_msgs--;
540		540
541	return 0;	541	return 0;
542	}	542	}
543		543
544	static struct carm_request carm_get_special(struct carm_host host)	544	static struct carm_request carm_get_special(struct carm_host host)
545	{	545	{
546	unsigned long flags;	546	unsigned long flags;
547	struct carm_request *crq = NULL;	547	struct carm_request *crq = NULL;
548	struct request *rq;	548	struct request *rq;
549	int tries = 5000;	549	int tries = 5000;
550		550
551	while (tries-- > 0) {	551	while (tries-- > 0) {
552	spin_lock_irqsave(&host->lock, flags);	552	spin_lock_irqsave(&host->lock, flags);
553	crq = carm_get_request(host);	553	crq = carm_get_request(host);
554	spin_unlock_irqrestore(&host->lock, flags);	554	spin_unlock_irqrestore(&host->lock, flags);
555		555
556	if (crq)	556	if (crq)
557	break;	557	break;
558	msleep(10);	558	msleep(10);
559	}	559	}
560		560
561	if (!crq)	561	if (!crq)
562	return NULL;	562	return NULL;
563		563
564	rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL);	564	rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL);
565	if (!rq) {	565	if (!rq) {
566	spin_lock_irqsave(&host->lock, flags);	566	spin_lock_irqsave(&host->lock, flags);
567	carm_put_request(host, crq);	567	carm_put_request(host, crq);
568	spin_unlock_irqrestore(&host->lock, flags);	568	spin_unlock_irqrestore(&host->lock, flags);
569	return NULL;	569	return NULL;
570	}	570	}
571		571
572	crq->rq = rq;	572	crq->rq = rq;
573	return crq;	573	return crq;
574	}	574	}
575		575
576	static int carm_array_info (struct carm_host *host, unsigned int array_idx)	576	static int carm_array_info (struct carm_host *host, unsigned int array_idx)
577	{	577	{
578	struct carm_msg_ioctl *ioc;	578	struct carm_msg_ioctl *ioc;
579	unsigned int idx;	579	unsigned int idx;
580	u32 msg_data;	580	u32 msg_data;
581	dma_addr_t msg_dma;	581	dma_addr_t msg_dma;
582	struct carm_request *crq;	582	struct carm_request *crq;
583	int rc;	583	int rc;
584		584
585	crq = carm_get_special(host);	585	crq = carm_get_special(host);
586	if (!crq) {	586	if (!crq) {
587	rc = -ENOMEM;	587	rc = -ENOMEM;
588	goto err_out;	588	goto err_out;
589	}	589	}
590		590
591	idx = crq->tag;	591	idx = crq->tag;
592		592
593	ioc = carm_ref_msg(host, idx);	593	ioc = carm_ref_msg(host, idx);
594	msg_dma = carm_ref_msg_dma(host, idx);	594	msg_dma = carm_ref_msg_dma(host, idx);
595	msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));	595	msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
596		596
597	crq->msg_type = CARM_MSG_ARRAY;	597	crq->msg_type = CARM_MSG_ARRAY;
598	crq->msg_subtype = CARM_ARRAY_INFO;	598	crq->msg_subtype = CARM_ARRAY_INFO;
599	rc = carm_lookup_bucket(sizeof(struct carm_msg_ioctl) +	599	rc = carm_lookup_bucket(sizeof(struct carm_msg_ioctl) +
600	sizeof(struct carm_array_info));	600	sizeof(struct carm_array_info));
601	BUG_ON(rc < 0);	601	BUG_ON(rc < 0);
602	crq->msg_bucket = (u32) rc;	602	crq->msg_bucket = (u32) rc;
603		603
604	memset(ioc, 0, sizeof(*ioc));	604	memset(ioc, 0, sizeof(*ioc));
605	ioc->type = CARM_MSG_ARRAY;	605	ioc->type = CARM_MSG_ARRAY;
606	ioc->subtype = CARM_ARRAY_INFO;	606	ioc->subtype = CARM_ARRAY_INFO;
607	ioc->array_id = (u8) array_idx;	607	ioc->array_id = (u8) array_idx;
608	ioc->handle = cpu_to_le32(TAG_ENCODE(idx));	608	ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
609	ioc->data_addr = cpu_to_le32(msg_data);	609	ioc->data_addr = cpu_to_le32(msg_data);
610		610
611	spin_lock_irq(&host->lock);	611	spin_lock_irq(&host->lock);
612	assert(host->state == HST_DEV_SCAN_START \|\|	612	assert(host->state == HST_DEV_SCAN_START \|\|
613	host->state == HST_DEV_SCAN);	613	host->state == HST_DEV_SCAN);
614	spin_unlock_irq(&host->lock);	614	spin_unlock_irq(&host->lock);
615		615
616	DPRINTK("blk_insert_request, tag == %u\n", idx);	616	DPRINTK("blk_insert_request, tag == %u\n", idx);
617	blk_insert_request(host->oob_q, crq->rq, 1, crq, 0);	617	blk_insert_request(host->oob_q, crq->rq, 1, crq);
618		618
619	return 0;	619	return 0;
620		620
621	err_out:	621	err_out:
622	spin_lock_irq(&host->lock);	622	spin_lock_irq(&host->lock);
623	host->state = HST_ERROR;	623	host->state = HST_ERROR;
624	spin_unlock_irq(&host->lock);	624	spin_unlock_irq(&host->lock);
625	return rc;	625	return rc;
626	}	626	}
627		627
628	typedef unsigned int (carm_sspc_t)(struct carm_host , unsigned int, void *);	628	typedef unsigned int (carm_sspc_t)(struct carm_host , unsigned int, void *);
629		629
630	static int carm_send_special (struct carm_host *host, carm_sspc_t func)	630	static int carm_send_special (struct carm_host *host, carm_sspc_t func)
631	{	631	{
632	struct carm_request *crq;	632	struct carm_request *crq;
633	struct carm_msg_ioctl *ioc;	633	struct carm_msg_ioctl *ioc;
634	void *mem;	634	void *mem;
635	unsigned int idx, msg_size;	635	unsigned int idx, msg_size;
636	int rc;	636	int rc;
637		637
638	crq = carm_get_special(host);	638	crq = carm_get_special(host);
639	if (!crq)	639	if (!crq)
640	return -ENOMEM;	640	return -ENOMEM;
641		641
642	idx = crq->tag;	642	idx = crq->tag;
643		643
644	mem = carm_ref_msg(host, idx);	644	mem = carm_ref_msg(host, idx);
645		645
646	msg_size = func(host, idx, mem);	646	msg_size = func(host, idx, mem);
647		647
648	ioc = mem;	648	ioc = mem;
649	crq->msg_type = ioc->type;	649	crq->msg_type = ioc->type;
650	crq->msg_subtype = ioc->subtype;	650	crq->msg_subtype = ioc->subtype;
651	rc = carm_lookup_bucket(msg_size);	651	rc = carm_lookup_bucket(msg_size);
652	BUG_ON(rc < 0);	652	BUG_ON(rc < 0);
653	crq->msg_bucket = (u32) rc;	653	crq->msg_bucket = (u32) rc;
654		654
655	DPRINTK("blk_insert_request, tag == %u\n", idx);	655	DPRINTK("blk_insert_request, tag == %u\n", idx);
656	blk_insert_request(host->oob_q, crq->rq, 1, crq, 0);	656	blk_insert_request(host->oob_q, crq->rq, 1, crq);
657		657
658	return 0;	658	return 0;
659	}	659	}
660		660
661	static unsigned int carm_fill_sync_time(struct carm_host *host,	661	static unsigned int carm_fill_sync_time(struct carm_host *host,
662	unsigned int idx, void *mem)	662	unsigned int idx, void *mem)
663	{	663	{
664	struct timeval tv;	664	struct timeval tv;
665	struct carm_msg_sync_time *st = mem;	665	struct carm_msg_sync_time *st = mem;
666		666
667	do_gettimeofday(&tv);	667	do_gettimeofday(&tv);
668		668
669	memset(st, 0, sizeof(*st));	669	memset(st, 0, sizeof(*st));
670	st->type = CARM_MSG_MISC;	670	st->type = CARM_MSG_MISC;
671	st->subtype = MISC_SET_TIME;	671	st->subtype = MISC_SET_TIME;
672	st->handle = cpu_to_le32(TAG_ENCODE(idx));	672	st->handle = cpu_to_le32(TAG_ENCODE(idx));
673	st->timestamp = cpu_to_le32(tv.tv_sec);	673	st->timestamp = cpu_to_le32(tv.tv_sec);
674		674
675	return sizeof(struct carm_msg_sync_time);	675	return sizeof(struct carm_msg_sync_time);
676	}	676	}
677		677
678	static unsigned int carm_fill_alloc_buf(struct carm_host *host,	678	static unsigned int carm_fill_alloc_buf(struct carm_host *host,
679	unsigned int idx, void *mem)	679	unsigned int idx, void *mem)
680	{	680	{
681	struct carm_msg_allocbuf *ab = mem;	681	struct carm_msg_allocbuf *ab = mem;
682		682
683	memset(ab, 0, sizeof(*ab));	683	memset(ab, 0, sizeof(*ab));
684	ab->type = CARM_MSG_MISC;	684	ab->type = CARM_MSG_MISC;
685	ab->subtype = MISC_ALLOC_MEM;	685	ab->subtype = MISC_ALLOC_MEM;
686	ab->handle = cpu_to_le32(TAG_ENCODE(idx));	686	ab->handle = cpu_to_le32(TAG_ENCODE(idx));
687	ab->n_sg = 1;	687	ab->n_sg = 1;
688	ab->sg_type = SGT_32BIT;	688	ab->sg_type = SGT_32BIT;
689	ab->addr = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));	689	ab->addr = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
690	ab->len = cpu_to_le32(PDC_SHM_SIZE >> 1);	690	ab->len = cpu_to_le32(PDC_SHM_SIZE >> 1);
691	ab->evt_pool = cpu_to_le32(host->shm_dma + (16 * 1024));	691	ab->evt_pool = cpu_to_le32(host->shm_dma + (16 * 1024));
692	ab->n_evt = cpu_to_le32(1024);	692	ab->n_evt = cpu_to_le32(1024);
693	ab->rbuf_pool = cpu_to_le32(host->shm_dma);	693	ab->rbuf_pool = cpu_to_le32(host->shm_dma);
694	ab->n_rbuf = cpu_to_le32(RMSG_Q_LEN);	694	ab->n_rbuf = cpu_to_le32(RMSG_Q_LEN);
695	ab->msg_pool = cpu_to_le32(host->shm_dma + RBUF_LEN);	695	ab->msg_pool = cpu_to_le32(host->shm_dma + RBUF_LEN);
696	ab->n_msg = cpu_to_le32(CARM_Q_LEN);	696	ab->n_msg = cpu_to_le32(CARM_Q_LEN);
697	ab->sg[0].start = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));	697	ab->sg[0].start = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
698	ab->sg[0].len = cpu_to_le32(65536);	698	ab->sg[0].len = cpu_to_le32(65536);
699		699
700	return sizeof(struct carm_msg_allocbuf);	700	return sizeof(struct carm_msg_allocbuf);
701	}	701	}
702		702
703	static unsigned int carm_fill_scan_channels(struct carm_host *host,	703	static unsigned int carm_fill_scan_channels(struct carm_host *host,
704	unsigned int idx, void *mem)	704	unsigned int idx, void *mem)
705	{	705	{
706	struct carm_msg_ioctl *ioc = mem;	706	struct carm_msg_ioctl *ioc = mem;
707	u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +	707	u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +
708	IOC_SCAN_CHAN_OFFSET);	708	IOC_SCAN_CHAN_OFFSET);
709		709
710	memset(ioc, 0, sizeof(*ioc));	710	memset(ioc, 0, sizeof(*ioc));
711	ioc->type = CARM_MSG_IOCTL;	711	ioc->type = CARM_MSG_IOCTL;
712	ioc->subtype = CARM_IOC_SCAN_CHAN;	712	ioc->subtype = CARM_IOC_SCAN_CHAN;
713	ioc->handle = cpu_to_le32(TAG_ENCODE(idx));	713	ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
714	ioc->data_addr = cpu_to_le32(msg_data);	714	ioc->data_addr = cpu_to_le32(msg_data);
715		715
716	/* fill output data area with "no device" default values */	716	/* fill output data area with "no device" default values */
717	mem += IOC_SCAN_CHAN_OFFSET;	717	mem += IOC_SCAN_CHAN_OFFSET;
718	memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);	718	memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);
719		719
720	return IOC_SCAN_CHAN_OFFSET + CARM_MAX_PORTS;	720	return IOC_SCAN_CHAN_OFFSET + CARM_MAX_PORTS;
721	}	721	}
722		722
723	static unsigned int carm_fill_get_fw_ver(struct carm_host *host,	723	static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
724	unsigned int idx, void *mem)	724	unsigned int idx, void *mem)
725	{	725	{
726	struct carm_msg_get_fw_ver *ioc = mem;	726	struct carm_msg_get_fw_ver *ioc = mem;
727	u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));	727	u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));
728		728
729	memset(ioc, 0, sizeof(*ioc));	729	memset(ioc, 0, sizeof(*ioc));
730	ioc->type = CARM_MSG_MISC;	730	ioc->type = CARM_MSG_MISC;
731	ioc->subtype = MISC_GET_FW_VER;	731	ioc->subtype = MISC_GET_FW_VER;
732	ioc->handle = cpu_to_le32(TAG_ENCODE(idx));	732	ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
733	ioc->data_addr = cpu_to_le32(msg_data);	733	ioc->data_addr = cpu_to_le32(msg_data);
734		734
735	return sizeof(struct carm_msg_get_fw_ver) +	735	return sizeof(struct carm_msg_get_fw_ver) +
736	sizeof(struct carm_fw_ver);	736	sizeof(struct carm_fw_ver);
737	}	737	}
738		738
739	static inline void carm_end_request_queued(struct carm_host *host,	739	static inline void carm_end_request_queued(struct carm_host *host,
740	struct carm_request *crq,	740	struct carm_request *crq,
741	int uptodate)	741	int uptodate)
742	{	742	{
743	struct request *req = crq->rq;	743	struct request *req = crq->rq;
744	int rc;	744	int rc;
745		745
746	rc = end_that_request_first(req, uptodate, req->hard_nr_sectors);	746	rc = end_that_request_first(req, uptodate, req->hard_nr_sectors);
747	assert(rc == 0);	747	assert(rc == 0);
748		748
749	end_that_request_last(req);	749	end_that_request_last(req);
750		750
751	rc = carm_put_request(host, crq);	751	rc = carm_put_request(host, crq);
752	assert(rc == 0);	752	assert(rc == 0);
753	}	753	}
754		754
755	static inline void carm_push_q (struct carm_host host, request_queue_t q)	755	static inline void carm_push_q (struct carm_host host, request_queue_t q)
756	{	756	{
757	unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;	757	unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
758		758
759	blk_stop_queue(q);	759	blk_stop_queue(q);
760	VPRINTK("STOPPED QUEUE %p\n", q);	760	VPRINTK("STOPPED QUEUE %p\n", q);
761		761
762	host->wait_q[idx] = q;	762	host->wait_q[idx] = q;
763	host->wait_q_prod++;	763	host->wait_q_prod++;
764	BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */	764	BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */
765	}	765	}
766		766
767	static inline request_queue_t carm_pop_q(struct carm_host host)	767	static inline request_queue_t carm_pop_q(struct carm_host host)
768	{	768	{
769	unsigned int idx;	769	unsigned int idx;
770		770
771	if (host->wait_q_prod == host->wait_q_cons)	771	if (host->wait_q_prod == host->wait_q_cons)
772	return NULL;	772	return NULL;
773		773
774	idx = host->wait_q_cons % CARM_MAX_WAIT_Q;	774	idx = host->wait_q_cons % CARM_MAX_WAIT_Q;
775	host->wait_q_cons++;	775	host->wait_q_cons++;
776		776
777	return host->wait_q[idx];	777	return host->wait_q[idx];
778	}	778	}
779		779
780	static inline void carm_round_robin(struct carm_host *host)	780	static inline void carm_round_robin(struct carm_host *host)
781	{	781	{
782	request_queue_t *q = carm_pop_q(host);	782	request_queue_t *q = carm_pop_q(host);
783	if (q) {	783	if (q) {
784	blk_start_queue(q);	784	blk_start_queue(q);
785	VPRINTK("STARTED QUEUE %p\n", q);	785	VPRINTK("STARTED QUEUE %p\n", q);
786	}	786	}
787	}	787	}
788		788
789	static inline void carm_end_rq(struct carm_host host, struct carm_request crq,	789	static inline void carm_end_rq(struct carm_host host, struct carm_request crq,
790	int is_ok)	790	int is_ok)
791	{	791	{
792	carm_end_request_queued(host, crq, is_ok);	792	carm_end_request_queued(host, crq, is_ok);
793	if (CARM_MAX_Q == 1)	793	if (CARM_MAX_Q == 1)
794	carm_round_robin(host);	794	carm_round_robin(host);
795	else if ((host->n_msgs <= CARM_MSG_LOW_WATER) &&	795	else if ((host->n_msgs <= CARM_MSG_LOW_WATER) &&
796	(host->hw_sg_used <= CARM_SG_LOW_WATER)) {	796	(host->hw_sg_used <= CARM_SG_LOW_WATER)) {
797	carm_round_robin(host);	797	carm_round_robin(host);
798	}	798	}
799	}	799	}
800		800
801	static void carm_oob_rq_fn(request_queue_t *q)	801	static void carm_oob_rq_fn(request_queue_t *q)
802	{	802	{
803	struct carm_host *host = q->queuedata;	803	struct carm_host *host = q->queuedata;
804	struct carm_request *crq;	804	struct carm_request *crq;
805	struct request *rq;	805	struct request *rq;
806	int rc;	806	int rc;
807		807
808	while (1) {	808	while (1) {
809	DPRINTK("get req\n");	809	DPRINTK("get req\n");
810	rq = elv_next_request(q);	810	rq = elv_next_request(q);
811	if (!rq)	811	if (!rq)
812	break;	812	break;
813		813
814	blkdev_dequeue_request(rq);	814	blkdev_dequeue_request(rq);
815		815
816	crq = rq->special;	816	crq = rq->special;
817	assert(crq != NULL);	817	assert(crq != NULL);
818	assert(crq->rq == rq);	818	assert(crq->rq == rq);
819		819
820	crq->n_elem = 0;	820	crq->n_elem = 0;
821		821
822	DPRINTK("send req\n");	822	DPRINTK("send req\n");
823	rc = carm_send_msg(host, crq);	823	rc = carm_send_msg(host, crq);
824	if (rc) {	824	if (rc) {
825	blk_requeue_request(q, rq);	825	blk_requeue_request(q, rq);
826	carm_push_q(host, q);	826	carm_push_q(host, q);
827	return; /* call us again later, eventually */	827	return; /* call us again later, eventually */
828	}	828	}
829	}	829	}
830	}	830	}
831		831
832	static void carm_rq_fn(request_queue_t *q)	832	static void carm_rq_fn(request_queue_t *q)
833	{	833	{
834	struct carm_port *port = q->queuedata;	834	struct carm_port *port = q->queuedata;
835	struct carm_host *host = port->host;	835	struct carm_host *host = port->host;
836	struct carm_msg_rw *msg;	836	struct carm_msg_rw *msg;
837	struct carm_request *crq;	837	struct carm_request *crq;
838	struct request *rq;	838	struct request *rq;
839	struct scatterlist *sg;	839	struct scatterlist *sg;
840	int writing = 0, pci_dir, i, n_elem, rc;	840	int writing = 0, pci_dir, i, n_elem, rc;
841	u32 tmp;	841	u32 tmp;
842	unsigned int msg_size;	842	unsigned int msg_size;
843		843
844	queue_one_request:	844	queue_one_request:
845	VPRINTK("get req\n");	845	VPRINTK("get req\n");
846	rq = elv_next_request(q);	846	rq = elv_next_request(q);
847	if (!rq)	847	if (!rq)
848	return;	848	return;
849		849
850	crq = carm_get_request(host);	850	crq = carm_get_request(host);
851	if (!crq) {	851	if (!crq) {
852	carm_push_q(host, q);	852	carm_push_q(host, q);
853	return; /* call us again later, eventually */	853	return; /* call us again later, eventually */
854	}	854	}
855	crq->rq = rq;	855	crq->rq = rq;
856		856
857	blkdev_dequeue_request(rq);	857	blkdev_dequeue_request(rq);
858		858
859	if (rq_data_dir(rq) == WRITE) {	859	if (rq_data_dir(rq) == WRITE) {
860	writing = 1;	860	writing = 1;
861	pci_dir = PCI_DMA_TODEVICE;	861	pci_dir = PCI_DMA_TODEVICE;
862	} else {	862	} else {
863	pci_dir = PCI_DMA_FROMDEVICE;	863	pci_dir = PCI_DMA_FROMDEVICE;
864	}	864	}
865		865
866	/* get scatterlist from block layer */	866	/* get scatterlist from block layer */
867	sg = &crq->sg[0];	867	sg = &crq->sg[0];
868	n_elem = blk_rq_map_sg(q, rq, sg);	868	n_elem = blk_rq_map_sg(q, rq, sg);
869	if (n_elem <= 0) {	869	if (n_elem <= 0) {
870	carm_end_rq(host, crq, 0);	870	carm_end_rq(host, crq, 0);
871	return; /* request with no s/g entries? */	871	return; /* request with no s/g entries? */
872	}	872	}
873		873
874	/* map scatterlist to PCI bus addresses */	874	/* map scatterlist to PCI bus addresses */
875	n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);	875	n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
876	if (n_elem <= 0) {	876	if (n_elem <= 0) {
877	carm_end_rq(host, crq, 0);	877	carm_end_rq(host, crq, 0);
878	return; /* request with no s/g entries? */	878	return; /* request with no s/g entries? */
879	}	879	}
880	crq->n_elem = n_elem;	880	crq->n_elem = n_elem;
881	crq->port = port;	881	crq->port = port;
882	host->hw_sg_used += n_elem;	882	host->hw_sg_used += n_elem;
883		883
884	/*	884	/*
885	* build read/write message	885	* build read/write message
886	*/	886	*/
887		887
888	VPRINTK("build msg\n");	888	VPRINTK("build msg\n");
889	msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag);	889	msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag);
890		890
891	if (writing) {	891	if (writing) {
892	msg->type = CARM_MSG_WRITE;	892	msg->type = CARM_MSG_WRITE;
893	crq->msg_type = CARM_MSG_WRITE;	893	crq->msg_type = CARM_MSG_WRITE;
894	} else {	894	} else {
895	msg->type = CARM_MSG_READ;	895	msg->type = CARM_MSG_READ;
896	crq->msg_type = CARM_MSG_READ;	896	crq->msg_type = CARM_MSG_READ;
897	}	897	}
898		898
899	msg->id = port->port_no;	899	msg->id = port->port_no;
900	msg->sg_count = n_elem;	900	msg->sg_count = n_elem;
901	msg->sg_type = SGT_32BIT;	901	msg->sg_type = SGT_32BIT;
902	msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag));	902	msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag));
903	msg->lba = cpu_to_le32(rq->sector & 0xffffffff);	903	msg->lba = cpu_to_le32(rq->sector & 0xffffffff);
904	tmp = (rq->sector >> 16) >> 16;	904	tmp = (rq->sector >> 16) >> 16;
905	msg->lba_high = cpu_to_le16( (u16) tmp );	905	msg->lba_high = cpu_to_le16( (u16) tmp );
906	msg->lba_count = cpu_to_le16(rq->nr_sectors);	906	msg->lba_count = cpu_to_le16(rq->nr_sectors);
907		907
908	msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);	908	msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
909	for (i = 0; i < n_elem; i++) {	909	for (i = 0; i < n_elem; i++) {
910	struct carm_msg_sg *carm_sg = &msg->sg[i];	910	struct carm_msg_sg *carm_sg = &msg->sg[i];
911	carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));	911	carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));
912	carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));	912	carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));
913	msg_size += sizeof(struct carm_msg_sg);	913	msg_size += sizeof(struct carm_msg_sg);
914	}	914	}
915		915
916	rc = carm_lookup_bucket(msg_size);	916	rc = carm_lookup_bucket(msg_size);
917	BUG_ON(rc < 0);	917	BUG_ON(rc < 0);
918	crq->msg_bucket = (u32) rc;	918	crq->msg_bucket = (u32) rc;
919		919
920	/*	920	/*
921	* queue read/write message to hardware	921	* queue read/write message to hardware
922	*/	922	*/
923		923
924	VPRINTK("send msg, tag == %u\n", crq->tag);	924	VPRINTK("send msg, tag == %u\n", crq->tag);
925	rc = carm_send_msg(host, crq);	925	rc = carm_send_msg(host, crq);
926	if (rc) {	926	if (rc) {
927	carm_put_request(host, crq);	927	carm_put_request(host, crq);
928	blk_requeue_request(q, rq);	928	blk_requeue_request(q, rq);
929	carm_push_q(host, q);	929	carm_push_q(host, q);
930	return; /* call us again later, eventually */	930	return; /* call us again later, eventually */
931	}	931	}
932		932
933	goto queue_one_request;	933	goto queue_one_request;
934	}	934	}
935		935
936	static void carm_handle_array_info(struct carm_host *host,	936	static void carm_handle_array_info(struct carm_host *host,
937	struct carm_request crq, u8 mem,	937	struct carm_request crq, u8 mem,
938	int is_ok)	938	int is_ok)
939	{	939	{
940	struct carm_port *port;	940	struct carm_port *port;
941	u8 *msg_data = mem + sizeof(struct carm_array_info);	941	u8 *msg_data = mem + sizeof(struct carm_array_info);
942	struct carm_array_info desc = (struct carm_array_info ) msg_data;	942	struct carm_array_info desc = (struct carm_array_info ) msg_data;
943	u64 lo, hi;	943	u64 lo, hi;
944	int cur_port;	944	int cur_port;
945	size_t slen;	945	size_t slen;
946		946
947	DPRINTK("ENTER\n");	947	DPRINTK("ENTER\n");
948		948
949	carm_end_rq(host, crq, is_ok);	949	carm_end_rq(host, crq, is_ok);
950		950
951	if (!is_ok)	951	if (!is_ok)
952	goto out;	952	goto out;
953	if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)	953	if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
954	goto out;	954	goto out;
955		955
956	cur_port = host->cur_scan_dev;	956	cur_port = host->cur_scan_dev;
957		957
958	/* should never occur */	958	/* should never occur */
959	if ((cur_port < 0) \|\| (cur_port >= CARM_MAX_PORTS)) {	959	if ((cur_port < 0) \|\| (cur_port >= CARM_MAX_PORTS)) {
960	printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",	960	printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",
961	cur_port, (int) desc->array_id);	961	cur_port, (int) desc->array_id);
962	goto out;	962	goto out;
963	}	963	}
964		964
965	port = &host->port[cur_port];	965	port = &host->port[cur_port];
966		966
967	lo = (u64) le32_to_cpu(desc->size);	967	lo = (u64) le32_to_cpu(desc->size);
968	hi = (u64) le16_to_cpu(desc->size_hi);	968	hi = (u64) le16_to_cpu(desc->size_hi);
969		969
970	port->capacity = lo \| (hi << 32);	970	port->capacity = lo \| (hi << 32);
971	port->dev_geom_head = le16_to_cpu(desc->head);	971	port->dev_geom_head = le16_to_cpu(desc->head);
972	port->dev_geom_sect = le16_to_cpu(desc->sect);	972	port->dev_geom_sect = le16_to_cpu(desc->sect);
973	port->dev_geom_cyl = le16_to_cpu(desc->cyl);	973	port->dev_geom_cyl = le16_to_cpu(desc->cyl);
974		974
975	host->dev_active \|= (1 << cur_port);	975	host->dev_active \|= (1 << cur_port);
976		976
977	strncpy(port->name, desc->name, sizeof(port->name));	977	strncpy(port->name, desc->name, sizeof(port->name));
978	port->name[sizeof(port->name) - 1] = 0;	978	port->name[sizeof(port->name) - 1] = 0;
979	slen = strlen(port->name);	979	slen = strlen(port->name);
980	while (slen && (port->name[slen - 1] == ' ')) {	980	while (slen && (port->name[slen - 1] == ' ')) {
981	port->name[slen - 1] = 0;	981	port->name[slen - 1] = 0;
982	slen--;	982	slen--;
983	}	983	}
984		984
985	printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",	985	printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",
986	pci_name(host->pdev), port->port_no,	986	pci_name(host->pdev), port->port_no,
987	(unsigned long long) port->capacity);	987	(unsigned long long) port->capacity);
988	printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",	988	printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",
989	pci_name(host->pdev), port->port_no, port->name);	989	pci_name(host->pdev), port->port_no, port->name);
990		990
991	out:	991	out:
992	assert(host->state == HST_DEV_SCAN);	992	assert(host->state == HST_DEV_SCAN);
993	schedule_work(&host->fsm_task);	993	schedule_work(&host->fsm_task);
994	}	994	}
995		995
996	static void carm_handle_scan_chan(struct carm_host *host,	996	static void carm_handle_scan_chan(struct carm_host *host,
997	struct carm_request crq, u8 mem,	997	struct carm_request crq, u8 mem,
998	int is_ok)	998	int is_ok)
999	{	999	{
1000	u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;	1000	u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
1001	unsigned int i, dev_count = 0;	1001	unsigned int i, dev_count = 0;
1002	int new_state = HST_DEV_SCAN_START;	1002	int new_state = HST_DEV_SCAN_START;
1003		1003
1004	DPRINTK("ENTER\n");	1004	DPRINTK("ENTER\n");
1005		1005
1006	carm_end_rq(host, crq, is_ok);	1006	carm_end_rq(host, crq, is_ok);
1007		1007
1008	if (!is_ok) {	1008	if (!is_ok) {
1009	new_state = HST_ERROR;	1009	new_state = HST_ERROR;
1010	goto out;	1010	goto out;
1011	}	1011	}
1012		1012
1013	/* TODO: scan and support non-disk devices */	1013	/* TODO: scan and support non-disk devices */
1014	for (i = 0; i < 8; i++)	1014	for (i = 0; i < 8; i++)
1015	if (msg_data[i] == 0) { /* direct-access device (disk) */	1015	if (msg_data[i] == 0) { /* direct-access device (disk) */
1016	host->dev_present \|= (1 << i);	1016	host->dev_present \|= (1 << i);
1017	dev_count++;	1017	dev_count++;
1018	}	1018	}
1019		1019
1020	printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",	1020	printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",
1021	pci_name(host->pdev), dev_count);	1021	pci_name(host->pdev), dev_count);
1022		1022
1023	out:	1023	out:
1024	assert(host->state == HST_PORT_SCAN);	1024	assert(host->state == HST_PORT_SCAN);
1025	host->state = new_state;	1025	host->state = new_state;
1026	schedule_work(&host->fsm_task);	1026	schedule_work(&host->fsm_task);
1027	}	1027	}
1028		1028
1029	static void carm_handle_generic(struct carm_host *host,	1029	static void carm_handle_generic(struct carm_host *host,
1030	struct carm_request *crq, int is_ok,	1030	struct carm_request *crq, int is_ok,
1031	int cur_state, int next_state)	1031	int cur_state, int next_state)
1032	{	1032	{
1033	DPRINTK("ENTER\n");	1033	DPRINTK("ENTER\n");
1034		1034
1035	carm_end_rq(host, crq, is_ok);	1035	carm_end_rq(host, crq, is_ok);
1036		1036
1037	assert(host->state == cur_state);	1037	assert(host->state == cur_state);
1038	if (is_ok)	1038	if (is_ok)
1039	host->state = next_state;	1039	host->state = next_state;
1040	else	1040	else
1041	host->state = HST_ERROR;	1041	host->state = HST_ERROR;
1042	schedule_work(&host->fsm_task);	1042	schedule_work(&host->fsm_task);
1043	}	1043	}
1044		1044
1045	static inline void carm_handle_rw(struct carm_host *host,	1045	static inline void carm_handle_rw(struct carm_host *host,
1046	struct carm_request *crq, int is_ok)	1046	struct carm_request *crq, int is_ok)
1047	{	1047	{
1048	int pci_dir;	1048	int pci_dir;
1049		1049
1050	VPRINTK("ENTER\n");	1050	VPRINTK("ENTER\n");
1051		1051
1052	if (rq_data_dir(crq->rq) == WRITE)	1052	if (rq_data_dir(crq->rq) == WRITE)
1053	pci_dir = PCI_DMA_TODEVICE;	1053	pci_dir = PCI_DMA_TODEVICE;
1054	else	1054	else
1055	pci_dir = PCI_DMA_FROMDEVICE;	1055	pci_dir = PCI_DMA_FROMDEVICE;
1056		1056
1057	pci_unmap_sg(host->pdev, &crq->sg[0], crq->n_elem, pci_dir);	1057	pci_unmap_sg(host->pdev, &crq->sg[0], crq->n_elem, pci_dir);
1058		1058
1059	carm_end_rq(host, crq, is_ok);	1059	carm_end_rq(host, crq, is_ok);
1060	}	1060	}
1061		1061
1062	static inline void carm_handle_resp(struct carm_host *host,	1062	static inline void carm_handle_resp(struct carm_host *host,
1063	__le32 ret_handle_le, u32 status)	1063	__le32 ret_handle_le, u32 status)
1064	{	1064	{
1065	u32 handle = le32_to_cpu(ret_handle_le);	1065	u32 handle = le32_to_cpu(ret_handle_le);
1066	unsigned int msg_idx;	1066	unsigned int msg_idx;
1067	struct carm_request *crq;	1067	struct carm_request *crq;
1068	int is_ok = (status == RMSG_OK);	1068	int is_ok = (status == RMSG_OK);
1069	u8 *mem;	1069	u8 *mem;
1070		1070
1071	VPRINTK("ENTER, handle == 0x%x\n", handle);	1071	VPRINTK("ENTER, handle == 0x%x\n", handle);
1072		1072
1073	if (unlikely(!TAG_VALID(handle))) {	1073	if (unlikely(!TAG_VALID(handle))) {
1074	printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",	1074	printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",
1075	pci_name(host->pdev), handle);	1075	pci_name(host->pdev), handle);
1076	return;	1076	return;
1077	}	1077	}
1078		1078
1079	msg_idx = TAG_DECODE(handle);	1079	msg_idx = TAG_DECODE(handle);
1080	VPRINTK("tag == %u\n", msg_idx);	1080	VPRINTK("tag == %u\n", msg_idx);
1081		1081
1082	crq = &host->req[msg_idx];	1082	crq = &host->req[msg_idx];
1083		1083
1084	/* fast path */	1084	/* fast path */
1085	if (likely(crq->msg_type == CARM_MSG_READ \|\|	1085	if (likely(crq->msg_type == CARM_MSG_READ \|\|
1086	crq->msg_type == CARM_MSG_WRITE)) {	1086	crq->msg_type == CARM_MSG_WRITE)) {
1087	carm_handle_rw(host, crq, is_ok);	1087	carm_handle_rw(host, crq, is_ok);
1088	return;	1088	return;
1089	}	1089	}
1090		1090
1091	mem = carm_ref_msg(host, msg_idx);	1091	mem = carm_ref_msg(host, msg_idx);
1092		1092
1093	switch (crq->msg_type) {	1093	switch (crq->msg_type) {
1094	case CARM_MSG_IOCTL: {	1094	case CARM_MSG_IOCTL: {
1095	switch (crq->msg_subtype) {	1095	switch (crq->msg_subtype) {
1096	case CARM_IOC_SCAN_CHAN:	1096	case CARM_IOC_SCAN_CHAN:
1097	carm_handle_scan_chan(host, crq, mem, is_ok);	1097	carm_handle_scan_chan(host, crq, mem, is_ok);
1098	break;	1098	break;
1099	default:	1099	default:
1100	/* unknown / invalid response */	1100	/* unknown / invalid response */
1101	goto err_out;	1101	goto err_out;
1102	}	1102	}
1103	break;	1103	break;
1104	}	1104	}
1105		1105
1106	case CARM_MSG_MISC: {	1106	case CARM_MSG_MISC: {
1107	switch (crq->msg_subtype) {	1107	switch (crq->msg_subtype) {
1108	case MISC_ALLOC_MEM:	1108	case MISC_ALLOC_MEM:
1109	carm_handle_generic(host, crq, is_ok,	1109	carm_handle_generic(host, crq, is_ok,
1110	HST_ALLOC_BUF, HST_SYNC_TIME);	1110	HST_ALLOC_BUF, HST_SYNC_TIME);
1111	break;	1111	break;
1112	case MISC_SET_TIME:	1112	case MISC_SET_TIME:
1113	carm_handle_generic(host, crq, is_ok,	1113	carm_handle_generic(host, crq, is_ok,
1114	HST_SYNC_TIME, HST_GET_FW_VER);	1114	HST_SYNC_TIME, HST_GET_FW_VER);
1115	break;	1115	break;
1116	case MISC_GET_FW_VER: {	1116	case MISC_GET_FW_VER: {
1117	struct carm_fw_ver ver = (struct carm_fw_ver )	1117	struct carm_fw_ver ver = (struct carm_fw_ver )
1118	mem + sizeof(struct carm_msg_get_fw_ver);	1118	mem + sizeof(struct carm_msg_get_fw_ver);
1119	if (is_ok) {	1119	if (is_ok) {
1120	host->fw_ver = le32_to_cpu(ver->version);	1120	host->fw_ver = le32_to_cpu(ver->version);
1121	host->flags \|= (ver->features & FL_FW_VER_MASK);	1121	host->flags \|= (ver->features & FL_FW_VER_MASK);
1122	}	1122	}
1123	carm_handle_generic(host, crq, is_ok,	1123	carm_handle_generic(host, crq, is_ok,
1124	HST_GET_FW_VER, HST_PORT_SCAN);	1124	HST_GET_FW_VER, HST_PORT_SCAN);
1125	break;	1125	break;
1126	}	1126	}
1127	default:	1127	default:
1128	/* unknown / invalid response */	1128	/* unknown / invalid response */
1129	goto err_out;	1129	goto err_out;
1130	}	1130	}
1131	break;	1131	break;
1132	}	1132	}
1133		1133
1134	case CARM_MSG_ARRAY: {	1134	case CARM_MSG_ARRAY: {
1135	switch (crq->msg_subtype) {	1135	switch (crq->msg_subtype) {
1136	case CARM_ARRAY_INFO:	1136	case CARM_ARRAY_INFO:
1137	carm_handle_array_info(host, crq, mem, is_ok);	1137	carm_handle_array_info(host, crq, mem, is_ok);
1138	break;	1138	break;
1139	default:	1139	default:
1140	/* unknown / invalid response */	1140	/* unknown / invalid response */
1141	goto err_out;	1141	goto err_out;
1142	}	1142	}
1143	break;	1143	break;
1144	}	1144	}
1145		1145
1146	default:	1146	default:
1147	/* unknown / invalid response */	1147	/* unknown / invalid response */
1148	goto err_out;	1148	goto err_out;
1149	}	1149	}
1150		1150
1151	return;	1151	return;
1152		1152
1153	err_out:	1153	err_out:
1154	printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",	1154	printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
1155	pci_name(host->pdev), crq->msg_type, crq->msg_subtype);	1155	pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
1156	carm_end_rq(host, crq, 0);	1156	carm_end_rq(host, crq, 0);
1157	}	1157	}
1158		1158
1159	static inline void carm_handle_responses(struct carm_host *host)	1159	static inline void carm_handle_responses(struct carm_host *host)
1160	{	1160	{
1161	void __iomem *mmio = host->mmio;	1161	void __iomem *mmio = host->mmio;
1162	struct carm_response resp = (struct carm_response ) host->shm;	1162	struct carm_response resp = (struct carm_response ) host->shm;
1163	unsigned int work = 0;	1163	unsigned int work = 0;
1164	unsigned int idx = host->resp_idx % RMSG_Q_LEN;	1164	unsigned int idx = host->resp_idx % RMSG_Q_LEN;
1165		1165
1166	while (1) {	1166	while (1) {
1167	u32 status = le32_to_cpu(resp[idx].status);	1167	u32 status = le32_to_cpu(resp[idx].status);
1168		1168
1169	if (status == 0xffffffff) {	1169	if (status == 0xffffffff) {
1170	VPRINTK("ending response on index %u\n", idx);	1170	VPRINTK("ending response on index %u\n", idx);
1171	writel(idx << 3, mmio + CARM_RESP_IDX);	1171	writel(idx << 3, mmio + CARM_RESP_IDX);
1172	break;	1172	break;
1173	}	1173	}
1174		1174
1175	/* response to a message we sent */	1175	/* response to a message we sent */
1176	else if ((status & (1 << 31)) == 0) {	1176	else if ((status & (1 << 31)) == 0) {
1177	VPRINTK("handling msg response on index %u\n", idx);	1177	VPRINTK("handling msg response on index %u\n", idx);
1178	carm_handle_resp(host, resp[idx].ret_handle, status);	1178	carm_handle_resp(host, resp[idx].ret_handle, status);
1179	resp[idx].status = cpu_to_le32(0xffffffff);	1179	resp[idx].status = cpu_to_le32(0xffffffff);
1180	}	1180	}
1181		1181
1182	/* asynchronous events the hardware throws our way */	1182	/* asynchronous events the hardware throws our way */
1183	else if ((status & 0xff000000) == (1 << 31)) {	1183	else if ((status & 0xff000000) == (1 << 31)) {
1184	u8 evt_type_ptr = (u8 ) &resp[idx];	1184	u8 evt_type_ptr = (u8 ) &resp[idx];
1185	u8 evt_type = *evt_type_ptr;	1185	u8 evt_type = *evt_type_ptr;
1186	printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",	1186	printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",
1187	pci_name(host->pdev), (int) evt_type);	1187	pci_name(host->pdev), (int) evt_type);
1188	resp[idx].status = cpu_to_le32(0xffffffff);	1188	resp[idx].status = cpu_to_le32(0xffffffff);
1189	}	1189	}
1190		1190
1191	idx = NEXT_RESP(idx);	1191	idx = NEXT_RESP(idx);
1192	work++;	1192	work++;
1193	}	1193	}
1194		1194
1195	VPRINTK("EXIT, work==%u\n", work);	1195	VPRINTK("EXIT, work==%u\n", work);
1196	host->resp_idx += work;	1196	host->resp_idx += work;
1197	}	1197	}
1198		1198
1199	static irqreturn_t carm_interrupt(int irq, void __host, struct pt_regs regs)	1199	static irqreturn_t carm_interrupt(int irq, void __host, struct pt_regs regs)
1200	{	1200	{
1201	struct carm_host *host = __host;	1201	struct carm_host *host = __host;
1202	void __iomem *mmio;	1202	void __iomem *mmio;
1203	u32 mask;	1203	u32 mask;
1204	int handled = 0;	1204	int handled = 0;
1205	unsigned long flags;	1205	unsigned long flags;
1206		1206
1207	if (!host) {	1207	if (!host) {
1208	VPRINTK("no host\n");	1208	VPRINTK("no host\n");
1209	return IRQ_NONE;	1209	return IRQ_NONE;
1210	}	1210	}
1211		1211
1212	spin_lock_irqsave(&host->lock, flags);	1212	spin_lock_irqsave(&host->lock, flags);
1213		1213
1214	mmio = host->mmio;	1214	mmio = host->mmio;
1215		1215
1216	/* reading should also clear interrupts */	1216	/* reading should also clear interrupts */
1217	mask = readl(mmio + CARM_INT_STAT);	1217	mask = readl(mmio + CARM_INT_STAT);
1218		1218
1219	if (mask == 0 \|\| mask == 0xffffffff) {	1219	if (mask == 0 \|\| mask == 0xffffffff) {
1220	VPRINTK("no work, mask == 0x%x\n", mask);	1220	VPRINTK("no work, mask == 0x%x\n", mask);
1221	goto out;	1221	goto out;
1222	}	1222	}
1223		1223
1224	if (mask & INT_ACK_MASK)	1224	if (mask & INT_ACK_MASK)
1225	writel(mask, mmio + CARM_INT_STAT);	1225	writel(mask, mmio + CARM_INT_STAT);
1226		1226
1227	if (unlikely(host->state == HST_INVALID)) {	1227	if (unlikely(host->state == HST_INVALID)) {
1228	VPRINTK("not initialized yet, mask = 0x%x\n", mask);	1228	VPRINTK("not initialized yet, mask = 0x%x\n", mask);
1229	goto out;	1229	goto out;
1230	}	1230	}
1231		1231
1232	if (mask & CARM_HAVE_RESP) {	1232	if (mask & CARM_HAVE_RESP) {
1233	handled = 1;	1233	handled = 1;
1234	carm_handle_responses(host);	1234	carm_handle_responses(host);
1235	}	1235	}
1236		1236
1237	out:	1237	out:
1238	spin_unlock_irqrestore(&host->lock, flags);	1238	spin_unlock_irqrestore(&host->lock, flags);
1239	VPRINTK("EXIT\n");	1239	VPRINTK("EXIT\n");
1240	return IRQ_RETVAL(handled);	1240	return IRQ_RETVAL(handled);
1241	}	1241	}
1242		1242
1243	static void carm_fsm_task (void *_data)	1243	static void carm_fsm_task (void *_data)
1244	{	1244	{
1245	struct carm_host *host = _data;	1245	struct carm_host *host = _data;
1246	unsigned long flags;	1246	unsigned long flags;
1247	unsigned int state;	1247	unsigned int state;
1248	int rc, i, next_dev;	1248	int rc, i, next_dev;
1249	int reschedule = 0;	1249	int reschedule = 0;
1250	int new_state = HST_INVALID;	1250	int new_state = HST_INVALID;
1251		1251
1252	spin_lock_irqsave(&host->lock, flags);	1252	spin_lock_irqsave(&host->lock, flags);
1253	state = host->state;	1253	state = host->state;
1254	spin_unlock_irqrestore(&host->lock, flags);	1254	spin_unlock_irqrestore(&host->lock, flags);
1255		1255
1256	DPRINTK("ENTER, state == %s\n", state_name[state]);	1256	DPRINTK("ENTER, state == %s\n", state_name[state]);
1257		1257
1258	switch (state) {	1258	switch (state) {
1259	case HST_PROBE_START:	1259	case HST_PROBE_START:
1260	new_state = HST_ALLOC_BUF;	1260	new_state = HST_ALLOC_BUF;
1261	reschedule = 1;	1261	reschedule = 1;
1262	break;	1262	break;
1263		1263
1264	case HST_ALLOC_BUF:	1264	case HST_ALLOC_BUF:
1265	rc = carm_send_special(host, carm_fill_alloc_buf);	1265	rc = carm_send_special(host, carm_fill_alloc_buf);
1266	if (rc) {	1266	if (rc) {
1267	new_state = HST_ERROR;	1267	new_state = HST_ERROR;
1268	reschedule = 1;	1268	reschedule = 1;
1269	}	1269	}
1270	break;	1270	break;
1271		1271
1272	case HST_SYNC_TIME:	1272	case HST_SYNC_TIME:
1273	rc = carm_send_special(host, carm_fill_sync_time);	1273	rc = carm_send_special(host, carm_fill_sync_time);
1274	if (rc) {	1274	if (rc) {
1275	new_state = HST_ERROR;	1275	new_state = HST_ERROR;
1276	reschedule = 1;	1276	reschedule = 1;
1277	}	1277	}
1278	break;	1278	break;
1279		1279
1280	case HST_GET_FW_VER:	1280	case HST_GET_FW_VER:
1281	rc = carm_send_special(host, carm_fill_get_fw_ver);	1281	rc = carm_send_special(host, carm_fill_get_fw_ver);
1282	if (rc) {	1282	if (rc) {
1283	new_state = HST_ERROR;	1283	new_state = HST_ERROR;
1284	reschedule = 1;	1284	reschedule = 1;
1285	}	1285	}
1286	break;	1286	break;
1287		1287
1288	case HST_PORT_SCAN:	1288	case HST_PORT_SCAN:
1289	rc = carm_send_special(host, carm_fill_scan_channels);	1289	rc = carm_send_special(host, carm_fill_scan_channels);
1290	if (rc) {	1290	if (rc) {
1291	new_state = HST_ERROR;	1291	new_state = HST_ERROR;
1292	reschedule = 1;	1292	reschedule = 1;
1293	}	1293	}
1294	break;	1294	break;
1295		1295
1296	case HST_DEV_SCAN_START:	1296	case HST_DEV_SCAN_START:
1297	host->cur_scan_dev = -1;	1297	host->cur_scan_dev = -1;
1298	new_state = HST_DEV_SCAN;	1298	new_state = HST_DEV_SCAN;
1299	reschedule = 1;	1299	reschedule = 1;
1300	break;	1300	break;
1301		1301
1302	case HST_DEV_SCAN:	1302	case HST_DEV_SCAN:
1303	next_dev = -1;	1303	next_dev = -1;
1304	for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)	1304	for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)
1305	if (host->dev_present & (1 << i)) {	1305	if (host->dev_present & (1 << i)) {
1306	next_dev = i;	1306	next_dev = i;
1307	break;	1307	break;
1308	}	1308	}
1309		1309
1310	if (next_dev >= 0) {	1310	if (next_dev >= 0) {
1311	host->cur_scan_dev = next_dev;	1311	host->cur_scan_dev = next_dev;
1312	rc = carm_array_info(host, next_dev);	1312	rc = carm_array_info(host, next_dev);
1313	if (rc) {	1313	if (rc) {
1314	new_state = HST_ERROR;	1314	new_state = HST_ERROR;
1315	reschedule = 1;	1315	reschedule = 1;
1316	}	1316	}
1317	} else {	1317	} else {
1318	new_state = HST_DEV_ACTIVATE;	1318	new_state = HST_DEV_ACTIVATE;
1319	reschedule = 1;	1319	reschedule = 1;
1320	}	1320	}
1321	break;	1321	break;
1322		1322
1323	case HST_DEV_ACTIVATE: {	1323	case HST_DEV_ACTIVATE: {
1324	int activated = 0;	1324	int activated = 0;
1325	for (i = 0; i < CARM_MAX_PORTS; i++)	1325	for (i = 0; i < CARM_MAX_PORTS; i++)
1326	if (host->dev_active & (1 << i)) {	1326	if (host->dev_active & (1 << i)) {
1327	struct carm_port *port = &host->port[i];	1327	struct carm_port *port = &host->port[i];
1328	struct gendisk *disk = port->disk;	1328	struct gendisk *disk = port->disk;
1329		1329
1330	set_capacity(disk, port->capacity);	1330	set_capacity(disk, port->capacity);
1331	add_disk(disk);	1331	add_disk(disk);
1332	activated++;	1332	activated++;
1333	}	1333	}
1334		1334
1335	printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",	1335	printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",
1336	pci_name(host->pdev), activated);	1336	pci_name(host->pdev), activated);
1337		1337
1338	new_state = HST_PROBE_FINISHED;	1338	new_state = HST_PROBE_FINISHED;
1339	reschedule = 1;	1339	reschedule = 1;
1340	break;	1340	break;
1341	}	1341	}
1342		1342
1343	case HST_PROBE_FINISHED:	1343	case HST_PROBE_FINISHED:
1344	up(&host->probe_sem);	1344	up(&host->probe_sem);
1345	break;	1345	break;
1346		1346
1347	case HST_ERROR:	1347	case HST_ERROR:
1348	/* FIXME: TODO */	1348	/* FIXME: TODO */
1349	break;	1349	break;
1350		1350
1351	default:	1351	default:
1352	/* should never occur */	1352	/* should never occur */
1353	printk(KERN_ERR PFX "BUG: unknown state %d\n", state);	1353	printk(KERN_ERR PFX "BUG: unknown state %d\n", state);
1354	assert(0);	1354	assert(0);
1355	break;	1355	break;
1356	}	1356	}
1357		1357
1358	if (new_state != HST_INVALID) {	1358	if (new_state != HST_INVALID) {
1359	spin_lock_irqsave(&host->lock, flags);	1359	spin_lock_irqsave(&host->lock, flags);
1360	host->state = new_state;	1360	host->state = new_state;
1361	spin_unlock_irqrestore(&host->lock, flags);	1361	spin_unlock_irqrestore(&host->lock, flags);
1362	}	1362	}
1363	if (reschedule)	1363	if (reschedule)
1364	schedule_work(&host->fsm_task);	1364	schedule_work(&host->fsm_task);
1365	}	1365	}
1366		1366
1367	static int carm_init_wait(void __iomem *mmio, u32 bits, unsigned int test_bit)	1367	static int carm_init_wait(void __iomem *mmio, u32 bits, unsigned int test_bit)
1368	{	1368	{
1369	unsigned int i;	1369	unsigned int i;
1370		1370
1371	for (i = 0; i < 50000; i++) {	1371	for (i = 0; i < 50000; i++) {
1372	u32 tmp = readl(mmio + CARM_LMUC);	1372	u32 tmp = readl(mmio + CARM_LMUC);
1373	udelay(100);	1373	udelay(100);
1374		1374
1375	if (test_bit) {	1375	if (test_bit) {
1376	if ((tmp & bits) == bits)	1376	if ((tmp & bits) == bits)
1377	return 0;	1377	return 0;
1378	} else {	1378	} else {
1379	if ((tmp & bits) == 0)	1379	if ((tmp & bits) == 0)
1380	return 0;	1380	return 0;
1381	}	1381	}
1382		1382
1383	cond_resched();	1383	cond_resched();
1384	}	1384	}
1385		1385
1386	printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",	1386	printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",
1387	bits, test_bit ? "yes" : "no");	1387	bits, test_bit ? "yes" : "no");
1388	return -EBUSY;	1388	return -EBUSY;
1389	}	1389	}
1390		1390
1391	static void carm_init_responses(struct carm_host *host)	1391	static void carm_init_responses(struct carm_host *host)
1392	{	1392	{
1393	void __iomem *mmio = host->mmio;	1393	void __iomem *mmio = host->mmio;
1394	unsigned int i;	1394	unsigned int i;
1395	struct carm_response resp = (struct carm_response ) host->shm;	1395	struct carm_response resp = (struct carm_response ) host->shm;
1396		1396
1397	for (i = 0; i < RMSG_Q_LEN; i++)	1397	for (i = 0; i < RMSG_Q_LEN; i++)
1398	resp[i].status = cpu_to_le32(0xffffffff);	1398	resp[i].status = cpu_to_le32(0xffffffff);
1399		1399
1400	writel(0, mmio + CARM_RESP_IDX);	1400	writel(0, mmio + CARM_RESP_IDX);
1401	}	1401	}
1402		1402
1403	static int carm_init_host(struct carm_host *host)	1403	static int carm_init_host(struct carm_host *host)
1404	{	1404	{
1405	void __iomem *mmio = host->mmio;	1405	void __iomem *mmio = host->mmio;
1406	u32 tmp;	1406	u32 tmp;
1407	u8 tmp8;	1407	u8 tmp8;
1408	int rc;	1408	int rc;
1409		1409
1410	DPRINTK("ENTER\n");	1410	DPRINTK("ENTER\n");
1411		1411
1412	writel(0, mmio + CARM_INT_MASK);	1412	writel(0, mmio + CARM_INT_MASK);
1413		1413
1414	tmp8 = readb(mmio + CARM_INITC);	1414	tmp8 = readb(mmio + CARM_INITC);
1415	if (tmp8 & 0x01) {	1415	if (tmp8 & 0x01) {
1416	tmp8 &= ~0x01;	1416	tmp8 &= ~0x01;
1417	writeb(tmp8, mmio + CARM_INITC);	1417	writeb(tmp8, mmio + CARM_INITC);
1418	readb(mmio + CARM_INITC); /* flush */	1418	readb(mmio + CARM_INITC); /* flush */
1419		1419
1420	DPRINTK("snooze...\n");	1420	DPRINTK("snooze...\n");
1421	msleep(5000);	1421	msleep(5000);
1422	}	1422	}
1423		1423
1424	tmp = readl(mmio + CARM_HMUC);	1424	tmp = readl(mmio + CARM_HMUC);
1425	if (tmp & CARM_CME) {	1425	if (tmp & CARM_CME) {
1426	DPRINTK("CME bit present, waiting\n");	1426	DPRINTK("CME bit present, waiting\n");
1427	rc = carm_init_wait(mmio, CARM_CME, 1);	1427	rc = carm_init_wait(mmio, CARM_CME, 1);
1428	if (rc) {	1428	if (rc) {
1429	DPRINTK("EXIT, carm_init_wait 1 failed\n");	1429	DPRINTK("EXIT, carm_init_wait 1 failed\n");
1430	return rc;	1430	return rc;
1431	}	1431	}
1432	}	1432	}
1433	if (tmp & CARM_RME) {	1433	if (tmp & CARM_RME) {
1434	DPRINTK("RME bit present, waiting\n");	1434	DPRINTK("RME bit present, waiting\n");
1435	rc = carm_init_wait(mmio, CARM_RME, 1);	1435	rc = carm_init_wait(mmio, CARM_RME, 1);
1436	if (rc) {	1436	if (rc) {
1437	DPRINTK("EXIT, carm_init_wait 2 failed\n");	1437	DPRINTK("EXIT, carm_init_wait 2 failed\n");
1438	return rc;	1438	return rc;
1439	}	1439	}
1440	}	1440	}
1441		1441
1442	tmp &= ~(CARM_RME \| CARM_CME);	1442	tmp &= ~(CARM_RME \| CARM_CME);
1443	writel(tmp, mmio + CARM_HMUC);	1443	writel(tmp, mmio + CARM_HMUC);
1444	readl(mmio + CARM_HMUC); /* flush */	1444	readl(mmio + CARM_HMUC); /* flush */
1445		1445
1446	rc = carm_init_wait(mmio, CARM_RME \| CARM_CME, 0);	1446	rc = carm_init_wait(mmio, CARM_RME \| CARM_CME, 0);
1447	if (rc) {	1447	if (rc) {
1448	DPRINTK("EXIT, carm_init_wait 3 failed\n");	1448	DPRINTK("EXIT, carm_init_wait 3 failed\n");
1449	return rc;	1449	return rc;
1450	}	1450	}
1451		1451
1452	carm_init_buckets(mmio);	1452	carm_init_buckets(mmio);
1453		1453
1454	writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);	1454	writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);
1455	writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);	1455	writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);
1456	writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);	1456	writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);
1457		1457
1458	tmp = readl(mmio + CARM_HMUC);	1458	tmp = readl(mmio + CARM_HMUC);
1459	tmp \|= (CARM_RME \| CARM_CME \| CARM_WZBC);	1459	tmp \|= (CARM_RME \| CARM_CME \| CARM_WZBC);
1460	writel(tmp, mmio + CARM_HMUC);	1460	writel(tmp, mmio + CARM_HMUC);
1461	readl(mmio + CARM_HMUC); /* flush */	1461	readl(mmio + CARM_HMUC); /* flush */
1462		1462
1463	rc = carm_init_wait(mmio, CARM_RME \| CARM_CME, 1);	1463	rc = carm_init_wait(mmio, CARM_RME \| CARM_CME, 1);
1464	if (rc) {	1464	if (rc) {
1465	DPRINTK("EXIT, carm_init_wait 4 failed\n");	1465	DPRINTK("EXIT, carm_init_wait 4 failed\n");
1466	return rc;	1466	return rc;
1467	}	1467	}
1468		1468
1469	writel(0, mmio + CARM_HMPHA);	1469	writel(0, mmio + CARM_HMPHA);
1470	writel(INT_DEF_MASK, mmio + CARM_INT_MASK);	1470	writel(INT_DEF_MASK, mmio + CARM_INT_MASK);
1471		1471
1472	carm_init_responses(host);	1472	carm_init_responses(host);
1473		1473
1474	/* start initialization, probing state machine */	1474	/* start initialization, probing state machine */
1475	spin_lock_irq(&host->lock);	1475	spin_lock_irq(&host->lock);
1476	assert(host->state == HST_INVALID);	1476	assert(host->state == HST_INVALID);
1477	host->state = HST_PROBE_START;	1477	host->state = HST_PROBE_START;
1478	spin_unlock_irq(&host->lock);	1478	spin_unlock_irq(&host->lock);
1479	schedule_work(&host->fsm_task);	1479	schedule_work(&host->fsm_task);
1480		1480
1481	DPRINTK("EXIT\n");	1481	DPRINTK("EXIT\n");
1482	return 0;	1482	return 0;
1483	}	1483	}
1484		1484
1485	static int carm_init_disks(struct carm_host *host)	1485	static int carm_init_disks(struct carm_host *host)
1486	{	1486	{
1487	unsigned int i;	1487	unsigned int i;
1488	int rc = 0;	1488	int rc = 0;
1489		1489
1490	for (i = 0; i < CARM_MAX_PORTS; i++) {	1490	for (i = 0; i < CARM_MAX_PORTS; i++) {
1491	struct gendisk *disk;	1491	struct gendisk *disk;
1492	request_queue_t *q;	1492	request_queue_t *q;
1493	struct carm_port *port;	1493	struct carm_port *port;
1494		1494
1495	port = &host->port[i];	1495	port = &host->port[i];
1496	port->host = host;	1496	port->host = host;
1497	port->port_no = i;	1497	port->port_no = i;
1498		1498
1499	disk = alloc_disk(CARM_MINORS_PER_MAJOR);	1499	disk = alloc_disk(CARM_MINORS_PER_MAJOR);
1500	if (!disk) {	1500	if (!disk) {
1501	rc = -ENOMEM;	1501	rc = -ENOMEM;
1502	break;	1502	break;
1503	}	1503	}
1504		1504
1505	port->disk = disk;	1505	port->disk = disk;
1506	sprintf(disk->disk_name, DRV_NAME "/%u",	1506	sprintf(disk->disk_name, DRV_NAME "/%u",
1507	(unsigned int) (host->id * CARM_MAX_PORTS) + i);	1507	(unsigned int) (host->id * CARM_MAX_PORTS) + i);
1508	sprintf(disk->devfs_name, DRV_NAME "/%u_%u", host->id, i);	1508	sprintf(disk->devfs_name, DRV_NAME "/%u_%u", host->id, i);
1509	disk->major = host->major;	1509	disk->major = host->major;
1510	disk->first_minor = i * CARM_MINORS_PER_MAJOR;	1510	disk->first_minor = i * CARM_MINORS_PER_MAJOR;
1511	disk->fops = &carm_bd_ops;	1511	disk->fops = &carm_bd_ops;
1512	disk->private_data = port;	1512	disk->private_data = port;
1513		1513
1514	q = blk_init_queue(carm_rq_fn, &host->lock);	1514	q = blk_init_queue(carm_rq_fn, &host->lock);
1515	if (!q) {	1515	if (!q) {
1516	rc = -ENOMEM;	1516	rc = -ENOMEM;
1517	break;	1517	break;
1518	}	1518	}
1519	disk->queue = q;	1519	disk->queue = q;
1520	blk_queue_max_hw_segments(q, CARM_MAX_REQ_SG);	1520	blk_queue_max_hw_segments(q, CARM_MAX_REQ_SG);
1521	blk_queue_max_phys_segments(q, CARM_MAX_REQ_SG);	1521	blk_queue_max_phys_segments(q, CARM_MAX_REQ_SG);
1522	blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);	1522	blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
1523		1523
1524	q->queuedata = port;	1524	q->queuedata = port;
1525	}	1525	}
1526		1526
1527	return rc;	1527	return rc;
1528	}	1528	}
1529		1529
1530	static void carm_free_disks(struct carm_host *host)	1530	static void carm_free_disks(struct carm_host *host)
1531	{	1531	{
1532	unsigned int i;	1532	unsigned int i;
1533		1533
1534	for (i = 0; i < CARM_MAX_PORTS; i++) {	1534	for (i = 0; i < CARM_MAX_PORTS; i++) {
1535	struct gendisk *disk = host->port[i].disk;	1535	struct gendisk *disk = host->port[i].disk;
1536	if (disk) {	1536	if (disk) {
1537	request_queue_t *q = disk->queue;	1537	request_queue_t *q = disk->queue;
1538		1538
1539	if (disk->flags & GENHD_FL_UP)	1539	if (disk->flags & GENHD_FL_UP)
1540	del_gendisk(disk);	1540	del_gendisk(disk);
1541	if (q)	1541	if (q)
1542	blk_cleanup_queue(q);	1542	blk_cleanup_queue(q);
1543	put_disk(disk);	1543	put_disk(disk);
1544	}	1544	}
1545	}	1545	}
1546	}	1546	}
1547		1547
1548	static int carm_init_shm(struct carm_host *host)	1548	static int carm_init_shm(struct carm_host *host)
1549	{	1549	{
1550	host->shm = pci_alloc_consistent(host->pdev, CARM_SHM_SIZE,	1550	host->shm = pci_alloc_consistent(host->pdev, CARM_SHM_SIZE,
1551	&host->shm_dma);	1551	&host->shm_dma);
1552	if (!host->shm)	1552	if (!host->shm)
1553	return -ENOMEM;	1553	return -ENOMEM;
1554		1554
1555	host->msg_base = host->shm + RBUF_LEN;	1555	host->msg_base = host->shm + RBUF_LEN;
1556	host->msg_dma = host->shm_dma + RBUF_LEN;	1556	host->msg_dma = host->shm_dma + RBUF_LEN;
1557		1557
1558	memset(host->shm, 0xff, RBUF_LEN);	1558	memset(host->shm, 0xff, RBUF_LEN);
1559	memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);	1559	memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);
1560		1560
1561	return 0;	1561	return 0;
1562	}	1562	}
1563		1563
1564	static int carm_init_one (struct pci_dev pdev, const struct pci_device_id ent)	1564	static int carm_init_one (struct pci_dev pdev, const struct pci_device_id ent)
1565	{	1565	{
1566	static unsigned int printed_version;	1566	static unsigned int printed_version;
1567	struct carm_host *host;	1567	struct carm_host *host;
1568	unsigned int pci_dac;	1568	unsigned int pci_dac;
1569	int rc;	1569	int rc;
1570	request_queue_t *q;	1570	request_queue_t *q;
1571	unsigned int i;	1571	unsigned int i;
1572		1572
1573	if (!printed_version++)	1573	if (!printed_version++)
1574	printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");	1574	printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
1575		1575
1576	rc = pci_enable_device(pdev);	1576	rc = pci_enable_device(pdev);
1577	if (rc)	1577	if (rc)
1578	return rc;	1578	return rc;
1579		1579
1580	rc = pci_request_regions(pdev, DRV_NAME);	1580	rc = pci_request_regions(pdev, DRV_NAME);
1581	if (rc)	1581	if (rc)
1582	goto err_out;	1582	goto err_out;
1583		1583
1584	#if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */	1584	#if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
1585	rc = pci_set_dma_mask(pdev, 0xffffffffffffffffULL);	1585	rc = pci_set_dma_mask(pdev, 0xffffffffffffffffULL);
1586	if (!rc) {	1586	if (!rc) {
1587	rc = pci_set_consistent_dma_mask(pdev, 0xffffffffffffffffULL);	1587	rc = pci_set_consistent_dma_mask(pdev, 0xffffffffffffffffULL);
1588	if (rc) {	1588	if (rc) {
1589	printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",	1589	printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",
1590	pci_name(pdev));	1590	pci_name(pdev));
1591	goto err_out_regions;	1591	goto err_out_regions;
1592	}	1592	}
1593	pci_dac = 1;	1593	pci_dac = 1;
1594	} else {	1594	} else {
1595	#endif	1595	#endif
1596	rc = pci_set_dma_mask(pdev, 0xffffffffULL);	1596	rc = pci_set_dma_mask(pdev, 0xffffffffULL);
1597	if (rc) {	1597	if (rc) {
1598	printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",	1598	printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
1599	pci_name(pdev));	1599	pci_name(pdev));
1600	goto err_out_regions;	1600	goto err_out_regions;
1601	}	1601	}
1602	pci_dac = 0;	1602	pci_dac = 0;
1603	#if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */	1603	#if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
1604	}	1604	}
1605	#endif	1605	#endif
1606		1606
1607	host = kmalloc(sizeof(*host), GFP_KERNEL);	1607	host = kmalloc(sizeof(*host), GFP_KERNEL);
1608	if (!host) {	1608	if (!host) {
1609	printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n",	1609	printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n",
1610	pci_name(pdev));	1610	pci_name(pdev));
1611	rc = -ENOMEM;	1611	rc = -ENOMEM;
1612	goto err_out_regions;	1612	goto err_out_regions;
1613	}	1613	}
1614		1614
1615	memset(host, 0, sizeof(*host));	1615	memset(host, 0, sizeof(*host));
1616	host->pdev = pdev;	1616	host->pdev = pdev;
1617	host->flags = pci_dac ? FL_DAC : 0;	1617	host->flags = pci_dac ? FL_DAC : 0;
1618	spin_lock_init(&host->lock);	1618	spin_lock_init(&host->lock);
1619	INIT_WORK(&host->fsm_task, carm_fsm_task, host);	1619	INIT_WORK(&host->fsm_task, carm_fsm_task, host);
1620	init_MUTEX_LOCKED(&host->probe_sem);	1620	init_MUTEX_LOCKED(&host->probe_sem);
1621		1621
1622	for (i = 0; i < ARRAY_SIZE(host->req); i++)	1622	for (i = 0; i < ARRAY_SIZE(host->req); i++)
1623	host->req[i].tag = i;	1623	host->req[i].tag = i;
1624		1624
1625	host->mmio = ioremap(pci_resource_start(pdev, 0),	1625	host->mmio = ioremap(pci_resource_start(pdev, 0),
1626	pci_resource_len(pdev, 0));	1626	pci_resource_len(pdev, 0));
1627	if (!host->mmio) {	1627	if (!host->mmio) {
1628	printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",	1628	printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",
1629	pci_name(pdev));	1629	pci_name(pdev));
1630	rc = -ENOMEM;	1630	rc = -ENOMEM;
1631	goto err_out_kfree;	1631	goto err_out_kfree;
1632	}	1632	}
1633		1633
1634	rc = carm_init_shm(host);	1634	rc = carm_init_shm(host);
1635	if (rc) {	1635	if (rc) {
1636	printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",	1636	printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",
1637	pci_name(pdev));	1637	pci_name(pdev));
1638	goto err_out_iounmap;	1638	goto err_out_iounmap;
1639	}	1639	}
1640		1640
1641	q = blk_init_queue(carm_oob_rq_fn, &host->lock);	1641	q = blk_init_queue(carm_oob_rq_fn, &host->lock);
1642	if (!q) {	1642	if (!q) {
1643	printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n",	1643	printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n",
1644	pci_name(pdev));	1644	pci_name(pdev));
1645	rc = -ENOMEM;	1645	rc = -ENOMEM;
1646	goto err_out_pci_free;	1646	goto err_out_pci_free;
1647	}	1647	}
1648	host->oob_q = q;	1648	host->oob_q = q;
1649	q->queuedata = host;	1649	q->queuedata = host;
1650		1650
1651	/*	1651	/*
1652	* Figure out which major to use: 160, 161, or dynamic	1652	* Figure out which major to use: 160, 161, or dynamic
1653	*/	1653	*/
1654	if (!test_and_set_bit(0, &carm_major_alloc))	1654	if (!test_and_set_bit(0, &carm_major_alloc))
1655	host->major = 160;	1655	host->major = 160;
1656	else if (!test_and_set_bit(1, &carm_major_alloc))	1656	else if (!test_and_set_bit(1, &carm_major_alloc))
1657	host->major = 161;	1657	host->major = 161;
1658	else	1658	else
1659	host->flags \|= FL_DYN_MAJOR;	1659	host->flags \|= FL_DYN_MAJOR;
1660		1660
1661	host->id = carm_host_id;	1661	host->id = carm_host_id;
1662	sprintf(host->name, DRV_NAME "%d", carm_host_id);	1662	sprintf(host->name, DRV_NAME "%d", carm_host_id);
1663		1663
1664	rc = register_blkdev(host->major, host->name);	1664	rc = register_blkdev(host->major, host->name);
1665	if (rc < 0)	1665	if (rc < 0)
1666	goto err_out_free_majors;	1666	goto err_out_free_majors;
1667	if (host->flags & FL_DYN_MAJOR)	1667	if (host->flags & FL_DYN_MAJOR)
1668	host->major = rc;	1668	host->major = rc;
1669		1669
1670	devfs_mk_dir(DRV_NAME);	1670	devfs_mk_dir(DRV_NAME);
1671		1671
1672	rc = carm_init_disks(host);	1672	rc = carm_init_disks(host);
1673	if (rc)	1673	if (rc)
1674	goto err_out_blkdev_disks;	1674	goto err_out_blkdev_disks;
1675		1675
1676	pci_set_master(pdev);	1676	pci_set_master(pdev);
1677		1677
1678	rc = request_irq(pdev->irq, carm_interrupt, SA_SHIRQ, DRV_NAME, host);	1678	rc = request_irq(pdev->irq, carm_interrupt, SA_SHIRQ, DRV_NAME, host);
1679	if (rc) {	1679	if (rc) {
1680	printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",	1680	printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",
1681	pci_name(pdev));	1681	pci_name(pdev));
1682	goto err_out_blkdev_disks;	1682	goto err_out_blkdev_disks;
1683	}	1683	}
1684		1684
1685	rc = carm_init_host(host);	1685	rc = carm_init_host(host);
1686	if (rc)	1686	if (rc)
1687	goto err_out_free_irq;	1687	goto err_out_free_irq;
1688		1688
1689	DPRINTK("waiting for probe_sem\n");	1689	DPRINTK("waiting for probe_sem\n");
1690	down(&host->probe_sem);	1690	down(&host->probe_sem);
1691		1691
1692	printk(KERN_INFO "%s: pci %s, ports %d, io %lx, irq %u, major %d\n",	1692	printk(KERN_INFO "%s: pci %s, ports %d, io %lx, irq %u, major %d\n",
1693	host->name, pci_name(pdev), (int) CARM_MAX_PORTS,	1693	host->name, pci_name(pdev), (int) CARM_MAX_PORTS,
1694	pci_resource_start(pdev, 0), pdev->irq, host->major);	1694	pci_resource_start(pdev, 0), pdev->irq, host->major);
1695		1695
1696	carm_host_id++;	1696	carm_host_id++;
1697	pci_set_drvdata(pdev, host);	1697	pci_set_drvdata(pdev, host);
1698	return 0;	1698	return 0;
1699		1699
1700	err_out_free_irq:	1700	err_out_free_irq:
1701	free_irq(pdev->irq, host);	1701	free_irq(pdev->irq, host);
1702	err_out_blkdev_disks:	1702	err_out_blkdev_disks:
1703	carm_free_disks(host);	1703	carm_free_disks(host);
1704	unregister_blkdev(host->major, host->name);	1704	unregister_blkdev(host->major, host->name);
1705	err_out_free_majors:	1705	err_out_free_majors:
1706	if (host->major == 160)	1706	if (host->major == 160)
1707	clear_bit(0, &carm_major_alloc);	1707	clear_bit(0, &carm_major_alloc);
1708	else if (host->major == 161)	1708	else if (host->major == 161)
1709	clear_bit(1, &carm_major_alloc);	1709	clear_bit(1, &carm_major_alloc);
1710	blk_cleanup_queue(host->oob_q);	1710	blk_cleanup_queue(host->oob_q);
1711	err_out_pci_free:	1711	err_out_pci_free:
1712	pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);	1712	pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
1713	err_out_iounmap:	1713	err_out_iounmap:
1714	iounmap(host->mmio);	1714	iounmap(host->mmio);
1715	err_out_kfree:	1715	err_out_kfree:
1716	kfree(host);	1716	kfree(host);
1717	err_out_regions:	1717	err_out_regions:
1718	pci_release_regions(pdev);	1718	pci_release_regions(pdev);
1719	err_out:	1719	err_out:
1720	pci_disable_device(pdev);	1720	pci_disable_device(pdev);
1721	return rc;	1721	return rc;
1722	}	1722	}
1723		1723
1724	static void carm_remove_one (struct pci_dev *pdev)	1724	static void carm_remove_one (struct pci_dev *pdev)
1725	{	1725	{
1726	struct carm_host *host = pci_get_drvdata(pdev);	1726	struct carm_host *host = pci_get_drvdata(pdev);
1727		1727
1728	if (!host) {	1728	if (!host) {
1729	printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",	1729	printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
1730	pci_name(pdev));	1730	pci_name(pdev));
1731	return;	1731	return;
1732	}	1732	}
1733		1733
1734	free_irq(pdev->irq, host);	1734	free_irq(pdev->irq, host);
1735	carm_free_disks(host);	1735	carm_free_disks(host);
1736	devfs_remove(DRV_NAME);	1736	devfs_remove(DRV_NAME);
1737	unregister_blkdev(host->major, host->name);	1737	unregister_blkdev(host->major, host->name);
1738	if (host->major == 160)	1738	if (host->major == 160)
1739	clear_bit(0, &carm_major_alloc);	1739	clear_bit(0, &carm_major_alloc);
1740	else if (host->major == 161)	1740	else if (host->major == 161)
1741	clear_bit(1, &carm_major_alloc);	1741	clear_bit(1, &carm_major_alloc);
1742	blk_cleanup_queue(host->oob_q);	1742	blk_cleanup_queue(host->oob_q);
1743	pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);	1743	pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
1744	iounmap(host->mmio);	1744	iounmap(host->mmio);
1745	kfree(host);	1745	kfree(host);
1746	pci_release_regions(pdev);	1746	pci_release_regions(pdev);
1747	pci_disable_device(pdev);	1747	pci_disable_device(pdev);
1748	pci_set_drvdata(pdev, NULL);	1748	pci_set_drvdata(pdev, NULL);
1749	}	1749	}
1750		1750
1751	static int __init carm_init(void)	1751	static int __init carm_init(void)
1752	{	1752	{
1753	return pci_module_init(&carm_driver);	1753	return pci_module_init(&carm_driver);
1754	}	1754	}
1755		1755
1756	static void __exit carm_exit(void)	1756	static void __exit carm_exit(void)
1757	{	1757	{
1758	pci_unregister_driver(&carm_driver);	1758	pci_unregister_driver(&carm_driver);
1759	}	1759	}
1760		1760
1761	module_init(carm_init);	1761	module_init(carm_init);
1762	module_exit(carm_exit);	1762	module_exit(carm_exit);
1763		1763
1764		1764
1765		1765

drivers/scsi/scsi_lib.c

Diff comments View file @ 867d119

1	/*	1	/*
2	* scsi_lib.c Copyright (C) 1999 Eric Youngdale	2	* scsi_lib.c Copyright (C) 1999 Eric Youngdale
3	*	3	*
4	* SCSI queueing library.	4	* SCSI queueing library.
5	* Initial versions: Eric Youngdale (eric@andante.org).	5	* Initial versions: Eric Youngdale (eric@andante.org).
6	* Based upon conversations with large numbers	6	* Based upon conversations with large numbers
7	* of people at Linux Expo.	7	* of people at Linux Expo.
8	*/	8	*/
9		9
10	#include <linux/bio.h>	10	#include <linux/bio.h>
11	#include <linux/blkdev.h>	11	#include <linux/blkdev.h>
12	#include <linux/completion.h>	12	#include <linux/completion.h>
13	#include <linux/kernel.h>	13	#include <linux/kernel.h>
14	#include <linux/mempool.h>	14	#include <linux/mempool.h>
15	#include <linux/slab.h>	15	#include <linux/slab.h>
16	#include <linux/init.h>	16	#include <linux/init.h>
17	#include <linux/pci.h>	17	#include <linux/pci.h>
18	#include <linux/delay.h>	18	#include <linux/delay.h>
19		19
20	#include <scsi/scsi.h>	20	#include <scsi/scsi.h>
21	#include <scsi/scsi_dbg.h>	21	#include <scsi/scsi_dbg.h>
22	#include <scsi/scsi_device.h>	22	#include <scsi/scsi_device.h>
23	#include <scsi/scsi_driver.h>	23	#include <scsi/scsi_driver.h>
24	#include <scsi/scsi_eh.h>	24	#include <scsi/scsi_eh.h>
25	#include <scsi/scsi_host.h>	25	#include <scsi/scsi_host.h>
26	#include <scsi/scsi_request.h>	26	#include <scsi/scsi_request.h>
27		27
28	#include "scsi_priv.h"	28	#include "scsi_priv.h"
29	#include "scsi_logging.h"	29	#include "scsi_logging.h"
30		30
31		31
32	#define SG_MEMPOOL_NR (sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool))	32	#define SG_MEMPOOL_NR (sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool))
33	#define SG_MEMPOOL_SIZE 32	33	#define SG_MEMPOOL_SIZE 32
34		34
35	struct scsi_host_sg_pool {	35	struct scsi_host_sg_pool {
36	size_t size;	36	size_t size;
37	char *name;	37	char *name;
38	kmem_cache_t *slab;	38	kmem_cache_t *slab;
39	mempool_t *pool;	39	mempool_t *pool;
40	};	40	};
41		41
42	#if (SCSI_MAX_PHYS_SEGMENTS < 32)	42	#if (SCSI_MAX_PHYS_SEGMENTS < 32)
43	#error SCSI_MAX_PHYS_SEGMENTS is too small	43	#error SCSI_MAX_PHYS_SEGMENTS is too small
44	#endif	44	#endif
45		45
46	#define SP(x) { x, "sgpool-" #x }	46	#define SP(x) { x, "sgpool-" #x }
47	struct scsi_host_sg_pool scsi_sg_pools[] = {	47	struct scsi_host_sg_pool scsi_sg_pools[] = {
48	SP(8),	48	SP(8),
49	SP(16),	49	SP(16),
50	SP(32),	50	SP(32),
51	#if (SCSI_MAX_PHYS_SEGMENTS > 32)	51	#if (SCSI_MAX_PHYS_SEGMENTS > 32)
52	SP(64),	52	SP(64),
53	#if (SCSI_MAX_PHYS_SEGMENTS > 64)	53	#if (SCSI_MAX_PHYS_SEGMENTS > 64)
54	SP(128),	54	SP(128),
55	#if (SCSI_MAX_PHYS_SEGMENTS > 128)	55	#if (SCSI_MAX_PHYS_SEGMENTS > 128)
56	SP(256),	56	SP(256),
57	#if (SCSI_MAX_PHYS_SEGMENTS > 256)	57	#if (SCSI_MAX_PHYS_SEGMENTS > 256)
58	#error SCSI_MAX_PHYS_SEGMENTS is too large	58	#error SCSI_MAX_PHYS_SEGMENTS is too large
59	#endif	59	#endif
60	#endif	60	#endif
61	#endif	61	#endif
62	#endif	62	#endif
63	};	63	};
64	#undef SP	64	#undef SP
65		65
66		66
67	/*	67	/*
68	* Function: scsi_insert_special_req()	68	* Function: scsi_insert_special_req()
69	*	69	*
70	* Purpose: Insert pre-formed request into request queue.	70	* Purpose: Insert pre-formed request into request queue.
71	*	71	*
72	* Arguments: sreq - request that is ready to be queued.	72	* Arguments: sreq - request that is ready to be queued.
73	* at_head - boolean. True if we should insert at head	73	* at_head - boolean. True if we should insert at head
74	* of queue, false if we should insert at tail.	74	* of queue, false if we should insert at tail.
75	*	75	*
76	* Lock status: Assumed that lock is not held upon entry.	76	* Lock status: Assumed that lock is not held upon entry.
77	*	77	*
78	* Returns: Nothing	78	* Returns: Nothing
79	*	79	*
80	* Notes: This function is called from character device and from	80	* Notes: This function is called from character device and from
81	* ioctl types of functions where the caller knows exactly	81	* ioctl types of functions where the caller knows exactly
82	* what SCSI command needs to be issued. The idea is that	82	* what SCSI command needs to be issued. The idea is that
83	* we merely inject the command into the queue (at the head	83	* we merely inject the command into the queue (at the head
84	* for now), and then call the queue request function to actually	84	* for now), and then call the queue request function to actually
85	* process it.	85	* process it.
86	*/	86	*/
87	int scsi_insert_special_req(struct scsi_request *sreq, int at_head)	87	int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
88	{	88	{
89	/*	89	/*
90	* Because users of this function are apt to reuse requests with no	90	* Because users of this function are apt to reuse requests with no
91	* modification, we have to sanitise the request flags here	91	* modification, we have to sanitise the request flags here
92	*/	92	*/
93	sreq->sr_request->flags &= ~REQ_DONTPREP;	93	sreq->sr_request->flags &= ~REQ_DONTPREP;
94	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,	94	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
95	at_head, sreq, 0);	95	at_head, sreq);
96	return 0;	96	return 0;
97	}	97	}
98		98
99	/*	99	/*
100	* Function: scsi_queue_insert()	100	* Function: scsi_queue_insert()
101	*	101	*
102	* Purpose: Insert a command in the midlevel queue.	102	* Purpose: Insert a command in the midlevel queue.
103	*	103	*
104	* Arguments: cmd - command that we are adding to queue.	104	* Arguments: cmd - command that we are adding to queue.
105	* reason - why we are inserting command to queue.	105	* reason - why we are inserting command to queue.
106	*	106	*
107	* Lock status: Assumed that lock is not held upon entry.	107	* Lock status: Assumed that lock is not held upon entry.
108	*	108	*
109	* Returns: Nothing.	109	* Returns: Nothing.
110	*	110	*
111	* Notes: We do this for one of two cases. Either the host is busy	111	* Notes: We do this for one of two cases. Either the host is busy
112	* and it cannot accept any more commands for the time being,	112	* and it cannot accept any more commands for the time being,
113	* or the device returned QUEUE_FULL and can accept no more	113	* or the device returned QUEUE_FULL and can accept no more
114	* commands.	114	* commands.
115	* Notes: This could be called either from an interrupt context or a	115	* Notes: This could be called either from an interrupt context or a
116	* normal process context.	116	* normal process context.
117	*/	117	*/
118	int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)	118	int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
119	{	119	{
120	struct Scsi_Host *host = cmd->device->host;	120	struct Scsi_Host *host = cmd->device->host;
121	struct scsi_device *device = cmd->device;	121	struct scsi_device *device = cmd->device;
122		122
123	SCSI_LOG_MLQUEUE(1,	123	SCSI_LOG_MLQUEUE(1,
124	printk("Inserting command %p into mlqueue\n", cmd));	124	printk("Inserting command %p into mlqueue\n", cmd));
125		125
126	/*	126	/*
127	* We are inserting the command into the ml queue. First, we	127	* We are inserting the command into the ml queue. First, we
128	* cancel the timer, so it doesn't time out.	128	* cancel the timer, so it doesn't time out.
129	*/	129	*/
130	scsi_delete_timer(cmd);	130	scsi_delete_timer(cmd);
131		131
132	/*	132	/*
133	* Next, set the appropriate busy bit for the device/host.	133	* Next, set the appropriate busy bit for the device/host.
134	*	134	*
135	* If the host/device isn't busy, assume that something actually	135	* If the host/device isn't busy, assume that something actually
136	* completed, and that we should be able to queue a command now.	136	* completed, and that we should be able to queue a command now.
137	*	137	*
138	* Note that the prior mid-layer assumption that any host could	138	* Note that the prior mid-layer assumption that any host could
139	* always queue at least one command is now broken. The mid-layer	139	* always queue at least one command is now broken. The mid-layer
140	* will implement a user specifiable stall (see	140	* will implement a user specifiable stall (see
141	* scsi_host.max_host_blocked and scsi_device.max_device_blocked)	141	* scsi_host.max_host_blocked and scsi_device.max_device_blocked)
142	* if a command is requeued with no other commands outstanding	142	* if a command is requeued with no other commands outstanding
143	* either for the device or for the host.	143	* either for the device or for the host.
144	*/	144	*/
145	if (reason == SCSI_MLQUEUE_HOST_BUSY)	145	if (reason == SCSI_MLQUEUE_HOST_BUSY)
146	host->host_blocked = host->max_host_blocked;	146	host->host_blocked = host->max_host_blocked;
147	else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)	147	else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
148	device->device_blocked = device->max_device_blocked;	148	device->device_blocked = device->max_device_blocked;
149		149
150	/*	150	/*
151	* Register the fact that we own the thing for now.	151	* Register the fact that we own the thing for now.
152	*/	152	*/
153	cmd->state = SCSI_STATE_MLQUEUE;	153	cmd->state = SCSI_STATE_MLQUEUE;
154	cmd->owner = SCSI_OWNER_MIDLEVEL;	154	cmd->owner = SCSI_OWNER_MIDLEVEL;
155		155
156	/*	156	/*
157	* Decrement the counters, since these commands are no longer	157	* Decrement the counters, since these commands are no longer
158	* active on the host/device.	158	* active on the host/device.
159	*/	159	*/
160	scsi_device_unbusy(device);	160	scsi_device_unbusy(device);
161		161
162	/*	162	/*
163	* Insert this command at the head of the queue for it's device.	163	* Insert this command at the head of the queue for it's device.
164	* It will go before all other commands that are already in the queue.	164	* It will go before all other commands that are already in the queue.
165	*	165	*
166	* NOTE: there is magic here about the way the queue is plugged if	166	* NOTE: there is magic here about the way the queue is plugged if
167	* we have no outstanding commands.	167	* we have no outstanding commands.
168	*	168	*
169	* Although this doesn't plug the queue, it does call the request	169	* Although this doesn't plug the queue, it does call the request
170	* function. The SCSI request function detects the blocked condition	170	* function. The SCSI request function detects the blocked condition
171	* and plugs the queue appropriately.	171	* and plugs the queue appropriately.
172	*/	172	*/
173	blk_insert_request(device->request_queue, cmd->request, 1, cmd, 1);	173	blk_insert_request(device->request_queue, cmd->request, 1, cmd, 1);
174	return 0;	174	return 0;
175	}	175	}
176		176
177	/*	177	/*
178	* Function: scsi_do_req	178	* Function: scsi_do_req
179	*	179	*
180	* Purpose: Queue a SCSI request	180	* Purpose: Queue a SCSI request
181	*	181	*
182	* Arguments: sreq - command descriptor.	182	* Arguments: sreq - command descriptor.
183	* cmnd - actual SCSI command to be performed.	183	* cmnd - actual SCSI command to be performed.
184	* buffer - data buffer.	184	* buffer - data buffer.
185	* bufflen - size of data buffer.	185	* bufflen - size of data buffer.
186	* done - completion function to be run.	186	* done - completion function to be run.
187	* timeout - how long to let it run before timeout.	187	* timeout - how long to let it run before timeout.
188	* retries - number of retries we allow.	188	* retries - number of retries we allow.
189	*	189	*
190	* Lock status: No locks held upon entry.	190	* Lock status: No locks held upon entry.
191	*	191	*
192	* Returns: Nothing.	192	* Returns: Nothing.
193	*	193	*
194	* Notes: This function is only used for queueing requests for things	194	* Notes: This function is only used for queueing requests for things
195	* like ioctls and character device requests - this is because	195	* like ioctls and character device requests - this is because
196	* we essentially just inject a request into the queue for the	196	* we essentially just inject a request into the queue for the
197	* device.	197	* device.
198	*	198	*
199	* In order to support the scsi_device_quiesce function, we	199	* In order to support the scsi_device_quiesce function, we
200	* now inject requests on the head of the device queue	200	* now inject requests on the head of the device queue
201	* rather than the tail.	201	* rather than the tail.
202	*/	202	*/
203	void scsi_do_req(struct scsi_request sreq, const void cmnd,	203	void scsi_do_req(struct scsi_request sreq, const void cmnd,
204	void *buffer, unsigned bufflen,	204	void *buffer, unsigned bufflen,
205	void (done)(struct scsi_cmnd ),	205	void (done)(struct scsi_cmnd ),
206	int timeout, int retries)	206	int timeout, int retries)
207	{	207	{
208	/*	208	/*
209	* If the upper level driver is reusing these things, then	209	* If the upper level driver is reusing these things, then
210	* we should release the low-level block now. Another one will	210	* we should release the low-level block now. Another one will
211	* be allocated later when this request is getting queued.	211	* be allocated later when this request is getting queued.
212	*/	212	*/
213	__scsi_release_request(sreq);	213	__scsi_release_request(sreq);
214		214
215	/*	215	/*
216	* Our own function scsi_done (which marks the host as not busy,	216	* Our own function scsi_done (which marks the host as not busy,
217	* disables the timeout counter, etc) will be called by us or by the	217	* disables the timeout counter, etc) will be called by us or by the
218	* scsi_hosts[host].queuecommand() function needs to also call	218	* scsi_hosts[host].queuecommand() function needs to also call
219	* the completion function for the high level driver.	219	* the completion function for the high level driver.
220	*/	220	*/
221	memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));	221	memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
222	sreq->sr_bufflen = bufflen;	222	sreq->sr_bufflen = bufflen;
223	sreq->sr_buffer = buffer;	223	sreq->sr_buffer = buffer;
224	sreq->sr_allowed = retries;	224	sreq->sr_allowed = retries;
225	sreq->sr_done = done;	225	sreq->sr_done = done;
226	sreq->sr_timeout_per_command = timeout;	226	sreq->sr_timeout_per_command = timeout;
227		227
228	if (sreq->sr_cmd_len == 0)	228	if (sreq->sr_cmd_len == 0)
229	sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);	229	sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
230		230
231	/*	231	/*
232	* head injection required here otherwise quiesce won't work	232	* head injection required here otherwise quiesce won't work
233	*/	233	*/
234	scsi_insert_special_req(sreq, 1);	234	scsi_insert_special_req(sreq, 1);
235	}	235	}
236	EXPORT_SYMBOL(scsi_do_req);	236	EXPORT_SYMBOL(scsi_do_req);
237		237
238	static void scsi_wait_done(struct scsi_cmnd *cmd)	238	static void scsi_wait_done(struct scsi_cmnd *cmd)
239	{	239	{
240	struct request *req = cmd->request;	240	struct request *req = cmd->request;
241	struct request_queue *q = cmd->device->request_queue;	241	struct request_queue *q = cmd->device->request_queue;
242	unsigned long flags;	242	unsigned long flags;
243		243
244	req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */	244	req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */
245		245
246	spin_lock_irqsave(q->queue_lock, flags);	246	spin_lock_irqsave(q->queue_lock, flags);
247	if (blk_rq_tagged(req))	247	if (blk_rq_tagged(req))
248	blk_queue_end_tag(q, req);	248	blk_queue_end_tag(q, req);
249	spin_unlock_irqrestore(q->queue_lock, flags);	249	spin_unlock_irqrestore(q->queue_lock, flags);
250		250
251	if (req->waiting)	251	if (req->waiting)
252	complete(req->waiting);	252	complete(req->waiting);
253	}	253	}
254		254
255	/* This is the end routine we get to if a command was never attached	255	/* This is the end routine we get to if a command was never attached
256	* to the request. Simply complete the request without changing	256	* to the request. Simply complete the request without changing
257	* rq_status; this will cause a DRIVER_ERROR. */	257	* rq_status; this will cause a DRIVER_ERROR. */
258	static void scsi_wait_req_end_io(struct request *req)	258	static void scsi_wait_req_end_io(struct request *req)
259	{	259	{
260	BUG_ON(!req->waiting);	260	BUG_ON(!req->waiting);
261		261
262	complete(req->waiting);	262	complete(req->waiting);
263	}	263	}
264		264
265	void scsi_wait_req(struct scsi_request sreq, const void cmnd, void *buffer,	265	void scsi_wait_req(struct scsi_request sreq, const void cmnd, void *buffer,
266	unsigned bufflen, int timeout, int retries)	266	unsigned bufflen, int timeout, int retries)
267	{	267	{
268	DECLARE_COMPLETION(wait);	268	DECLARE_COMPLETION(wait);
269		269
270	sreq->sr_request->waiting = &wait;	270	sreq->sr_request->waiting = &wait;
271	sreq->sr_request->rq_status = RQ_SCSI_BUSY;	271	sreq->sr_request->rq_status = RQ_SCSI_BUSY;
272	sreq->sr_request->end_io = scsi_wait_req_end_io;	272	sreq->sr_request->end_io = scsi_wait_req_end_io;
273	scsi_do_req(sreq, cmnd, buffer, bufflen, scsi_wait_done,	273	scsi_do_req(sreq, cmnd, buffer, bufflen, scsi_wait_done,
274	timeout, retries);	274	timeout, retries);
275	wait_for_completion(&wait);	275	wait_for_completion(&wait);
276	sreq->sr_request->waiting = NULL;	276	sreq->sr_request->waiting = NULL;
277	if (sreq->sr_request->rq_status != RQ_SCSI_DONE)	277	if (sreq->sr_request->rq_status != RQ_SCSI_DONE)
278	sreq->sr_result \|= (DRIVER_ERROR << 24);	278	sreq->sr_result \|= (DRIVER_ERROR << 24);
279		279
280	__scsi_release_request(sreq);	280	__scsi_release_request(sreq);
281	}	281	}
282	EXPORT_SYMBOL(scsi_wait_req);	282	EXPORT_SYMBOL(scsi_wait_req);
283		283
284	/*	284	/*
285	* Function: scsi_init_cmd_errh()	285	* Function: scsi_init_cmd_errh()
286	*	286	*
287	* Purpose: Initialize cmd fields related to error handling.	287	* Purpose: Initialize cmd fields related to error handling.
288	*	288	*
289	* Arguments: cmd - command that is ready to be queued.	289	* Arguments: cmd - command that is ready to be queued.
290	*	290	*
291	* Returns: Nothing	291	* Returns: Nothing
292	*	292	*
293	* Notes: This function has the job of initializing a number of	293	* Notes: This function has the job of initializing a number of
294	* fields related to error handling. Typically this will	294	* fields related to error handling. Typically this will
295	* be called once for each command, as required.	295	* be called once for each command, as required.
296	*/	296	*/
297	static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)	297	static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
298	{	298	{
299	cmd->owner = SCSI_OWNER_MIDLEVEL;	299	cmd->owner = SCSI_OWNER_MIDLEVEL;
300	cmd->serial_number = 0;	300	cmd->serial_number = 0;
301	cmd->abort_reason = 0;	301	cmd->abort_reason = 0;
302		302
303	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);	303	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
304		304
305	if (cmd->cmd_len == 0)	305	if (cmd->cmd_len == 0)
306	cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);	306	cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
307		307
308	/*	308	/*
309	* We need saved copies of a number of fields - this is because	309	* We need saved copies of a number of fields - this is because
310	* error handling may need to overwrite these with different values	310	* error handling may need to overwrite these with different values
311	* to run different commands, and once error handling is complete,	311	* to run different commands, and once error handling is complete,
312	* we will need to restore these values prior to running the actual	312	* we will need to restore these values prior to running the actual
313	* command.	313	* command.
314	*/	314	*/
315	cmd->old_use_sg = cmd->use_sg;	315	cmd->old_use_sg = cmd->use_sg;
316	cmd->old_cmd_len = cmd->cmd_len;	316	cmd->old_cmd_len = cmd->cmd_len;
317	cmd->sc_old_data_direction = cmd->sc_data_direction;	317	cmd->sc_old_data_direction = cmd->sc_data_direction;
318	cmd->old_underflow = cmd->underflow;	318	cmd->old_underflow = cmd->underflow;
319	memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));	319	memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
320	cmd->buffer = cmd->request_buffer;	320	cmd->buffer = cmd->request_buffer;
321	cmd->bufflen = cmd->request_bufflen;	321	cmd->bufflen = cmd->request_bufflen;
322	cmd->abort_reason = 0;	322	cmd->abort_reason = 0;
323		323
324	return 1;	324	return 1;
325	}	325	}
326		326
327	/*	327	/*
328	* Function: scsi_setup_cmd_retry()	328	* Function: scsi_setup_cmd_retry()
329	*	329	*
330	* Purpose: Restore the command state for a retry	330	* Purpose: Restore the command state for a retry
331	*	331	*
332	* Arguments: cmd - command to be restored	332	* Arguments: cmd - command to be restored
333	*	333	*
334	* Returns: Nothing	334	* Returns: Nothing
335	*	335	*
336	* Notes: Immediately prior to retrying a command, we need	336	* Notes: Immediately prior to retrying a command, we need
337	* to restore certain fields that we saved above.	337	* to restore certain fields that we saved above.
338	*/	338	*/
339	void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)	339	void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
340	{	340	{
341	memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));	341	memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
342	cmd->request_buffer = cmd->buffer;	342	cmd->request_buffer = cmd->buffer;
343	cmd->request_bufflen = cmd->bufflen;	343	cmd->request_bufflen = cmd->bufflen;
344	cmd->use_sg = cmd->old_use_sg;	344	cmd->use_sg = cmd->old_use_sg;
345	cmd->cmd_len = cmd->old_cmd_len;	345	cmd->cmd_len = cmd->old_cmd_len;
346	cmd->sc_data_direction = cmd->sc_old_data_direction;	346	cmd->sc_data_direction = cmd->sc_old_data_direction;
347	cmd->underflow = cmd->old_underflow;	347	cmd->underflow = cmd->old_underflow;
348	}	348	}
349		349
350	void scsi_device_unbusy(struct scsi_device *sdev)	350	void scsi_device_unbusy(struct scsi_device *sdev)
351	{	351	{
352	struct Scsi_Host *shost = sdev->host;	352	struct Scsi_Host *shost = sdev->host;
353	unsigned long flags;	353	unsigned long flags;
354		354
355	spin_lock_irqsave(shost->host_lock, flags);	355	spin_lock_irqsave(shost->host_lock, flags);
356	shost->host_busy--;	356	shost->host_busy--;
357	if (unlikely(test_bit(SHOST_RECOVERY, &shost->shost_state) &&	357	if (unlikely(test_bit(SHOST_RECOVERY, &shost->shost_state) &&
358	shost->host_failed))	358	shost->host_failed))
359	scsi_eh_wakeup(shost);	359	scsi_eh_wakeup(shost);
360	spin_unlock(shost->host_lock);	360	spin_unlock(shost->host_lock);
361	spin_lock(sdev->request_queue->queue_lock);	361	spin_lock(sdev->request_queue->queue_lock);
362	sdev->device_busy--;	362	sdev->device_busy--;
363	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);	363	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
364	}	364	}
365		365
366	/*	366	/*
367	* Called for single_lun devices on IO completion. Clear starget_sdev_user,	367	* Called for single_lun devices on IO completion. Clear starget_sdev_user,
368	* and call blk_run_queue for all the scsi_devices on the target -	368	* and call blk_run_queue for all the scsi_devices on the target -
369	* including current_sdev first.	369	* including current_sdev first.
370	*	370	*
371	* Called with no scsi locks held.	371	* Called with no scsi locks held.
372	*/	372	*/
373	static void scsi_single_lun_run(struct scsi_device *current_sdev)	373	static void scsi_single_lun_run(struct scsi_device *current_sdev)
374	{	374	{
375	struct Scsi_Host *shost = current_sdev->host;	375	struct Scsi_Host *shost = current_sdev->host;
376	struct scsi_device sdev, tmp;	376	struct scsi_device sdev, tmp;
377	struct scsi_target *starget = scsi_target(current_sdev);	377	struct scsi_target *starget = scsi_target(current_sdev);
378	unsigned long flags;	378	unsigned long flags;
379		379
380	spin_lock_irqsave(shost->host_lock, flags);	380	spin_lock_irqsave(shost->host_lock, flags);
381	starget->starget_sdev_user = NULL;	381	starget->starget_sdev_user = NULL;
382	spin_unlock_irqrestore(shost->host_lock, flags);	382	spin_unlock_irqrestore(shost->host_lock, flags);
383		383
384	/*	384	/*
385	* Call blk_run_queue for all LUNs on the target, starting with	385	* Call blk_run_queue for all LUNs on the target, starting with
386	* current_sdev. We race with others (to set starget_sdev_user),	386	* current_sdev. We race with others (to set starget_sdev_user),
387	* but in most cases, we will be first. Ideally, each LU on the	387	* but in most cases, we will be first. Ideally, each LU on the
388	* target would get some limited time or requests on the target.	388	* target would get some limited time or requests on the target.
389	*/	389	*/
390	blk_run_queue(current_sdev->request_queue);	390	blk_run_queue(current_sdev->request_queue);
391		391
392	spin_lock_irqsave(shost->host_lock, flags);	392	spin_lock_irqsave(shost->host_lock, flags);
393	if (starget->starget_sdev_user)	393	if (starget->starget_sdev_user)
394	goto out;	394	goto out;
395	list_for_each_entry_safe(sdev, tmp, &starget->devices,	395	list_for_each_entry_safe(sdev, tmp, &starget->devices,
396	same_target_siblings) {	396	same_target_siblings) {
397	if (sdev == current_sdev)	397	if (sdev == current_sdev)
398	continue;	398	continue;
399	if (scsi_device_get(sdev))	399	if (scsi_device_get(sdev))
400	continue;	400	continue;
401		401
402	spin_unlock_irqrestore(shost->host_lock, flags);	402	spin_unlock_irqrestore(shost->host_lock, flags);
403	blk_run_queue(sdev->request_queue);	403	blk_run_queue(sdev->request_queue);
404	spin_lock_irqsave(shost->host_lock, flags);	404	spin_lock_irqsave(shost->host_lock, flags);
405		405
406	scsi_device_put(sdev);	406	scsi_device_put(sdev);
407	}	407	}
408	out:	408	out:
409	spin_unlock_irqrestore(shost->host_lock, flags);	409	spin_unlock_irqrestore(shost->host_lock, flags);
410	}	410	}
411		411
412	/*	412	/*
413	* Function: scsi_run_queue()	413	* Function: scsi_run_queue()
414	*	414	*
415	* Purpose: Select a proper request queue to serve next	415	* Purpose: Select a proper request queue to serve next
416	*	416	*
417	* Arguments: q - last request's queue	417	* Arguments: q - last request's queue
418	*	418	*
419	* Returns: Nothing	419	* Returns: Nothing
420	*	420	*
421	* Notes: The previous command was completely finished, start	421	* Notes: The previous command was completely finished, start
422	* a new one if possible.	422	* a new one if possible.
423	*/	423	*/
424	static void scsi_run_queue(struct request_queue *q)	424	static void scsi_run_queue(struct request_queue *q)
425	{	425	{
426	struct scsi_device *sdev = q->queuedata;	426	struct scsi_device *sdev = q->queuedata;
427	struct Scsi_Host *shost = sdev->host;	427	struct Scsi_Host *shost = sdev->host;
428	unsigned long flags;	428	unsigned long flags;
429		429
430	if (sdev->single_lun)	430	if (sdev->single_lun)
431	scsi_single_lun_run(sdev);	431	scsi_single_lun_run(sdev);
432		432
433	spin_lock_irqsave(shost->host_lock, flags);	433	spin_lock_irqsave(shost->host_lock, flags);
434	while (!list_empty(&shost->starved_list) &&	434	while (!list_empty(&shost->starved_list) &&
435	!shost->host_blocked && !shost->host_self_blocked &&	435	!shost->host_blocked && !shost->host_self_blocked &&
436	!((shost->can_queue > 0) &&	436	!((shost->can_queue > 0) &&
437	(shost->host_busy >= shost->can_queue))) {	437	(shost->host_busy >= shost->can_queue))) {
438	/*	438	/*
439	* As long as shost is accepting commands and we have	439	* As long as shost is accepting commands and we have
440	* starved queues, call blk_run_queue. scsi_request_fn	440	* starved queues, call blk_run_queue. scsi_request_fn
441	* drops the queue_lock and can add us back to the	441	* drops the queue_lock and can add us back to the
442	* starved_list.	442	* starved_list.
443	*	443	*
444	* host_lock protects the starved_list and starved_entry.	444	* host_lock protects the starved_list and starved_entry.
445	* scsi_request_fn must get the host_lock before checking	445	* scsi_request_fn must get the host_lock before checking
446	* or modifying starved_list or starved_entry.	446	* or modifying starved_list or starved_entry.
447	*/	447	*/
448	sdev = list_entry(shost->starved_list.next,	448	sdev = list_entry(shost->starved_list.next,
449	struct scsi_device, starved_entry);	449	struct scsi_device, starved_entry);
450	list_del_init(&sdev->starved_entry);	450	list_del_init(&sdev->starved_entry);
451	spin_unlock_irqrestore(shost->host_lock, flags);	451	spin_unlock_irqrestore(shost->host_lock, flags);
452		452
453	blk_run_queue(sdev->request_queue);	453	blk_run_queue(sdev->request_queue);
454		454
455	spin_lock_irqsave(shost->host_lock, flags);	455	spin_lock_irqsave(shost->host_lock, flags);
456	if (unlikely(!list_empty(&sdev->starved_entry)))	456	if (unlikely(!list_empty(&sdev->starved_entry)))
457	/*	457	/*
458	* sdev lost a race, and was put back on the	458	* sdev lost a race, and was put back on the
459	* starved list. This is unlikely but without this	459	* starved list. This is unlikely but without this
460	* in theory we could loop forever.	460	* in theory we could loop forever.
461	*/	461	*/
462	break;	462	break;
463	}	463	}
464	spin_unlock_irqrestore(shost->host_lock, flags);	464	spin_unlock_irqrestore(shost->host_lock, flags);
465		465
466	blk_run_queue(q);	466	blk_run_queue(q);
467	}	467	}
468		468
469	/*	469	/*
470	* Function: scsi_requeue_command()	470	* Function: scsi_requeue_command()
471	*	471	*
472	* Purpose: Handle post-processing of completed commands.	472	* Purpose: Handle post-processing of completed commands.
473	*	473	*
474	* Arguments: q - queue to operate on	474	* Arguments: q - queue to operate on
475	* cmd - command that may need to be requeued.	475	* cmd - command that may need to be requeued.
476	*	476	*
477	* Returns: Nothing	477	* Returns: Nothing
478	*	478	*
479	* Notes: After command completion, there may be blocks left	479	* Notes: After command completion, there may be blocks left
480	* over which weren't finished by the previous command	480	* over which weren't finished by the previous command
481	* this can be for a number of reasons - the main one is	481	* this can be for a number of reasons - the main one is
482	* I/O errors in the middle of the request, in which case	482	* I/O errors in the middle of the request, in which case
483	* we need to request the blocks that come after the bad	483	* we need to request the blocks that come after the bad
484	* sector.	484	* sector.
485	*/	485	*/
486	static void scsi_requeue_command(struct request_queue q, struct scsi_cmnd cmd)	486	static void scsi_requeue_command(struct request_queue q, struct scsi_cmnd cmd)
487	{	487	{
488	cmd->request->flags &= ~REQ_DONTPREP;	488	cmd->request->flags &= ~REQ_DONTPREP;
489	blk_insert_request(q, cmd->request, 1, cmd, 1);	489	blk_insert_request(q, cmd->request, 1, cmd, 1);
490		490
491	scsi_run_queue(q);	491	scsi_run_queue(q);
492	}	492	}
493		493
494	void scsi_next_command(struct scsi_cmnd *cmd)	494	void scsi_next_command(struct scsi_cmnd *cmd)
495	{	495	{
496	struct request_queue *q = cmd->device->request_queue;	496	struct request_queue *q = cmd->device->request_queue;
497		497
498	scsi_put_command(cmd);	498	scsi_put_command(cmd);
499	scsi_run_queue(q);	499	scsi_run_queue(q);
500	}	500	}
501		501
502	void scsi_run_host_queues(struct Scsi_Host *shost)	502	void scsi_run_host_queues(struct Scsi_Host *shost)
503	{	503	{
504	struct scsi_device *sdev;	504	struct scsi_device *sdev;
505		505
506	shost_for_each_device(sdev, shost)	506	shost_for_each_device(sdev, shost)
507	scsi_run_queue(sdev->request_queue);	507	scsi_run_queue(sdev->request_queue);
508	}	508	}
509		509
510	/*	510	/*
511	* Function: scsi_end_request()	511	* Function: scsi_end_request()
512	*	512	*
513	* Purpose: Post-processing of completed commands (usually invoked at end	513	* Purpose: Post-processing of completed commands (usually invoked at end
514	* of upper level post-processing and scsi_io_completion).	514	* of upper level post-processing and scsi_io_completion).
515	*	515	*
516	* Arguments: cmd - command that is complete.	516	* Arguments: cmd - command that is complete.
517	* uptodate - 1 if I/O indicates success, <= 0 for I/O error.	517	* uptodate - 1 if I/O indicates success, <= 0 for I/O error.
518	* bytes - number of bytes of completed I/O	518	* bytes - number of bytes of completed I/O
519	* requeue - indicates whether we should requeue leftovers.	519	* requeue - indicates whether we should requeue leftovers.
520	*	520	*
521	* Lock status: Assumed that lock is not held upon entry.	521	* Lock status: Assumed that lock is not held upon entry.
522	*	522	*
523	* Returns: cmd if requeue done or required, NULL otherwise	523	* Returns: cmd if requeue done or required, NULL otherwise
524	*	524	*
525	* Notes: This is called for block device requests in order to	525	* Notes: This is called for block device requests in order to
526	* mark some number of sectors as complete.	526	* mark some number of sectors as complete.
527	*	527	*
528	* We are guaranteeing that the request queue will be goosed	528	* We are guaranteeing that the request queue will be goosed
529	* at some point during this call.	529	* at some point during this call.
530	*/	530	*/
531	static struct scsi_cmnd scsi_end_request(struct scsi_cmnd cmd, int uptodate,	531	static struct scsi_cmnd scsi_end_request(struct scsi_cmnd cmd, int uptodate,
532	int bytes, int requeue)	532	int bytes, int requeue)
533	{	533	{
534	request_queue_t *q = cmd->device->request_queue;	534	request_queue_t *q = cmd->device->request_queue;
535	struct request *req = cmd->request;	535	struct request *req = cmd->request;
536	unsigned long flags;	536	unsigned long flags;
537		537
538	/*	538	/*
539	* If there are blocks left over at the end, set up the command	539	* If there are blocks left over at the end, set up the command
540	* to queue the remainder of them.	540	* to queue the remainder of them.
541	*/	541	*/
542	if (end_that_request_chunk(req, uptodate, bytes)) {	542	if (end_that_request_chunk(req, uptodate, bytes)) {
543	int leftover = (req->hard_nr_sectors << 9);	543	int leftover = (req->hard_nr_sectors << 9);
544		544
545	if (blk_pc_request(req))	545	if (blk_pc_request(req))
546	leftover = req->data_len;	546	leftover = req->data_len;
547		547
548	/* kill remainder if no retrys */	548	/* kill remainder if no retrys */
549	if (!uptodate && blk_noretry_request(req))	549	if (!uptodate && blk_noretry_request(req))
550	end_that_request_chunk(req, 0, leftover);	550	end_that_request_chunk(req, 0, leftover);
551	else {	551	else {
552	if (requeue)	552	if (requeue)
553	/*	553	/*
554	* Bleah. Leftovers again. Stick the	554	* Bleah. Leftovers again. Stick the
555	* leftovers in the front of the	555	* leftovers in the front of the
556	* queue, and goose the queue again.	556	* queue, and goose the queue again.
557	*/	557	*/
558	scsi_requeue_command(q, cmd);	558	scsi_requeue_command(q, cmd);
559		559
560	return cmd;	560	return cmd;
561	}	561	}
562	}	562	}
563		563
564	add_disk_randomness(req->rq_disk);	564	add_disk_randomness(req->rq_disk);
565		565
566	spin_lock_irqsave(q->queue_lock, flags);	566	spin_lock_irqsave(q->queue_lock, flags);
567	if (blk_rq_tagged(req))	567	if (blk_rq_tagged(req))
568	blk_queue_end_tag(q, req);	568	blk_queue_end_tag(q, req);
569	end_that_request_last(req);	569	end_that_request_last(req);
570	spin_unlock_irqrestore(q->queue_lock, flags);	570	spin_unlock_irqrestore(q->queue_lock, flags);
571		571
572	/*	572	/*
573	* This will goose the queue request function at the end, so we don't	573	* This will goose the queue request function at the end, so we don't
574	* need to worry about launching another command.	574	* need to worry about launching another command.
575	*/	575	*/
576	scsi_next_command(cmd);	576	scsi_next_command(cmd);
577	return NULL;	577	return NULL;
578	}	578	}
579		579
580	static struct scatterlist scsi_alloc_sgtable(struct scsi_cmnd cmd, int gfp_mask)	580	static struct scatterlist scsi_alloc_sgtable(struct scsi_cmnd cmd, int gfp_mask)
581	{	581	{
582	struct scsi_host_sg_pool *sgp;	582	struct scsi_host_sg_pool *sgp;
583	struct scatterlist *sgl;	583	struct scatterlist *sgl;
584		584
585	BUG_ON(!cmd->use_sg);	585	BUG_ON(!cmd->use_sg);
586		586
587	switch (cmd->use_sg) {	587	switch (cmd->use_sg) {
588	case 1 ... 8:	588	case 1 ... 8:
589	cmd->sglist_len = 0;	589	cmd->sglist_len = 0;
590	break;	590	break;
591	case 9 ... 16:	591	case 9 ... 16:
592	cmd->sglist_len = 1;	592	cmd->sglist_len = 1;
593	break;	593	break;
594	case 17 ... 32:	594	case 17 ... 32:
595	cmd->sglist_len = 2;	595	cmd->sglist_len = 2;
596	break;	596	break;
597	#if (SCSI_MAX_PHYS_SEGMENTS > 32)	597	#if (SCSI_MAX_PHYS_SEGMENTS > 32)
598	case 33 ... 64:	598	case 33 ... 64:
599	cmd->sglist_len = 3;	599	cmd->sglist_len = 3;
600	break;	600	break;
601	#if (SCSI_MAX_PHYS_SEGMENTS > 64)	601	#if (SCSI_MAX_PHYS_SEGMENTS > 64)
602	case 65 ... 128:	602	case 65 ... 128:
603	cmd->sglist_len = 4;	603	cmd->sglist_len = 4;
604	break;	604	break;
605	#if (SCSI_MAX_PHYS_SEGMENTS > 128)	605	#if (SCSI_MAX_PHYS_SEGMENTS > 128)
606	case 129 ... 256:	606	case 129 ... 256:
607	cmd->sglist_len = 5;	607	cmd->sglist_len = 5;
608	break;	608	break;
609	#endif	609	#endif
610	#endif	610	#endif
611	#endif	611	#endif
612	default:	612	default:
613	return NULL;	613	return NULL;
614	}	614	}
615		615
616	sgp = scsi_sg_pools + cmd->sglist_len;	616	sgp = scsi_sg_pools + cmd->sglist_len;
617	sgl = mempool_alloc(sgp->pool, gfp_mask);	617	sgl = mempool_alloc(sgp->pool, gfp_mask);
618	if (sgl)	618	if (sgl)
619	memset(sgl, 0, sgp->size);	619	memset(sgl, 0, sgp->size);
620	return sgl;	620	return sgl;
621	}	621	}
622		622
623	static void scsi_free_sgtable(struct scatterlist *sgl, int index)	623	static void scsi_free_sgtable(struct scatterlist *sgl, int index)
624	{	624	{
625	struct scsi_host_sg_pool *sgp;	625	struct scsi_host_sg_pool *sgp;
626		626
627	BUG_ON(index > SG_MEMPOOL_NR);	627	BUG_ON(index > SG_MEMPOOL_NR);
628		628
629	sgp = scsi_sg_pools + index;	629	sgp = scsi_sg_pools + index;
630	mempool_free(sgl, sgp->pool);	630	mempool_free(sgl, sgp->pool);
631	}	631	}
632		632
633	/*	633	/*
634	* Function: scsi_release_buffers()	634	* Function: scsi_release_buffers()
635	*	635	*
636	* Purpose: Completion processing for block device I/O requests.	636	* Purpose: Completion processing for block device I/O requests.
637	*	637	*
638	* Arguments: cmd - command that we are bailing.	638	* Arguments: cmd - command that we are bailing.
639	*	639	*
640	* Lock status: Assumed that no lock is held upon entry.	640	* Lock status: Assumed that no lock is held upon entry.
641	*	641	*
642	* Returns: Nothing	642	* Returns: Nothing
643	*	643	*
644	* Notes: In the event that an upper level driver rejects a	644	* Notes: In the event that an upper level driver rejects a
645	* command, we must release resources allocated during	645	* command, we must release resources allocated during
646	* the __init_io() function. Primarily this would involve	646	* the __init_io() function. Primarily this would involve
647	* the scatter-gather table, and potentially any bounce	647	* the scatter-gather table, and potentially any bounce
648	* buffers.	648	* buffers.
649	*/	649	*/
650	static void scsi_release_buffers(struct scsi_cmnd *cmd)	650	static void scsi_release_buffers(struct scsi_cmnd *cmd)
651	{	651	{
652	struct request *req = cmd->request;	652	struct request *req = cmd->request;
653		653
654	/*	654	/*
655	* Free up any indirection buffers we allocated for DMA purposes.	655	* Free up any indirection buffers we allocated for DMA purposes.
656	*/	656	*/
657	if (cmd->use_sg)	657	if (cmd->use_sg)
658	scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);	658	scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
659	else if (cmd->request_buffer != req->buffer)	659	else if (cmd->request_buffer != req->buffer)
660	kfree(cmd->request_buffer);	660	kfree(cmd->request_buffer);
661		661
662	/*	662	/*
663	* Zero these out. They now point to freed memory, and it is	663	* Zero these out. They now point to freed memory, and it is
664	* dangerous to hang onto the pointers.	664	* dangerous to hang onto the pointers.
665	*/	665	*/
666	cmd->buffer = NULL;	666	cmd->buffer = NULL;
667	cmd->bufflen = 0;	667	cmd->bufflen = 0;
668	cmd->request_buffer = NULL;	668	cmd->request_buffer = NULL;
669	cmd->request_bufflen = 0;	669	cmd->request_bufflen = 0;
670	}	670	}
671		671
672	/*	672	/*
673	* Function: scsi_io_completion()	673	* Function: scsi_io_completion()
674	*	674	*
675	* Purpose: Completion processing for block device I/O requests.	675	* Purpose: Completion processing for block device I/O requests.
676	*	676	*
677	* Arguments: cmd - command that is finished.	677	* Arguments: cmd - command that is finished.
678	*	678	*
679	* Lock status: Assumed that no lock is held upon entry.	679	* Lock status: Assumed that no lock is held upon entry.
680	*	680	*
681	* Returns: Nothing	681	* Returns: Nothing
682	*	682	*
683	* Notes: This function is matched in terms of capabilities to	683	* Notes: This function is matched in terms of capabilities to
684	* the function that created the scatter-gather list.	684	* the function that created the scatter-gather list.
685	* In other words, if there are no bounce buffers	685	* In other words, if there are no bounce buffers
686	* (the normal case for most drivers), we don't need	686	* (the normal case for most drivers), we don't need
687	* the logic to deal with cleaning up afterwards.	687	* the logic to deal with cleaning up afterwards.
688	*	688	*
689	* We must do one of several things here:	689	* We must do one of several things here:
690	*	690	*
691	* a) Call scsi_end_request. This will finish off the	691	* a) Call scsi_end_request. This will finish off the
692	* specified number of sectors. If we are done, the	692	* specified number of sectors. If we are done, the
693	* command block will be released, and the queue	693	* command block will be released, and the queue
694	* function will be goosed. If we are not done, then	694	* function will be goosed. If we are not done, then
695	* scsi_end_request will directly goose the queue.	695	* scsi_end_request will directly goose the queue.
696	*	696	*
697	* b) We can just use scsi_requeue_command() here. This would	697	* b) We can just use scsi_requeue_command() here. This would
698	* be used if we just wanted to retry, for example.	698	* be used if we just wanted to retry, for example.
699	*/	699	*/
700	void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,	700	void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
701	unsigned int block_bytes)	701	unsigned int block_bytes)
702	{	702	{
703	int result = cmd->result;	703	int result = cmd->result;
704	int this_count = cmd->bufflen;	704	int this_count = cmd->bufflen;
705	request_queue_t *q = cmd->device->request_queue;	705	request_queue_t *q = cmd->device->request_queue;
706	struct request *req = cmd->request;	706	struct request *req = cmd->request;
707	int clear_errors = 1;	707	int clear_errors = 1;
708	struct scsi_sense_hdr sshdr;	708	struct scsi_sense_hdr sshdr;
709	int sense_valid = 0;	709	int sense_valid = 0;
710	int sense_deferred = 0;	710	int sense_deferred = 0;
711		711
712	if (blk_complete_barrier_rq(q, req, good_bytes >> 9))	712	if (blk_complete_barrier_rq(q, req, good_bytes >> 9))
713	return;	713	return;
714		714
715	/*	715	/*
716	* Free up any indirection buffers we allocated for DMA purposes.	716	* Free up any indirection buffers we allocated for DMA purposes.
717	* For the case of a READ, we need to copy the data out of the	717	* For the case of a READ, we need to copy the data out of the
718	* bounce buffer and into the real buffer.	718	* bounce buffer and into the real buffer.
719	*/	719	*/
720	if (cmd->use_sg)	720	if (cmd->use_sg)
721	scsi_free_sgtable(cmd->buffer, cmd->sglist_len);	721	scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
722	else if (cmd->buffer != req->buffer) {	722	else if (cmd->buffer != req->buffer) {
723	if (rq_data_dir(req) == READ) {	723	if (rq_data_dir(req) == READ) {
724	unsigned long flags;	724	unsigned long flags;
725	char *to = bio_kmap_irq(req->bio, &flags);	725	char *to = bio_kmap_irq(req->bio, &flags);
726	memcpy(to, cmd->buffer, cmd->bufflen);	726	memcpy(to, cmd->buffer, cmd->bufflen);
727	bio_kunmap_irq(to, &flags);	727	bio_kunmap_irq(to, &flags);
728	}	728	}
729	kfree(cmd->buffer);	729	kfree(cmd->buffer);
730	}	730	}
731		731
732	if (result) {	732	if (result) {
733	sense_valid = scsi_command_normalize_sense(cmd, &sshdr);	733	sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
734	if (sense_valid)	734	if (sense_valid)
735	sense_deferred = scsi_sense_is_deferred(&sshdr);	735	sense_deferred = scsi_sense_is_deferred(&sshdr);
736	}	736	}
737	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */	737	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
738	req->errors = result;	738	req->errors = result;
739	if (result) {	739	if (result) {
740	clear_errors = 0;	740	clear_errors = 0;
741	if (sense_valid && req->sense) {	741	if (sense_valid && req->sense) {
742	/*	742	/*
743	* SG_IO wants current and deferred errors	743	* SG_IO wants current and deferred errors
744	*/	744	*/
745	int len = 8 + cmd->sense_buffer[7];	745	int len = 8 + cmd->sense_buffer[7];
746		746
747	if (len > SCSI_SENSE_BUFFERSIZE)	747	if (len > SCSI_SENSE_BUFFERSIZE)
748	len = SCSI_SENSE_BUFFERSIZE;	748	len = SCSI_SENSE_BUFFERSIZE;
749	memcpy(req->sense, cmd->sense_buffer, len);	749	memcpy(req->sense, cmd->sense_buffer, len);
750	req->sense_len = len;	750	req->sense_len = len;
751	}	751	}
752	} else	752	} else
753	req->data_len = cmd->resid;	753	req->data_len = cmd->resid;
754	}	754	}
755		755
756	/*	756	/*
757	* Zero these out. They now point to freed memory, and it is	757	* Zero these out. They now point to freed memory, and it is
758	* dangerous to hang onto the pointers.	758	* dangerous to hang onto the pointers.
759	*/	759	*/
760	cmd->buffer = NULL;	760	cmd->buffer = NULL;
761	cmd->bufflen = 0;	761	cmd->bufflen = 0;
762	cmd->request_buffer = NULL;	762	cmd->request_buffer = NULL;
763	cmd->request_bufflen = 0;	763	cmd->request_bufflen = 0;
764		764
765	/*	765	/*
766	* Next deal with any sectors which we were able to correctly	766	* Next deal with any sectors which we were able to correctly
767	* handle.	767	* handle.
768	*/	768	*/
769	if (good_bytes >= 0) {	769	if (good_bytes >= 0) {
770	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",	770	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
771	req->nr_sectors, good_bytes));	771	req->nr_sectors, good_bytes));
772	SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));	772	SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
773		773
774	if (clear_errors)	774	if (clear_errors)
775	req->errors = 0;	775	req->errors = 0;
776	/*	776	/*
777	* If multiple sectors are requested in one buffer, then	777	* If multiple sectors are requested in one buffer, then
778	* they will have been finished off by the first command.	778	* they will have been finished off by the first command.
779	* If not, then we have a multi-buffer command.	779	* If not, then we have a multi-buffer command.
780	*	780	*
781	* If block_bytes != 0, it means we had a medium error	781	* If block_bytes != 0, it means we had a medium error
782	* of some sort, and that we want to mark some number of	782	* of some sort, and that we want to mark some number of
783	* sectors as not uptodate. Thus we want to inhibit	783	* sectors as not uptodate. Thus we want to inhibit
784	* requeueing right here - we will requeue down below	784	* requeueing right here - we will requeue down below
785	* when we handle the bad sectors.	785	* when we handle the bad sectors.
786	*/	786	*/
787	cmd = scsi_end_request(cmd, 1, good_bytes, result == 0);	787	cmd = scsi_end_request(cmd, 1, good_bytes, result == 0);
788		788
789	/*	789	/*
790	* If the command completed without error, then either finish off the	790	* If the command completed without error, then either finish off the
791	* rest of the command, or start a new one.	791	* rest of the command, or start a new one.
792	*/	792	*/
793	if (result == 0 \|\| cmd == NULL ) {	793	if (result == 0 \|\| cmd == NULL ) {
794	return;	794	return;
795	}	795	}
796	}	796	}
797	/*	797	/*
798	* Now, if we were good little boys and girls, Santa left us a request	798	* Now, if we were good little boys and girls, Santa left us a request
799	* sense buffer. We can extract information from this, so we	799	* sense buffer. We can extract information from this, so we
800	* can choose a block to remap, etc.	800	* can choose a block to remap, etc.
801	*/	801	*/
802	if (sense_valid && !sense_deferred) {	802	if (sense_valid && !sense_deferred) {
803	switch (sshdr.sense_key) {	803	switch (sshdr.sense_key) {
804	case UNIT_ATTENTION:	804	case UNIT_ATTENTION:
805	if (cmd->device->removable) {	805	if (cmd->device->removable) {
806	/* detected disc change. set a bit	806	/* detected disc change. set a bit
807	* and quietly refuse further access.	807	* and quietly refuse further access.
808	*/	808	*/
809	cmd->device->changed = 1;	809	cmd->device->changed = 1;
810	cmd = scsi_end_request(cmd, 0,	810	cmd = scsi_end_request(cmd, 0,
811	this_count, 1);	811	this_count, 1);
812	return;	812	return;
813	} else {	813	} else {
814	/*	814	/*
815	* Must have been a power glitch, or a	815	* Must have been a power glitch, or a
816	* bus reset. Could not have been a	816	* bus reset. Could not have been a
817	* media change, so we just retry the	817	* media change, so we just retry the
818	* request and see what happens.	818	* request and see what happens.
819	*/	819	*/
820	scsi_requeue_command(q, cmd);	820	scsi_requeue_command(q, cmd);
821	return;	821	return;
822	}	822	}
823	break;	823	break;
824	case ILLEGAL_REQUEST:	824	case ILLEGAL_REQUEST:
825	/*	825	/*
826	* If we had an ILLEGAL REQUEST returned, then we may	826	* If we had an ILLEGAL REQUEST returned, then we may
827	* have performed an unsupported command. The only	827	* have performed an unsupported command. The only
828	* thing this should be would be a ten byte read where	828	* thing this should be would be a ten byte read where
829	* only a six byte read was supported. Also, on a	829	* only a six byte read was supported. Also, on a
830	* system where READ CAPACITY failed, we may have read	830	* system where READ CAPACITY failed, we may have read
831	* past the end of the disk.	831	* past the end of the disk.
832	*/	832	*/
833	if (cmd->device->use_10_for_rw &&	833	if (cmd->device->use_10_for_rw &&
834	(cmd->cmnd[0] == READ_10 \|\|	834	(cmd->cmnd[0] == READ_10 \|\|
835	cmd->cmnd[0] == WRITE_10)) {	835	cmd->cmnd[0] == WRITE_10)) {
836	cmd->device->use_10_for_rw = 0;	836	cmd->device->use_10_for_rw = 0;
837	/*	837	/*
838	* This will cause a retry with a 6-byte	838	* This will cause a retry with a 6-byte
839	* command.	839	* command.
840	*/	840	*/
841	scsi_requeue_command(q, cmd);	841	scsi_requeue_command(q, cmd);
842	result = 0;	842	result = 0;
843	} else {	843	} else {
844	cmd = scsi_end_request(cmd, 0, this_count, 1);	844	cmd = scsi_end_request(cmd, 0, this_count, 1);
845	return;	845	return;
846	}	846	}
847	break;	847	break;
848	case NOT_READY:	848	case NOT_READY:
849	/*	849	/*
850	* If the device is in the process of becoming ready,	850	* If the device is in the process of becoming ready,
851	* retry.	851	* retry.
852	*/	852	*/
853	if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) {	853	if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) {
854	scsi_requeue_command(q, cmd);	854	scsi_requeue_command(q, cmd);
855	return;	855	return;
856	}	856	}
857	printk(KERN_INFO "Device %s not ready.\n",	857	printk(KERN_INFO "Device %s not ready.\n",
858	req->rq_disk ? req->rq_disk->disk_name : "");	858	req->rq_disk ? req->rq_disk->disk_name : "");
859	cmd = scsi_end_request(cmd, 0, this_count, 1);	859	cmd = scsi_end_request(cmd, 0, this_count, 1);
860	return;	860	return;
861	case VOLUME_OVERFLOW:	861	case VOLUME_OVERFLOW:
862	printk(KERN_INFO "Volume overflow <%d %d %d %d> CDB: ",	862	printk(KERN_INFO "Volume overflow <%d %d %d %d> CDB: ",
863	cmd->device->host->host_no,	863	cmd->device->host->host_no,
864	(int)cmd->device->channel,	864	(int)cmd->device->channel,
865	(int)cmd->device->id, (int)cmd->device->lun);	865	(int)cmd->device->id, (int)cmd->device->lun);
866	__scsi_print_command(cmd->data_cmnd);	866	__scsi_print_command(cmd->data_cmnd);
867	scsi_print_sense("", cmd);	867	scsi_print_sense("", cmd);
868	cmd = scsi_end_request(cmd, 0, block_bytes, 1);	868	cmd = scsi_end_request(cmd, 0, block_bytes, 1);
869	return;	869	return;
870	default:	870	default:
871	break;	871	break;
872	}	872	}
873	} /* driver byte != 0 */	873	} /* driver byte != 0 */
874	if (host_byte(result) == DID_RESET) {	874	if (host_byte(result) == DID_RESET) {
875	/*	875	/*
876	* Third party bus reset or reset for error	876	* Third party bus reset or reset for error
877	* recovery reasons. Just retry the request	877	* recovery reasons. Just retry the request
878	* and see what happens.	878	* and see what happens.
879	*/	879	*/
880	scsi_requeue_command(q, cmd);	880	scsi_requeue_command(q, cmd);
881	return;	881	return;
882	}	882	}
883	if (result) {	883	if (result) {
884	printk(KERN_INFO "SCSI error : <%d %d %d %d> return code "	884	printk(KERN_INFO "SCSI error : <%d %d %d %d> return code "
885	"= 0x%x\n", cmd->device->host->host_no,	885	"= 0x%x\n", cmd->device->host->host_no,
886	cmd->device->channel,	886	cmd->device->channel,
887	cmd->device->id,	887	cmd->device->id,
888	cmd->device->lun, result);	888	cmd->device->lun, result);
889		889
890	if (driver_byte(result) & DRIVER_SENSE)	890	if (driver_byte(result) & DRIVER_SENSE)
891	scsi_print_sense("", cmd);	891	scsi_print_sense("", cmd);
892	/*	892	/*
893	* Mark a single buffer as not uptodate. Queue the remainder.	893	* Mark a single buffer as not uptodate. Queue the remainder.
894	* We sometimes get this cruft in the event that a medium error	894	* We sometimes get this cruft in the event that a medium error
895	* isn't properly reported.	895	* isn't properly reported.
896	*/	896	*/
897	block_bytes = req->hard_cur_sectors << 9;	897	block_bytes = req->hard_cur_sectors << 9;
898	if (!block_bytes)	898	if (!block_bytes)
899	block_bytes = req->data_len;	899	block_bytes = req->data_len;
900	cmd = scsi_end_request(cmd, 0, block_bytes, 1);	900	cmd = scsi_end_request(cmd, 0, block_bytes, 1);
901	}	901	}
902	}	902	}
903	EXPORT_SYMBOL(scsi_io_completion);	903	EXPORT_SYMBOL(scsi_io_completion);
904		904
905	/*	905	/*
906	* Function: scsi_init_io()	906	* Function: scsi_init_io()
907	*	907	*
908	* Purpose: SCSI I/O initialize function.	908	* Purpose: SCSI I/O initialize function.
909	*	909	*
910	* Arguments: cmd - Command descriptor we wish to initialize	910	* Arguments: cmd - Command descriptor we wish to initialize
911	*	911	*
912	* Returns: 0 on success	912	* Returns: 0 on success
913	* BLKPREP_DEFER if the failure is retryable	913	* BLKPREP_DEFER if the failure is retryable
914	* BLKPREP_KILL if the failure is fatal	914	* BLKPREP_KILL if the failure is fatal
915	*/	915	*/
916	static int scsi_init_io(struct scsi_cmnd *cmd)	916	static int scsi_init_io(struct scsi_cmnd *cmd)
917	{	917	{
918	struct request *req = cmd->request;	918	struct request *req = cmd->request;
919	struct scatterlist *sgpnt;	919	struct scatterlist *sgpnt;
920	int count;	920	int count;
921		921
922	/*	922	/*
923	* if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer	923	* if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
924	*/	924	*/
925	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {	925	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
926	cmd->request_bufflen = req->data_len;	926	cmd->request_bufflen = req->data_len;
927	cmd->request_buffer = req->data;	927	cmd->request_buffer = req->data;
928	req->buffer = req->data;	928	req->buffer = req->data;
929	cmd->use_sg = 0;	929	cmd->use_sg = 0;
930	return 0;	930	return 0;
931	}	931	}
932		932
933	/*	933	/*
934	* we used to not use scatter-gather for single segment request,	934	* we used to not use scatter-gather for single segment request,
935	* but now we do (it makes highmem I/O easier to support without	935	* but now we do (it makes highmem I/O easier to support without
936	* kmapping pages)	936	* kmapping pages)
937	*/	937	*/
938	cmd->use_sg = req->nr_phys_segments;	938	cmd->use_sg = req->nr_phys_segments;
939		939
940	/*	940	/*
941	* if sg table allocation fails, requeue request later.	941	* if sg table allocation fails, requeue request later.
942	*/	942	*/
943	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);	943	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
944	if (unlikely(!sgpnt))	944	if (unlikely(!sgpnt))
945	return BLKPREP_DEFER;	945	return BLKPREP_DEFER;
946		946
947	cmd->request_buffer = (char *) sgpnt;	947	cmd->request_buffer = (char *) sgpnt;
948	cmd->request_bufflen = req->nr_sectors << 9;	948	cmd->request_bufflen = req->nr_sectors << 9;
949	if (blk_pc_request(req))	949	if (blk_pc_request(req))
950	cmd->request_bufflen = req->data_len;	950	cmd->request_bufflen = req->data_len;
951	req->buffer = NULL;	951	req->buffer = NULL;
952		952
953	/*	953	/*
954	* Next, walk the list, and fill in the addresses and sizes of	954	* Next, walk the list, and fill in the addresses and sizes of
955	* each segment.	955	* each segment.
956	*/	956	*/
957	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);	957	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
958		958
959	/*	959	/*
960	* mapped well, send it off	960	* mapped well, send it off
961	*/	961	*/
962	if (likely(count <= cmd->use_sg)) {	962	if (likely(count <= cmd->use_sg)) {
963	cmd->use_sg = count;	963	cmd->use_sg = count;
964	return 0;	964	return 0;
965	}	965	}
966		966
967	printk(KERN_ERR "Incorrect number of segments after building list\n");	967	printk(KERN_ERR "Incorrect number of segments after building list\n");
968	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);	968	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
969	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,	969	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
970	req->current_nr_sectors);	970	req->current_nr_sectors);
971		971
972	/* release the command and kill it */	972	/* release the command and kill it */
973	scsi_release_buffers(cmd);	973	scsi_release_buffers(cmd);
974	scsi_put_command(cmd);	974	scsi_put_command(cmd);
975	return BLKPREP_KILL;	975	return BLKPREP_KILL;
976	}	976	}
977		977
978	static int scsi_prepare_flush_fn(request_queue_t q, struct request rq)	978	static int scsi_prepare_flush_fn(request_queue_t q, struct request rq)
979	{	979	{
980	struct scsi_device *sdev = q->queuedata;	980	struct scsi_device *sdev = q->queuedata;
981	struct scsi_driver *drv;	981	struct scsi_driver *drv;
982		982
983	if (sdev->sdev_state == SDEV_RUNNING) {	983	if (sdev->sdev_state == SDEV_RUNNING) {
984	drv = (struct scsi_driver *) rq->rq_disk->private_data;	984	drv = (struct scsi_driver *) rq->rq_disk->private_data;
985		985
986	if (drv->prepare_flush)	986	if (drv->prepare_flush)
987	return drv->prepare_flush(q, rq);	987	return drv->prepare_flush(q, rq);
988	}	988	}
989		989
990	return 0;	990	return 0;
991	}	991	}
992		992
993	static void scsi_end_flush_fn(request_queue_t q, struct request rq)	993	static void scsi_end_flush_fn(request_queue_t q, struct request rq)
994	{	994	{
995	struct scsi_device *sdev = q->queuedata;	995	struct scsi_device *sdev = q->queuedata;
996	struct request *flush_rq = rq->end_io_data;	996	struct request *flush_rq = rq->end_io_data;
997	struct scsi_driver *drv;	997	struct scsi_driver *drv;
998		998
999	if (flush_rq->errors) {	999	if (flush_rq->errors) {
1000	printk("scsi: barrier error, disabling flush support\n");	1000	printk("scsi: barrier error, disabling flush support\n");
1001	blk_queue_ordered(q, QUEUE_ORDERED_NONE);	1001	blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1002	}	1002	}
1003		1003
1004	if (sdev->sdev_state == SDEV_RUNNING) {	1004	if (sdev->sdev_state == SDEV_RUNNING) {
1005	drv = (struct scsi_driver *) rq->rq_disk->private_data;	1005	drv = (struct scsi_driver *) rq->rq_disk->private_data;
1006	drv->end_flush(q, rq);	1006	drv->end_flush(q, rq);
1007	}	1007	}
1008	}	1008	}
1009		1009
1010	static int scsi_issue_flush_fn(request_queue_t q, struct gendisk disk,	1010	static int scsi_issue_flush_fn(request_queue_t q, struct gendisk disk,
1011	sector_t *error_sector)	1011	sector_t *error_sector)
1012	{	1012	{
1013	struct scsi_device *sdev = q->queuedata;	1013	struct scsi_device *sdev = q->queuedata;
1014	struct scsi_driver *drv;	1014	struct scsi_driver *drv;
1015		1015
1016	if (sdev->sdev_state != SDEV_RUNNING)	1016	if (sdev->sdev_state != SDEV_RUNNING)
1017	return -ENXIO;	1017	return -ENXIO;
1018		1018
1019	drv = (struct scsi_driver *) disk->private_data;	1019	drv = (struct scsi_driver *) disk->private_data;
1020	if (drv->issue_flush)	1020	if (drv->issue_flush)
1021	return drv->issue_flush(&sdev->sdev_gendev, error_sector);	1021	return drv->issue_flush(&sdev->sdev_gendev, error_sector);
1022		1022
1023	return -EOPNOTSUPP;	1023	return -EOPNOTSUPP;
1024	}	1024	}
1025		1025
1026	static int scsi_prep_fn(struct request_queue q, struct request req)	1026	static int scsi_prep_fn(struct request_queue q, struct request req)
1027	{	1027	{
1028	struct scsi_device *sdev = q->queuedata;	1028	struct scsi_device *sdev = q->queuedata;
1029	struct scsi_cmnd *cmd;	1029	struct scsi_cmnd *cmd;
1030	int specials_only = 0;	1030	int specials_only = 0;
1031		1031
1032	/*	1032	/*
1033	* Just check to see if the device is online. If it isn't, we	1033	* Just check to see if the device is online. If it isn't, we
1034	* refuse to process any commands. The device must be brought	1034	* refuse to process any commands. The device must be brought
1035	* online before trying any recovery commands	1035	* online before trying any recovery commands
1036	*/	1036	*/
1037	if (unlikely(!scsi_device_online(sdev))) {	1037	if (unlikely(!scsi_device_online(sdev))) {
1038	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",	1038	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1039	sdev->host->host_no, sdev->id, sdev->lun);	1039	sdev->host->host_no, sdev->id, sdev->lun);
1040	return BLKPREP_KILL;	1040	return BLKPREP_KILL;
1041	}	1041	}
1042	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {	1042	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1043	/* OK, we're not in a running state don't prep	1043	/* OK, we're not in a running state don't prep
1044	* user commands */	1044	* user commands */
1045	if (sdev->sdev_state == SDEV_DEL) {	1045	if (sdev->sdev_state == SDEV_DEL) {
1046	/* Device is fully deleted, no commands	1046	/* Device is fully deleted, no commands
1047	* at all allowed down */	1047	* at all allowed down */
1048	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to dead device\n",	1048	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to dead device\n",
1049	sdev->host->host_no, sdev->id, sdev->lun);	1049	sdev->host->host_no, sdev->id, sdev->lun);
1050	return BLKPREP_KILL;	1050	return BLKPREP_KILL;
1051	}	1051	}
1052	/* OK, we only allow special commands (i.e. not	1052	/* OK, we only allow special commands (i.e. not
1053	* user initiated ones */	1053	* user initiated ones */
1054	specials_only = sdev->sdev_state;	1054	specials_only = sdev->sdev_state;
1055	}	1055	}
1056		1056
1057	/*	1057	/*
1058	* Find the actual device driver associated with this command.	1058	* Find the actual device driver associated with this command.
1059	* The SPECIAL requests are things like character device or	1059	* The SPECIAL requests are things like character device or
1060	* ioctls, which did not originate from ll_rw_blk. Note that	1060	* ioctls, which did not originate from ll_rw_blk. Note that
1061	* the special field is also used to indicate the cmd for	1061	* the special field is also used to indicate the cmd for
1062	* the remainder of a partially fulfilled request that can	1062	* the remainder of a partially fulfilled request that can
1063	* come up when there is a medium error. We have to treat	1063	* come up when there is a medium error. We have to treat
1064	* these two cases differently. We differentiate by looking	1064	* these two cases differently. We differentiate by looking
1065	* at request->cmd, as this tells us the real story.	1065	* at request->cmd, as this tells us the real story.
1066	*/	1066	*/
1067	if (req->flags & REQ_SPECIAL) {	1067	if (req->flags & REQ_SPECIAL) {
1068	struct scsi_request *sreq = req->special;	1068	struct scsi_request *sreq = req->special;
1069		1069
1070	if (sreq->sr_magic == SCSI_REQ_MAGIC) {	1070	if (sreq->sr_magic == SCSI_REQ_MAGIC) {
1071	cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);	1071	cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
1072	if (unlikely(!cmd))	1072	if (unlikely(!cmd))
1073	goto defer;	1073	goto defer;
1074	scsi_init_cmd_from_req(cmd, sreq);	1074	scsi_init_cmd_from_req(cmd, sreq);
1075	} else	1075	} else
1076	cmd = req->special;	1076	cmd = req->special;
1077	} else if (req->flags & (REQ_CMD \| REQ_BLOCK_PC)) {	1077	} else if (req->flags & (REQ_CMD \| REQ_BLOCK_PC)) {
1078		1078
1079	if(unlikely(specials_only)) {	1079	if(unlikely(specials_only)) {
1080	if(specials_only == SDEV_QUIESCE \|\|	1080	if(specials_only == SDEV_QUIESCE \|\|
1081	specials_only == SDEV_BLOCK)	1081	specials_only == SDEV_BLOCK)
1082	return BLKPREP_DEFER;	1082	return BLKPREP_DEFER;
1083		1083
1084	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",	1084	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",
1085	sdev->host->host_no, sdev->id, sdev->lun);	1085	sdev->host->host_no, sdev->id, sdev->lun);
1086	return BLKPREP_KILL;	1086	return BLKPREP_KILL;
1087	}	1087	}
1088		1088
1089		1089
1090	/*	1090	/*
1091	* Now try and find a command block that we can use.	1091	* Now try and find a command block that we can use.
1092	*/	1092	*/
1093	if (!req->special) {	1093	if (!req->special) {
1094	cmd = scsi_get_command(sdev, GFP_ATOMIC);	1094	cmd = scsi_get_command(sdev, GFP_ATOMIC);
1095	if (unlikely(!cmd))	1095	if (unlikely(!cmd))
1096	goto defer;	1096	goto defer;
1097	} else	1097	} else
1098	cmd = req->special;	1098	cmd = req->special;
1099		1099
1100	/* pull a tag out of the request if we have one */	1100	/* pull a tag out of the request if we have one */
1101	cmd->tag = req->tag;	1101	cmd->tag = req->tag;
1102	} else {	1102	} else {
1103	blk_dump_rq_flags(req, "SCSI bad req");	1103	blk_dump_rq_flags(req, "SCSI bad req");
1104	return BLKPREP_KILL;	1104	return BLKPREP_KILL;
1105	}	1105	}
1106		1106
1107	/* note the overloading of req->special. When the tag	1107	/* note the overloading of req->special. When the tag
1108	* is active it always means cmd. If the tag goes	1108	* is active it always means cmd. If the tag goes
1109	* back for re-queueing, it may be reset */	1109	* back for re-queueing, it may be reset */
1110	req->special = cmd;	1110	req->special = cmd;
1111	cmd->request = req;	1111	cmd->request = req;
1112		1112
1113	/*	1113	/*
1114	* FIXME: drop the lock here because the functions below	1114	* FIXME: drop the lock here because the functions below
1115	* expect to be called without the queue lock held. Also,	1115	* expect to be called without the queue lock held. Also,
1116	* previously, we dequeued the request before dropping the	1116	* previously, we dequeued the request before dropping the
1117	* lock. We hope REQ_STARTED prevents anything untoward from	1117	* lock. We hope REQ_STARTED prevents anything untoward from
1118	* happening now.	1118	* happening now.
1119	*/	1119	*/
1120	if (req->flags & (REQ_CMD \| REQ_BLOCK_PC)) {	1120	if (req->flags & (REQ_CMD \| REQ_BLOCK_PC)) {
1121	struct scsi_driver *drv;	1121	struct scsi_driver *drv;
1122	int ret;	1122	int ret;
1123		1123
1124	/*	1124	/*
1125	* This will do a couple of things:	1125	* This will do a couple of things:
1126	* 1) Fill in the actual SCSI command.	1126	* 1) Fill in the actual SCSI command.
1127	* 2) Fill in any other upper-level specific fields	1127	* 2) Fill in any other upper-level specific fields
1128	* (timeout).	1128	* (timeout).
1129	*	1129	*
1130	* If this returns 0, it means that the request failed	1130	* If this returns 0, it means that the request failed
1131	* (reading past end of disk, reading offline device,	1131	* (reading past end of disk, reading offline device,
1132	* etc). This won't actually talk to the device, but	1132	* etc). This won't actually talk to the device, but
1133	* some kinds of consistency checking may cause the	1133	* some kinds of consistency checking may cause the
1134	* request to be rejected immediately.	1134	* request to be rejected immediately.
1135	*/	1135	*/
1136		1136
1137	/*	1137	/*
1138	* This sets up the scatter-gather table (allocating if	1138	* This sets up the scatter-gather table (allocating if
1139	* required).	1139	* required).
1140	*/	1140	*/
1141	ret = scsi_init_io(cmd);	1141	ret = scsi_init_io(cmd);
1142	if (ret) /* BLKPREP_KILL return also releases the command */	1142	if (ret) /* BLKPREP_KILL return also releases the command */
1143	return ret;	1143	return ret;
1144		1144
1145	/*	1145	/*
1146	* Initialize the actual SCSI command for this request.	1146	* Initialize the actual SCSI command for this request.
1147	*/	1147	*/
1148	drv = (struct scsi_driver *)req->rq_disk->private_data;	1148	drv = (struct scsi_driver *)req->rq_disk->private_data;
1149	if (unlikely(!drv->init_command(cmd))) {	1149	if (unlikely(!drv->init_command(cmd))) {
1150	scsi_release_buffers(cmd);	1150	scsi_release_buffers(cmd);
1151	scsi_put_command(cmd);	1151	scsi_put_command(cmd);
1152	return BLKPREP_KILL;	1152	return BLKPREP_KILL;
1153	}	1153	}
1154	}	1154	}
1155		1155
1156	/*	1156	/*
1157	* The request is now prepped, no need to come back here	1157	* The request is now prepped, no need to come back here
1158	*/	1158	*/
1159	req->flags \|= REQ_DONTPREP;	1159	req->flags \|= REQ_DONTPREP;
1160	return BLKPREP_OK;	1160	return BLKPREP_OK;
1161		1161
1162	defer:	1162	defer:
1163	/* If we defer, the elv_next_request() returns NULL, but the	1163	/* If we defer, the elv_next_request() returns NULL, but the
1164	* queue must be restarted, so we plug here if no returning	1164	* queue must be restarted, so we plug here if no returning
1165	* command will automatically do that. */	1165	* command will automatically do that. */
1166	if (sdev->device_busy == 0)	1166	if (sdev->device_busy == 0)
1167	blk_plug_device(q);	1167	blk_plug_device(q);
1168	return BLKPREP_DEFER;	1168	return BLKPREP_DEFER;
1169	}	1169	}
1170		1170
1171	/*	1171	/*
1172	* scsi_dev_queue_ready: if we can send requests to sdev, return 1 else	1172	* scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1173	* return 0.	1173	* return 0.
1174	*	1174	*
1175	* Called with the queue_lock held.	1175	* Called with the queue_lock held.
1176	*/	1176	*/
1177	static inline int scsi_dev_queue_ready(struct request_queue *q,	1177	static inline int scsi_dev_queue_ready(struct request_queue *q,
1178	struct scsi_device *sdev)	1178	struct scsi_device *sdev)
1179	{	1179	{
1180	if (sdev->device_busy >= sdev->queue_depth)	1180	if (sdev->device_busy >= sdev->queue_depth)
1181	return 0;	1181	return 0;
1182	if (sdev->device_busy == 0 && sdev->device_blocked) {	1182	if (sdev->device_busy == 0 && sdev->device_blocked) {
1183	/*	1183	/*
1184	* unblock after device_blocked iterates to zero	1184	* unblock after device_blocked iterates to zero
1185	*/	1185	*/
1186	if (--sdev->device_blocked == 0) {	1186	if (--sdev->device_blocked == 0) {
1187	SCSI_LOG_MLQUEUE(3,	1187	SCSI_LOG_MLQUEUE(3,
1188	printk("scsi%d (%d:%d) unblocking device at"	1188	printk("scsi%d (%d:%d) unblocking device at"
1189	" zero depth\n", sdev->host->host_no,	1189	" zero depth\n", sdev->host->host_no,
1190	sdev->id, sdev->lun));	1190	sdev->id, sdev->lun));
1191	} else {	1191	} else {
1192	blk_plug_device(q);	1192	blk_plug_device(q);
1193	return 0;	1193	return 0;
1194	}	1194	}
1195	}	1195	}
1196	if (sdev->device_blocked)	1196	if (sdev->device_blocked)
1197	return 0;	1197	return 0;
1198		1198
1199	return 1;	1199	return 1;
1200	}	1200	}
1201		1201
1202	/*	1202	/*
1203	* scsi_host_queue_ready: if we can send requests to shost, return 1 else	1203	* scsi_host_queue_ready: if we can send requests to shost, return 1 else
1204	* return 0. We must end up running the queue again whenever 0 is	1204	* return 0. We must end up running the queue again whenever 0 is
1205	* returned, else IO can hang.	1205	* returned, else IO can hang.
1206	*	1206	*
1207	* Called with host_lock held.	1207	* Called with host_lock held.
1208	*/	1208	*/
1209	static inline int scsi_host_queue_ready(struct request_queue *q,	1209	static inline int scsi_host_queue_ready(struct request_queue *q,
1210	struct Scsi_Host *shost,	1210	struct Scsi_Host *shost,
1211	struct scsi_device *sdev)	1211	struct scsi_device *sdev)
1212	{	1212	{
1213	if (test_bit(SHOST_RECOVERY, &shost->shost_state))	1213	if (test_bit(SHOST_RECOVERY, &shost->shost_state))
1214	return 0;	1214	return 0;
1215	if (shost->host_busy == 0 && shost->host_blocked) {	1215	if (shost->host_busy == 0 && shost->host_blocked) {
1216	/*	1216	/*
1217	* unblock after host_blocked iterates to zero	1217	* unblock after host_blocked iterates to zero
1218	*/	1218	*/
1219	if (--shost->host_blocked == 0) {	1219	if (--shost->host_blocked == 0) {
1220	SCSI_LOG_MLQUEUE(3,	1220	SCSI_LOG_MLQUEUE(3,
1221	printk("scsi%d unblocking host at zero depth\n",	1221	printk("scsi%d unblocking host at zero depth\n",
1222	shost->host_no));	1222	shost->host_no));
1223	} else {	1223	} else {
1224	blk_plug_device(q);	1224	blk_plug_device(q);
1225	return 0;	1225	return 0;
1226	}	1226	}
1227	}	1227	}
1228	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) \|\|	1228	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) \|\|
1229	shost->host_blocked \|\| shost->host_self_blocked) {	1229	shost->host_blocked \|\| shost->host_self_blocked) {
1230	if (list_empty(&sdev->starved_entry))	1230	if (list_empty(&sdev->starved_entry))
1231	list_add_tail(&sdev->starved_entry, &shost->starved_list);	1231	list_add_tail(&sdev->starved_entry, &shost->starved_list);
1232	return 0;	1232	return 0;
1233	}	1233	}
1234		1234
1235	/* We're OK to process the command, so we can't be starved */	1235	/* We're OK to process the command, so we can't be starved */
1236	if (!list_empty(&sdev->starved_entry))	1236	if (!list_empty(&sdev->starved_entry))
1237	list_del_init(&sdev->starved_entry);	1237	list_del_init(&sdev->starved_entry);
1238		1238
1239	return 1;	1239	return 1;
1240	}	1240	}
1241		1241
1242	/*	1242	/*
1243	* Kill requests for a dead device	1243	* Kill requests for a dead device
1244	*/	1244	*/
1245	static void scsi_kill_requests(request_queue_t *q)	1245	static void scsi_kill_requests(request_queue_t *q)
1246	{	1246	{
1247	struct request *req;	1247	struct request *req;
1248		1248
1249	while ((req = elv_next_request(q)) != NULL) {	1249	while ((req = elv_next_request(q)) != NULL) {
1250	blkdev_dequeue_request(req);	1250	blkdev_dequeue_request(req);
1251	req->flags \|= REQ_QUIET;	1251	req->flags \|= REQ_QUIET;
1252	while (end_that_request_first(req, 0, req->nr_sectors))	1252	while (end_that_request_first(req, 0, req->nr_sectors))
1253	;	1253	;
1254	end_that_request_last(req);	1254	end_that_request_last(req);
1255	}	1255	}
1256	}	1256	}
1257		1257
1258	/*	1258	/*
1259	* Function: scsi_request_fn()	1259	* Function: scsi_request_fn()
1260	*	1260	*
1261	* Purpose: Main strategy routine for SCSI.	1261	* Purpose: Main strategy routine for SCSI.
1262	*	1262	*
1263	* Arguments: q - Pointer to actual queue.	1263	* Arguments: q - Pointer to actual queue.
1264	*	1264	*
1265	* Returns: Nothing	1265	* Returns: Nothing
1266	*	1266	*
1267	* Lock status: IO request lock assumed to be held when called.	1267	* Lock status: IO request lock assumed to be held when called.
1268	*/	1268	*/
1269	static void scsi_request_fn(struct request_queue *q)	1269	static void scsi_request_fn(struct request_queue *q)
1270	{	1270	{
1271	struct scsi_device *sdev = q->queuedata;	1271	struct scsi_device *sdev = q->queuedata;
1272	struct Scsi_Host *shost;	1272	struct Scsi_Host *shost;
1273	struct scsi_cmnd *cmd;	1273	struct scsi_cmnd *cmd;
1274	struct request *req;	1274	struct request *req;
1275		1275
1276	if (!sdev) {	1276	if (!sdev) {
1277	printk("scsi: killing requests for dead queue\n");	1277	printk("scsi: killing requests for dead queue\n");
1278	scsi_kill_requests(q);	1278	scsi_kill_requests(q);
1279	return;	1279	return;
1280	}	1280	}
1281		1281
1282	if(!get_device(&sdev->sdev_gendev))	1282	if(!get_device(&sdev->sdev_gendev))
1283	/* We must be tearing the block queue down already */	1283	/* We must be tearing the block queue down already */
1284	return;	1284	return;
1285		1285
1286	/*	1286	/*
1287	* To start with, we keep looping until the queue is empty, or until	1287	* To start with, we keep looping until the queue is empty, or until
1288	* the host is no longer able to accept any more requests.	1288	* the host is no longer able to accept any more requests.
1289	*/	1289	*/
1290	shost = sdev->host;	1290	shost = sdev->host;
1291	while (!blk_queue_plugged(q)) {	1291	while (!blk_queue_plugged(q)) {
1292	int rtn;	1292	int rtn;
1293	/*	1293	/*
1294	* get next queueable request. We do this early to make sure	1294	* get next queueable request. We do this early to make sure
1295	* that the request is fully prepared even if we cannot	1295	* that the request is fully prepared even if we cannot
1296	* accept it.	1296	* accept it.
1297	*/	1297	*/
1298	req = elv_next_request(q);	1298	req = elv_next_request(q);
1299	if (!req \|\| !scsi_dev_queue_ready(q, sdev))	1299	if (!req \|\| !scsi_dev_queue_ready(q, sdev))
1300	break;	1300	break;
1301		1301
1302	if (unlikely(!scsi_device_online(sdev))) {	1302	if (unlikely(!scsi_device_online(sdev))) {
1303	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",	1303	printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1304	sdev->host->host_no, sdev->id, sdev->lun);	1304	sdev->host->host_no, sdev->id, sdev->lun);
1305	blkdev_dequeue_request(req);	1305	blkdev_dequeue_request(req);
1306	req->flags \|= REQ_QUIET;	1306	req->flags \|= REQ_QUIET;
1307	while (end_that_request_first(req, 0, req->nr_sectors))	1307	while (end_that_request_first(req, 0, req->nr_sectors))
1308	;	1308	;
1309	end_that_request_last(req);	1309	end_that_request_last(req);
1310	continue;	1310	continue;
1311	}	1311	}
1312		1312
1313		1313
1314	/*	1314	/*
1315	* Remove the request from the request list.	1315	* Remove the request from the request list.
1316	*/	1316	*/
1317	if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))	1317	if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1318	blkdev_dequeue_request(req);	1318	blkdev_dequeue_request(req);
1319	sdev->device_busy++;	1319	sdev->device_busy++;
1320		1320
1321	spin_unlock(q->queue_lock);	1321	spin_unlock(q->queue_lock);
1322	spin_lock(shost->host_lock);	1322	spin_lock(shost->host_lock);
1323		1323
1324	if (!scsi_host_queue_ready(q, shost, sdev))	1324	if (!scsi_host_queue_ready(q, shost, sdev))
1325	goto not_ready;	1325	goto not_ready;
1326	if (sdev->single_lun) {	1326	if (sdev->single_lun) {
1327	if (scsi_target(sdev)->starget_sdev_user &&	1327	if (scsi_target(sdev)->starget_sdev_user &&
1328	scsi_target(sdev)->starget_sdev_user != sdev)	1328	scsi_target(sdev)->starget_sdev_user != sdev)
1329	goto not_ready;	1329	goto not_ready;
1330	scsi_target(sdev)->starget_sdev_user = sdev;	1330	scsi_target(sdev)->starget_sdev_user = sdev;
1331	}	1331	}
1332	shost->host_busy++;	1332	shost->host_busy++;
1333		1333
1334	/*	1334	/*
1335	* XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will	1335	* XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1336	* take the lock again.	1336	* take the lock again.
1337	*/	1337	*/
1338	spin_unlock_irq(shost->host_lock);	1338	spin_unlock_irq(shost->host_lock);
1339		1339
1340	cmd = req->special;	1340	cmd = req->special;
1341	if (unlikely(cmd == NULL)) {	1341	if (unlikely(cmd == NULL)) {
1342	printk(KERN_CRIT "impossible request in %s.\n"	1342	printk(KERN_CRIT "impossible request in %s.\n"
1343	"please mail a stack trace to "	1343	"please mail a stack trace to "
1344	"linux-scsi@vger.kernel.org",	1344	"linux-scsi@vger.kernel.org",
1345	__FUNCTION__);	1345	__FUNCTION__);
1346	BUG();	1346	BUG();
1347	}	1347	}
1348		1348
1349	/*	1349	/*
1350	* Finally, initialize any error handling parameters, and set up	1350	* Finally, initialize any error handling parameters, and set up
1351	* the timers for timeouts.	1351	* the timers for timeouts.
1352	*/	1352	*/
1353	scsi_init_cmd_errh(cmd);	1353	scsi_init_cmd_errh(cmd);
1354		1354
1355	/*	1355	/*
1356	* Dispatch the command to the low-level driver.	1356	* Dispatch the command to the low-level driver.
1357	*/	1357	*/
1358	rtn = scsi_dispatch_cmd(cmd);	1358	rtn = scsi_dispatch_cmd(cmd);
1359	spin_lock_irq(q->queue_lock);	1359	spin_lock_irq(q->queue_lock);
1360	if(rtn) {	1360	if(rtn) {
1361	/* we're refusing the command; because of	1361	/* we're refusing the command; because of
1362	* the way locks get dropped, we need to	1362	* the way locks get dropped, we need to
1363	* check here if plugging is required */	1363	* check here if plugging is required */
1364	if(sdev->device_busy == 0)	1364	if(sdev->device_busy == 0)
1365	blk_plug_device(q);	1365	blk_plug_device(q);
1366		1366
1367	break;	1367	break;
1368	}	1368	}
1369	}	1369	}
1370		1370
1371	goto out;	1371	goto out;
1372		1372
1373	not_ready:	1373	not_ready:
1374	spin_unlock_irq(shost->host_lock);	1374	spin_unlock_irq(shost->host_lock);
1375		1375
1376	/*	1376	/*
1377	* lock q, handle tag, requeue req, and decrement device_busy. We	1377	* lock q, handle tag, requeue req, and decrement device_busy. We
1378	* must return with queue_lock held.	1378	* must return with queue_lock held.
1379	*	1379	*
1380	* Decrementing device_busy without checking it is OK, as all such	1380	* Decrementing device_busy without checking it is OK, as all such
1381	* cases (host limits or settings) should run the queue at some	1381	* cases (host limits or settings) should run the queue at some
1382	* later time.	1382	* later time.
1383	*/	1383	*/
1384	spin_lock_irq(q->queue_lock);	1384	spin_lock_irq(q->queue_lock);
1385	blk_requeue_request(q, req);	1385	blk_requeue_request(q, req);
1386	sdev->device_busy--;	1386	sdev->device_busy--;
1387	if(sdev->device_busy == 0)	1387	if(sdev->device_busy == 0)
1388	blk_plug_device(q);	1388	blk_plug_device(q);
1389	out:	1389	out:
1390	/* must be careful here...if we trigger the ->remove() function	1390	/* must be careful here...if we trigger the ->remove() function
1391	* we cannot be holding the q lock */	1391	* we cannot be holding the q lock */
1392	spin_unlock_irq(q->queue_lock);	1392	spin_unlock_irq(q->queue_lock);
1393	put_device(&sdev->sdev_gendev);	1393	put_device(&sdev->sdev_gendev);
1394	spin_lock_irq(q->queue_lock);	1394	spin_lock_irq(q->queue_lock);
1395	}	1395	}
1396		1396
1397	u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)	1397	u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1398	{	1398	{
1399	struct device *host_dev;	1399	struct device *host_dev;
1400	u64 bounce_limit = 0xffffffff;	1400	u64 bounce_limit = 0xffffffff;
1401		1401
1402	if (shost->unchecked_isa_dma)	1402	if (shost->unchecked_isa_dma)
1403	return BLK_BOUNCE_ISA;	1403	return BLK_BOUNCE_ISA;
1404	/*	1404	/*
1405	* Platforms with virtual-DMA translation	1405	* Platforms with virtual-DMA translation
1406	* hardware have no practical limit.	1406	* hardware have no practical limit.
1407	*/	1407	*/
1408	if (!PCI_DMA_BUS_IS_PHYS)	1408	if (!PCI_DMA_BUS_IS_PHYS)
1409	return BLK_BOUNCE_ANY;	1409	return BLK_BOUNCE_ANY;
1410		1410
1411	host_dev = scsi_get_device(shost);	1411	host_dev = scsi_get_device(shost);
1412	if (host_dev && host_dev->dma_mask)	1412	if (host_dev && host_dev->dma_mask)
1413	bounce_limit = *host_dev->dma_mask;	1413	bounce_limit = *host_dev->dma_mask;
1414		1414
1415	return bounce_limit;	1415	return bounce_limit;
1416	}	1416	}
1417	EXPORT_SYMBOL(scsi_calculate_bounce_limit);	1417	EXPORT_SYMBOL(scsi_calculate_bounce_limit);
1418		1418
1419	struct request_queue scsi_alloc_queue(struct scsi_device sdev)	1419	struct request_queue scsi_alloc_queue(struct scsi_device sdev)
1420	{	1420	{
1421	struct Scsi_Host *shost = sdev->host;	1421	struct Scsi_Host *shost = sdev->host;
1422	struct request_queue *q;	1422	struct request_queue *q;
1423		1423
1424	q = blk_init_queue(scsi_request_fn, NULL);	1424	q = blk_init_queue(scsi_request_fn, NULL);
1425	if (!q)	1425	if (!q)
1426	return NULL;	1426	return NULL;
1427		1427
1428	blk_queue_prep_rq(q, scsi_prep_fn);	1428	blk_queue_prep_rq(q, scsi_prep_fn);
1429		1429
1430	blk_queue_max_hw_segments(q, shost->sg_tablesize);	1430	blk_queue_max_hw_segments(q, shost->sg_tablesize);
1431	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);	1431	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
1432	blk_queue_max_sectors(q, shost->max_sectors);	1432	blk_queue_max_sectors(q, shost->max_sectors);
1433	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));	1433	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1434	blk_queue_segment_boundary(q, shost->dma_boundary);	1434	blk_queue_segment_boundary(q, shost->dma_boundary);
1435	blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);	1435	blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1436		1436
1437	/*	1437	/*
1438	* ordered tags are superior to flush ordering	1438	* ordered tags are superior to flush ordering
1439	*/	1439	*/
1440	if (shost->ordered_tag)	1440	if (shost->ordered_tag)
1441	blk_queue_ordered(q, QUEUE_ORDERED_TAG);	1441	blk_queue_ordered(q, QUEUE_ORDERED_TAG);
1442	else if (shost->ordered_flush) {	1442	else if (shost->ordered_flush) {
1443	blk_queue_ordered(q, QUEUE_ORDERED_FLUSH);	1443	blk_queue_ordered(q, QUEUE_ORDERED_FLUSH);
1444	q->prepare_flush_fn = scsi_prepare_flush_fn;	1444	q->prepare_flush_fn = scsi_prepare_flush_fn;
1445	q->end_flush_fn = scsi_end_flush_fn;	1445	q->end_flush_fn = scsi_end_flush_fn;
1446	}	1446	}
1447		1447
1448	if (!shost->use_clustering)	1448	if (!shost->use_clustering)
1449	clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);	1449	clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
1450	return q;	1450	return q;
1451	}	1451	}
1452		1452
1453	void scsi_free_queue(struct request_queue *q)	1453	void scsi_free_queue(struct request_queue *q)
1454	{	1454	{
1455	blk_cleanup_queue(q);	1455	blk_cleanup_queue(q);
1456	}	1456	}
1457		1457
1458	/*	1458	/*
1459	* Function: scsi_block_requests()	1459	* Function: scsi_block_requests()
1460	*	1460	*
1461	* Purpose: Utility function used by low-level drivers to prevent further	1461	* Purpose: Utility function used by low-level drivers to prevent further
1462	* commands from being queued to the device.	1462	* commands from being queued to the device.
1463	*	1463	*
1464	* Arguments: shost - Host in question	1464	* Arguments: shost - Host in question
1465	*	1465	*
1466	* Returns: Nothing	1466	* Returns: Nothing
1467	*	1467	*
1468	* Lock status: No locks are assumed held.	1468	* Lock status: No locks are assumed held.
1469	*	1469	*
1470	* Notes: There is no timer nor any other means by which the requests	1470	* Notes: There is no timer nor any other means by which the requests
1471	* get unblocked other than the low-level driver calling	1471	* get unblocked other than the low-level driver calling
1472	* scsi_unblock_requests().	1472	* scsi_unblock_requests().
1473	*/	1473	*/
1474	void scsi_block_requests(struct Scsi_Host *shost)	1474	void scsi_block_requests(struct Scsi_Host *shost)
1475	{	1475	{
1476	shost->host_self_blocked = 1;	1476	shost->host_self_blocked = 1;
1477	}	1477	}
1478	EXPORT_SYMBOL(scsi_block_requests);	1478	EXPORT_SYMBOL(scsi_block_requests);
1479		1479
1480	/*	1480	/*
1481	* Function: scsi_unblock_requests()	1481	* Function: scsi_unblock_requests()
1482	*	1482	*
1483	* Purpose: Utility function used by low-level drivers to allow further	1483	* Purpose: Utility function used by low-level drivers to allow further
1484	* commands from being queued to the device.	1484	* commands from being queued to the device.
1485	*	1485	*
1486	* Arguments: shost - Host in question	1486	* Arguments: shost - Host in question
1487	*	1487	*
1488	* Returns: Nothing	1488	* Returns: Nothing
1489	*	1489	*
1490	* Lock status: No locks are assumed held.	1490	* Lock status: No locks are assumed held.
1491	*	1491	*
1492	* Notes: There is no timer nor any other means by which the requests	1492	* Notes: There is no timer nor any other means by which the requests
1493	* get unblocked other than the low-level driver calling	1493	* get unblocked other than the low-level driver calling
1494	* scsi_unblock_requests().	1494	* scsi_unblock_requests().
1495	*	1495	*
1496	* This is done as an API function so that changes to the	1496	* This is done as an API function so that changes to the
1497	* internals of the scsi mid-layer won't require wholesale	1497	* internals of the scsi mid-layer won't require wholesale
1498	* changes to drivers that use this feature.	1498	* changes to drivers that use this feature.
1499	*/	1499	*/
1500	void scsi_unblock_requests(struct Scsi_Host *shost)	1500	void scsi_unblock_requests(struct Scsi_Host *shost)
1501	{	1501	{
1502	shost->host_self_blocked = 0;	1502	shost->host_self_blocked = 0;
1503	scsi_run_host_queues(shost);	1503	scsi_run_host_queues(shost);
1504	}	1504	}
1505	EXPORT_SYMBOL(scsi_unblock_requests);	1505	EXPORT_SYMBOL(scsi_unblock_requests);
1506		1506
1507	int __init scsi_init_queue(void)	1507	int __init scsi_init_queue(void)
1508	{	1508	{
1509	int i;	1509	int i;
1510		1510
1511	for (i = 0; i < SG_MEMPOOL_NR; i++) {	1511	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1512	struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;	1512	struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1513	int size = sgp->size * sizeof(struct scatterlist);	1513	int size = sgp->size * sizeof(struct scatterlist);
1514		1514
1515	sgp->slab = kmem_cache_create(sgp->name, size, 0,	1515	sgp->slab = kmem_cache_create(sgp->name, size, 0,
1516	SLAB_HWCACHE_ALIGN, NULL, NULL);	1516	SLAB_HWCACHE_ALIGN, NULL, NULL);
1517	if (!sgp->slab) {	1517	if (!sgp->slab) {
1518	printk(KERN_ERR "SCSI: can't init sg slab %s\n",	1518	printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1519	sgp->name);	1519	sgp->name);
1520	}	1520	}
1521		1521
1522	sgp->pool = mempool_create(SG_MEMPOOL_SIZE,	1522	sgp->pool = mempool_create(SG_MEMPOOL_SIZE,
1523	mempool_alloc_slab, mempool_free_slab,	1523	mempool_alloc_slab, mempool_free_slab,
1524	sgp->slab);	1524	sgp->slab);
1525	if (!sgp->pool) {	1525	if (!sgp->pool) {
1526	printk(KERN_ERR "SCSI: can't init sg mempool %s\n",	1526	printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1527	sgp->name);	1527	sgp->name);
1528	}	1528	}
1529	}	1529	}
1530		1530
1531	return 0;	1531	return 0;
1532	}	1532	}
1533		1533
1534	void scsi_exit_queue(void)	1534	void scsi_exit_queue(void)
1535	{	1535	{
1536	int i;	1536	int i;
1537		1537
1538	for (i = 0; i < SG_MEMPOOL_NR; i++) {	1538	for (i = 0; i < SG_MEMPOOL_NR; i++) {
1539	struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;	1539	struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1540	mempool_destroy(sgp->pool);	1540	mempool_destroy(sgp->pool);
1541	kmem_cache_destroy(sgp->slab);	1541	kmem_cache_destroy(sgp->slab);
1542	}	1542	}
1543	}	1543	}
1544	/**	1544	/**
1545	* __scsi_mode_sense - issue a mode sense, falling back from 10 to	1545	* __scsi_mode_sense - issue a mode sense, falling back from 10 to
1546	* six bytes if necessary.	1546	* six bytes if necessary.
1547	* @sreq: SCSI request to fill in with the MODE_SENSE	1547	* @sreq: SCSI request to fill in with the MODE_SENSE
1548	* @dbd: set if mode sense will allow block descriptors to be returned	1548	* @dbd: set if mode sense will allow block descriptors to be returned
1549	* @modepage: mode page being requested	1549	* @modepage: mode page being requested
1550	* @buffer: request buffer (may not be smaller than eight bytes)	1550	* @buffer: request buffer (may not be smaller than eight bytes)
1551	* @len: length of request buffer.	1551	* @len: length of request buffer.
1552	* @timeout: command timeout	1552	* @timeout: command timeout
1553	* @retries: number of retries before failing	1553	* @retries: number of retries before failing
1554	* @data: returns a structure abstracting the mode header data	1554	* @data: returns a structure abstracting the mode header data
1555	*	1555	*
1556	* Returns zero if unsuccessful, or the header offset (either 4	1556	* Returns zero if unsuccessful, or the header offset (either 4
1557	* or 8 depending on whether a six or ten byte command was	1557	* or 8 depending on whether a six or ten byte command was
1558	* issued) if successful.	1558	* issued) if successful.
1559	**/	1559	**/
1560	int	1560	int
1561	__scsi_mode_sense(struct scsi_request *sreq, int dbd, int modepage,	1561	__scsi_mode_sense(struct scsi_request *sreq, int dbd, int modepage,
1562	unsigned char *buffer, int len, int timeout, int retries,	1562	unsigned char *buffer, int len, int timeout, int retries,
1563	struct scsi_mode_data *data) {	1563	struct scsi_mode_data *data) {
1564	unsigned char cmd[12];	1564	unsigned char cmd[12];
1565	int use_10_for_ms;	1565	int use_10_for_ms;
1566	int header_length;	1566	int header_length;
1567		1567
1568	memset(data, 0, sizeof(*data));	1568	memset(data, 0, sizeof(*data));
1569	memset(&cmd[0], 0, 12);	1569	memset(&cmd[0], 0, 12);
1570	cmd[1] = dbd & 0x18; /* allows DBD and LLBA bits */	1570	cmd[1] = dbd & 0x18; /* allows DBD and LLBA bits */
1571	cmd[2] = modepage;	1571	cmd[2] = modepage;
1572		1572
1573	retry:	1573	retry:
1574	use_10_for_ms = sreq->sr_device->use_10_for_ms;	1574	use_10_for_ms = sreq->sr_device->use_10_for_ms;
1575		1575
1576	if (use_10_for_ms) {	1576	if (use_10_for_ms) {
1577	if (len < 8)	1577	if (len < 8)
1578	len = 8;	1578	len = 8;
1579		1579
1580	cmd[0] = MODE_SENSE_10;	1580	cmd[0] = MODE_SENSE_10;
1581	cmd[8] = len;	1581	cmd[8] = len;
1582	header_length = 8;	1582	header_length = 8;
1583	} else {	1583	} else {
1584	if (len < 4)	1584	if (len < 4)
1585	len = 4;	1585	len = 4;
1586		1586
1587	cmd[0] = MODE_SENSE;	1587	cmd[0] = MODE_SENSE;
1588	cmd[4] = len;	1588	cmd[4] = len;
1589	header_length = 4;	1589	header_length = 4;
1590	}	1590	}
1591		1591
1592	sreq->sr_cmd_len = 0;	1592	sreq->sr_cmd_len = 0;
1593	memset(sreq->sr_sense_buffer, 0, sizeof(sreq->sr_sense_buffer));	1593	memset(sreq->sr_sense_buffer, 0, sizeof(sreq->sr_sense_buffer));
1594	sreq->sr_data_direction = DMA_FROM_DEVICE;	1594	sreq->sr_data_direction = DMA_FROM_DEVICE;
1595		1595
1596	memset(buffer, 0, len);	1596	memset(buffer, 0, len);
1597		1597
1598	scsi_wait_req(sreq, cmd, buffer, len, timeout, retries);	1598	scsi_wait_req(sreq, cmd, buffer, len, timeout, retries);
1599		1599
1600	/* This code looks awful: what it's doing is making sure an	1600	/* This code looks awful: what it's doing is making sure an
1601	* ILLEGAL REQUEST sense return identifies the actual command	1601	* ILLEGAL REQUEST sense return identifies the actual command
1602	* byte as the problem. MODE_SENSE commands can return	1602	* byte as the problem. MODE_SENSE commands can return
1603	* ILLEGAL REQUEST if the code page isn't supported */	1603	* ILLEGAL REQUEST if the code page isn't supported */
1604		1604
1605	if (use_10_for_ms && !scsi_status_is_good(sreq->sr_result) &&	1605	if (use_10_for_ms && !scsi_status_is_good(sreq->sr_result) &&
1606	(driver_byte(sreq->sr_result) & DRIVER_SENSE)) {	1606	(driver_byte(sreq->sr_result) & DRIVER_SENSE)) {
1607	struct scsi_sense_hdr sshdr;	1607	struct scsi_sense_hdr sshdr;
1608		1608
1609	if (scsi_request_normalize_sense(sreq, &sshdr)) {	1609	if (scsi_request_normalize_sense(sreq, &sshdr)) {
1610	if ((sshdr.sense_key == ILLEGAL_REQUEST) &&	1610	if ((sshdr.sense_key == ILLEGAL_REQUEST) &&
1611	(sshdr.asc == 0x20) && (sshdr.ascq == 0)) {	1611	(sshdr.asc == 0x20) && (sshdr.ascq == 0)) {
1612	/*	1612	/*
1613	* Invalid command operation code	1613	* Invalid command operation code
1614	*/	1614	*/
1615	sreq->sr_device->use_10_for_ms = 0;	1615	sreq->sr_device->use_10_for_ms = 0;
1616	goto retry;	1616	goto retry;
1617	}	1617	}
1618	}	1618	}
1619	}	1619	}
1620		1620
1621	if(scsi_status_is_good(sreq->sr_result)) {	1621	if(scsi_status_is_good(sreq->sr_result)) {
1622	data->header_length = header_length;	1622	data->header_length = header_length;
1623	if(use_10_for_ms) {	1623	if(use_10_for_ms) {
1624	data->length = buffer[0]*256 + buffer[1] + 2;	1624	data->length = buffer[0]*256 + buffer[1] + 2;
1625	data->medium_type = buffer[2];	1625	data->medium_type = buffer[2];
1626	data->device_specific = buffer[3];	1626	data->device_specific = buffer[3];
1627	data->longlba = buffer[4] & 0x01;	1627	data->longlba = buffer[4] & 0x01;
1628	data->block_descriptor_length = buffer[6]*256	1628	data->block_descriptor_length = buffer[6]*256
1629	+ buffer[7];	1629	+ buffer[7];
1630	} else {	1630	} else {
1631	data->length = buffer[0] + 1;	1631	data->length = buffer[0] + 1;
1632	data->medium_type = buffer[1];	1632	data->medium_type = buffer[1];
1633	data->device_specific = buffer[2];	1633	data->device_specific = buffer[2];
1634	data->block_descriptor_length = buffer[3];	1634	data->block_descriptor_length = buffer[3];
1635	}	1635	}
1636	}	1636	}
1637		1637
1638	return sreq->sr_result;	1638	return sreq->sr_result;
1639	}	1639	}
1640	EXPORT_SYMBOL(__scsi_mode_sense);	1640	EXPORT_SYMBOL(__scsi_mode_sense);
1641		1641
1642	/**	1642	/**
1643	* scsi_mode_sense - issue a mode sense, falling back from 10 to	1643	* scsi_mode_sense - issue a mode sense, falling back from 10 to
1644	* six bytes if necessary.	1644	* six bytes if necessary.
1645	* @sdev: scsi device to send command to.	1645	* @sdev: scsi device to send command to.
1646	* @dbd: set if mode sense will disable block descriptors in the return	1646	* @dbd: set if mode sense will disable block descriptors in the return
1647	* @modepage: mode page being requested	1647	* @modepage: mode page being requested
1648	* @buffer: request buffer (may not be smaller than eight bytes)	1648	* @buffer: request buffer (may not be smaller than eight bytes)
1649	* @len: length of request buffer.	1649	* @len: length of request buffer.
1650	* @timeout: command timeout	1650	* @timeout: command timeout
1651	* @retries: number of retries before failing	1651	* @retries: number of retries before failing
1652	*	1652	*
1653	* Returns zero if unsuccessful, or the header offset (either 4	1653	* Returns zero if unsuccessful, or the header offset (either 4
1654	* or 8 depending on whether a six or ten byte command was	1654	* or 8 depending on whether a six or ten byte command was
1655	* issued) if successful.	1655	* issued) if successful.
1656	**/	1656	**/
1657	int	1657	int
1658	scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,	1658	scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
1659	unsigned char *buffer, int len, int timeout, int retries,	1659	unsigned char *buffer, int len, int timeout, int retries,
1660	struct scsi_mode_data *data)	1660	struct scsi_mode_data *data)
1661	{	1661	{
1662	struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);	1662	struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1663	int ret;	1663	int ret;
1664		1664
1665	if (!sreq)	1665	if (!sreq)
1666	return -1;	1666	return -1;
1667		1667
1668	ret = __scsi_mode_sense(sreq, dbd, modepage, buffer, len,	1668	ret = __scsi_mode_sense(sreq, dbd, modepage, buffer, len,
1669	timeout, retries, data);	1669	timeout, retries, data);
1670		1670
1671	scsi_release_request(sreq);	1671	scsi_release_request(sreq);
1672		1672
1673	return ret;	1673	return ret;
1674	}	1674	}
1675	EXPORT_SYMBOL(scsi_mode_sense);	1675	EXPORT_SYMBOL(scsi_mode_sense);
1676		1676
1677	int	1677	int
1678	scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)	1678	scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)
1679	{	1679	{
1680	struct scsi_request *sreq;	1680	struct scsi_request *sreq;
1681	char cmd[] = {	1681	char cmd[] = {
1682	TEST_UNIT_READY, 0, 0, 0, 0, 0,	1682	TEST_UNIT_READY, 0, 0, 0, 0, 0,
1683	};	1683	};
1684	int result;	1684	int result;
1685		1685
1686	sreq = scsi_allocate_request(sdev, GFP_KERNEL);	1686	sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1687	if (!sreq)	1687	if (!sreq)
1688	return -ENOMEM;	1688	return -ENOMEM;
1689		1689
1690	sreq->sr_data_direction = DMA_NONE;	1690	sreq->sr_data_direction = DMA_NONE;
1691	scsi_wait_req(sreq, cmd, NULL, 0, timeout, retries);	1691	scsi_wait_req(sreq, cmd, NULL, 0, timeout, retries);
1692		1692
1693	if ((driver_byte(sreq->sr_result) & DRIVER_SENSE) && sdev->removable) {	1693	if ((driver_byte(sreq->sr_result) & DRIVER_SENSE) && sdev->removable) {
1694	struct scsi_sense_hdr sshdr;	1694	struct scsi_sense_hdr sshdr;
1695		1695
1696	if ((scsi_request_normalize_sense(sreq, &sshdr)) &&	1696	if ((scsi_request_normalize_sense(sreq, &sshdr)) &&
1697	((sshdr.sense_key == UNIT_ATTENTION) \|\|	1697	((sshdr.sense_key == UNIT_ATTENTION) \|\|
1698	(sshdr.sense_key == NOT_READY))) {	1698	(sshdr.sense_key == NOT_READY))) {
1699	sdev->changed = 1;	1699	sdev->changed = 1;
1700	sreq->sr_result = 0;	1700	sreq->sr_result = 0;
1701	}	1701	}
1702	}	1702	}
1703	result = sreq->sr_result;	1703	result = sreq->sr_result;
1704	scsi_release_request(sreq);	1704	scsi_release_request(sreq);
1705	return result;	1705	return result;
1706	}	1706	}
1707	EXPORT_SYMBOL(scsi_test_unit_ready);	1707	EXPORT_SYMBOL(scsi_test_unit_ready);
1708		1708
1709	/**	1709	/**
1710	* scsi_device_set_state - Take the given device through the device	1710	* scsi_device_set_state - Take the given device through the device
1711	* state model.	1711	* state model.
1712	* @sdev: scsi device to change the state of.	1712	* @sdev: scsi device to change the state of.
1713	* @state: state to change to.	1713	* @state: state to change to.
1714	*	1714	*
1715	* Returns zero if unsuccessful or an error if the requested	1715	* Returns zero if unsuccessful or an error if the requested
1716	* transition is illegal.	1716	* transition is illegal.
1717	**/	1717	**/
1718	int	1718	int
1719	scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)	1719	scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
1720	{	1720	{
1721	enum scsi_device_state oldstate = sdev->sdev_state;	1721	enum scsi_device_state oldstate = sdev->sdev_state;
1722		1722
1723	if (state == oldstate)	1723	if (state == oldstate)
1724	return 0;	1724	return 0;
1725		1725
1726	switch (state) {	1726	switch (state) {
1727	case SDEV_CREATED:	1727	case SDEV_CREATED:
1728	/* There are no legal states that come back to	1728	/* There are no legal states that come back to
1729	* created. This is the manually initialised start	1729	* created. This is the manually initialised start
1730	* state */	1730	* state */
1731	goto illegal;	1731	goto illegal;
1732		1732
1733	case SDEV_RUNNING:	1733	case SDEV_RUNNING:
1734	switch (oldstate) {	1734	switch (oldstate) {
1735	case SDEV_CREATED:	1735	case SDEV_CREATED:
1736	case SDEV_OFFLINE:	1736	case SDEV_OFFLINE:
1737	case SDEV_QUIESCE:	1737	case SDEV_QUIESCE:
1738	case SDEV_BLOCK:	1738	case SDEV_BLOCK:
1739	break;	1739	break;
1740	default:	1740	default:
1741	goto illegal;	1741	goto illegal;
1742	}	1742	}
1743	break;	1743	break;
1744		1744
1745	case SDEV_QUIESCE:	1745	case SDEV_QUIESCE:
1746	switch (oldstate) {	1746	switch (oldstate) {
1747	case SDEV_RUNNING:	1747	case SDEV_RUNNING:
1748	case SDEV_OFFLINE:	1748	case SDEV_OFFLINE:
1749	break;	1749	break;
1750	default:	1750	default:
1751	goto illegal;	1751	goto illegal;
1752	}	1752	}
1753	break;	1753	break;
1754		1754
1755	case SDEV_OFFLINE:	1755	case SDEV_OFFLINE:
1756	switch (oldstate) {	1756	switch (oldstate) {
1757	case SDEV_CREATED:	1757	case SDEV_CREATED:
1758	case SDEV_RUNNING:	1758	case SDEV_RUNNING:
1759	case SDEV_QUIESCE:	1759	case SDEV_QUIESCE:
1760	case SDEV_BLOCK:	1760	case SDEV_BLOCK:
1761	break;	1761	break;
1762	default:	1762	default:
1763	goto illegal;	1763	goto illegal;
1764	}	1764	}
1765	break;	1765	break;
1766		1766
1767	case SDEV_BLOCK:	1767	case SDEV_BLOCK:
1768	switch (oldstate) {	1768	switch (oldstate) {
1769	case SDEV_CREATED:	1769	case SDEV_CREATED:
1770	case SDEV_RUNNING:	1770	case SDEV_RUNNING:
1771	break;	1771	break;
1772	default:	1772	default:
1773	goto illegal;	1773	goto illegal;
1774	}	1774	}
1775	break;	1775	break;
1776		1776
1777	case SDEV_CANCEL:	1777	case SDEV_CANCEL:
1778	switch (oldstate) {	1778	switch (oldstate) {
1779	case SDEV_CREATED:	1779	case SDEV_CREATED:
1780	case SDEV_RUNNING:	1780	case SDEV_RUNNING:
1781	case SDEV_OFFLINE:	1781	case SDEV_OFFLINE:
1782	case SDEV_BLOCK:	1782	case SDEV_BLOCK:
1783	break;	1783	break;
1784	default:	1784	default:
1785	goto illegal;	1785	goto illegal;
1786	}	1786	}
1787	break;	1787	break;
1788		1788
1789	case SDEV_DEL:	1789	case SDEV_DEL:
1790	switch (oldstate) {	1790	switch (oldstate) {
1791	case SDEV_CANCEL:	1791	case SDEV_CANCEL:
1792	break;	1792	break;
1793	default:	1793	default:
1794	goto illegal;	1794	goto illegal;
1795	}	1795	}
1796	break;	1796	break;
1797		1797
1798	}	1798	}
1799	sdev->sdev_state = state;	1799	sdev->sdev_state = state;
1800	return 0;	1800	return 0;
1801		1801
1802	illegal:	1802	illegal:
1803	SCSI_LOG_ERROR_RECOVERY(1,	1803	SCSI_LOG_ERROR_RECOVERY(1,
1804	dev_printk(KERN_ERR, &sdev->sdev_gendev,	1804	dev_printk(KERN_ERR, &sdev->sdev_gendev,
1805	"Illegal state transition %s->%s\n",	1805	"Illegal state transition %s->%s\n",
1806	scsi_device_state_name(oldstate),	1806	scsi_device_state_name(oldstate),
1807	scsi_device_state_name(state))	1807	scsi_device_state_name(state))
1808	);	1808	);
1809	return -EINVAL;	1809	return -EINVAL;
1810	}	1810	}
1811	EXPORT_SYMBOL(scsi_device_set_state);	1811	EXPORT_SYMBOL(scsi_device_set_state);
1812		1812
1813	/**	1813	/**
1814	* scsi_device_quiesce - Block user issued commands.	1814	* scsi_device_quiesce - Block user issued commands.
1815	* @sdev: scsi device to quiesce.	1815	* @sdev: scsi device to quiesce.
1816	*	1816	*
1817	* This works by trying to transition to the SDEV_QUIESCE state	1817	* This works by trying to transition to the SDEV_QUIESCE state
1818	* (which must be a legal transition). When the device is in this	1818	* (which must be a legal transition). When the device is in this
1819	* state, only special requests will be accepted, all others will	1819	* state, only special requests will be accepted, all others will
1820	* be deferred. Since special requests may also be requeued requests,	1820	* be deferred. Since special requests may also be requeued requests,
1821	* a successful return doesn't guarantee the device will be	1821	* a successful return doesn't guarantee the device will be
1822	* totally quiescent.	1822	* totally quiescent.
1823	*	1823	*
1824	* Must be called with user context, may sleep.	1824	* Must be called with user context, may sleep.
1825	*	1825	*
1826	* Returns zero if unsuccessful or an error if not.	1826	* Returns zero if unsuccessful or an error if not.
1827	**/	1827	**/
1828	int	1828	int
1829	scsi_device_quiesce(struct scsi_device *sdev)	1829	scsi_device_quiesce(struct scsi_device *sdev)
1830	{	1830	{
1831	int err = scsi_device_set_state(sdev, SDEV_QUIESCE);	1831	int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
1832	if (err)	1832	if (err)
1833	return err;	1833	return err;
1834		1834
1835	scsi_run_queue(sdev->request_queue);	1835	scsi_run_queue(sdev->request_queue);
1836	while (sdev->device_busy) {	1836	while (sdev->device_busy) {
1837	msleep_interruptible(200);	1837	msleep_interruptible(200);
1838	scsi_run_queue(sdev->request_queue);	1838	scsi_run_queue(sdev->request_queue);
1839	}	1839	}
1840	return 0;	1840	return 0;
1841	}	1841	}
1842	EXPORT_SYMBOL(scsi_device_quiesce);	1842	EXPORT_SYMBOL(scsi_device_quiesce);
1843		1843
1844	/**	1844	/**
1845	* scsi_device_resume - Restart user issued commands to a quiesced device.	1845	* scsi_device_resume - Restart user issued commands to a quiesced device.
1846	* @sdev: scsi device to resume.	1846	* @sdev: scsi device to resume.
1847	*	1847	*
1848	* Moves the device from quiesced back to running and restarts the	1848	* Moves the device from quiesced back to running and restarts the
1849	* queues.	1849	* queues.
1850	*	1850	*
1851	* Must be called with user context, may sleep.	1851	* Must be called with user context, may sleep.
1852	**/	1852	**/
1853	void	1853	void
1854	scsi_device_resume(struct scsi_device *sdev)	1854	scsi_device_resume(struct scsi_device *sdev)
1855	{	1855	{
1856	if(scsi_device_set_state(sdev, SDEV_RUNNING))	1856	if(scsi_device_set_state(sdev, SDEV_RUNNING))
1857	return;	1857	return;
1858	scsi_run_queue(sdev->request_queue);	1858	scsi_run_queue(sdev->request_queue);
1859	}	1859	}
1860	EXPORT_SYMBOL(scsi_device_resume);	1860	EXPORT_SYMBOL(scsi_device_resume);
1861		1861
1862	static void	1862	static void
1863	device_quiesce_fn(struct scsi_device sdev, void data)	1863	device_quiesce_fn(struct scsi_device sdev, void data)
1864	{	1864	{
1865	scsi_device_quiesce(sdev);	1865	scsi_device_quiesce(sdev);
1866	}	1866	}
1867		1867
1868	void	1868	void
1869	scsi_target_quiesce(struct scsi_target *starget)	1869	scsi_target_quiesce(struct scsi_target *starget)
1870	{	1870	{
1871	starget_for_each_device(starget, NULL, device_quiesce_fn);	1871	starget_for_each_device(starget, NULL, device_quiesce_fn);
1872	}	1872	}
1873	EXPORT_SYMBOL(scsi_target_quiesce);	1873	EXPORT_SYMBOL(scsi_target_quiesce);
1874		1874
1875	static void	1875	static void
1876	device_resume_fn(struct scsi_device sdev, void data)	1876	device_resume_fn(struct scsi_device sdev, void data)
1877	{	1877	{
1878	scsi_device_resume(sdev);	1878	scsi_device_resume(sdev);
1879	}	1879	}
1880		1880
1881	void	1881	void
1882	scsi_target_resume(struct scsi_target *starget)	1882	scsi_target_resume(struct scsi_target *starget)
1883	{	1883	{
1884	starget_for_each_device(starget, NULL, device_resume_fn);	1884	starget_for_each_device(starget, NULL, device_resume_fn);
1885	}	1885	}
1886	EXPORT_SYMBOL(scsi_target_resume);	1886	EXPORT_SYMBOL(scsi_target_resume);
1887		1887
1888	/**	1888	/**
1889	* scsi_internal_device_block - internal function to put a device	1889	* scsi_internal_device_block - internal function to put a device
1890	* temporarily into the SDEV_BLOCK state	1890	* temporarily into the SDEV_BLOCK state
1891	* @sdev: device to block	1891	* @sdev: device to block
1892	*	1892	*
1893	* Block request made by scsi lld's to temporarily stop all	1893	* Block request made by scsi lld's to temporarily stop all
1894	* scsi commands on the specified device. Called from interrupt	1894	* scsi commands on the specified device. Called from interrupt
1895	* or normal process context.	1895	* or normal process context.
1896	*	1896	*
1897	* Returns zero if successful or error if not	1897	* Returns zero if successful or error if not
1898	*	1898	*
1899	* Notes:	1899	* Notes:
1900	* This routine transitions the device to the SDEV_BLOCK state	1900	* This routine transitions the device to the SDEV_BLOCK state
1901	* (which must be a legal transition). When the device is in this	1901	* (which must be a legal transition). When the device is in this
1902	* state, all commands are deferred until the scsi lld reenables	1902	* state, all commands are deferred until the scsi lld reenables
1903	* the device with scsi_device_unblock or device_block_tmo fires.	1903	* the device with scsi_device_unblock or device_block_tmo fires.
1904	* This routine assumes the host_lock is held on entry.	1904	* This routine assumes the host_lock is held on entry.
1905	**/	1905	**/
1906	int	1906	int
1907	scsi_internal_device_block(struct scsi_device *sdev)	1907	scsi_internal_device_block(struct scsi_device *sdev)
1908	{	1908	{
1909	request_queue_t *q = sdev->request_queue;	1909	request_queue_t *q = sdev->request_queue;
1910	unsigned long flags;	1910	unsigned long flags;
1911	int err = 0;	1911	int err = 0;
1912		1912
1913	err = scsi_device_set_state(sdev, SDEV_BLOCK);	1913	err = scsi_device_set_state(sdev, SDEV_BLOCK);
1914	if (err)	1914	if (err)
1915	return err;	1915	return err;
1916		1916
1917	/*	1917	/*
1918	* The device has transitioned to SDEV_BLOCK. Stop the	1918	* The device has transitioned to SDEV_BLOCK. Stop the
1919	* block layer from calling the midlayer with this device's	1919	* block layer from calling the midlayer with this device's
1920	* request queue.	1920	* request queue.
1921	*/	1921	*/
1922	spin_lock_irqsave(q->queue_lock, flags);	1922	spin_lock_irqsave(q->queue_lock, flags);
1923	blk_stop_queue(q);	1923	blk_stop_queue(q);
1924	spin_unlock_irqrestore(q->queue_lock, flags);	1924	spin_unlock_irqrestore(q->queue_lock, flags);
1925		1925
1926	return 0;	1926	return 0;
1927	}	1927	}
1928	EXPORT_SYMBOL_GPL(scsi_internal_device_block);	1928	EXPORT_SYMBOL_GPL(scsi_internal_device_block);
1929		1929
1930	/**	1930	/**
1931	* scsi_internal_device_unblock - resume a device after a block request	1931	* scsi_internal_device_unblock - resume a device after a block request
1932	* @sdev: device to resume	1932	* @sdev: device to resume
1933	*	1933	*
1934	* Called by scsi lld's or the midlayer to restart the device queue	1934	* Called by scsi lld's or the midlayer to restart the device queue
1935	* for the previously suspended scsi device. Called from interrupt or	1935	* for the previously suspended scsi device. Called from interrupt or
1936	* normal process context.	1936	* normal process context.
1937	*	1937	*
1938	* Returns zero if successful or error if not.	1938	* Returns zero if successful or error if not.
1939	*	1939	*
1940	* Notes:	1940	* Notes:
1941	* This routine transitions the device to the SDEV_RUNNING state	1941	* This routine transitions the device to the SDEV_RUNNING state
1942	* (which must be a legal transition) allowing the midlayer to	1942	* (which must be a legal transition) allowing the midlayer to
1943	* goose the queue for this device. This routine assumes the	1943	* goose the queue for this device. This routine assumes the
1944	* host_lock is held upon entry.	1944	* host_lock is held upon entry.
1945	**/	1945	**/
1946	int	1946	int
1947	scsi_internal_device_unblock(struct scsi_device *sdev)	1947	scsi_internal_device_unblock(struct scsi_device *sdev)
1948	{	1948	{
1949	request_queue_t *q = sdev->request_queue;	1949	request_queue_t *q = sdev->request_queue;
1950	int err;	1950	int err;
1951	unsigned long flags;	1951	unsigned long flags;
1952		1952
1953	/*	1953	/*
1954	* Try to transition the scsi device to SDEV_RUNNING	1954	* Try to transition the scsi device to SDEV_RUNNING
1955	* and goose the device queue if successful.	1955	* and goose the device queue if successful.
1956	*/	1956	*/
1957	err = scsi_device_set_state(sdev, SDEV_RUNNING);	1957	err = scsi_device_set_state(sdev, SDEV_RUNNING);
1958	if (err)	1958	if (err)
1959	return err;	1959	return err;
1960		1960
1961	spin_lock_irqsave(q->queue_lock, flags);	1961	spin_lock_irqsave(q->queue_lock, flags);
1962	blk_start_queue(q);	1962	blk_start_queue(q);
1963	spin_unlock_irqrestore(q->queue_lock, flags);	1963	spin_unlock_irqrestore(q->queue_lock, flags);
1964		1964
1965	return 0;	1965	return 0;
1966	}	1966	}
1967	EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);	1967	EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
1968		1968
1969	static void	1969	static void
1970	device_block(struct scsi_device sdev, void data)	1970	device_block(struct scsi_device sdev, void data)
1971	{	1971	{
1972	scsi_internal_device_block(sdev);	1972	scsi_internal_device_block(sdev);
1973	}	1973	}
1974		1974
1975	static int	1975	static int
1976	target_block(struct device dev, void data)	1976	target_block(struct device dev, void data)
1977	{	1977	{
1978	if (scsi_is_target_device(dev))	1978	if (scsi_is_target_device(dev))
1979	starget_for_each_device(to_scsi_target(dev), NULL,	1979	starget_for_each_device(to_scsi_target(dev), NULL,
1980	device_block);	1980	device_block);
1981	return 0;	1981	return 0;
1982	}	1982	}
1983		1983
1984	void	1984	void
1985	scsi_target_block(struct device *dev)	1985	scsi_target_block(struct device *dev)
1986	{	1986	{
1987	if (scsi_is_target_device(dev))	1987	if (scsi_is_target_device(dev))
1988	starget_for_each_device(to_scsi_target(dev), NULL,	1988	starget_for_each_device(to_scsi_target(dev), NULL,
1989	device_block);	1989	device_block);
1990	else	1990	else
1991	device_for_each_child(dev, NULL, target_block);	1991	device_for_each_child(dev, NULL, target_block);
1992	}	1992	}
1993	EXPORT_SYMBOL_GPL(scsi_target_block);	1993	EXPORT_SYMBOL_GPL(scsi_target_block);
1994		1994
1995	static void	1995	static void
1996	device_unblock(struct scsi_device sdev, void data)	1996	device_unblock(struct scsi_device sdev, void data)
1997	{	1997	{
1998	scsi_internal_device_unblock(sdev);	1998	scsi_internal_device_unblock(sdev);
1999	}	1999	}
2000		2000
2001	static int	2001	static int
2002	target_unblock(struct device dev, void data)	2002	target_unblock(struct device dev, void data)
2003	{	2003	{
2004	if (scsi_is_target_device(dev))	2004	if (scsi_is_target_device(dev))
2005	starget_for_each_device(to_scsi_target(dev), NULL,	2005	starget_for_each_device(to_scsi_target(dev), NULL,
2006	device_unblock);	2006	device_unblock);
2007	return 0;	2007	return 0;
2008	}	2008	}
2009		2009
2010	void	2010	void
2011	scsi_target_unblock(struct device *dev)	2011	scsi_target_unblock(struct device *dev)
2012	{	2012	{
2013	if (scsi_is_target_device(dev))	2013	if (scsi_is_target_device(dev))
2014	starget_for_each_device(to_scsi_target(dev), NULL,	2014	starget_for_each_device(to_scsi_target(dev), NULL,
2015	device_unblock);	2015	device_unblock);
2016	else	2016	else
2017	device_for_each_child(dev, NULL, target_unblock);	2017	device_for_each_child(dev, NULL, target_unblock);
2018	}	2018	}
2019	EXPORT_SYMBOL_GPL(scsi_target_unblock);	2019	EXPORT_SYMBOL_GPL(scsi_target_unblock);
2020		2020

include/linux/blkdev.h

Diff comments View file @ 867d119