Commit 16008d641670571ff4cd750b416c7caf2d89f467

Authored by Linus Torvalds

Merge branch 'for-3.3/drivers' of git://git.kernel.dk/linux-block

* 'for-3.3/drivers' of git://git.kernel.dk/linux-block:
  mtip32xx: do rebuild monitoring asynchronously
  xen-blkfront: Use kcalloc instead of kzalloc to allocate array
  mtip32xx: uninitialized variable in mtip_quiesce_io()
  mtip32xx: updates based on feedback
  xen-blkback: convert hole punching to discard request on loop devices
  xen/blkback: Move processing of BLKIF_OP_DISCARD from dispatch_rw_block_io
  xen/blk[front|back]: Enhance discard support with secure erasing support.
  xen/blk[front|back]: Squash blkif_request_rw and blkif_request_discard together
  mtip32xx: update to new ->make_request() API
  mtip32xx: add module.h include to avoid conflict with moduleh tree
  mtip32xx: mark a few more items static
  mtip32xx: ensure that all local functions are static
  mtip32xx: cleanup compat ioctl handling
  mtip32xx: fix warnings/errors on 32-bit compiles
  block: Add driver for Micron RealSSD pcie flash cards

Showing 11 changed files Side-by-side Diff

drivers/block/Kconfig
... ... @@ -116,6 +116,8 @@
116 116  
117 117 source "drivers/block/paride/Kconfig"
118 118  
  119 +source "drivers/block/mtip32xx/Kconfig"
  120 +
119 121 config BLK_CPQ_DA
120 122 tristate "Compaq SMART2 support"
121 123 depends on PCI && VIRT_TO_BUS
drivers/block/Makefile
... ... @@ -39,6 +39,7 @@
39 39 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
40 40 obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
41 41 obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
  42 +obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
42 43  
43 44 swim_mod-y := swim.o swim_asm.o
drivers/block/mtip32xx/Kconfig
  1 +#
  2 +# mtip32xx device driver configuration
  3 +#
  4 +
  5 +config BLK_DEV_PCIESSD_MTIP32XX
  6 + tristate "Block Device Driver for Micron PCIe SSDs"
  7 + depends on HOTPLUG_PCI_PCIE
  8 + help
  9 + This enables the block driver for Micron PCIe SSDs.
drivers/block/mtip32xx/Makefile
  1 +#
  2 +# Makefile for Block device driver for Micron PCIe SSD
  3 +#
  4 +
  5 +obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx.o
drivers/block/mtip32xx/mtip32xx.c
Changes suppressed. Click to show
  1 +/*
  2 + * Driver for the Micron P320 SSD
  3 + * Copyright (C) 2011 Micron Technology, Inc.
  4 + *
  5 + * Portions of this code were derived from works subjected to the
  6 + * following copyright:
  7 + * Copyright (C) 2009 Integrated Device Technology, Inc.
  8 + *
  9 + * This program is free software; you can redistribute it and/or modify
  10 + * it under the terms of the GNU General Public License as published by
  11 + * the Free Software Foundation; either version 2 of the License, or
  12 + * (at your option) any later version.
  13 + *
  14 + * This program is distributed in the hope that it will be useful,
  15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17 + * GNU General Public License for more details.
  18 + *
  19 + */
  20 +
  21 +#include <linux/pci.h>
  22 +#include <linux/interrupt.h>
  23 +#include <linux/ata.h>
  24 +#include <linux/delay.h>
  25 +#include <linux/hdreg.h>
  26 +#include <linux/uaccess.h>
  27 +#include <linux/random.h>
  28 +#include <linux/smp.h>
  29 +#include <linux/compat.h>
  30 +#include <linux/fs.h>
  31 +#include <linux/module.h>
  32 +#include <linux/genhd.h>
  33 +#include <linux/blkdev.h>
  34 +#include <linux/bio.h>
  35 +#include <linux/dma-mapping.h>
  36 +#include <linux/idr.h>
  37 +#include <linux/kthread.h>
  38 +#include <../drivers/ata/ahci.h>
  39 +#include "mtip32xx.h"
  40 +
  41 +#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
  42 +#define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
  43 +#define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
  44 +#define HW_PORT_PRIV_DMA_SZ \
  45 + (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
  46 +
  47 +#define HOST_HSORG 0xFC
  48 +#define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
  49 +#define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
  50 +#define HSORG_HWREV 0xFF00
  51 +#define HSORG_STYLE 0x8
  52 +#define HSORG_SLOTGROUPS 0x7
  53 +
  54 +#define PORT_COMMAND_ISSUE 0x38
  55 +#define PORT_SDBV 0x7C
  56 +
  57 +#define PORT_OFFSET 0x100
  58 +#define PORT_MEM_SIZE 0x80
  59 +
  60 +#define PORT_IRQ_ERR \
  61 + (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \
  62 + PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \
  63 + PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \
  64 + PORT_IRQ_OVERFLOW)
  65 +#define PORT_IRQ_LEGACY \
  66 + (PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
  67 +#define PORT_IRQ_HANDLED \
  68 + (PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \
  69 + PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \
  70 + PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY)
  71 +#define DEF_PORT_IRQ \
  72 + (PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS)
  73 +
  74 +/* product numbers */
  75 +#define MTIP_PRODUCT_UNKNOWN 0x00
  76 +#define MTIP_PRODUCT_ASICFPGA 0x11
  77 +
  78 +/* Device instance number, incremented each time a device is probed. */
  79 +static int instance;
  80 +
  81 +/*
  82 + * Global variable used to hold the major block device number
  83 + * allocated in mtip_init().
  84 + */
  85 +static int mtip_major;
  86 +
  87 +static DEFINE_SPINLOCK(rssd_index_lock);
  88 +static DEFINE_IDA(rssd_index_ida);
  89 +
  90 +static int mtip_block_initialize(struct driver_data *dd);
  91 +
  92 +#ifdef CONFIG_COMPAT
  93 +struct mtip_compat_ide_task_request_s {
  94 + __u8 io_ports[8];
  95 + __u8 hob_ports[8];
  96 + ide_reg_valid_t out_flags;
  97 + ide_reg_valid_t in_flags;
  98 + int data_phase;
  99 + int req_cmd;
  100 + compat_ulong_t out_size;
  101 + compat_ulong_t in_size;
  102 +};
  103 +#endif
  104 +
  105 +/*
  106 + * This function check_for_surprise_removal is called
  107 + * while card is removed from the system and it will
  108 + * read the vendor id from the configration space
  109 + *
  110 + * @pdev Pointer to the pci_dev structure.
  111 + *
  112 + * return value
  113 + * true if device removed, else false
  114 + */
  115 +static bool mtip_check_surprise_removal(struct pci_dev *pdev)
  116 +{
  117 + u16 vendor_id = 0;
  118 +
  119 + /* Read the vendorID from the configuration space */
  120 + pci_read_config_word(pdev, 0x00, &vendor_id);
  121 + if (vendor_id == 0xFFFF)
  122 + return true; /* device removed */
  123 +
  124 + return false; /* device present */
  125 +}
  126 +
  127 +/*
  128 + * This function is called for clean the pending command in the
  129 + * command slot during the surprise removal of device and return
  130 + * error to the upper layer.
  131 + *
  132 + * @dd Pointer to the DRIVER_DATA structure.
  133 + *
  134 + * return value
  135 + * None
  136 + */
  137 +static void mtip_command_cleanup(struct driver_data *dd)
  138 +{
  139 + int group = 0, commandslot = 0, commandindex = 0;
  140 + struct mtip_cmd *command;
  141 + struct mtip_port *port = dd->port;
  142 +
  143 + for (group = 0; group < 4; group++) {
  144 + for (commandslot = 0; commandslot < 32; commandslot++) {
  145 + if (!(port->allocated[group] & (1 << commandslot)))
  146 + continue;
  147 +
  148 + commandindex = group << 5 | commandslot;
  149 + command = &port->commands[commandindex];
  150 +
  151 + if (atomic_read(&command->active)
  152 + && (command->async_callback)) {
  153 + command->async_callback(command->async_data,
  154 + -ENODEV);
  155 + command->async_callback = NULL;
  156 + command->async_data = NULL;
  157 + }
  158 +
  159 + dma_unmap_sg(&port->dd->pdev->dev,
  160 + command->sg,
  161 + command->scatter_ents,
  162 + command->direction);
  163 + }
  164 + }
  165 +
  166 + up(&port->cmd_slot);
  167 +
  168 + atomic_set(&dd->drv_cleanup_done, true);
  169 +}
  170 +
  171 +/*
  172 + * Obtain an empty command slot.
  173 + *
  174 + * This function needs to be reentrant since it could be called
  175 + * at the same time on multiple CPUs. The allocation of the
  176 + * command slot must be atomic.
  177 + *
  178 + * @port Pointer to the port data structure.
  179 + *
  180 + * return value
  181 + * >= 0 Index of command slot obtained.
  182 + * -1 No command slots available.
  183 + */
  184 +static int get_slot(struct mtip_port *port)
  185 +{
  186 + int slot, i;
  187 + unsigned int num_command_slots = port->dd->slot_groups * 32;
  188 +
  189 + /*
  190 + * Try 10 times, because there is a small race here.
  191 + * that's ok, because it's still cheaper than a lock.
  192 + *
  193 + * Race: Since this section is not protected by lock, same bit
  194 + * could be chosen by different process contexts running in
  195 + * different processor. So instead of costly lock, we are going
  196 + * with loop.
  197 + */
  198 + for (i = 0; i < 10; i++) {
  199 + slot = find_next_zero_bit(port->allocated,
  200 + num_command_slots, 1);
  201 + if ((slot < num_command_slots) &&
  202 + (!test_and_set_bit(slot, port->allocated)))
  203 + return slot;
  204 + }
  205 + dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
  206 +
  207 + if (mtip_check_surprise_removal(port->dd->pdev)) {
  208 + /* Device not present, clean outstanding commands */
  209 + mtip_command_cleanup(port->dd);
  210 + }
  211 + return -1;
  212 +}
  213 +
  214 +/*
  215 + * Release a command slot.
  216 + *
  217 + * @port Pointer to the port data structure.
  218 + * @tag Tag of command to release
  219 + *
  220 + * return value
  221 + * None
  222 + */
  223 +static inline void release_slot(struct mtip_port *port, int tag)
  224 +{
  225 + smp_mb__before_clear_bit();
  226 + clear_bit(tag, port->allocated);
  227 + smp_mb__after_clear_bit();
  228 +}
  229 +
  230 +/*
  231 + * Reset the HBA (without sleeping)
  232 + *
  233 + * Just like hba_reset, except does not call sleep, so can be
  234 + * run from interrupt/tasklet context.
  235 + *
  236 + * @dd Pointer to the driver data structure.
  237 + *
  238 + * return value
  239 + * 0 The reset was successful.
  240 + * -1 The HBA Reset bit did not clear.
  241 + */
  242 +static int hba_reset_nosleep(struct driver_data *dd)
  243 +{
  244 + unsigned long timeout;
  245 +
  246 + /* Chip quirk: quiesce any chip function */
  247 + mdelay(10);
  248 +
  249 + /* Set the reset bit */
  250 + writel(HOST_RESET, dd->mmio + HOST_CTL);
  251 +
  252 + /* Flush */
  253 + readl(dd->mmio + HOST_CTL);
  254 +
  255 + /*
  256 + * Wait 10ms then spin for up to 1 second
  257 + * waiting for reset acknowledgement
  258 + */
  259 + timeout = jiffies + msecs_to_jiffies(1000);
  260 + mdelay(10);
  261 + while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
  262 + && time_before(jiffies, timeout))
  263 + mdelay(1);
  264 +
  265 + if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
  266 + return -1;
  267 +
  268 + return 0;
  269 +}
  270 +
  271 +/*
  272 + * Issue a command to the hardware.
  273 + *
  274 + * Set the appropriate bit in the s_active and Command Issue hardware
  275 + * registers, causing hardware command processing to begin.
  276 + *
  277 + * @port Pointer to the port structure.
  278 + * @tag The tag of the command to be issued.
  279 + *
  280 + * return value
  281 + * None
  282 + */
  283 +static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
  284 +{
  285 + unsigned long flags = 0;
  286 +
  287 + atomic_set(&port->commands[tag].active, 1);
  288 +
  289 + spin_lock_irqsave(&port->cmd_issue_lock, flags);
  290 +
  291 + writel((1 << MTIP_TAG_BIT(tag)),
  292 + port->s_active[MTIP_TAG_INDEX(tag)]);
  293 + writel((1 << MTIP_TAG_BIT(tag)),
  294 + port->cmd_issue[MTIP_TAG_INDEX(tag)]);
  295 +
  296 + spin_unlock_irqrestore(&port->cmd_issue_lock, flags);
  297 +}
  298 +
  299 +/*
  300 + * Enable/disable the reception of FIS
  301 + *
  302 + * @port Pointer to the port data structure
  303 + * @enable 1 to enable, 0 to disable
  304 + *
  305 + * return value
  306 + * Previous state: 1 enabled, 0 disabled
  307 + */
  308 +static int mtip_enable_fis(struct mtip_port *port, int enable)
  309 +{
  310 + u32 tmp;
  311 +
  312 + /* enable FIS reception */
  313 + tmp = readl(port->mmio + PORT_CMD);
  314 + if (enable)
  315 + writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
  316 + else
  317 + writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
  318 +
  319 + /* Flush */
  320 + readl(port->mmio + PORT_CMD);
  321 +
  322 + return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX));
  323 +}
  324 +
  325 +/*
  326 + * Enable/disable the DMA engine
  327 + *
  328 + * @port Pointer to the port data structure
  329 + * @enable 1 to enable, 0 to disable
  330 + *
  331 + * return value
  332 + * Previous state: 1 enabled, 0 disabled.
  333 + */
  334 +static int mtip_enable_engine(struct mtip_port *port, int enable)
  335 +{
  336 + u32 tmp;
  337 +
  338 + /* enable FIS reception */
  339 + tmp = readl(port->mmio + PORT_CMD);
  340 + if (enable)
  341 + writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD);
  342 + else
  343 + writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD);
  344 +
  345 + readl(port->mmio + PORT_CMD);
  346 + return (((tmp & PORT_CMD_START) == PORT_CMD_START));
  347 +}
  348 +
  349 +/*
  350 + * Enables the port DMA engine and FIS reception.
  351 + *
  352 + * return value
  353 + * None
  354 + */
  355 +static inline void mtip_start_port(struct mtip_port *port)
  356 +{
  357 + /* Enable FIS reception */
  358 + mtip_enable_fis(port, 1);
  359 +
  360 + /* Enable the DMA engine */
  361 + mtip_enable_engine(port, 1);
  362 +}
  363 +
  364 +/*
  365 + * Deinitialize a port by disabling port interrupts, the DMA engine,
  366 + * and FIS reception.
  367 + *
  368 + * @port Pointer to the port structure
  369 + *
  370 + * return value
  371 + * None
  372 + */
  373 +static inline void mtip_deinit_port(struct mtip_port *port)
  374 +{
  375 + /* Disable interrupts on this port */
  376 + writel(0, port->mmio + PORT_IRQ_MASK);
  377 +
  378 + /* Disable the DMA engine */
  379 + mtip_enable_engine(port, 0);
  380 +
  381 + /* Disable FIS reception */
  382 + mtip_enable_fis(port, 0);
  383 +}
  384 +
  385 +/*
  386 + * Initialize a port.
  387 + *
  388 + * This function deinitializes the port by calling mtip_deinit_port() and
  389 + * then initializes it by setting the command header and RX FIS addresses,
  390 + * clearing the SError register and any pending port interrupts before
  391 + * re-enabling the default set of port interrupts.
  392 + *
  393 + * @port Pointer to the port structure.
  394 + *
  395 + * return value
  396 + * None
  397 + */
  398 +static void mtip_init_port(struct mtip_port *port)
  399 +{
  400 + int i;
  401 + mtip_deinit_port(port);
  402 +
  403 + /* Program the command list base and FIS base addresses */
  404 + if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) {
  405 + writel((port->command_list_dma >> 16) >> 16,
  406 + port->mmio + PORT_LST_ADDR_HI);
  407 + writel((port->rxfis_dma >> 16) >> 16,
  408 + port->mmio + PORT_FIS_ADDR_HI);
  409 + }
  410 +
  411 + writel(port->command_list_dma & 0xFFFFFFFF,
  412 + port->mmio + PORT_LST_ADDR);
  413 + writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
  414 +
  415 + /* Clear SError */
  416 + writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
  417 +
  418 + /* reset the completed registers.*/
  419 + for (i = 0; i < port->dd->slot_groups; i++)
  420 + writel(0xFFFFFFFF, port->completed[i]);
  421 +
  422 + /* Clear any pending interrupts for this port */
  423 + writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
  424 +
  425 + /* Enable port interrupts */
  426 + writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
  427 +}
  428 +
  429 +/*
  430 + * Restart a port
  431 + *
  432 + * @port Pointer to the port data structure.
  433 + *
  434 + * return value
  435 + * None
  436 + */
  437 +static void mtip_restart_port(struct mtip_port *port)
  438 +{
  439 + unsigned long timeout;
  440 +
  441 + /* Disable the DMA engine */
  442 + mtip_enable_engine(port, 0);
  443 +
  444 + /* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */
  445 + timeout = jiffies + msecs_to_jiffies(500);
  446 + while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON)
  447 + && time_before(jiffies, timeout))
  448 + ;
  449 +
  450 + /*
  451 + * Chip quirk: escalate to hba reset if
  452 + * PxCMD.CR not clear after 500 ms
  453 + */
  454 + if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) {
  455 + dev_warn(&port->dd->pdev->dev,
  456 + "PxCMD.CR not clear, escalating reset\n");
  457 +
  458 + if (hba_reset_nosleep(port->dd))
  459 + dev_err(&port->dd->pdev->dev,
  460 + "HBA reset escalation failed.\n");
  461 +
  462 + /* 30 ms delay before com reset to quiesce chip */
  463 + mdelay(30);
  464 + }
  465 +
  466 + dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n");
  467 +
  468 + /* Set PxSCTL.DET */
  469 + writel(readl(port->mmio + PORT_SCR_CTL) |
  470 + 1, port->mmio + PORT_SCR_CTL);
  471 + readl(port->mmio + PORT_SCR_CTL);
  472 +
  473 + /* Wait 1 ms to quiesce chip function */
  474 + timeout = jiffies + msecs_to_jiffies(1);
  475 + while (time_before(jiffies, timeout))
  476 + ;
  477 +
  478 + /* Clear PxSCTL.DET */
  479 + writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
  480 + port->mmio + PORT_SCR_CTL);
  481 + readl(port->mmio + PORT_SCR_CTL);
  482 +
  483 + /* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */
  484 + timeout = jiffies + msecs_to_jiffies(500);
  485 + while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
  486 + && time_before(jiffies, timeout))
  487 + ;
  488 +
  489 + if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
  490 + dev_warn(&port->dd->pdev->dev,
  491 + "COM reset failed\n");
  492 +
  493 + /* Clear SError, the PxSERR.DIAG.x should be set so clear it */
  494 + writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
  495 +
  496 + /* Enable the DMA engine */
  497 + mtip_enable_engine(port, 1);
  498 +}
  499 +
  500 +/*
  501 + * Called periodically to see if any read/write commands are
  502 + * taking too long to complete.
  503 + *
  504 + * @data Pointer to the PORT data structure.
  505 + *
  506 + * return value
  507 + * None
  508 + */
  509 +static void mtip_timeout_function(unsigned long int data)
  510 +{
  511 + struct mtip_port *port = (struct mtip_port *) data;
  512 + struct host_to_dev_fis *fis;
  513 + struct mtip_cmd *command;
  514 + int tag, cmdto_cnt = 0;
  515 + unsigned int bit, group;
  516 + unsigned int num_command_slots = port->dd->slot_groups * 32;
  517 +
  518 + if (unlikely(!port))
  519 + return;
  520 +
  521 + if (atomic_read(&port->dd->resumeflag) == true) {
  522 + mod_timer(&port->cmd_timer,
  523 + jiffies + msecs_to_jiffies(30000));
  524 + return;
  525 + }
  526 +
  527 + for (tag = 0; tag < num_command_slots; tag++) {
  528 + /*
  529 + * Skip internal command slot as it has
  530 + * its own timeout mechanism
  531 + */
  532 + if (tag == MTIP_TAG_INTERNAL)
  533 + continue;
  534 +
  535 + if (atomic_read(&port->commands[tag].active) &&
  536 + (time_after(jiffies, port->commands[tag].comp_time))) {
  537 + group = tag >> 5;
  538 + bit = tag & 0x1F;
  539 +
  540 + command = &port->commands[tag];
  541 + fis = (struct host_to_dev_fis *) command->command;
  542 +
  543 + dev_warn(&port->dd->pdev->dev,
  544 + "Timeout for command tag %d\n", tag);
  545 +
  546 + cmdto_cnt++;
  547 + if (cmdto_cnt == 1)
  548 + set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
  549 +
  550 + /*
  551 + * Clear the completed bit. This should prevent
  552 + * any interrupt handlers from trying to retire
  553 + * the command.
  554 + */
  555 + writel(1 << bit, port->completed[group]);
  556 +
  557 + /* Call the async completion callback. */
  558 + if (likely(command->async_callback))
  559 + command->async_callback(command->async_data,
  560 + -EIO);
  561 + command->async_callback = NULL;
  562 + command->comp_func = NULL;
  563 +
  564 + /* Unmap the DMA scatter list entries */
  565 + dma_unmap_sg(&port->dd->pdev->dev,
  566 + command->sg,
  567 + command->scatter_ents,
  568 + command->direction);
  569 +
  570 + /*
  571 + * Clear the allocated bit and active tag for the
  572 + * command.
  573 + */
  574 + atomic_set(&port->commands[tag].active, 0);
  575 + release_slot(port, tag);
  576 +
  577 + up(&port->cmd_slot);
  578 + }
  579 + }
  580 +
  581 + if (cmdto_cnt) {
  582 + dev_warn(&port->dd->pdev->dev,
  583 + "%d commands timed out: restarting port",
  584 + cmdto_cnt);
  585 + mtip_restart_port(port);
  586 + clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
  587 + wake_up_interruptible(&port->svc_wait);
  588 + }
  589 +
  590 + /* Restart the timer */
  591 + mod_timer(&port->cmd_timer,
  592 + jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
  593 +}
  594 +
  595 +/*
  596 + * IO completion function.
  597 + *
  598 + * This completion function is called by the driver ISR when a
  599 + * command that was issued by the kernel completes. It first calls the
  600 + * asynchronous completion function which normally calls back into the block
  601 + * layer passing the asynchronous callback data, then unmaps the
  602 + * scatter list associated with the completed command, and finally
  603 + * clears the allocated bit associated with the completed command.
  604 + *
  605 + * @port Pointer to the port data structure.
  606 + * @tag Tag of the command.
  607 + * @data Pointer to driver_data.
  608 + * @status Completion status.
  609 + *
  610 + * return value
  611 + * None
  612 + */
  613 +static void mtip_async_complete(struct mtip_port *port,
  614 + int tag,
  615 + void *data,
  616 + int status)
  617 +{
  618 + struct mtip_cmd *command;
  619 + struct driver_data *dd = data;
  620 + int cb_status = status ? -EIO : 0;
  621 +
  622 + if (unlikely(!dd) || unlikely(!port))
  623 + return;
  624 +
  625 + command = &port->commands[tag];
  626 +
  627 + if (unlikely(status == PORT_IRQ_TF_ERR)) {
  628 + dev_warn(&port->dd->pdev->dev,
  629 + "Command tag %d failed due to TFE\n", tag);
  630 + }
  631 +
  632 + /* Upper layer callback */
  633 + if (likely(command->async_callback))
  634 + command->async_callback(command->async_data, cb_status);
  635 +
  636 + command->async_callback = NULL;
  637 + command->comp_func = NULL;
  638 +
  639 + /* Unmap the DMA scatter list entries */
  640 + dma_unmap_sg(&dd->pdev->dev,
  641 + command->sg,
  642 + command->scatter_ents,
  643 + command->direction);
  644 +
  645 + /* Clear the allocated and active bits for the command */
  646 + atomic_set(&port->commands[tag].active, 0);
  647 + release_slot(port, tag);
  648 +
  649 + up(&port->cmd_slot);
  650 +}
  651 +
  652 +/*
  653 + * Internal command completion callback function.
  654 + *
  655 + * This function is normally called by the driver ISR when an internal
  656 + * command completed. This function signals the command completion by
  657 + * calling complete().
  658 + *
  659 + * @port Pointer to the port data structure.
  660 + * @tag Tag of the command that has completed.
  661 + * @data Pointer to a completion structure.
  662 + * @status Completion status.
  663 + *
  664 + * return value
  665 + * None
  666 + */
  667 +static void mtip_completion(struct mtip_port *port,
  668 + int tag,
  669 + void *data,
  670 + int status)
  671 +{
  672 + struct mtip_cmd *command = &port->commands[tag];
  673 + struct completion *waiting = data;
  674 + if (unlikely(status == PORT_IRQ_TF_ERR))
  675 + dev_warn(&port->dd->pdev->dev,
  676 + "Internal command %d completed with TFE\n", tag);
  677 +
  678 + command->async_callback = NULL;
  679 + command->comp_func = NULL;
  680 +
  681 + complete(waiting);
  682 +}
  683 +
  684 +/*
  685 + * Helper function for tag logging
  686 + */
  687 +static void print_tags(struct driver_data *dd,
  688 + char *msg,
  689 + unsigned long *tagbits)
  690 +{
  691 + unsigned int tag, count = 0;
  692 +
  693 + for (tag = 0; tag < (dd->slot_groups) * 32; tag++) {
  694 + if (test_bit(tag, tagbits))
  695 + count++;
  696 + }
  697 + if (count)
  698 + dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count);
  699 +}
  700 +
  701 +/*
  702 + * Handle an error.
  703 + *
  704 + * @dd Pointer to the DRIVER_DATA structure.
  705 + *
  706 + * return value
  707 + * None
  708 + */
  709 +static void mtip_handle_tfe(struct driver_data *dd)
  710 +{
  711 + int group, tag, bit, reissue;
  712 + struct mtip_port *port;
  713 + struct mtip_cmd *command;
  714 + u32 completed;
  715 + struct host_to_dev_fis *fis;
  716 + unsigned long tagaccum[SLOTBITS_IN_LONGS];
  717 +
  718 + dev_warn(&dd->pdev->dev, "Taskfile error\n");
  719 +
  720 + port = dd->port;
  721 +
  722 + /* Stop the timer to prevent command timeouts. */
  723 + del_timer(&port->cmd_timer);
  724 +
  725 + /* Set eh_active */
  726 + set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
  727 +
  728 + /* Loop through all the groups */
  729 + for (group = 0; group < dd->slot_groups; group++) {
  730 + completed = readl(port->completed[group]);
  731 +
  732 + /* clear completed status register in the hardware.*/
  733 + writel(completed, port->completed[group]);
  734 +
  735 + /* clear the tag accumulator */
  736 + memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
  737 +
  738 + /* Process successfully completed commands */
  739 + for (bit = 0; bit < 32 && completed; bit++) {
  740 + if (!(completed & (1<<bit)))
  741 + continue;
  742 + tag = (group << 5) + bit;
  743 +
  744 + /* Skip the internal command slot */
  745 + if (tag == MTIP_TAG_INTERNAL)
  746 + continue;
  747 +
  748 + command = &port->commands[tag];
  749 + if (likely(command->comp_func)) {
  750 + set_bit(tag, tagaccum);
  751 + atomic_set(&port->commands[tag].active, 0);
  752 + command->comp_func(port,
  753 + tag,
  754 + command->comp_data,
  755 + 0);
  756 + } else {
  757 + dev_err(&port->dd->pdev->dev,
  758 + "Missing completion func for tag %d",
  759 + tag);
  760 + if (mtip_check_surprise_removal(dd->pdev)) {
  761 + mtip_command_cleanup(dd);
  762 + /* don't proceed further */
  763 + return;
  764 + }
  765 + }
  766 + }
  767 + }
  768 + print_tags(dd, "TFE tags completed:", tagaccum);
  769 +
  770 + /* Restart the port */
  771 + mdelay(20);
  772 + mtip_restart_port(port);
  773 +
  774 + /* clear the tag accumulator */
  775 + memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
  776 +
  777 + /* Loop through all the groups */
  778 + for (group = 0; group < dd->slot_groups; group++) {
  779 + for (bit = 0; bit < 32; bit++) {
  780 + reissue = 1;
  781 + tag = (group << 5) + bit;
  782 +
  783 + /* If the active bit is set re-issue the command */
  784 + if (atomic_read(&port->commands[tag].active) == 0)
  785 + continue;
  786 +
  787 + fis = (struct host_to_dev_fis *)
  788 + port->commands[tag].command;
  789 +
  790 + /* Should re-issue? */
  791 + if (tag == MTIP_TAG_INTERNAL ||
  792 + fis->command == ATA_CMD_SET_FEATURES)
  793 + reissue = 0;
  794 +
  795 + /*
  796 + * First check if this command has
  797 + * exceeded its retries.
  798 + */
  799 + if (reissue &&
  800 + (port->commands[tag].retries-- > 0)) {
  801 +
  802 + set_bit(tag, tagaccum);
  803 +
  804 + /* Update the timeout value. */
  805 + port->commands[tag].comp_time =
  806 + jiffies + msecs_to_jiffies(
  807 + MTIP_NCQ_COMMAND_TIMEOUT_MS);
  808 + /* Re-issue the command. */
  809 + mtip_issue_ncq_command(port, tag);
  810 +
  811 + continue;
  812 + }
  813 +
  814 + /* Retire a command that will not be reissued */
  815 + dev_warn(&port->dd->pdev->dev,
  816 + "retiring tag %d\n", tag);
  817 + atomic_set(&port->commands[tag].active, 0);
  818 +
  819 + if (port->commands[tag].comp_func)
  820 + port->commands[tag].comp_func(
  821 + port,
  822 + tag,
  823 + port->commands[tag].comp_data,
  824 + PORT_IRQ_TF_ERR);
  825 + else
  826 + dev_warn(&port->dd->pdev->dev,
  827 + "Bad completion for tag %d\n",
  828 + tag);
  829 + }
  830 + }
  831 + print_tags(dd, "TFE tags reissued:", tagaccum);
  832 +
  833 + /* clear eh_active */
  834 + clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
  835 + wake_up_interruptible(&port->svc_wait);
  836 +
  837 + mod_timer(&port->cmd_timer,
  838 + jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
  839 +}
  840 +
  841 +/*
  842 + * Handle a set device bits interrupt
  843 + */
  844 +static inline void mtip_process_sdbf(struct driver_data *dd)
  845 +{
  846 + struct mtip_port *port = dd->port;
  847 + int group, tag, bit;
  848 + u32 completed;
  849 + struct mtip_cmd *command;
  850 +
  851 + /* walk all bits in all slot groups */
  852 + for (group = 0; group < dd->slot_groups; group++) {
  853 + completed = readl(port->completed[group]);
  854 +
  855 + /* clear completed status register in the hardware.*/
  856 + writel(completed, port->completed[group]);
  857 +
  858 + /* Process completed commands. */
  859 + for (bit = 0;
  860 + (bit < 32) && completed;
  861 + bit++, completed >>= 1) {
  862 + if (completed & 0x01) {
  863 + tag = (group << 5) | bit;
  864 +
  865 + /* skip internal command slot. */
  866 + if (unlikely(tag == MTIP_TAG_INTERNAL))
  867 + continue;
  868 +
  869 + command = &port->commands[tag];
  870 + /* make internal callback */
  871 + if (likely(command->comp_func)) {
  872 + command->comp_func(
  873 + port,
  874 + tag,
  875 + command->comp_data,
  876 + 0);
  877 + } else {
  878 + dev_warn(&dd->pdev->dev,
  879 + "Null completion "
  880 + "for tag %d",
  881 + tag);
  882 +
  883 + if (mtip_check_surprise_removal(
  884 + dd->pdev)) {
  885 + mtip_command_cleanup(dd);
  886 + return;
  887 + }
  888 + }
  889 + }
  890 + }
  891 + }
  892 +}
  893 +
  894 +/*
  895 + * Process legacy pio and d2h interrupts
  896 + */
  897 +static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
  898 +{
  899 + struct mtip_port *port = dd->port;
  900 + struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
  901 +
  902 + if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
  903 + (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
  904 + & (1 << MTIP_TAG_INTERNAL))) {
  905 + if (cmd->comp_func) {
  906 + cmd->comp_func(port,
  907 + MTIP_TAG_INTERNAL,
  908 + cmd->comp_data,
  909 + 0);
  910 + return;
  911 + }
  912 + }
  913 +
  914 + dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat);
  915 +
  916 + return;
  917 +}
  918 +
  919 +/*
  920 + * Demux and handle errors
  921 + */
  922 +static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
  923 +{
  924 + if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR)))
  925 + mtip_handle_tfe(dd);
  926 +
  927 + if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
  928 + dev_warn(&dd->pdev->dev,
  929 + "Clearing PxSERR.DIAG.x\n");
  930 + writel((1 << 26), dd->port->mmio + PORT_SCR_ERR);
  931 + }
  932 +
  933 + if (unlikely(port_stat & PORT_IRQ_PHYRDY)) {
  934 + dev_warn(&dd->pdev->dev,
  935 + "Clearing PxSERR.DIAG.n\n");
  936 + writel((1 << 16), dd->port->mmio + PORT_SCR_ERR);
  937 + }
  938 +
  939 + if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) {
  940 + dev_warn(&dd->pdev->dev,
  941 + "Port stat errors %x unhandled\n",
  942 + (port_stat & ~PORT_IRQ_HANDLED));
  943 + }
  944 +}
  945 +
  946 +static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
  947 +{
  948 + struct driver_data *dd = (struct driver_data *) data;
  949 + struct mtip_port *port = dd->port;
  950 + u32 hba_stat, port_stat;
  951 + int rv = IRQ_NONE;
  952 +
  953 + hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
  954 + if (hba_stat) {
  955 + rv = IRQ_HANDLED;
  956 +
  957 + /* Acknowledge the interrupt status on the port.*/
  958 + port_stat = readl(port->mmio + PORT_IRQ_STAT);
  959 + writel(port_stat, port->mmio + PORT_IRQ_STAT);
  960 +
  961 + /* Demux port status */
  962 + if (likely(port_stat & PORT_IRQ_SDB_FIS))
  963 + mtip_process_sdbf(dd);
  964 +
  965 + if (unlikely(port_stat & PORT_IRQ_ERR)) {
  966 + if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
  967 + mtip_command_cleanup(dd);
  968 + /* don't proceed further */
  969 + return IRQ_HANDLED;
  970 + }
  971 +
  972 + mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
  973 + }
  974 +
  975 + if (unlikely(port_stat & PORT_IRQ_LEGACY))
  976 + mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY);
  977 + }
  978 +
  979 + /* acknowledge interrupt */
  980 + writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
  981 +
  982 + return rv;
  983 +}
  984 +
  985 +/*
  986 + * Wrapper for mtip_handle_irq
  987 + * (ignores return code)
  988 + */
  989 +static void mtip_tasklet(unsigned long data)
  990 +{
  991 + mtip_handle_irq((struct driver_data *) data);
  992 +}
  993 +
  994 +/*
  995 + * HBA interrupt subroutine.
  996 + *
  997 + * @irq IRQ number.
  998 + * @instance Pointer to the driver data structure.
  999 + *
  1000 + * return value
  1001 + * IRQ_HANDLED A HBA interrupt was pending and handled.
  1002 + * IRQ_NONE This interrupt was not for the HBA.
  1003 + */
  1004 +static irqreturn_t mtip_irq_handler(int irq, void *instance)
  1005 +{
  1006 + struct driver_data *dd = instance;
  1007 + tasklet_schedule(&dd->tasklet);
  1008 + return IRQ_HANDLED;
  1009 +}
  1010 +
  1011 +static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
  1012 +{
  1013 + atomic_set(&port->commands[tag].active, 1);
  1014 + writel(1 << MTIP_TAG_BIT(tag),
  1015 + port->cmd_issue[MTIP_TAG_INDEX(tag)]);
  1016 +}
  1017 +
  1018 +/*
  1019 + * Wait for port to quiesce
  1020 + *
  1021 + * @port Pointer to port data structure
  1022 + * @timeout Max duration to wait (ms)
  1023 + *
  1024 + * return value
  1025 + * 0 Success
  1026 + * -EBUSY Commands still active
  1027 + */
  1028 +static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
  1029 +{
  1030 + unsigned long to;
  1031 + unsigned int n;
  1032 + unsigned int active = 1;
  1033 +
  1034 + to = jiffies + msecs_to_jiffies(timeout);
  1035 + do {
  1036 + if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) &&
  1037 + test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
  1038 + msleep(20);
  1039 + continue; /* svc thd is actively issuing commands */
  1040 + }
  1041 + /*
  1042 + * Ignore s_active bit 0 of array element 0.
  1043 + * This bit will always be set
  1044 + */
  1045 + active = readl(port->s_active[0]) & 0xFFFFFFFE;
  1046 + for (n = 1; n < port->dd->slot_groups; n++)
  1047 + active |= readl(port->s_active[n]);
  1048 +
  1049 + if (!active)
  1050 + break;
  1051 +
  1052 + msleep(20);
  1053 + } while (time_before(jiffies, to));
  1054 +
  1055 + return active ? -EBUSY : 0;
  1056 +}
  1057 +
  1058 +/*
  1059 + * Execute an internal command and wait for the completion.
  1060 + *
  1061 + * @port Pointer to the port data structure.
  1062 + * @fis Pointer to the FIS that describes the command.
  1063 + * @fis_len Length in WORDS of the FIS.
  1064 + * @buffer DMA accessible for command data.
  1065 + * @buf_len Length, in bytes, of the data buffer.
  1066 + * @opts Command header options, excluding the FIS length
  1067 + * and the number of PRD entries.
  1068 + * @timeout Time in ms to wait for the command to complete.
  1069 + *
  1070 + * return value
  1071 + * 0 Command completed successfully.
  1072 + * -EFAULT The buffer address is not correctly aligned.
  1073 + * -EBUSY Internal command or other IO in progress.
  1074 + * -EAGAIN Time out waiting for command to complete.
  1075 + */
  1076 +static int mtip_exec_internal_command(struct mtip_port *port,
  1077 + void *fis,
  1078 + int fis_len,
  1079 + dma_addr_t buffer,
  1080 + int buf_len,
  1081 + u32 opts,
  1082 + gfp_t atomic,
  1083 + unsigned long timeout)
  1084 +{
  1085 + struct mtip_cmd_sg *command_sg;
  1086 + DECLARE_COMPLETION_ONSTACK(wait);
  1087 + int rv = 0;
  1088 + struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
  1089 +
  1090 + /* Make sure the buffer is 8 byte aligned. This is asic specific. */
  1091 + if (buffer & 0x00000007) {
  1092 + dev_err(&port->dd->pdev->dev,
  1093 + "SG buffer is not 8 byte aligned\n");
  1094 + return -EFAULT;
  1095 + }
  1096 +
  1097 + /* Only one internal command should be running at a time */
  1098 + if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) {
  1099 + dev_warn(&port->dd->pdev->dev,
  1100 + "Internal command already active\n");
  1101 + return -EBUSY;
  1102 + }
  1103 + set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
  1104 +
  1105 + if (atomic == GFP_KERNEL) {
  1106 + /* wait for io to complete if non atomic */
  1107 + if (mtip_quiesce_io(port, 5000) < 0) {
  1108 + dev_warn(&port->dd->pdev->dev,
  1109 + "Failed to quiesce IO\n");
  1110 + release_slot(port, MTIP_TAG_INTERNAL);
  1111 + clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
  1112 + wake_up_interruptible(&port->svc_wait);
  1113 + return -EBUSY;
  1114 + }
  1115 +
  1116 + /* Set the completion function and data for the command. */
  1117 + int_cmd->comp_data = &wait;
  1118 + int_cmd->comp_func = mtip_completion;
  1119 +
  1120 + } else {
  1121 + /* Clear completion - we're going to poll */
  1122 + int_cmd->comp_data = NULL;
  1123 + int_cmd->comp_func = NULL;
  1124 + }
  1125 +
  1126 + /* Copy the command to the command table */
  1127 + memcpy(int_cmd->command, fis, fis_len*4);
  1128 +
  1129 + /* Populate the SG list */
  1130 + int_cmd->command_header->opts =
  1131 + __force_bit2int cpu_to_le32(opts | fis_len);
  1132 + if (buf_len) {
  1133 + command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
  1134 +
  1135 + command_sg->info =
  1136 + __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
  1137 + command_sg->dba =
  1138 + __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
  1139 + command_sg->dba_upper =
  1140 + __force_bit2int cpu_to_le32((buffer >> 16) >> 16);
  1141 +
  1142 + int_cmd->command_header->opts |=
  1143 + __force_bit2int cpu_to_le32((1 << 16));
  1144 + }
  1145 +
  1146 + /* Populate the command header */
  1147 + int_cmd->command_header->byte_count = 0;
  1148 +
  1149 + /* Issue the command to the hardware */
  1150 + mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
  1151 +
  1152 + /* Poll if atomic, wait_for_completion otherwise */
  1153 + if (atomic == GFP_KERNEL) {
  1154 + /* Wait for the command to complete or timeout. */
  1155 + if (wait_for_completion_timeout(
  1156 + &wait,
  1157 + msecs_to_jiffies(timeout)) == 0) {
  1158 + dev_err(&port->dd->pdev->dev,
  1159 + "Internal command did not complete [%d] "
  1160 + "within timeout of %lu ms\n",
  1161 + atomic, timeout);
  1162 + rv = -EAGAIN;
  1163 + }
  1164 +
  1165 + if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
  1166 + & (1 << MTIP_TAG_INTERNAL)) {
  1167 + dev_warn(&port->dd->pdev->dev,
  1168 + "Retiring internal command but CI is 1.\n");
  1169 + }
  1170 +
  1171 + } else {
  1172 + /* Spin for <timeout> checking if command still outstanding */
  1173 + timeout = jiffies + msecs_to_jiffies(timeout);
  1174 +
  1175 + while ((readl(
  1176 + port->cmd_issue[MTIP_TAG_INTERNAL])
  1177 + & (1 << MTIP_TAG_INTERNAL))
  1178 + && time_before(jiffies, timeout))
  1179 + ;
  1180 +
  1181 + if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
  1182 + & (1 << MTIP_TAG_INTERNAL)) {
  1183 + dev_err(&port->dd->pdev->dev,
  1184 + "Internal command did not complete [%d]\n",
  1185 + atomic);
  1186 + rv = -EAGAIN;
  1187 + }
  1188 + }
  1189 +
  1190 + /* Clear the allocated and active bits for the internal command. */
  1191 + atomic_set(&int_cmd->active, 0);
  1192 + release_slot(port, MTIP_TAG_INTERNAL);
  1193 + clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
  1194 + wake_up_interruptible(&port->svc_wait);
  1195 +
  1196 + return rv;
  1197 +}
  1198 +
  1199 +/*
  1200 + * Byte-swap ATA ID strings.
  1201 + *
  1202 + * ATA identify data contains strings in byte-swapped 16-bit words.
  1203 + * They must be swapped (on all architectures) to be usable as C strings.
  1204 + * This function swaps bytes in-place.
  1205 + *
  1206 + * @buf The buffer location of the string
  1207 + * @len The number of bytes to swap
  1208 + *
  1209 + * return value
  1210 + * None
  1211 + */
  1212 +static inline void ata_swap_string(u16 *buf, unsigned int len)
  1213 +{
  1214 + int i;
  1215 + for (i = 0; i < (len/2); i++)
  1216 + be16_to_cpus(&buf[i]);
  1217 +}
  1218 +
  1219 +/*
  1220 + * Request the device identity information.
  1221 + *
  1222 + * If a user space buffer is not specified, i.e. is NULL, the
  1223 + * identify information is still read from the drive and placed
  1224 + * into the identify data buffer (@e port->identify) in the
  1225 + * port data structure.
  1226 + * When the identify buffer contains valid identify information @e
  1227 + * port->identify_valid is non-zero.
  1228 + *
  1229 + * @port Pointer to the port structure.
  1230 + * @user_buffer A user space buffer where the identify data should be
  1231 + * copied.
  1232 + *
  1233 + * return value
  1234 + * 0 Command completed successfully.
  1235 + * -EFAULT An error occurred while coping data to the user buffer.
  1236 + * -1 Command failed.
  1237 + */
  1238 +static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
  1239 +{
  1240 + int rv = 0;
  1241 + struct host_to_dev_fis fis;
  1242 +
  1243 + /* Build the FIS. */
  1244 + memset(&fis, 0, sizeof(struct host_to_dev_fis));
  1245 + fis.type = 0x27;
  1246 + fis.opts = 1 << 7;
  1247 + fis.command = ATA_CMD_ID_ATA;
  1248 +
  1249 + /* Set the identify information as invalid. */
  1250 + port->identify_valid = 0;
  1251 +
  1252 + /* Clear the identify information. */
  1253 + memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS);
  1254 +
  1255 + /* Execute the command. */
  1256 + if (mtip_exec_internal_command(port,
  1257 + &fis,
  1258 + 5,
  1259 + port->identify_dma,
  1260 + sizeof(u16) * ATA_ID_WORDS,
  1261 + 0,
  1262 + GFP_KERNEL,
  1263 + MTIP_INTERNAL_COMMAND_TIMEOUT_MS)
  1264 + < 0) {
  1265 + rv = -1;
  1266 + goto out;
  1267 + }
  1268 +
  1269 + /*
  1270 + * Perform any necessary byte-swapping. Yes, the kernel does in fact
  1271 + * perform field-sensitive swapping on the string fields.
  1272 + * See the kernel use of ata_id_string() for proof of this.
  1273 + */
  1274 +#ifdef __LITTLE_ENDIAN
  1275 + ata_swap_string(port->identify + 27, 40); /* model string*/
  1276 + ata_swap_string(port->identify + 23, 8); /* firmware string*/
  1277 + ata_swap_string(port->identify + 10, 20); /* serial# string*/
  1278 +#else
  1279 + {
  1280 + int i;
  1281 + for (i = 0; i < ATA_ID_WORDS; i++)
  1282 + port->identify[i] = le16_to_cpu(port->identify[i]);
  1283 + }
  1284 +#endif
  1285 +
  1286 + /* Set the identify buffer as valid. */
  1287 + port->identify_valid = 1;
  1288 +
  1289 + if (user_buffer) {
  1290 + if (copy_to_user(
  1291 + user_buffer,
  1292 + port->identify,
  1293 + ATA_ID_WORDS * sizeof(u16))) {
  1294 + rv = -EFAULT;
  1295 + goto out;
  1296 + }
  1297 + }
  1298 +
  1299 +out:
  1300 + return rv;
  1301 +}
  1302 +
  1303 +/*
  1304 + * Issue a standby immediate command to the device.
  1305 + *
  1306 + * @port Pointer to the port structure.
  1307 + *
  1308 + * return value
  1309 + * 0 Command was executed successfully.
  1310 + * -1 An error occurred while executing the command.
  1311 + */
  1312 +static int mtip_standby_immediate(struct mtip_port *port)
  1313 +{
  1314 + int rv;
  1315 + struct host_to_dev_fis fis;
  1316 +
  1317 + /* Build the FIS. */
  1318 + memset(&fis, 0, sizeof(struct host_to_dev_fis));
  1319 + fis.type = 0x27;
  1320 + fis.opts = 1 << 7;
  1321 + fis.command = ATA_CMD_STANDBYNOW1;
  1322 +
  1323 + /* Execute the command. Use a 15-second timeout for large drives. */
  1324 + rv = mtip_exec_internal_command(port,
  1325 + &fis,
  1326 + 5,
  1327 + 0,
  1328 + 0,
  1329 + 0,
  1330 + GFP_KERNEL,
  1331 + 15000);
  1332 +
  1333 + return rv;
  1334 +}
  1335 +
  1336 +/*
  1337 + * Get the drive capacity.
  1338 + *
  1339 + * @dd Pointer to the device data structure.
  1340 + * @sectors Pointer to the variable that will receive the sector count.
  1341 + *
  1342 + * return value
  1343 + * 1 Capacity was returned successfully.
  1344 + * 0 The identify information is invalid.
  1345 + */
  1346 +static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
  1347 +{
  1348 + struct mtip_port *port = dd->port;
  1349 + u64 total, raw0, raw1, raw2, raw3;
  1350 + raw0 = port->identify[100];
  1351 + raw1 = port->identify[101];
  1352 + raw2 = port->identify[102];
  1353 + raw3 = port->identify[103];
  1354 + total = raw0 | raw1<<16 | raw2<<32 | raw3<<48;
  1355 + *sectors = total;
  1356 + return (bool) !!port->identify_valid;
  1357 +}
  1358 +
  1359 +/*
  1360 + * Reset the HBA.
  1361 + *
  1362 + * Resets the HBA by setting the HBA Reset bit in the Global
  1363 + * HBA Control register. After setting the HBA Reset bit the
  1364 + * function waits for 1 second before reading the HBA Reset
  1365 + * bit to make sure it has cleared. If HBA Reset is not clear
  1366 + * an error is returned. Cannot be used in non-blockable
  1367 + * context.
  1368 + *
  1369 + * @dd Pointer to the driver data structure.
  1370 + *
  1371 + * return value
  1372 + * 0 The reset was successful.
  1373 + * -1 The HBA Reset bit did not clear.
  1374 + */
  1375 +static int mtip_hba_reset(struct driver_data *dd)
  1376 +{
  1377 + mtip_deinit_port(dd->port);
  1378 +
  1379 + /* Set the reset bit */
  1380 + writel(HOST_RESET, dd->mmio + HOST_CTL);
  1381 +
  1382 + /* Flush */
  1383 + readl(dd->mmio + HOST_CTL);
  1384 +
  1385 + /* Wait for reset to clear */
  1386 + ssleep(1);
  1387 +
  1388 + /* Check the bit has cleared */
  1389 + if (readl(dd->mmio + HOST_CTL) & HOST_RESET) {
  1390 + dev_err(&dd->pdev->dev,
  1391 + "Reset bit did not clear.\n");
  1392 + return -1;
  1393 + }
  1394 +
  1395 + return 0;
  1396 +}
  1397 +
  1398 +/*
  1399 + * Display the identify command data.
  1400 + *
  1401 + * @port Pointer to the port data structure.
  1402 + *
  1403 + * return value
  1404 + * None
  1405 + */
  1406 +static void mtip_dump_identify(struct mtip_port *port)
  1407 +{
  1408 + sector_t sectors;
  1409 + unsigned short revid;
  1410 + char cbuf[42];
  1411 +
  1412 + if (!port->identify_valid)
  1413 + return;
  1414 +
  1415 + strlcpy(cbuf, (char *)(port->identify+10), 21);
  1416 + dev_info(&port->dd->pdev->dev,
  1417 + "Serial No.: %s\n", cbuf);
  1418 +
  1419 + strlcpy(cbuf, (char *)(port->identify+23), 9);
  1420 + dev_info(&port->dd->pdev->dev,
  1421 + "Firmware Ver.: %s\n", cbuf);
  1422 +
  1423 + strlcpy(cbuf, (char *)(port->identify+27), 41);
  1424 + dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
  1425 +
  1426 + if (mtip_hw_get_capacity(port->dd, &sectors))
  1427 + dev_info(&port->dd->pdev->dev,
  1428 + "Capacity: %llu sectors (%llu MB)\n",
  1429 + (u64)sectors,
  1430 + ((u64)sectors) * ATA_SECT_SIZE >> 20);
  1431 +
  1432 + pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
  1433 + switch (revid & 0xFF) {
  1434 + case 0x1:
  1435 + strlcpy(cbuf, "A0", 3);
  1436 + break;
  1437 + case 0x3:
  1438 + strlcpy(cbuf, "A2", 3);
  1439 + break;
  1440 + default:
  1441 + strlcpy(cbuf, "?", 2);
  1442 + break;
  1443 + }
  1444 + dev_info(&port->dd->pdev->dev,
  1445 + "Card Type: %s\n", cbuf);
  1446 +}
  1447 +
  1448 +/*
  1449 + * Map the commands scatter list into the command table.
  1450 + *
  1451 + * @command Pointer to the command.
  1452 + * @nents Number of scatter list entries.
  1453 + *
  1454 + * return value
  1455 + * None
  1456 + */
  1457 +static inline void fill_command_sg(struct driver_data *dd,
  1458 + struct mtip_cmd *command,
  1459 + int nents)
  1460 +{
  1461 + int n;
  1462 + unsigned int dma_len;
  1463 + struct mtip_cmd_sg *command_sg;
  1464 + struct scatterlist *sg = command->sg;
  1465 +
  1466 + command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
  1467 +
  1468 + for (n = 0; n < nents; n++) {
  1469 + dma_len = sg_dma_len(sg);
  1470 + if (dma_len > 0x400000)
  1471 + dev_err(&dd->pdev->dev,
  1472 + "DMA segment length truncated\n");
  1473 + command_sg->info = __force_bit2int
  1474 + cpu_to_le32((dma_len-1) & 0x3FFFFF);
  1475 + command_sg->dba = __force_bit2int
  1476 + cpu_to_le32(sg_dma_address(sg));
  1477 + command_sg->dba_upper = __force_bit2int
  1478 + cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
  1479 + command_sg++;
  1480 + sg++;
  1481 + }
  1482 +}
  1483 +
  1484 +/*
  1485 + * @brief Execute a drive command.
  1486 + *
  1487 + * return value 0 The command completed successfully.
  1488 + * return value -1 An error occurred while executing the command.
  1489 + */
  1490 +static int exec_drive_task(struct mtip_port *port, u8 *command)
  1491 +{
  1492 + struct host_to_dev_fis fis;
  1493 + struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
  1494 +
  1495 + /* Build the FIS. */
  1496 + memset(&fis, 0, sizeof(struct host_to_dev_fis));
  1497 + fis.type = 0x27;
  1498 + fis.opts = 1 << 7;
  1499 + fis.command = command[0];
  1500 + fis.features = command[1];
  1501 + fis.sect_count = command[2];
  1502 + fis.sector = command[3];
  1503 + fis.cyl_low = command[4];
  1504 + fis.cyl_hi = command[5];
  1505 + fis.device = command[6] & ~0x10; /* Clear the dev bit*/
  1506 +
  1507 +
  1508 + dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, "
  1509 + "nsect %x, sect %x, lcyl %x, "
  1510 + "hcyl %x, sel %x\n",
  1511 + __func__,
  1512 + command[0],
  1513 + command[1],
  1514 + command[2],
  1515 + command[3],
  1516 + command[4],
  1517 + command[5],
  1518 + command[6]);
  1519 +
  1520 + /* Execute the command. */
  1521 + if (mtip_exec_internal_command(port,
  1522 + &fis,
  1523 + 5,
  1524 + 0,
  1525 + 0,
  1526 + 0,
  1527 + GFP_KERNEL,
  1528 + MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
  1529 + return -1;
  1530 + }
  1531 +
  1532 + command[0] = reply->command; /* Status*/
  1533 + command[1] = reply->features; /* Error*/
  1534 + command[4] = reply->cyl_low;
  1535 + command[5] = reply->cyl_hi;
  1536 +
  1537 + dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, "
  1538 + "err %x , cyl_lo %x cyl_hi %x\n",
  1539 + __func__,
  1540 + command[0],
  1541 + command[1],
  1542 + command[4],
  1543 + command[5]);
  1544 +
  1545 + return 0;
  1546 +}
  1547 +
  1548 +/*
  1549 + * @brief Execute a drive command.
  1550 + *
  1551 + * @param port Pointer to the port data structure.
  1552 + * @param command Pointer to the user specified command parameters.
  1553 + * @param user_buffer Pointer to the user space buffer where read sector
  1554 + * data should be copied.
  1555 + *
  1556 + * return value 0 The command completed successfully.
  1557 + * return value -EFAULT An error occurred while copying the completion
  1558 + * data to the user space buffer.
  1559 + * return value -1 An error occurred while executing the command.
  1560 + */
  1561 +static int exec_drive_command(struct mtip_port *port, u8 *command,
  1562 + void __user *user_buffer)
  1563 +{
  1564 + struct host_to_dev_fis fis;
  1565 + struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
  1566 +
  1567 + /* Build the FIS. */
  1568 + memset(&fis, 0, sizeof(struct host_to_dev_fis));
  1569 + fis.type = 0x27;
  1570 + fis.opts = 1 << 7;
  1571 + fis.command = command[0];
  1572 + fis.features = command[2];
  1573 + fis.sect_count = command[3];
  1574 + if (fis.command == ATA_CMD_SMART) {
  1575 + fis.sector = command[1];
  1576 + fis.cyl_low = 0x4F;
  1577 + fis.cyl_hi = 0xC2;
  1578 + }
  1579 +
  1580 + dbg_printk(MTIP_DRV_NAME
  1581 + "%s: User Command: cmd %x, sect %x, "
  1582 + "feat %x, sectcnt %x\n",
  1583 + __func__,
  1584 + command[0],
  1585 + command[1],
  1586 + command[2],
  1587 + command[3]);
  1588 +
  1589 + memset(port->sector_buffer, 0x00, ATA_SECT_SIZE);
  1590 +
  1591 + /* Execute the command. */
  1592 + if (mtip_exec_internal_command(port,
  1593 + &fis,
  1594 + 5,
  1595 + port->sector_buffer_dma,
  1596 + (command[3] != 0) ? ATA_SECT_SIZE : 0,
  1597 + 0,
  1598 + GFP_KERNEL,
  1599 + MTIP_IOCTL_COMMAND_TIMEOUT_MS)
  1600 + < 0) {
  1601 + return -1;
  1602 + }
  1603 +
  1604 + /* Collect the completion status. */
  1605 + command[0] = reply->command; /* Status*/
  1606 + command[1] = reply->features; /* Error*/
  1607 + command[2] = command[3];
  1608 +
  1609 + dbg_printk(MTIP_DRV_NAME
  1610 + "%s: Completion Status: stat %x, "
  1611 + "err %x, cmd %x\n",
  1612 + __func__,
  1613 + command[0],
  1614 + command[1],
  1615 + command[2]);
  1616 +
  1617 + if (user_buffer && command[3]) {
  1618 + if (copy_to_user(user_buffer,
  1619 + port->sector_buffer,
  1620 + ATA_SECT_SIZE * command[3])) {
  1621 + return -EFAULT;
  1622 + }
  1623 + }
  1624 +
  1625 + return 0;
  1626 +}
  1627 +
  1628 +/*
  1629 + * Indicates whether a command has a single sector payload.
  1630 + *
  1631 + * @command passed to the device to perform the certain event.
  1632 + * @features passed to the device to perform the certain event.
  1633 + *
  1634 + * return value
  1635 + * 1 command is one that always has a single sector payload,
  1636 + * regardless of the value in the Sector Count field.
  1637 + * 0 otherwise
  1638 + *
  1639 + */
  1640 +static unsigned int implicit_sector(unsigned char command,
  1641 + unsigned char features)
  1642 +{
  1643 + unsigned int rv = 0;
  1644 +
  1645 + /* list of commands that have an implicit sector count of 1 */
  1646 + switch (command) {
  1647 + case ATA_CMD_SEC_SET_PASS:
  1648 + case ATA_CMD_SEC_UNLOCK:
  1649 + case ATA_CMD_SEC_ERASE_PREP:
  1650 + case ATA_CMD_SEC_ERASE_UNIT:
  1651 + case ATA_CMD_SEC_FREEZE_LOCK:
  1652 + case ATA_CMD_SEC_DISABLE_PASS:
  1653 + case ATA_CMD_PMP_READ:
  1654 + case ATA_CMD_PMP_WRITE:
  1655 + rv = 1;
  1656 + break;
  1657 + case ATA_CMD_SET_MAX:
  1658 + if (features == ATA_SET_MAX_UNLOCK)
  1659 + rv = 1;
  1660 + break;
  1661 + case ATA_CMD_SMART:
  1662 + if ((features == ATA_SMART_READ_VALUES) ||
  1663 + (features == ATA_SMART_READ_THRESHOLDS))
  1664 + rv = 1;
  1665 + break;
  1666 + case ATA_CMD_CONF_OVERLAY:
  1667 + if ((features == ATA_DCO_IDENTIFY) ||
  1668 + (features == ATA_DCO_SET))
  1669 + rv = 1;
  1670 + break;
  1671 + }
  1672 + return rv;
  1673 +}
  1674 +
  1675 +/*
  1676 + * Executes a taskfile
  1677 + * See ide_taskfile_ioctl() for derivation
  1678 + */
  1679 +static int exec_drive_taskfile(struct driver_data *dd,
  1680 + void __user *buf,
  1681 + ide_task_request_t *req_task,
  1682 + int outtotal)
  1683 +{
  1684 + struct host_to_dev_fis fis;
  1685 + struct host_to_dev_fis *reply;
  1686 + u8 *outbuf = NULL;
  1687 + u8 *inbuf = NULL;
  1688 + dma_addr_t outbuf_dma = 0;
  1689 + dma_addr_t inbuf_dma = 0;
  1690 + dma_addr_t dma_buffer = 0;
  1691 + int err = 0;
  1692 + unsigned int taskin = 0;
  1693 + unsigned int taskout = 0;
  1694 + u8 nsect = 0;
  1695 + unsigned int timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
  1696 + unsigned int force_single_sector;
  1697 + unsigned int transfer_size;
  1698 + unsigned long task_file_data;
  1699 + int intotal = outtotal + req_task->out_size;
  1700 +
  1701 + taskout = req_task->out_size;
  1702 + taskin = req_task->in_size;
  1703 + /* 130560 = 512 * 0xFF*/
  1704 + if (taskin > 130560 || taskout > 130560) {
  1705 + err = -EINVAL;
  1706 + goto abort;
  1707 + }
  1708 +
  1709 + if (taskout) {
  1710 + outbuf = kzalloc(taskout, GFP_KERNEL);
  1711 + if (outbuf == NULL) {
  1712 + err = -ENOMEM;
  1713 + goto abort;
  1714 + }
  1715 + if (copy_from_user(outbuf, buf + outtotal, taskout)) {
  1716 + err = -EFAULT;
  1717 + goto abort;
  1718 + }
  1719 + outbuf_dma = pci_map_single(dd->pdev,
  1720 + outbuf,
  1721 + taskout,
  1722 + DMA_TO_DEVICE);
  1723 + if (outbuf_dma == 0) {
  1724 + err = -ENOMEM;
  1725 + goto abort;
  1726 + }
  1727 + dma_buffer = outbuf_dma;
  1728 + }
  1729 +
  1730 + if (taskin) {
  1731 + inbuf = kzalloc(taskin, GFP_KERNEL);
  1732 + if (inbuf == NULL) {
  1733 + err = -ENOMEM;
  1734 + goto abort;
  1735 + }
  1736 +
  1737 + if (copy_from_user(inbuf, buf + intotal, taskin)) {
  1738 + err = -EFAULT;
  1739 + goto abort;
  1740 + }
  1741 + inbuf_dma = pci_map_single(dd->pdev,
  1742 + inbuf,
  1743 + taskin, DMA_FROM_DEVICE);
  1744 + if (inbuf_dma == 0) {
  1745 + err = -ENOMEM;
  1746 + goto abort;
  1747 + }
  1748 + dma_buffer = inbuf_dma;
  1749 + }
  1750 +
  1751 + /* only supports PIO and non-data commands from this ioctl. */
  1752 + switch (req_task->data_phase) {
  1753 + case TASKFILE_OUT:
  1754 + nsect = taskout / ATA_SECT_SIZE;
  1755 + reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
  1756 + break;
  1757 + case TASKFILE_IN:
  1758 + reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
  1759 + break;
  1760 + case TASKFILE_NO_DATA:
  1761 + reply = (dd->port->rxfis + RX_FIS_D2H_REG);
  1762 + break;
  1763 + default:
  1764 + err = -EINVAL;
  1765 + goto abort;
  1766 + }
  1767 +
  1768 + /* Build the FIS. */
  1769 + memset(&fis, 0, sizeof(struct host_to_dev_fis));
  1770 +
  1771 + fis.type = 0x27;
  1772 + fis.opts = 1 << 7;
  1773 + fis.command = req_task->io_ports[7];
  1774 + fis.features = req_task->io_ports[1];
  1775 + fis.sect_count = req_task->io_ports[2];
  1776 + fis.lba_low = req_task->io_ports[3];
  1777 + fis.lba_mid = req_task->io_ports[4];
  1778 + fis.lba_hi = req_task->io_ports[5];
  1779 + /* Clear the dev bit*/
  1780 + fis.device = req_task->io_ports[6] & ~0x10;
  1781 +
  1782 + if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) {
  1783 + req_task->in_flags.all =
  1784 + IDE_TASKFILE_STD_IN_FLAGS |
  1785 + (IDE_HOB_STD_IN_FLAGS << 8);
  1786 + fis.lba_low_ex = req_task->hob_ports[3];
  1787 + fis.lba_mid_ex = req_task->hob_ports[4];
  1788 + fis.lba_hi_ex = req_task->hob_ports[5];
  1789 + fis.features_ex = req_task->hob_ports[1];
  1790 + fis.sect_cnt_ex = req_task->hob_ports[2];
  1791 +
  1792 + } else {
  1793 + req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
  1794 + }
  1795 +
  1796 + force_single_sector = implicit_sector(fis.command, fis.features);
  1797 +
  1798 + if ((taskin || taskout) && (!fis.sect_count)) {
  1799 + if (nsect)
  1800 + fis.sect_count = nsect;
  1801 + else {
  1802 + if (!force_single_sector) {
  1803 + dev_warn(&dd->pdev->dev,
  1804 + "data movement but "
  1805 + "sect_count is 0\n");
  1806 + err = -EINVAL;
  1807 + goto abort;
  1808 + }
  1809 + }
  1810 + }
  1811 +
  1812 + dbg_printk(MTIP_DRV_NAME
  1813 + "taskfile: cmd %x, feat %x, nsect %x,"
  1814 + " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
  1815 + " head/dev %x\n",
  1816 + fis.command,
  1817 + fis.features,
  1818 + fis.sect_count,
  1819 + fis.lba_low,
  1820 + fis.lba_mid,
  1821 + fis.lba_hi,
  1822 + fis.device);
  1823 +
  1824 + switch (fis.command) {
  1825 + case ATA_CMD_DOWNLOAD_MICRO:
  1826 + /* Change timeout for Download Microcode to 60 seconds.*/
  1827 + timeout = 60000;
  1828 + break;
  1829 + case ATA_CMD_SEC_ERASE_UNIT:
  1830 + /* Change timeout for Security Erase Unit to 4 minutes.*/
  1831 + timeout = 240000;
  1832 + break;
  1833 + case ATA_CMD_STANDBYNOW1:
  1834 + /* Change timeout for standby immediate to 10 seconds.*/
  1835 + timeout = 10000;
  1836 + break;
  1837 + case 0xF7:
  1838 + case 0xFA:
  1839 + /* Change timeout for vendor unique command to 10 secs */
  1840 + timeout = 10000;
  1841 + break;
  1842 + case ATA_CMD_SMART:
  1843 + /* Change timeout for vendor unique command to 10 secs */
  1844 + timeout = 10000;
  1845 + break;
  1846 + default:
  1847 + timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
  1848 + break;
  1849 + }
  1850 +
  1851 + /* Determine the correct transfer size.*/
  1852 + if (force_single_sector)
  1853 + transfer_size = ATA_SECT_SIZE;
  1854 + else
  1855 + transfer_size = ATA_SECT_SIZE * fis.sect_count;
  1856 +
  1857 + /* Execute the command.*/
  1858 + if (mtip_exec_internal_command(dd->port,
  1859 + &fis,
  1860 + 5,
  1861 + dma_buffer,
  1862 + transfer_size,
  1863 + 0,
  1864 + GFP_KERNEL,
  1865 + timeout) < 0) {
  1866 + err = -EIO;
  1867 + goto abort;
  1868 + }
  1869 +
  1870 + task_file_data = readl(dd->port->mmio+PORT_TFDATA);
  1871 +
  1872 + if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) {
  1873 + reply = dd->port->rxfis + RX_FIS_PIO_SETUP;
  1874 + req_task->io_ports[7] = reply->control;
  1875 + } else {
  1876 + reply = dd->port->rxfis + RX_FIS_D2H_REG;
  1877 + req_task->io_ports[7] = reply->command;
  1878 + }
  1879 +
  1880 + /* reclaim the DMA buffers.*/
  1881 + if (inbuf_dma)
  1882 + pci_unmap_single(dd->pdev, inbuf_dma,
  1883 + taskin, DMA_FROM_DEVICE);
  1884 + if (outbuf_dma)
  1885 + pci_unmap_single(dd->pdev, outbuf_dma,
  1886 + taskout, DMA_TO_DEVICE);
  1887 + inbuf_dma = 0;
  1888 + outbuf_dma = 0;
  1889 +
  1890 + /* return the ATA registers to the caller.*/
  1891 + req_task->io_ports[1] = reply->features;
  1892 + req_task->io_ports[2] = reply->sect_count;
  1893 + req_task->io_ports[3] = reply->lba_low;
  1894 + req_task->io_ports[4] = reply->lba_mid;
  1895 + req_task->io_ports[5] = reply->lba_hi;
  1896 + req_task->io_ports[6] = reply->device;
  1897 +
  1898 + if (req_task->out_flags.all & 1) {
  1899 +
  1900 + req_task->hob_ports[3] = reply->lba_low_ex;
  1901 + req_task->hob_ports[4] = reply->lba_mid_ex;
  1902 + req_task->hob_ports[5] = reply->lba_hi_ex;
  1903 + req_task->hob_ports[1] = reply->features_ex;
  1904 + req_task->hob_ports[2] = reply->sect_cnt_ex;
  1905 + }
  1906 +
  1907 + /* Com rest after secure erase or lowlevel format */
  1908 + if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) ||
  1909 + ((fis.command == 0xFC) &&
  1910 + (fis.features == 0x27 || fis.features == 0x72 ||
  1911 + fis.features == 0x62 || fis.features == 0x26))) &&
  1912 + !(reply->command & 1)) {
  1913 + mtip_restart_port(dd->port);
  1914 + }
  1915 +
  1916 + dbg_printk(MTIP_DRV_NAME
  1917 + "%s: Completion: stat %x,"
  1918 + "err %x, sect_cnt %x, lbalo %x,"
  1919 + "lbamid %x, lbahi %x, dev %x\n",
  1920 + __func__,
  1921 + req_task->io_ports[7],
  1922 + req_task->io_ports[1],
  1923 + req_task->io_ports[2],
  1924 + req_task->io_ports[3],
  1925 + req_task->io_ports[4],
  1926 + req_task->io_ports[5],
  1927 + req_task->io_ports[6]);
  1928 +
  1929 + if (taskout) {
  1930 + if (copy_to_user(buf + outtotal, outbuf, taskout)) {
  1931 + err = -EFAULT;
  1932 + goto abort;
  1933 + }
  1934 + }
  1935 + if (taskin) {
  1936 + if (copy_to_user(buf + intotal, inbuf, taskin)) {
  1937 + err = -EFAULT;
  1938 + goto abort;
  1939 + }
  1940 + }
  1941 +abort:
  1942 + if (inbuf_dma)
  1943 + pci_unmap_single(dd->pdev, inbuf_dma,
  1944 + taskin, DMA_FROM_DEVICE);
  1945 + if (outbuf_dma)
  1946 + pci_unmap_single(dd->pdev, outbuf_dma,
  1947 + taskout, DMA_TO_DEVICE);
  1948 + kfree(outbuf);
  1949 + kfree(inbuf);
  1950 +
  1951 + return err;
  1952 +}
  1953 +
  1954 +/*
  1955 + * Handle IOCTL calls from the Block Layer.
  1956 + *
  1957 + * This function is called by the Block Layer when it receives an IOCTL
  1958 + * command that it does not understand. If the IOCTL command is not supported
  1959 + * this function returns -ENOTTY.
  1960 + *
  1961 + * @dd Pointer to the driver data structure.
  1962 + * @cmd IOCTL command passed from the Block Layer.
  1963 + * @arg IOCTL argument passed from the Block Layer.
  1964 + *
  1965 + * return value
  1966 + * 0 The IOCTL completed successfully.
  1967 + * -ENOTTY The specified command is not supported.
  1968 + * -EFAULT An error occurred copying data to a user space buffer.
  1969 + * -EIO An error occurred while executing the command.
  1970 + */
  1971 +static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
  1972 + unsigned long arg)
  1973 +{
  1974 + switch (cmd) {
  1975 + case HDIO_GET_IDENTITY:
  1976 + if (mtip_get_identify(dd->port, (void __user *) arg) < 0) {
  1977 + dev_warn(&dd->pdev->dev,
  1978 + "Unable to read identity\n");
  1979 + return -EIO;
  1980 + }
  1981 +
  1982 + break;
  1983 + case HDIO_DRIVE_CMD:
  1984 + {
  1985 + u8 drive_command[4];
  1986 +
  1987 + /* Copy the user command info to our buffer. */
  1988 + if (copy_from_user(drive_command,
  1989 + (void __user *) arg,
  1990 + sizeof(drive_command)))
  1991 + return -EFAULT;
  1992 +
  1993 + /* Execute the drive command. */
  1994 + if (exec_drive_command(dd->port,
  1995 + drive_command,
  1996 + (void __user *) (arg+4)))
  1997 + return -EIO;
  1998 +
  1999 + /* Copy the status back to the users buffer. */
  2000 + if (copy_to_user((void __user *) arg,
  2001 + drive_command,
  2002 + sizeof(drive_command)))
  2003 + return -EFAULT;
  2004 +
  2005 + break;
  2006 + }
  2007 + case HDIO_DRIVE_TASK:
  2008 + {
  2009 + u8 drive_command[7];
  2010 +
  2011 + /* Copy the user command info to our buffer. */
  2012 + if (copy_from_user(drive_command,
  2013 + (void __user *) arg,
  2014 + sizeof(drive_command)))
  2015 + return -EFAULT;
  2016 +
  2017 + /* Execute the drive command. */
  2018 + if (exec_drive_task(dd->port, drive_command))
  2019 + return -EIO;
  2020 +
  2021 + /* Copy the status back to the users buffer. */
  2022 + if (copy_to_user((void __user *) arg,
  2023 + drive_command,
  2024 + sizeof(drive_command)))
  2025 + return -EFAULT;
  2026 +
  2027 + break;
  2028 + }
  2029 + case HDIO_DRIVE_TASKFILE: {
  2030 + ide_task_request_t req_task;
  2031 + int ret, outtotal;
  2032 +
  2033 + if (copy_from_user(&req_task, (void __user *) arg,
  2034 + sizeof(req_task)))
  2035 + return -EFAULT;
  2036 +
  2037 + outtotal = sizeof(req_task);
  2038 +
  2039 + ret = exec_drive_taskfile(dd, (void __user *) arg,
  2040 + &req_task, outtotal);
  2041 +
  2042 + if (copy_to_user((void __user *) arg, &req_task,
  2043 + sizeof(req_task)))
  2044 + return -EFAULT;
  2045 +
  2046 + return ret;
  2047 + }
  2048 +
  2049 + default:
  2050 + return -EINVAL;
  2051 + }
  2052 + return 0;
  2053 +}
  2054 +
  2055 +/*
  2056 + * Submit an IO to the hw
  2057 + *
  2058 + * This function is called by the block layer to issue an io
  2059 + * to the device. Upon completion, the callback function will
  2060 + * be called with the data parameter passed as the callback data.
  2061 + *
  2062 + * @dd Pointer to the driver data structure.
  2063 + * @start First sector to read.
  2064 + * @nsect Number of sectors to read.
  2065 + * @nents Number of entries in scatter list for the read command.
  2066 + * @tag The tag of this read command.
  2067 + * @callback Pointer to the function that should be called
  2068 + * when the read completes.
  2069 + * @data Callback data passed to the callback function
  2070 + * when the read completes.
  2071 + * @barrier If non-zero, this command must be completed before
  2072 + * issuing any other commands.
  2073 + * @dir Direction (read or write)
  2074 + *
  2075 + * return value
  2076 + * None
  2077 + */
  2078 +static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
  2079 + int nsect, int nents, int tag, void *callback,
  2080 + void *data, int barrier, int dir)
  2081 +{
  2082 + struct host_to_dev_fis *fis;
  2083 + struct mtip_port *port = dd->port;
  2084 + struct mtip_cmd *command = &port->commands[tag];
  2085 +
  2086 + /* Map the scatter list for DMA access */
  2087 + if (dir == READ)
  2088 + nents = dma_map_sg(&dd->pdev->dev, command->sg,
  2089 + nents, DMA_FROM_DEVICE);
  2090 + else
  2091 + nents = dma_map_sg(&dd->pdev->dev, command->sg,
  2092 + nents, DMA_TO_DEVICE);
  2093 +
  2094 + command->scatter_ents = nents;
  2095 +
  2096 + /*
  2097 + * The number of retries for this command before it is
  2098 + * reported as a failure to the upper layers.
  2099 + */
  2100 + command->retries = MTIP_MAX_RETRIES;
  2101 +
  2102 + /* Fill out fis */
  2103 + fis = command->command;
  2104 + fis->type = 0x27;
  2105 + fis->opts = 1 << 7;
  2106 + fis->command =
  2107 + (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
  2108 + *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF);
  2109 + *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF);
  2110 + fis->device = 1 << 6;
  2111 + if (barrier)
  2112 + fis->device |= FUA_BIT;
  2113 + fis->features = nsect & 0xFF;
  2114 + fis->features_ex = (nsect >> 8) & 0xFF;
  2115 + fis->sect_count = ((tag << 3) | (tag >> 5));
  2116 + fis->sect_cnt_ex = 0;
  2117 + fis->control = 0;
  2118 + fis->res2 = 0;
  2119 + fis->res3 = 0;
  2120 + fill_command_sg(dd, command, nents);
  2121 +
  2122 + /* Populate the command header */
  2123 + command->command_header->opts =
  2124 + __force_bit2int cpu_to_le32(
  2125 + (nents << 16) | 5 | AHCI_CMD_PREFETCH);
  2126 + command->command_header->byte_count = 0;
  2127 +
  2128 + /*
  2129 + * Set the completion function and data for the command
  2130 + * within this layer.
  2131 + */
  2132 + command->comp_data = dd;
  2133 + command->comp_func = mtip_async_complete;
  2134 + command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  2135 +
  2136 + /*
  2137 + * Set the completion function and data for the command passed
  2138 + * from the upper layer.
  2139 + */
  2140 + command->async_data = data;
  2141 + command->async_callback = callback;
  2142 +
  2143 + /*
  2144 + * To prevent this command from being issued
  2145 + * if an internal command is in progress or error handling is active.
  2146 + */
  2147 + if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) ||
  2148 + test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) {
  2149 + set_bit(tag, port->cmds_to_issue);
  2150 + set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
  2151 + return;
  2152 + }
  2153 +
  2154 + /* Issue the command to the hardware */
  2155 + mtip_issue_ncq_command(port, tag);
  2156 +
  2157 + /* Set the command's timeout value.*/
  2158 + port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
  2159 + MTIP_NCQ_COMMAND_TIMEOUT_MS);
  2160 +}
  2161 +
  2162 +/*
  2163 + * Release a command slot.
  2164 + *
  2165 + * @dd Pointer to the driver data structure.
  2166 + * @tag Slot tag
  2167 + *
  2168 + * return value
  2169 + * None
  2170 + */
  2171 +static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag)
  2172 +{
  2173 + release_slot(dd->port, tag);
  2174 +}
  2175 +
  2176 +/*
  2177 + * Obtain a command slot and return its associated scatter list.
  2178 + *
  2179 + * @dd Pointer to the driver data structure.
  2180 + * @tag Pointer to an int that will receive the allocated command
  2181 + * slot tag.
  2182 + *
  2183 + * return value
  2184 + * Pointer to the scatter list for the allocated command slot
  2185 + * or NULL if no command slots are available.
  2186 + */
  2187 +static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
  2188 + int *tag)
  2189 +{
  2190 + /*
  2191 + * It is possible that, even with this semaphore, a thread
  2192 + * may think that no command slots are available. Therefore, we
  2193 + * need to make an attempt to get_slot().
  2194 + */
  2195 + down(&dd->port->cmd_slot);
  2196 + *tag = get_slot(dd->port);
  2197 +
  2198 + if (unlikely(*tag < 0))
  2199 + return NULL;
  2200 +
  2201 + return dd->port->commands[*tag].sg;
  2202 +}
  2203 +
  2204 +/*
  2205 + * Sysfs register/status dump.
  2206 + *
  2207 + * @dev Pointer to the device structure, passed by the kernrel.
  2208 + * @attr Pointer to the device_attribute structure passed by the kernel.
  2209 + * @buf Pointer to the char buffer that will receive the stats info.
  2210 + *
  2211 + * return value
  2212 + * The size, in bytes, of the data copied into buf.
  2213 + */
  2214 +static ssize_t hw_show_registers(struct device *dev,
  2215 + struct device_attribute *attr,
  2216 + char *buf)
  2217 +{
  2218 + u32 group_allocated;
  2219 + struct driver_data *dd = dev_to_disk(dev)->private_data;
  2220 + int size = 0;
  2221 + int n;
  2222 +
  2223 + size += sprintf(&buf[size], "%s:\ns_active:\n", __func__);
  2224 +
  2225 + for (n = 0; n < dd->slot_groups; n++)
  2226 + size += sprintf(&buf[size], "0x%08x\n",
  2227 + readl(dd->port->s_active[n]));
  2228 +
  2229 + size += sprintf(&buf[size], "Command Issue:\n");
  2230 +
  2231 + for (n = 0; n < dd->slot_groups; n++)
  2232 + size += sprintf(&buf[size], "0x%08x\n",
  2233 + readl(dd->port->cmd_issue[n]));
  2234 +
  2235 + size += sprintf(&buf[size], "Allocated:\n");
  2236 +
  2237 + for (n = 0; n < dd->slot_groups; n++) {
  2238 + if (sizeof(long) > sizeof(u32))
  2239 + group_allocated =
  2240 + dd->port->allocated[n/2] >> (32*(n&1));
  2241 + else
  2242 + group_allocated = dd->port->allocated[n];
  2243 + size += sprintf(&buf[size], "0x%08x\n",
  2244 + group_allocated);
  2245 + }
  2246 +
  2247 + size += sprintf(&buf[size], "completed:\n");
  2248 +
  2249 + for (n = 0; n < dd->slot_groups; n++)
  2250 + size += sprintf(&buf[size], "0x%08x\n",
  2251 + readl(dd->port->completed[n]));
  2252 +
  2253 + size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n",
  2254 + readl(dd->port->mmio + PORT_IRQ_STAT));
  2255 + size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n",
  2256 + readl(dd->mmio + HOST_IRQ_STAT));
  2257 +
  2258 + return size;
  2259 +}
  2260 +static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL);
  2261 +
  2262 +/*
  2263 + * Create the sysfs related attributes.
  2264 + *
  2265 + * @dd Pointer to the driver data structure.
  2266 + * @kobj Pointer to the kobj for the block device.
  2267 + *
  2268 + * return value
  2269 + * 0 Operation completed successfully.
  2270 + * -EINVAL Invalid parameter.
  2271 + */
  2272 +static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
  2273 +{
  2274 + if (!kobj || !dd)
  2275 + return -EINVAL;
  2276 +
  2277 + if (sysfs_create_file(kobj, &dev_attr_registers.attr))
  2278 + dev_warn(&dd->pdev->dev,
  2279 + "Error creating registers sysfs entry\n");
  2280 + return 0;
  2281 +}
  2282 +
  2283 +/*
  2284 + * Remove the sysfs related attributes.
  2285 + *
  2286 + * @dd Pointer to the driver data structure.
  2287 + * @kobj Pointer to the kobj for the block device.
  2288 + *
  2289 + * return value
  2290 + * 0 Operation completed successfully.
  2291 + * -EINVAL Invalid parameter.
  2292 + */
  2293 +static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
  2294 +{
  2295 + if (!kobj || !dd)
  2296 + return -EINVAL;
  2297 +
  2298 + sysfs_remove_file(kobj, &dev_attr_registers.attr);
  2299 +
  2300 + return 0;
  2301 +}
  2302 +
  2303 +/*
  2304 + * Perform any init/resume time hardware setup
  2305 + *
  2306 + * @dd Pointer to the driver data structure.
  2307 + *
  2308 + * return value
  2309 + * None
  2310 + */
  2311 +static inline void hba_setup(struct driver_data *dd)
  2312 +{
  2313 + u32 hwdata;
  2314 + hwdata = readl(dd->mmio + HOST_HSORG);
  2315 +
  2316 + /* interrupt bug workaround: use only 1 IS bit.*/
  2317 + writel(hwdata |
  2318 + HSORG_DISABLE_SLOTGRP_INTR |
  2319 + HSORG_DISABLE_SLOTGRP_PXIS,
  2320 + dd->mmio + HOST_HSORG);
  2321 +}
  2322 +
  2323 +/*
  2324 + * Detect the details of the product, and store anything needed
  2325 + * into the driver data structure. This includes product type and
  2326 + * version and number of slot groups.
  2327 + *
  2328 + * @dd Pointer to the driver data structure.
  2329 + *
  2330 + * return value
  2331 + * None
  2332 + */
  2333 +static void mtip_detect_product(struct driver_data *dd)
  2334 +{
  2335 + u32 hwdata;
  2336 + unsigned int rev, slotgroups;
  2337 +
  2338 + /*
  2339 + * HBA base + 0xFC [15:0] - vendor-specific hardware interface
  2340 + * info register:
  2341 + * [15:8] hardware/software interface rev#
  2342 + * [ 3] asic-style interface
  2343 + * [ 2:0] number of slot groups, minus 1 (only valid for asic-style).
  2344 + */
  2345 + hwdata = readl(dd->mmio + HOST_HSORG);
  2346 +
  2347 + dd->product_type = MTIP_PRODUCT_UNKNOWN;
  2348 + dd->slot_groups = 1;
  2349 +
  2350 + if (hwdata & 0x8) {
  2351 + dd->product_type = MTIP_PRODUCT_ASICFPGA;
  2352 + rev = (hwdata & HSORG_HWREV) >> 8;
  2353 + slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1;
  2354 + dev_info(&dd->pdev->dev,
  2355 + "ASIC-FPGA design, HS rev 0x%x, "
  2356 + "%i slot groups [%i slots]\n",
  2357 + rev,
  2358 + slotgroups,
  2359 + slotgroups * 32);
  2360 +
  2361 + if (slotgroups > MTIP_MAX_SLOT_GROUPS) {
  2362 + dev_warn(&dd->pdev->dev,
  2363 + "Warning: driver only supports "
  2364 + "%i slot groups.\n", MTIP_MAX_SLOT_GROUPS);
  2365 + slotgroups = MTIP_MAX_SLOT_GROUPS;
  2366 + }
  2367 + dd->slot_groups = slotgroups;
  2368 + return;
  2369 + }
  2370 +
  2371 + dev_warn(&dd->pdev->dev, "Unrecognized product id\n");
  2372 +}
  2373 +
  2374 +/*
  2375 + * Blocking wait for FTL rebuild to complete
  2376 + *
  2377 + * @dd Pointer to the DRIVER_DATA structure.
  2378 + *
  2379 + * return value
  2380 + * 0 FTL rebuild completed successfully
  2381 + * -EFAULT FTL rebuild error/timeout/interruption
  2382 + */
  2383 +static int mtip_ftl_rebuild_poll(struct driver_data *dd)
  2384 +{
  2385 + unsigned long timeout, cnt = 0, start;
  2386 +
  2387 + dev_warn(&dd->pdev->dev,
  2388 + "FTL rebuild in progress. Polling for completion.\n");
  2389 +
  2390 + start = jiffies;
  2391 + dd->ftlrebuildflag = 1;
  2392 + timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
  2393 +
  2394 + do {
  2395 + if (mtip_check_surprise_removal(dd->pdev))
  2396 + return -EFAULT;
  2397 +
  2398 + if (mtip_get_identify(dd->port, NULL) < 0)
  2399 + return -EFAULT;
  2400 +
  2401 + if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
  2402 + MTIP_FTL_REBUILD_MAGIC) {
  2403 + ssleep(1);
  2404 + /* Print message every 3 minutes */
  2405 + if (cnt++ >= 180) {
  2406 + dev_warn(&dd->pdev->dev,
  2407 + "FTL rebuild in progress (%d secs).\n",
  2408 + jiffies_to_msecs(jiffies - start) / 1000);
  2409 + cnt = 0;
  2410 + }
  2411 + } else {
  2412 + dev_warn(&dd->pdev->dev,
  2413 + "FTL rebuild complete (%d secs).\n",
  2414 + jiffies_to_msecs(jiffies - start) / 1000);
  2415 + dd->ftlrebuildflag = 0;
  2416 + mtip_block_initialize(dd);
  2417 + break;
  2418 + }
  2419 + ssleep(10);
  2420 + } while (time_before(jiffies, timeout));
  2421 +
  2422 + /* Check for timeout */
  2423 + if (dd->ftlrebuildflag) {
  2424 + dev_err(&dd->pdev->dev,
  2425 + "Timed out waiting for FTL rebuild to complete (%d secs).\n",
  2426 + jiffies_to_msecs(jiffies - start) / 1000);
  2427 + return -EFAULT;
  2428 + }
  2429 +
  2430 + return 0;
  2431 +}
  2432 +
  2433 +/*
  2434 + * service thread to issue queued commands
  2435 + *
  2436 + * @data Pointer to the driver data structure.
  2437 + *
  2438 + * return value
  2439 + * 0
  2440 + */
  2441 +
  2442 +static int mtip_service_thread(void *data)
  2443 +{
  2444 + struct driver_data *dd = (struct driver_data *)data;
  2445 + unsigned long slot, slot_start, slot_wrap;
  2446 + unsigned int num_cmd_slots = dd->slot_groups * 32;
  2447 + struct mtip_port *port = dd->port;
  2448 +
  2449 + while (1) {
  2450 + /*
  2451 + * the condition is to check neither an internal command is
  2452 + * is in progress nor error handling is active
  2453 + */
  2454 + wait_event_interruptible(port->svc_wait, (port->flags) &&
  2455 + !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
  2456 + !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags));
  2457 +
  2458 + if (kthread_should_stop())
  2459 + break;
  2460 +
  2461 + set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
  2462 + if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
  2463 + slot = 1;
  2464 + /* used to restrict the loop to one iteration */
  2465 + slot_start = num_cmd_slots;
  2466 + slot_wrap = 0;
  2467 + while (1) {
  2468 + slot = find_next_bit(port->cmds_to_issue,
  2469 + num_cmd_slots, slot);
  2470 + if (slot_wrap == 1) {
  2471 + if ((slot_start >= slot) ||
  2472 + (slot >= num_cmd_slots))
  2473 + break;
  2474 + }
  2475 + if (unlikely(slot_start == num_cmd_slots))
  2476 + slot_start = slot;
  2477 +
  2478 + if (unlikely(slot == num_cmd_slots)) {
  2479 + slot = 1;
  2480 + slot_wrap = 1;
  2481 + continue;
  2482 + }
  2483 +
  2484 + /* Issue the command to the hardware */
  2485 + mtip_issue_ncq_command(port, slot);
  2486 +
  2487 + /* Set the command's timeout value.*/
  2488 + port->commands[slot].comp_time = jiffies +
  2489 + msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS);
  2490 +
  2491 + clear_bit(slot, port->cmds_to_issue);
  2492 + }
  2493 +
  2494 + clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
  2495 + } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) {
  2496 + mtip_ftl_rebuild_poll(dd);
  2497 + clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags);
  2498 + }
  2499 + clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
  2500 +
  2501 + if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags))
  2502 + break;
  2503 + }
  2504 + return 0;
  2505 +}
  2506 +
  2507 +/*
  2508 + * Called once for each card.
  2509 + *
  2510 + * @dd Pointer to the driver data structure.
  2511 + *
  2512 + * return value
  2513 + * 0 on success, else an error code.
  2514 + */
  2515 +static int mtip_hw_init(struct driver_data *dd)
  2516 +{
  2517 + int i;
  2518 + int rv;
  2519 + unsigned int num_command_slots;
  2520 +
  2521 + dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
  2522 +
  2523 + mtip_detect_product(dd);
  2524 + if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
  2525 + rv = -EIO;
  2526 + goto out1;
  2527 + }
  2528 + num_command_slots = dd->slot_groups * 32;
  2529 +
  2530 + hba_setup(dd);
  2531 +
  2532 + tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd);
  2533 +
  2534 + dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL);
  2535 + if (!dd->port) {
  2536 + dev_err(&dd->pdev->dev,
  2537 + "Memory allocation: port structure\n");
  2538 + return -ENOMEM;
  2539 + }
  2540 +
  2541 + /* Counting semaphore to track command slot usage */
  2542 + sema_init(&dd->port->cmd_slot, num_command_slots - 1);
  2543 +
  2544 + /* Spinlock to prevent concurrent issue */
  2545 + spin_lock_init(&dd->port->cmd_issue_lock);
  2546 +
  2547 + /* Set the port mmio base address. */
  2548 + dd->port->mmio = dd->mmio + PORT_OFFSET;
  2549 + dd->port->dd = dd;
  2550 +
  2551 + /* Allocate memory for the command list. */
  2552 + dd->port->command_list =
  2553 + dmam_alloc_coherent(&dd->pdev->dev,
  2554 + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
  2555 + &dd->port->command_list_dma,
  2556 + GFP_KERNEL);
  2557 + if (!dd->port->command_list) {
  2558 + dev_err(&dd->pdev->dev,
  2559 + "Memory allocation: command list\n");
  2560 + rv = -ENOMEM;
  2561 + goto out1;
  2562 + }
  2563 +
  2564 + /* Clear the memory we have allocated. */
  2565 + memset(dd->port->command_list,
  2566 + 0,
  2567 + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2));
  2568 +
  2569 + /* Setup the addresse of the RX FIS. */
  2570 + dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
  2571 + dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
  2572 +
  2573 + /* Setup the address of the command tables. */
  2574 + dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ;
  2575 + dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
  2576 +
  2577 + /* Setup the address of the identify data. */
  2578 + dd->port->identify = dd->port->command_table +
  2579 + HW_CMD_TBL_AR_SZ;
  2580 + dd->port->identify_dma = dd->port->command_tbl_dma +
  2581 + HW_CMD_TBL_AR_SZ;
  2582 +
  2583 + /* Setup the address of the sector buffer. */
  2584 + dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
  2585 + dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
  2586 +
  2587 + /* Point the command headers at the command tables. */
  2588 + for (i = 0; i < num_command_slots; i++) {
  2589 + dd->port->commands[i].command_header =
  2590 + dd->port->command_list +
  2591 + (sizeof(struct mtip_cmd_hdr) * i);
  2592 + dd->port->commands[i].command_header_dma =
  2593 + dd->port->command_list_dma +
  2594 + (sizeof(struct mtip_cmd_hdr) * i);
  2595 +
  2596 + dd->port->commands[i].command =
  2597 + dd->port->command_table + (HW_CMD_TBL_SZ * i);
  2598 + dd->port->commands[i].command_dma =
  2599 + dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
  2600 +
  2601 + if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
  2602 + dd->port->commands[i].command_header->ctbau =
  2603 + __force_bit2int cpu_to_le32(
  2604 + (dd->port->commands[i].command_dma >> 16) >> 16);
  2605 + dd->port->commands[i].command_header->ctba =
  2606 + __force_bit2int cpu_to_le32(
  2607 + dd->port->commands[i].command_dma & 0xFFFFFFFF);
  2608 +
  2609 + /*
  2610 + * If this is not done, a bug is reported by the stock
  2611 + * FC11 i386. Due to the fact that it has lots of kernel
  2612 + * debugging enabled.
  2613 + */
  2614 + sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
  2615 +
  2616 + /* Mark all commands as currently inactive.*/
  2617 + atomic_set(&dd->port->commands[i].active, 0);
  2618 + }
  2619 +
  2620 + /* Setup the pointers to the extended s_active and CI registers. */
  2621 + for (i = 0; i < dd->slot_groups; i++) {
  2622 + dd->port->s_active[i] =
  2623 + dd->port->mmio + i*0x80 + PORT_SCR_ACT;
  2624 + dd->port->cmd_issue[i] =
  2625 + dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE;
  2626 + dd->port->completed[i] =
  2627 + dd->port->mmio + i*0x80 + PORT_SDBV;
  2628 + }
  2629 +
  2630 + /* Reset the HBA. */
  2631 + if (mtip_hba_reset(dd) < 0) {
  2632 + dev_err(&dd->pdev->dev,
  2633 + "Card did not reset within timeout\n");
  2634 + rv = -EIO;
  2635 + goto out2;
  2636 + }
  2637 +
  2638 + mtip_init_port(dd->port);
  2639 + mtip_start_port(dd->port);
  2640 +
  2641 + /* Setup the ISR and enable interrupts. */
  2642 + rv = devm_request_irq(&dd->pdev->dev,
  2643 + dd->pdev->irq,
  2644 + mtip_irq_handler,
  2645 + IRQF_SHARED,
  2646 + dev_driver_string(&dd->pdev->dev),
  2647 + dd);
  2648 +
  2649 + if (rv) {
  2650 + dev_err(&dd->pdev->dev,
  2651 + "Unable to allocate IRQ %d\n", dd->pdev->irq);
  2652 + goto out2;
  2653 + }
  2654 +
  2655 + /* Enable interrupts on the HBA. */
  2656 + writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
  2657 + dd->mmio + HOST_CTL);
  2658 +
  2659 + init_timer(&dd->port->cmd_timer);
  2660 + init_waitqueue_head(&dd->port->svc_wait);
  2661 +
  2662 + dd->port->cmd_timer.data = (unsigned long int) dd->port;
  2663 + dd->port->cmd_timer.function = mtip_timeout_function;
  2664 + mod_timer(&dd->port->cmd_timer,
  2665 + jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
  2666 +
  2667 + if (mtip_get_identify(dd->port, NULL) < 0) {
  2668 + rv = -EFAULT;
  2669 + goto out3;
  2670 + }
  2671 +
  2672 + if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
  2673 + MTIP_FTL_REBUILD_MAGIC) {
  2674 + set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags);
  2675 + return MTIP_FTL_REBUILD_MAGIC;
  2676 + }
  2677 + mtip_dump_identify(dd->port);
  2678 + return rv;
  2679 +
  2680 +out3:
  2681 + del_timer_sync(&dd->port->cmd_timer);
  2682 +
  2683 + /* Disable interrupts on the HBA. */
  2684 + writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
  2685 + dd->mmio + HOST_CTL);
  2686 +
  2687 + /*Release the IRQ. */
  2688 + devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
  2689 +
  2690 +out2:
  2691 + mtip_deinit_port(dd->port);
  2692 +
  2693 + /* Free the command/command header memory. */
  2694 + dmam_free_coherent(&dd->pdev->dev,
  2695 + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
  2696 + dd->port->command_list,
  2697 + dd->port->command_list_dma);
  2698 +out1:
  2699 + /* Free the memory allocated for the for structure. */
  2700 + kfree(dd->port);
  2701 +
  2702 + return rv;
  2703 +}
  2704 +
  2705 +/*
  2706 + * Called to deinitialize an interface.
  2707 + *
  2708 + * @dd Pointer to the driver data structure.
  2709 + *
  2710 + * return value
  2711 + * 0
  2712 + */
  2713 +static int mtip_hw_exit(struct driver_data *dd)
  2714 +{
  2715 + /*
  2716 + * Send standby immediate (E0h) to the drive so that it
  2717 + * saves its state.
  2718 + */
  2719 + if (atomic_read(&dd->drv_cleanup_done) != true) {
  2720 +
  2721 + mtip_standby_immediate(dd->port);
  2722 +
  2723 + /* de-initialize the port. */
  2724 + mtip_deinit_port(dd->port);
  2725 +
  2726 + /* Disable interrupts on the HBA. */
  2727 + writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
  2728 + dd->mmio + HOST_CTL);
  2729 + }
  2730 +
  2731 + del_timer_sync(&dd->port->cmd_timer);
  2732 +
  2733 + /* Release the IRQ. */
  2734 + devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
  2735 +
  2736 + /* Stop the bottom half tasklet. */
  2737 + tasklet_kill(&dd->tasklet);
  2738 +
  2739 + /* Free the command/command header memory. */
  2740 + dmam_free_coherent(&dd->pdev->dev,
  2741 + HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
  2742 + dd->port->command_list,
  2743 + dd->port->command_list_dma);
  2744 + /* Free the memory allocated for the for structure. */
  2745 + kfree(dd->port);
  2746 +
  2747 + return 0;
  2748 +}
  2749 +
  2750 +/*
  2751 + * Issue a Standby Immediate command to the device.
  2752 + *
  2753 + * This function is called by the Block Layer just before the
  2754 + * system powers off during a shutdown.
  2755 + *
  2756 + * @dd Pointer to the driver data structure.
  2757 + *
  2758 + * return value
  2759 + * 0
  2760 + */
  2761 +static int mtip_hw_shutdown(struct driver_data *dd)
  2762 +{
  2763 + /*
  2764 + * Send standby immediate (E0h) to the drive so that it
  2765 + * saves its state.
  2766 + */
  2767 + mtip_standby_immediate(dd->port);
  2768 +
  2769 + return 0;
  2770 +}
  2771 +
  2772 +/*
  2773 + * Suspend function
  2774 + *
  2775 + * This function is called by the Block Layer just before the
  2776 + * system hibernates.
  2777 + *
  2778 + * @dd Pointer to the driver data structure.
  2779 + *
  2780 + * return value
  2781 + * 0 Suspend was successful
  2782 + * -EFAULT Suspend was not successful
  2783 + */
  2784 +static int mtip_hw_suspend(struct driver_data *dd)
  2785 +{
  2786 + /*
  2787 + * Send standby immediate (E0h) to the drive
  2788 + * so that it saves its state.
  2789 + */
  2790 + if (mtip_standby_immediate(dd->port) != 0) {
  2791 + dev_err(&dd->pdev->dev,
  2792 + "Failed standby-immediate command\n");
  2793 + return -EFAULT;
  2794 + }
  2795 +
  2796 + /* Disable interrupts on the HBA.*/
  2797 + writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
  2798 + dd->mmio + HOST_CTL);
  2799 + mtip_deinit_port(dd->port);
  2800 +
  2801 + return 0;
  2802 +}
  2803 +
  2804 +/*
  2805 + * Resume function
  2806 + *
  2807 + * This function is called by the Block Layer as the
  2808 + * system resumes.
  2809 + *
  2810 + * @dd Pointer to the driver data structure.
  2811 + *
  2812 + * return value
  2813 + * 0 Resume was successful
  2814 + * -EFAULT Resume was not successful
  2815 + */
  2816 +static int mtip_hw_resume(struct driver_data *dd)
  2817 +{
  2818 + /* Perform any needed hardware setup steps */
  2819 + hba_setup(dd);
  2820 +
  2821 + /* Reset the HBA */
  2822 + if (mtip_hba_reset(dd) != 0) {
  2823 + dev_err(&dd->pdev->dev,
  2824 + "Unable to reset the HBA\n");
  2825 + return -EFAULT;
  2826 + }
  2827 +
  2828 + /*
  2829 + * Enable the port, DMA engine, and FIS reception specific
  2830 + * h/w in controller.
  2831 + */
  2832 + mtip_init_port(dd->port);
  2833 + mtip_start_port(dd->port);
  2834 +
  2835 + /* Enable interrupts on the HBA.*/
  2836 + writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
  2837 + dd->mmio + HOST_CTL);
  2838 +
  2839 + return 0;
  2840 +}
  2841 +
  2842 +/*
  2843 + * Helper function for reusing disk name
  2844 + * upon hot insertion.
  2845 + */
  2846 +static int rssd_disk_name_format(char *prefix,
  2847 + int index,
  2848 + char *buf,
  2849 + int buflen)
  2850 +{
  2851 + const int base = 'z' - 'a' + 1;
  2852 + char *begin = buf + strlen(prefix);
  2853 + char *end = buf + buflen;
  2854 + char *p;
  2855 + int unit;
  2856 +
  2857 + p = end - 1;
  2858 + *p = '\0';
  2859 + unit = base;
  2860 + do {
  2861 + if (p == begin)
  2862 + return -EINVAL;
  2863 + *--p = 'a' + (index % unit);
  2864 + index = (index / unit) - 1;
  2865 + } while (index >= 0);
  2866 +
  2867 + memmove(begin, p, end - p);
  2868 + memcpy(buf, prefix, strlen(prefix));
  2869 +
  2870 + return 0;
  2871 +}
  2872 +
  2873 +/*
  2874 + * Block layer IOCTL handler.
  2875 + *
  2876 + * @dev Pointer to the block_device structure.
  2877 + * @mode ignored
  2878 + * @cmd IOCTL command passed from the user application.
  2879 + * @arg Argument passed from the user application.
  2880 + *
  2881 + * return value
  2882 + * 0 IOCTL completed successfully.
  2883 + * -ENOTTY IOCTL not supported or invalid driver data
  2884 + * structure pointer.
  2885 + */
  2886 +static int mtip_block_ioctl(struct block_device *dev,
  2887 + fmode_t mode,
  2888 + unsigned cmd,
  2889 + unsigned long arg)
  2890 +{
  2891 + struct driver_data *dd = dev->bd_disk->private_data;
  2892 +
  2893 + if (!capable(CAP_SYS_ADMIN))
  2894 + return -EACCES;
  2895 +
  2896 + if (!dd)
  2897 + return -ENOTTY;
  2898 +
  2899 + switch (cmd) {
  2900 + case BLKFLSBUF:
  2901 + return -ENOTTY;
  2902 + default:
  2903 + return mtip_hw_ioctl(dd, cmd, arg);
  2904 + }
  2905 +}
  2906 +
  2907 +#ifdef CONFIG_COMPAT
  2908 +/*
  2909 + * Block layer compat IOCTL handler.
  2910 + *
  2911 + * @dev Pointer to the block_device structure.
  2912 + * @mode ignored
  2913 + * @cmd IOCTL command passed from the user application.
  2914 + * @arg Argument passed from the user application.
  2915 + *
  2916 + * return value
  2917 + * 0 IOCTL completed successfully.
  2918 + * -ENOTTY IOCTL not supported or invalid driver data
  2919 + * structure pointer.
  2920 + */
  2921 +static int mtip_block_compat_ioctl(struct block_device *dev,
  2922 + fmode_t mode,
  2923 + unsigned cmd,
  2924 + unsigned long arg)
  2925 +{
  2926 + struct driver_data *dd = dev->bd_disk->private_data;
  2927 +
  2928 + if (!capable(CAP_SYS_ADMIN))
  2929 + return -EACCES;
  2930 +
  2931 + if (!dd)
  2932 + return -ENOTTY;
  2933 +
  2934 + switch (cmd) {
  2935 + case BLKFLSBUF:
  2936 + return -ENOTTY;
  2937 + case HDIO_DRIVE_TASKFILE: {
  2938 + struct mtip_compat_ide_task_request_s __user *compat_req_task;
  2939 + ide_task_request_t req_task;
  2940 + int compat_tasksize, outtotal, ret;
  2941 +
  2942 + compat_tasksize =
  2943 + sizeof(struct mtip_compat_ide_task_request_s);
  2944 +
  2945 + compat_req_task =
  2946 + (struct mtip_compat_ide_task_request_s __user *) arg;
  2947 +
  2948 + if (copy_from_user(&req_task, (void __user *) arg,
  2949 + compat_tasksize - (2 * sizeof(compat_long_t))))
  2950 + return -EFAULT;
  2951 +
  2952 + if (get_user(req_task.out_size, &compat_req_task->out_size))
  2953 + return -EFAULT;
  2954 +
  2955 + if (get_user(req_task.in_size, &compat_req_task->in_size))
  2956 + return -EFAULT;
  2957 +
  2958 + outtotal = sizeof(struct mtip_compat_ide_task_request_s);
  2959 +
  2960 + ret = exec_drive_taskfile(dd, (void __user *) arg,
  2961 + &req_task, outtotal);
  2962 +
  2963 + if (copy_to_user((void __user *) arg, &req_task,
  2964 + compat_tasksize -
  2965 + (2 * sizeof(compat_long_t))))
  2966 + return -EFAULT;
  2967 +
  2968 + if (put_user(req_task.out_size, &compat_req_task->out_size))
  2969 + return -EFAULT;
  2970 +
  2971 + if (put_user(req_task.in_size, &compat_req_task->in_size))
  2972 + return -EFAULT;
  2973 +
  2974 + return ret;
  2975 + }
  2976 + default:
  2977 + return mtip_hw_ioctl(dd, cmd, arg);
  2978 + }
  2979 +}
  2980 +#endif
  2981 +
  2982 +/*
  2983 + * Obtain the geometry of the device.
  2984 + *
  2985 + * You may think that this function is obsolete, but some applications,
  2986 + * fdisk for example still used CHS values. This function describes the
  2987 + * device as having 224 heads and 56 sectors per cylinder. These values are
  2988 + * chosen so that each cylinder is aligned on a 4KB boundary. Since a
  2989 + * partition is described in terms of a start and end cylinder this means
  2990 + * that each partition is also 4KB aligned. Non-aligned partitions adversely
  2991 + * affects performance.
  2992 + *
  2993 + * @dev Pointer to the block_device strucutre.
  2994 + * @geo Pointer to a hd_geometry structure.
  2995 + *
  2996 + * return value
  2997 + * 0 Operation completed successfully.
  2998 + * -ENOTTY An error occurred while reading the drive capacity.
  2999 + */
  3000 +static int mtip_block_getgeo(struct block_device *dev,
  3001 + struct hd_geometry *geo)
  3002 +{
  3003 + struct driver_data *dd = dev->bd_disk->private_data;
  3004 + sector_t capacity;
  3005 +
  3006 + if (!dd)
  3007 + return -ENOTTY;
  3008 +
  3009 + if (!(mtip_hw_get_capacity(dd, &capacity))) {
  3010 + dev_warn(&dd->pdev->dev,
  3011 + "Could not get drive capacity.\n");
  3012 + return -ENOTTY;
  3013 + }
  3014 +
  3015 + geo->heads = 224;
  3016 + geo->sectors = 56;
  3017 + sector_div(capacity, (geo->heads * geo->sectors));
  3018 + geo->cylinders = capacity;
  3019 + return 0;
  3020 +}
  3021 +
  3022 +/*
  3023 + * Block device operation function.
  3024 + *
  3025 + * This structure contains pointers to the functions required by the block
  3026 + * layer.
  3027 + */
  3028 +static const struct block_device_operations mtip_block_ops = {
  3029 + .ioctl = mtip_block_ioctl,
  3030 +#ifdef CONFIG_COMPAT
  3031 + .compat_ioctl = mtip_block_compat_ioctl,
  3032 +#endif
  3033 + .getgeo = mtip_block_getgeo,
  3034 + .owner = THIS_MODULE
  3035 +};
  3036 +
  3037 +/*
  3038 + * Block layer make request function.
  3039 + *
  3040 + * This function is called by the kernel to process a BIO for
  3041 + * the P320 device.
  3042 + *
  3043 + * @queue Pointer to the request queue. Unused other than to obtain
  3044 + * the driver data structure.
  3045 + * @bio Pointer to the BIO.
  3046 + *
  3047 + */
  3048 +static void mtip_make_request(struct request_queue *queue, struct bio *bio)
  3049 +{
  3050 + struct driver_data *dd = queue->queuedata;
  3051 + struct scatterlist *sg;
  3052 + struct bio_vec *bvec;
  3053 + int nents = 0;
  3054 + int tag = 0;
  3055 +
  3056 + if (unlikely(!bio_has_data(bio))) {
  3057 + blk_queue_flush(queue, 0);
  3058 + bio_endio(bio, 0);
  3059 + return;
  3060 + }
  3061 +
  3062 + sg = mtip_hw_get_scatterlist(dd, &tag);
  3063 + if (likely(sg != NULL)) {
  3064 + blk_queue_bounce(queue, &bio);
  3065 +
  3066 + if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
  3067 + dev_warn(&dd->pdev->dev,
  3068 + "Maximum number of SGL entries exceeded");
  3069 + bio_io_error(bio);
  3070 + mtip_hw_release_scatterlist(dd, tag);
  3071 + return;
  3072 + }
  3073 +
  3074 + /* Create the scatter list for this bio. */
  3075 + bio_for_each_segment(bvec, bio, nents) {
  3076 + sg_set_page(&sg[nents],
  3077 + bvec->bv_page,
  3078 + bvec->bv_len,
  3079 + bvec->bv_offset);
  3080 + }
  3081 +
  3082 + /* Issue the read/write. */
  3083 + mtip_hw_submit_io(dd,
  3084 + bio->bi_sector,
  3085 + bio_sectors(bio),
  3086 + nents,
  3087 + tag,
  3088 + bio_endio,
  3089 + bio,
  3090 + bio->bi_rw & REQ_FUA,
  3091 + bio_data_dir(bio));
  3092 + } else
  3093 + bio_io_error(bio);
  3094 +}
  3095 +
  3096 +/*
  3097 + * Block layer initialization function.
  3098 + *
  3099 + * This function is called once by the PCI layer for each P320
  3100 + * device that is connected to the system.
  3101 + *
  3102 + * @dd Pointer to the driver data structure.
  3103 + *
  3104 + * return value
  3105 + * 0 on success else an error code.
  3106 + */
  3107 +static int mtip_block_initialize(struct driver_data *dd)
  3108 +{
  3109 + int rv = 0, wait_for_rebuild = 0;
  3110 + sector_t capacity;
  3111 + unsigned int index = 0;
  3112 + struct kobject *kobj;
  3113 + unsigned char thd_name[16];
  3114 +
  3115 + if (dd->disk)
  3116 + goto skip_create_disk; /* hw init done, before rebuild */
  3117 +
  3118 + /* Initialize the protocol layer. */
  3119 + wait_for_rebuild = mtip_hw_init(dd);
  3120 + if (wait_for_rebuild < 0) {
  3121 + dev_err(&dd->pdev->dev,
  3122 + "Protocol layer initialization failed\n");
  3123 + rv = -EINVAL;
  3124 + goto protocol_init_error;
  3125 + }
  3126 +
  3127 + dd->disk = alloc_disk(MTIP_MAX_MINORS);
  3128 + if (dd->disk == NULL) {
  3129 + dev_err(&dd->pdev->dev,
  3130 + "Unable to allocate gendisk structure\n");
  3131 + rv = -EINVAL;
  3132 + goto alloc_disk_error;
  3133 + }
  3134 +
  3135 + /* Generate the disk name, implemented same as in sd.c */
  3136 + do {
  3137 + if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL))
  3138 + goto ida_get_error;
  3139 +
  3140 + spin_lock(&rssd_index_lock);
  3141 + rv = ida_get_new(&rssd_index_ida, &index);
  3142 + spin_unlock(&rssd_index_lock);
  3143 + } while (rv == -EAGAIN);
  3144 +
  3145 + if (rv)
  3146 + goto ida_get_error;
  3147 +
  3148 + rv = rssd_disk_name_format("rssd",
  3149 + index,
  3150 + dd->disk->disk_name,
  3151 + DISK_NAME_LEN);
  3152 + if (rv)
  3153 + goto disk_index_error;
  3154 +
  3155 + dd->disk->driverfs_dev = &dd->pdev->dev;
  3156 + dd->disk->major = dd->major;
  3157 + dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS;
  3158 + dd->disk->fops = &mtip_block_ops;
  3159 + dd->disk->private_data = dd;
  3160 + dd->index = index;
  3161 +
  3162 + /*
  3163 + * if rebuild pending, start the service thread, and delay the block
  3164 + * queue creation and add_disk()
  3165 + */
  3166 + if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
  3167 + goto start_service_thread;
  3168 +
  3169 +skip_create_disk:
  3170 + /* Allocate the request queue. */
  3171 + dd->queue = blk_alloc_queue(GFP_KERNEL);
  3172 + if (dd->queue == NULL) {
  3173 + dev_err(&dd->pdev->dev,
  3174 + "Unable to allocate request queue\n");
  3175 + rv = -ENOMEM;
  3176 + goto block_queue_alloc_init_error;
  3177 + }
  3178 +
  3179 + /* Attach our request function to the request queue. */
  3180 + blk_queue_make_request(dd->queue, mtip_make_request);
  3181 +
  3182 + dd->disk->queue = dd->queue;
  3183 + dd->queue->queuedata = dd;
  3184 +
  3185 + /* Set device limits. */
  3186 + set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
  3187 + blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
  3188 + blk_queue_physical_block_size(dd->queue, 4096);
  3189 + blk_queue_io_min(dd->queue, 4096);
  3190 + blk_queue_flush(dd->queue, 0);
  3191 +
  3192 + /* Set the capacity of the device in 512 byte sectors. */
  3193 + if (!(mtip_hw_get_capacity(dd, &capacity))) {
  3194 + dev_warn(&dd->pdev->dev,
  3195 + "Could not read drive capacity\n");
  3196 + rv = -EIO;
  3197 + goto read_capacity_error;
  3198 + }
  3199 + set_capacity(dd->disk, capacity);
  3200 +
  3201 + /* Enable the block device and add it to /dev */
  3202 + add_disk(dd->disk);
  3203 +
  3204 + /*
  3205 + * Now that the disk is active, initialize any sysfs attributes
  3206 + * managed by the protocol layer.
  3207 + */
  3208 + kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
  3209 + if (kobj) {
  3210 + mtip_hw_sysfs_init(dd, kobj);
  3211 + kobject_put(kobj);
  3212 + }
  3213 +
  3214 + if (dd->mtip_svc_handler)
  3215 + return rv; /* service thread created for handling rebuild */
  3216 +
  3217 +start_service_thread:
  3218 + sprintf(thd_name, "mtip_svc_thd_%02d", index);
  3219 +
  3220 + dd->mtip_svc_handler = kthread_run(mtip_service_thread,
  3221 + dd, thd_name);
  3222 +
  3223 + if (IS_ERR(dd->mtip_svc_handler)) {
  3224 + printk(KERN_ERR "mtip32xx: service thread failed to start\n");
  3225 + dd->mtip_svc_handler = NULL;
  3226 + rv = -EFAULT;
  3227 + goto kthread_run_error;
  3228 + }
  3229 +
  3230 + return rv;
  3231 +
  3232 +kthread_run_error:
  3233 + /* Delete our gendisk. This also removes the device from /dev */
  3234 + del_gendisk(dd->disk);
  3235 +
  3236 +read_capacity_error:
  3237 + blk_cleanup_queue(dd->queue);
  3238 +
  3239 +block_queue_alloc_init_error:
  3240 +disk_index_error:
  3241 + spin_lock(&rssd_index_lock);
  3242 + ida_remove(&rssd_index_ida, index);
  3243 + spin_unlock(&rssd_index_lock);
  3244 +
  3245 +ida_get_error:
  3246 + put_disk(dd->disk);
  3247 +
  3248 +alloc_disk_error:
  3249 + mtip_hw_exit(dd); /* De-initialize the protocol layer. */
  3250 +
  3251 +protocol_init_error:
  3252 + return rv;
  3253 +}
  3254 +
  3255 +/*
  3256 + * Block layer deinitialization function.
  3257 + *
  3258 + * Called by the PCI layer as each P320 device is removed.
  3259 + *
  3260 + * @dd Pointer to the driver data structure.
  3261 + *
  3262 + * return value
  3263 + * 0
  3264 + */
  3265 +static int mtip_block_remove(struct driver_data *dd)
  3266 +{
  3267 + struct kobject *kobj;
  3268 +
  3269 + if (dd->mtip_svc_handler) {
  3270 + set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags);
  3271 + wake_up_interruptible(&dd->port->svc_wait);
  3272 + kthread_stop(dd->mtip_svc_handler);
  3273 + }
  3274 +
  3275 + /* Clean up the sysfs attributes managed by the protocol layer. */
  3276 + kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
  3277 + if (kobj) {
  3278 + mtip_hw_sysfs_exit(dd, kobj);
  3279 + kobject_put(kobj);
  3280 + }
  3281 +
  3282 + /*
  3283 + * Delete our gendisk structure. This also removes the device
  3284 + * from /dev
  3285 + */
  3286 + del_gendisk(dd->disk);
  3287 + blk_cleanup_queue(dd->queue);
  3288 + dd->disk = NULL;
  3289 + dd->queue = NULL;
  3290 +
  3291 + /* De-initialize the protocol layer. */
  3292 + mtip_hw_exit(dd);
  3293 +
  3294 + return 0;
  3295 +}
  3296 +
  3297 +/*
  3298 + * Function called by the PCI layer when just before the
  3299 + * machine shuts down.
  3300 + *
  3301 + * If a protocol layer shutdown function is present it will be called
  3302 + * by this function.
  3303 + *
  3304 + * @dd Pointer to the driver data structure.
  3305 + *
  3306 + * return value
  3307 + * 0
  3308 + */
  3309 +static int mtip_block_shutdown(struct driver_data *dd)
  3310 +{
  3311 + dev_info(&dd->pdev->dev,
  3312 + "Shutting down %s ...\n", dd->disk->disk_name);
  3313 +
  3314 + /* Delete our gendisk structure, and cleanup the blk queue. */
  3315 + del_gendisk(dd->disk);
  3316 + blk_cleanup_queue(dd->queue);
  3317 + dd->disk = NULL;
  3318 + dd->queue = NULL;
  3319 +
  3320 + mtip_hw_shutdown(dd);
  3321 + return 0;
  3322 +}
  3323 +
  3324 +static int mtip_block_suspend(struct driver_data *dd)
  3325 +{
  3326 + dev_info(&dd->pdev->dev,
  3327 + "Suspending %s ...\n", dd->disk->disk_name);
  3328 + mtip_hw_suspend(dd);
  3329 + return 0;
  3330 +}
  3331 +
  3332 +static int mtip_block_resume(struct driver_data *dd)
  3333 +{
  3334 + dev_info(&dd->pdev->dev, "Resuming %s ...\n",
  3335 + dd->disk->disk_name);
  3336 + mtip_hw_resume(dd);
  3337 + return 0;
  3338 +}
  3339 +
  3340 +/*
  3341 + * Called for each supported PCI device detected.
  3342 + *
  3343 + * This function allocates the private data structure, enables the
  3344 + * PCI device and then calls the block layer initialization function.
  3345 + *
  3346 + * return value
  3347 + * 0 on success else an error code.
  3348 + */
  3349 +static int mtip_pci_probe(struct pci_dev *pdev,
  3350 + const struct pci_device_id *ent)
  3351 +{
  3352 + int rv = 0;
  3353 + struct driver_data *dd = NULL;
  3354 +
  3355 + /* Allocate memory for this devices private data. */
  3356 + dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL);
  3357 + if (dd == NULL) {
  3358 + dev_err(&pdev->dev,
  3359 + "Unable to allocate memory for driver data\n");
  3360 + return -ENOMEM;
  3361 + }
  3362 +
  3363 + /* Set the atomic variable as 1 in case of SRSI */
  3364 + atomic_set(&dd->drv_cleanup_done, true);
  3365 +
  3366 + atomic_set(&dd->resumeflag, false);
  3367 +
  3368 + /* Attach the private data to this PCI device. */
  3369 + pci_set_drvdata(pdev, dd);
  3370 +
  3371 + rv = pcim_enable_device(pdev);
  3372 + if (rv < 0) {
  3373 + dev_err(&pdev->dev, "Unable to enable device\n");
  3374 + goto iomap_err;
  3375 + }
  3376 +
  3377 + /* Map BAR5 to memory. */
  3378 + rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
  3379 + if (rv < 0) {
  3380 + dev_err(&pdev->dev, "Unable to map regions\n");
  3381 + goto iomap_err;
  3382 + }
  3383 +
  3384 + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
  3385 + rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
  3386 +
  3387 + if (rv) {
  3388 + rv = pci_set_consistent_dma_mask(pdev,
  3389 + DMA_BIT_MASK(32));
  3390 + if (rv) {
  3391 + dev_warn(&pdev->dev,
  3392 + "64-bit DMA enable failed\n");
  3393 + goto setmask_err;
  3394 + }
  3395 + }
  3396 + }
  3397 +
  3398 + pci_set_master(pdev);
  3399 +
  3400 + if (pci_enable_msi(pdev)) {
  3401 + dev_warn(&pdev->dev,
  3402 + "Unable to enable MSI interrupt.\n");
  3403 + goto block_initialize_err;
  3404 + }
  3405 +
  3406 + /* Copy the info we may need later into the private data structure. */
  3407 + dd->major = mtip_major;
  3408 + dd->instance = instance;
  3409 + dd->pdev = pdev;
  3410 +
  3411 + /* Initialize the block layer. */
  3412 + rv = mtip_block_initialize(dd);
  3413 + if (rv < 0) {
  3414 + dev_err(&pdev->dev,
  3415 + "Unable to initialize block layer\n");
  3416 + goto block_initialize_err;
  3417 + }
  3418 +
  3419 + /*
  3420 + * Increment the instance count so that each device has a unique
  3421 + * instance number.
  3422 + */
  3423 + instance++;
  3424 +
  3425 + goto done;
  3426 +
  3427 +block_initialize_err:
  3428 + pci_disable_msi(pdev);
  3429 +
  3430 +setmask_err:
  3431 + pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
  3432 +
  3433 +iomap_err:
  3434 + kfree(dd);
  3435 + pci_set_drvdata(pdev, NULL);
  3436 + return rv;
  3437 +done:
  3438 + /* Set the atomic variable as 0 in case of SRSI */
  3439 + atomic_set(&dd->drv_cleanup_done, true);
  3440 +
  3441 + return rv;
  3442 +}
  3443 +
  3444 +/*
  3445 + * Called for each probed device when the device is removed or the
  3446 + * driver is unloaded.
  3447 + *
  3448 + * return value
  3449 + * None
  3450 + */
  3451 +static void mtip_pci_remove(struct pci_dev *pdev)
  3452 +{
  3453 + struct driver_data *dd = pci_get_drvdata(pdev);
  3454 + int counter = 0;
  3455 +
  3456 + if (mtip_check_surprise_removal(pdev)) {
  3457 + while (atomic_read(&dd->drv_cleanup_done) == false) {
  3458 + counter++;
  3459 + msleep(20);
  3460 + if (counter == 10) {
  3461 + /* Cleanup the outstanding commands */
  3462 + mtip_command_cleanup(dd);
  3463 + break;
  3464 + }
  3465 + }
  3466 + }
  3467 + /* Set the atomic variable as 1 in case of SRSI */
  3468 + atomic_set(&dd->drv_cleanup_done, true);
  3469 +
  3470 + /* Clean up the block layer. */
  3471 + mtip_block_remove(dd);
  3472 +
  3473 + pci_disable_msi(pdev);
  3474 +
  3475 + kfree(dd);
  3476 + pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
  3477 +}
  3478 +
  3479 +/*
  3480 + * Called for each probed device when the device is suspended.
  3481 + *
  3482 + * return value
  3483 + * 0 Success
  3484 + * <0 Error
  3485 + */
  3486 +static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
  3487 +{
  3488 + int rv = 0;
  3489 + struct driver_data *dd = pci_get_drvdata(pdev);
  3490 +
  3491 + if (!dd) {
  3492 + dev_err(&pdev->dev,
  3493 + "Driver private datastructure is NULL\n");
  3494 + return -EFAULT;
  3495 + }
  3496 +
  3497 + atomic_set(&dd->resumeflag, true);
  3498 +
  3499 + /* Disable ports & interrupts then send standby immediate */
  3500 + rv = mtip_block_suspend(dd);
  3501 + if (rv < 0) {
  3502 + dev_err(&pdev->dev,
  3503 + "Failed to suspend controller\n");
  3504 + return rv;
  3505 + }
  3506 +
  3507 + /*
  3508 + * Save the pci config space to pdev structure &
  3509 + * disable the device
  3510 + */
  3511 + pci_save_state(pdev);
  3512 + pci_disable_device(pdev);
  3513 +
  3514 + /* Move to Low power state*/
  3515 + pci_set_power_state(pdev, PCI_D3hot);
  3516 +
  3517 + return rv;
  3518 +}
  3519 +
  3520 +/*
  3521 + * Called for each probed device when the device is resumed.
  3522 + *
  3523 + * return value
  3524 + * 0 Success
  3525 + * <0 Error
  3526 + */
  3527 +static int mtip_pci_resume(struct pci_dev *pdev)
  3528 +{
  3529 + int rv = 0;
  3530 + struct driver_data *dd;
  3531 +
  3532 + dd = pci_get_drvdata(pdev);
  3533 + if (!dd) {
  3534 + dev_err(&pdev->dev,
  3535 + "Driver private datastructure is NULL\n");
  3536 + return -EFAULT;
  3537 + }
  3538 +
  3539 + /* Move the device to active State */
  3540 + pci_set_power_state(pdev, PCI_D0);
  3541 +
  3542 + /* Restore PCI configuration space */
  3543 + pci_restore_state(pdev);
  3544 +
  3545 + /* Enable the PCI device*/
  3546 + rv = pcim_enable_device(pdev);
  3547 + if (rv < 0) {
  3548 + dev_err(&pdev->dev,
  3549 + "Failed to enable card during resume\n");
  3550 + goto err;
  3551 + }
  3552 + pci_set_master(pdev);
  3553 +
  3554 + /*
  3555 + * Calls hbaReset, initPort, & startPort function
  3556 + * then enables interrupts
  3557 + */
  3558 + rv = mtip_block_resume(dd);
  3559 + if (rv < 0)
  3560 + dev_err(&pdev->dev, "Unable to resume\n");
  3561 +
  3562 +err:
  3563 + atomic_set(&dd->resumeflag, false);
  3564 +
  3565 + return rv;
  3566 +}
  3567 +
  3568 +/*
  3569 + * Shutdown routine
  3570 + *
  3571 + * return value
  3572 + * None
  3573 + */
  3574 +static void mtip_pci_shutdown(struct pci_dev *pdev)
  3575 +{
  3576 + struct driver_data *dd = pci_get_drvdata(pdev);
  3577 + if (dd)
  3578 + mtip_block_shutdown(dd);
  3579 +}
  3580 +
  3581 +/* Table of device ids supported by this driver. */
  3582 +static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = {
  3583 + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) },
  3584 + { 0 }
  3585 +};
  3586 +
  3587 +/* Structure that describes the PCI driver functions. */
  3588 +static struct pci_driver mtip_pci_driver = {
  3589 + .name = MTIP_DRV_NAME,
  3590 + .id_table = mtip_pci_tbl,
  3591 + .probe = mtip_pci_probe,
  3592 + .remove = mtip_pci_remove,
  3593 + .suspend = mtip_pci_suspend,
  3594 + .resume = mtip_pci_resume,
  3595 + .shutdown = mtip_pci_shutdown,
  3596 +};
  3597 +
  3598 +MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
  3599 +
  3600 +/*
  3601 + * Module initialization function.
  3602 + *
  3603 + * Called once when the module is loaded. This function allocates a major
  3604 + * block device number to the Cyclone devices and registers the PCI layer
  3605 + * of the driver.
  3606 + *
  3607 + * Return value
  3608 + * 0 on success else error code.
  3609 + */
  3610 +static int __init mtip_init(void)
  3611 +{
  3612 + printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
  3613 +
  3614 + /* Allocate a major block device number to use with this driver. */
  3615 + mtip_major = register_blkdev(0, MTIP_DRV_NAME);
  3616 + if (mtip_major < 0) {
  3617 + printk(KERN_ERR "Unable to register block device (%d)\n",
  3618 + mtip_major);
  3619 + return -EBUSY;
  3620 + }
  3621 +
  3622 + /* Register our PCI operations. */
  3623 + return pci_register_driver(&mtip_pci_driver);
  3624 +}
  3625 +
  3626 +/*
  3627 + * Module de-initialization function.
  3628 + *
  3629 + * Called once when the module is unloaded. This function deallocates
  3630 + * the major block device number allocated by mtip_init() and
  3631 + * unregisters the PCI layer of the driver.
  3632 + *
  3633 + * Return value
  3634 + * none
  3635 + */
  3636 +static void __exit mtip_exit(void)
  3637 +{
  3638 + /* Release the allocated major block device number. */
  3639 + unregister_blkdev(mtip_major, MTIP_DRV_NAME);
  3640 +
  3641 + /* Unregister the PCI driver. */
  3642 + pci_unregister_driver(&mtip_pci_driver);
  3643 +}
  3644 +
  3645 +MODULE_AUTHOR("Micron Technology, Inc");
  3646 +MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver");
  3647 +MODULE_LICENSE("GPL");
  3648 +MODULE_VERSION(MTIP_DRV_VERSION);
  3649 +
  3650 +module_init(mtip_init);
  3651 +module_exit(mtip_exit);
drivers/block/mtip32xx/mtip32xx.h
  1 +/*
  2 + * mtip32xx.h - Header file for the P320 SSD Block Driver
  3 + * Copyright (C) 2011 Micron Technology, Inc.
  4 + *
  5 + * Portions of this code were derived from works subjected to the
  6 + * following copyright:
  7 + * Copyright (C) 2009 Integrated Device Technology, Inc.
  8 + *
  9 + * This program is free software; you can redistribute it and/or modify
  10 + * it under the terms of the GNU General Public License as published by
  11 + * the Free Software Foundation; either version 2 of the License, or
  12 + * (at your option) any later version.
  13 + *
  14 + * This program is distributed in the hope that it will be useful,
  15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17 + * GNU General Public License for more details.
  18 + *
  19 + */
  20 +
  21 +#ifndef __MTIP32XX_H__
  22 +#define __MTIP32XX_H__
  23 +
  24 +#include <linux/spinlock.h>
  25 +#include <linux/rwsem.h>
  26 +#include <linux/ata.h>
  27 +#include <linux/interrupt.h>
  28 +#include <linux/genhd.h>
  29 +#include <linux/version.h>
  30 +
  31 +/* Offset of Subsystem Device ID in pci confoguration space */
  32 +#define PCI_SUBSYSTEM_DEVICEID 0x2E
  33 +
  34 +/* offset of Device Control register in PCIe extended capabilites space */
  35 +#define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48
  36 +
  37 +/* # of times to retry timed out IOs */
  38 +#define MTIP_MAX_RETRIES 5
  39 +
  40 +/* Various timeout values in ms */
  41 +#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000
  42 +#define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000
  43 +#define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000
  44 +
  45 +/* check for timeouts every 500ms */
  46 +#define MTIP_TIMEOUT_CHECK_PERIOD 500
  47 +
  48 +/* ftl rebuild */
  49 +#define MTIP_FTL_REBUILD_OFFSET 142
  50 +#define MTIP_FTL_REBUILD_MAGIC 0xED51
  51 +#define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000
  52 +
  53 +/* Macro to extract the tag bit number from a tag value. */
  54 +#define MTIP_TAG_BIT(tag) (tag & 0x1F)
  55 +
  56 +/*
  57 + * Macro to extract the tag index from a tag value. The index
  58 + * is used to access the correct s_active/Command Issue register based
  59 + * on the tag value.
  60 + */
  61 +#define MTIP_TAG_INDEX(tag) (tag >> 5)
  62 +
  63 +/*
  64 + * Maximum number of scatter gather entries
  65 + * a single command may have.
  66 + */
  67 +#define MTIP_MAX_SG 128
  68 +
  69 +/*
  70 + * Maximum number of slot groups (Command Issue & s_active registers)
  71 + * NOTE: This is the driver maximum; check dd->slot_groups for actual value.
  72 + */
  73 +#define MTIP_MAX_SLOT_GROUPS 8
  74 +
  75 +/* Internal command tag. */
  76 +#define MTIP_TAG_INTERNAL 0
  77 +
  78 +/* Micron Vendor ID & P320x SSD Device ID */
  79 +#define PCI_VENDOR_ID_MICRON 0x1344
  80 +#define P320_DEVICE_ID 0x5150
  81 +
  82 +/* Driver name and version strings */
  83 +#define MTIP_DRV_NAME "mtip32xx"
  84 +#define MTIP_DRV_VERSION "1.2.6os3"
  85 +
  86 +/* Maximum number of minor device numbers per device. */
  87 +#define MTIP_MAX_MINORS 16
  88 +
  89 +/* Maximum number of supported command slots. */
  90 +#define MTIP_MAX_COMMAND_SLOTS (MTIP_MAX_SLOT_GROUPS * 32)
  91 +
  92 +/*
  93 + * Per-tag bitfield size in longs.
  94 + * Linux bit manipulation functions
  95 + * (i.e. test_and_set_bit, find_next_zero_bit)
  96 + * manipulate memory in longs, so we try to make the math work.
  97 + * take the slot groups and find the number of longs, rounding up.
  98 + * Careful! i386 and x86_64 use different size longs!
  99 + */
  100 +#define U32_PER_LONG (sizeof(long) / sizeof(u32))
  101 +#define SLOTBITS_IN_LONGS ((MTIP_MAX_SLOT_GROUPS + \
  102 + (U32_PER_LONG-1))/U32_PER_LONG)
  103 +
  104 +/* BAR number used to access the HBA registers. */
  105 +#define MTIP_ABAR 5
  106 +
  107 +/* Forced Unit Access Bit */
  108 +#define FUA_BIT 0x80
  109 +
  110 +#ifdef DEBUG
  111 + #define dbg_printk(format, arg...) \
  112 + printk(pr_fmt(format), ##arg);
  113 +#else
  114 + #define dbg_printk(format, arg...)
  115 +#endif
  116 +
  117 +#define __force_bit2int (unsigned int __force)
  118 +
  119 +/* below are bit numbers in 'flags' defined in mtip_port */
  120 +#define MTIP_FLAG_IC_ACTIVE_BIT 0
  121 +#define MTIP_FLAG_EH_ACTIVE_BIT 1
  122 +#define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2
  123 +#define MTIP_FLAG_ISSUE_CMDS_BIT 4
  124 +#define MTIP_FLAG_REBUILD_BIT 5
  125 +#define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8
  126 +
  127 +/* Register Frame Information Structure (FIS), host to device. */
  128 +struct host_to_dev_fis {
  129 + /*
  130 + * FIS type.
  131 + * - 27h Register FIS, host to device.
  132 + * - 34h Register FIS, device to host.
  133 + * - 39h DMA Activate FIS, device to host.
  134 + * - 41h DMA Setup FIS, bi-directional.
  135 + * - 46h Data FIS, bi-directional.
  136 + * - 58h BIST Activate FIS, bi-directional.
  137 + * - 5Fh PIO Setup FIS, device to host.
  138 + * - A1h Set Device Bits FIS, device to host.
  139 + */
  140 + unsigned char type;
  141 + unsigned char opts;
  142 + unsigned char command;
  143 + unsigned char features;
  144 +
  145 + union {
  146 + unsigned char lba_low;
  147 + unsigned char sector;
  148 + };
  149 + union {
  150 + unsigned char lba_mid;
  151 + unsigned char cyl_low;
  152 + };
  153 + union {
  154 + unsigned char lba_hi;
  155 + unsigned char cyl_hi;
  156 + };
  157 + union {
  158 + unsigned char device;
  159 + unsigned char head;
  160 + };
  161 +
  162 + union {
  163 + unsigned char lba_low_ex;
  164 + unsigned char sector_ex;
  165 + };
  166 + union {
  167 + unsigned char lba_mid_ex;
  168 + unsigned char cyl_low_ex;
  169 + };
  170 + union {
  171 + unsigned char lba_hi_ex;
  172 + unsigned char cyl_hi_ex;
  173 + };
  174 + unsigned char features_ex;
  175 +
  176 + unsigned char sect_count;
  177 + unsigned char sect_cnt_ex;
  178 + unsigned char res2;
  179 + unsigned char control;
  180 +
  181 + unsigned int res3;
  182 +};
  183 +
  184 +/* Command header structure. */
  185 +struct mtip_cmd_hdr {
  186 + /*
  187 + * Command options.
  188 + * - Bits 31:16 Number of PRD entries.
  189 + * - Bits 15:8 Unused in this implementation.
  190 + * - Bit 7 Prefetch bit, informs the drive to prefetch PRD entries.
  191 + * - Bit 6 Write bit, should be set when writing data to the device.
  192 + * - Bit 5 Unused in this implementation.
  193 + * - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes).
  194 + */
  195 + unsigned int opts;
  196 + /* This field is unsed when using NCQ. */
  197 + union {
  198 + unsigned int byte_count;
  199 + unsigned int status;
  200 + };
  201 + /*
  202 + * Lower 32 bits of the command table address associated with this
  203 + * header. The command table addresses must be 128 byte aligned.
  204 + */
  205 + unsigned int ctba;
  206 + /*
  207 + * If 64 bit addressing is used this field is the upper 32 bits
  208 + * of the command table address associated with this command.
  209 + */
  210 + unsigned int ctbau;
  211 + /* Reserved and unused. */
  212 + unsigned int res[4];
  213 +};
  214 +
  215 +/* Command scatter gather structure (PRD). */
  216 +struct mtip_cmd_sg {
  217 + /*
  218 + * Low 32 bits of the data buffer address. For P320 this
  219 + * address must be 8 byte aligned signified by bits 2:0 being
  220 + * set to 0.
  221 + */
  222 + unsigned int dba;
  223 + /*
  224 + * When 64 bit addressing is used this field is the upper
  225 + * 32 bits of the data buffer address.
  226 + */
  227 + unsigned int dba_upper;
  228 + /* Unused. */
  229 + unsigned int reserved;
  230 + /*
  231 + * Bit 31: interrupt when this data block has been transferred.
  232 + * Bits 30..22: reserved
  233 + * Bits 21..0: byte count (minus 1). For P320 the byte count must be
  234 + * 8 byte aligned signified by bits 2:0 being set to 1.
  235 + */
  236 + unsigned int info;
  237 +};
  238 +struct mtip_port;
  239 +
  240 +/* Structure used to describe a command. */
  241 +struct mtip_cmd {
  242 +
  243 + struct mtip_cmd_hdr *command_header; /* ptr to command header entry */
  244 +
  245 + dma_addr_t command_header_dma; /* corresponding physical address */
  246 +
  247 + void *command; /* ptr to command table entry */
  248 +
  249 + dma_addr_t command_dma; /* corresponding physical address */
  250 +
  251 + void *comp_data; /* data passed to completion function comp_func() */
  252 + /*
  253 + * Completion function called by the ISR upon completion of
  254 + * a command.
  255 + */
  256 + void (*comp_func)(struct mtip_port *port,
  257 + int tag,
  258 + void *data,
  259 + int status);
  260 + /* Additional callback function that may be called by comp_func() */
  261 + void (*async_callback)(void *data, int status);
  262 +
  263 + void *async_data; /* Addl. data passed to async_callback() */
  264 +
  265 + int scatter_ents; /* Number of scatter list entries used */
  266 +
  267 + struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
  268 +
  269 + int retries; /* The number of retries left for this command. */
  270 +
  271 + int direction; /* Data transfer direction */
  272 +
  273 + unsigned long comp_time; /* command completion time, in jiffies */
  274 +
  275 + atomic_t active; /* declares if this command sent to the drive. */
  276 +};
  277 +
  278 +/* Structure used to describe a port. */
  279 +struct mtip_port {
  280 + /* Pointer back to the driver data for this port. */
  281 + struct driver_data *dd;
  282 + /*
  283 + * Used to determine if the data pointed to by the
  284 + * identify field is valid.
  285 + */
  286 + unsigned long identify_valid;
  287 + /* Base address of the memory mapped IO for the port. */
  288 + void __iomem *mmio;
  289 + /* Array of pointers to the memory mapped s_active registers. */
  290 + void __iomem *s_active[MTIP_MAX_SLOT_GROUPS];
  291 + /* Array of pointers to the memory mapped completed registers. */
  292 + void __iomem *completed[MTIP_MAX_SLOT_GROUPS];
  293 + /* Array of pointers to the memory mapped Command Issue registers. */
  294 + void __iomem *cmd_issue[MTIP_MAX_SLOT_GROUPS];
  295 + /*
  296 + * Pointer to the beginning of the command header memory as used
  297 + * by the driver.
  298 + */
  299 + void *command_list;
  300 + /*
  301 + * Pointer to the beginning of the command header memory as used
  302 + * by the DMA.
  303 + */
  304 + dma_addr_t command_list_dma;
  305 + /*
  306 + * Pointer to the beginning of the RX FIS memory as used
  307 + * by the driver.
  308 + */
  309 + void *rxfis;
  310 + /*
  311 + * Pointer to the beginning of the RX FIS memory as used
  312 + * by the DMA.
  313 + */
  314 + dma_addr_t rxfis_dma;
  315 + /*
  316 + * Pointer to the beginning of the command table memory as used
  317 + * by the driver.
  318 + */
  319 + void *command_table;
  320 + /*
  321 + * Pointer to the beginning of the command table memory as used
  322 + * by the DMA.
  323 + */
  324 + dma_addr_t command_tbl_dma;
  325 + /*
  326 + * Pointer to the beginning of the identify data memory as used
  327 + * by the driver.
  328 + */
  329 + u16 *identify;
  330 + /*
  331 + * Pointer to the beginning of the identify data memory as used
  332 + * by the DMA.
  333 + */
  334 + dma_addr_t identify_dma;
  335 + /*
  336 + * Pointer to the beginning of a sector buffer that is used
  337 + * by the driver when issuing internal commands.
  338 + */
  339 + u16 *sector_buffer;
  340 + /*
  341 + * Pointer to the beginning of a sector buffer that is used
  342 + * by the DMA when the driver issues internal commands.
  343 + */
  344 + dma_addr_t sector_buffer_dma;
  345 + /*
  346 + * Bit significant, used to determine if a command slot has
  347 + * been allocated. i.e. the slot is in use. Bits are cleared
  348 + * when the command slot and all associated data structures
  349 + * are no longer needed.
  350 + */
  351 + unsigned long allocated[SLOTBITS_IN_LONGS];
  352 + /*
  353 + * used to queue commands when an internal command is in progress
  354 + * or error handling is active
  355 + */
  356 + unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
  357 + /*
  358 + * Array of command slots. Structure includes pointers to the
  359 + * command header and command table, and completion function and data
  360 + * pointers.
  361 + */
  362 + struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS];
  363 + /* Used by mtip_service_thread to wait for an event */
  364 + wait_queue_head_t svc_wait;
  365 + /*
  366 + * indicates the state of the port. Also, helps the service thread
  367 + * to determine its action on wake up.
  368 + */
  369 + unsigned long flags;
  370 + /*
  371 + * Timer used to complete commands that have been active for too long.
  372 + */
  373 + struct timer_list cmd_timer;
  374 + /*
  375 + * Semaphore used to block threads if there are no
  376 + * command slots available.
  377 + */
  378 + struct semaphore cmd_slot;
  379 + /* Spinlock for working around command-issue bug. */
  380 + spinlock_t cmd_issue_lock;
  381 +};
  382 +
  383 +/*
  384 + * Driver private data structure.
  385 + *
  386 + * One structure is allocated per probed device.
  387 + */
  388 +struct driver_data {
  389 + void __iomem *mmio; /* Base address of the HBA registers. */
  390 +
  391 + int major; /* Major device number. */
  392 +
  393 + int instance; /* Instance number. First device probed is 0, ... */
  394 +
  395 + struct gendisk *disk; /* Pointer to our gendisk structure. */
  396 +
  397 + struct pci_dev *pdev; /* Pointer to the PCI device structure. */
  398 +
  399 + struct request_queue *queue; /* Our request queue. */
  400 +
  401 + struct mtip_port *port; /* Pointer to the port data structure. */
  402 +
  403 + /* Tasklet used to process the bottom half of the ISR. */
  404 + struct tasklet_struct tasklet;
  405 +
  406 + unsigned product_type; /* magic value declaring the product type */
  407 +
  408 + unsigned slot_groups; /* number of slot groups the product supports */
  409 +
  410 + atomic_t drv_cleanup_done; /* Atomic variable for SRSI */
  411 +
  412 + unsigned long index; /* Index to determine the disk name */
  413 +
  414 + unsigned int ftlrebuildflag; /* FTL rebuild flag */
  415 +
  416 + atomic_t resumeflag; /* Atomic variable to track suspend/resume */
  417 +
  418 + atomic_t eh_active; /* Flag for error handling tracking */
  419 +
  420 + struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
  421 +};
  422 +
  423 +#endif
drivers/block/xen-blkback/blkback.c
... ... @@ -39,9 +39,6 @@
39 39 #include <linux/list.h>
40 40 #include <linux/delay.h>
41 41 #include <linux/freezer.h>
42   -#include <linux/loop.h>
43   -#include <linux/falloc.h>
44   -#include <linux/fs.h>
45 42  
46 43 #include <xen/events.h>
47 44 #include <xen/page.h>
... ... @@ -362,7 +359,7 @@
362 359 {
363 360 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
364 361 int i;
365   - int nseg = req->nr_segments;
  362 + int nseg = req->u.rw.nr_segments;
366 363 int ret = 0;
367 364  
368 365 /*
369 366  
370 367  
... ... @@ -416,30 +413,25 @@
416 413 return ret;
417 414 }
418 415  
419   -static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
  416 +static int dispatch_discard_io(struct xen_blkif *blkif,
  417 + struct blkif_request *req)
420 418 {
421 419 int err = 0;
422 420 int status = BLKIF_RSP_OKAY;
423 421 struct block_device *bdev = blkif->vbd.bdev;
424 422  
425   - if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
426   - /* just forward the discard request */
  423 + blkif->st_ds_req++;
  424 +
  425 + xen_blkif_get(blkif);
  426 + if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
  427 + blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
  428 + unsigned long secure = (blkif->vbd.discard_secure &&
  429 + (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
  430 + BLKDEV_DISCARD_SECURE : 0;
427 431 err = blkdev_issue_discard(bdev,
428 432 req->u.discard.sector_number,
429 433 req->u.discard.nr_sectors,
430   - GFP_KERNEL, 0);
431   - else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
432   - /* punch a hole in the backing file */
433   - struct loop_device *lo = bdev->bd_disk->private_data;
434   - struct file *file = lo->lo_backing_file;
435   -
436   - if (file->f_op->fallocate)
437   - err = file->f_op->fallocate(file,
438   - FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
439   - req->u.discard.sector_number << 9,
440   - req->u.discard.nr_sectors << 9);
441   - else
442   - err = -EOPNOTSUPP;
  434 + GFP_KERNEL, secure);
443 435 } else
444 436 err = -EOPNOTSUPP;
445 437  
... ... @@ -449,7 +441,9 @@
449 441 } else if (err)
450 442 status = BLKIF_RSP_ERROR;
451 443  
452   - make_response(blkif, req->id, req->operation, status);
  444 + make_response(blkif, req->u.discard.id, req->operation, status);
  445 + xen_blkif_put(blkif);
  446 + return err;
453 447 }
454 448  
455 449 static void xen_blk_drain_io(struct xen_blkif *blkif)
... ... @@ -573,8 +567,11 @@
573 567  
574 568 /* Apply all sanity checks to /private copy/ of request. */
575 569 barrier();
576   -
577   - if (dispatch_rw_block_io(blkif, &req, pending_req))
  570 + if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
  571 + free_req(pending_req);
  572 + if (dispatch_discard_io(blkif, &req))
  573 + break;
  574 + } else if (dispatch_rw_block_io(blkif, &req, pending_req))
578 575 break;
579 576  
580 577 /* Yield point for this unbounded loop. */
... ... @@ -633,10 +630,6 @@
633 630 blkif->st_f_req++;
634 631 operation = WRITE_FLUSH;
635 632 break;
636   - case BLKIF_OP_DISCARD:
637   - blkif->st_ds_req++;
638   - operation = REQ_DISCARD;
639   - break;
640 633 default:
641 634 operation = 0; /* make gcc happy */
642 635 goto fail_response;
... ... @@ -644,9 +637,9 @@
644 637 }
645 638  
646 639 /* Check that the number of segments is sane. */
647   - nseg = req->nr_segments;
648   - if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
649   - operation != REQ_DISCARD) ||
  640 + nseg = req->u.rw.nr_segments;
  641 +
  642 + if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
650 643 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
651 644 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
652 645 nseg);
653 646  
... ... @@ -654,12 +647,12 @@
654 647 goto fail_response;
655 648 }
656 649  
657   - preq.dev = req->handle;
  650 + preq.dev = req->u.rw.handle;
658 651 preq.sector_number = req->u.rw.sector_number;
659 652 preq.nr_sects = 0;
660 653  
661 654 pending_req->blkif = blkif;
662   - pending_req->id = req->id;
  655 + pending_req->id = req->u.rw.id;
663 656 pending_req->operation = req->operation;
664 657 pending_req->status = BLKIF_RSP_OKAY;
665 658 pending_req->nr_pages = nseg;
... ... @@ -707,7 +700,7 @@
707 700 * the hypercall to unmap the grants - that is all done in
708 701 * xen_blkbk_unmap.
709 702 */
710   - if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
  703 + if (xen_blkbk_map(req, pending_req, seg))
711 704 goto fail_flush;
712 705  
713 706 /*
714 707  
715 708  
... ... @@ -739,23 +732,16 @@
739 732  
740 733 /* This will be hit if the operation was a flush or discard. */
741 734 if (!bio) {
742   - BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
  735 + BUG_ON(operation != WRITE_FLUSH);
743 736  
744   - if (operation == WRITE_FLUSH) {
745   - bio = bio_alloc(GFP_KERNEL, 0);
746   - if (unlikely(bio == NULL))
747   - goto fail_put_bio;
  737 + bio = bio_alloc(GFP_KERNEL, 0);
  738 + if (unlikely(bio == NULL))
  739 + goto fail_put_bio;
748 740  
749   - biolist[nbio++] = bio;
750   - bio->bi_bdev = preq.bdev;
751   - bio->bi_private = pending_req;
752   - bio->bi_end_io = end_block_io_op;
753   - } else if (operation == REQ_DISCARD) {
754   - xen_blk_discard(blkif, req);
755   - xen_blkif_put(blkif);
756   - free_req(pending_req);
757   - return 0;
758   - }
  741 + biolist[nbio++] = bio;
  742 + bio->bi_bdev = preq.bdev;
  743 + bio->bi_private = pending_req;
  744 + bio->bi_end_io = end_block_io_op;
759 745 }
760 746  
761 747 /*
... ... @@ -784,7 +770,7 @@
784 770 xen_blkbk_unmap(pending_req);
785 771 fail_response:
786 772 /* Haven't submitted any bio's yet. */
787   - make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
  773 + make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
788 774 free_req(pending_req);
789 775 msleep(1); /* back off a bit */
790 776 return -EIO;
drivers/block/xen-blkback/common.h
... ... @@ -60,58 +60,66 @@
60 60 char dummy;
61 61 };
62 62  
63   -/* i386 protocol version */
64   -#pragma pack(push, 4)
65   -
66 63 struct blkif_x86_32_request_rw {
  64 + uint8_t nr_segments; /* number of segments */
  65 + blkif_vdev_t handle; /* only for read/write requests */
  66 + uint64_t id; /* private guest value, echoed in resp */
67 67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
68 68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
69   -};
  69 +} __attribute__((__packed__));
70 70  
71 71 struct blkif_x86_32_request_discard {
  72 + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
  73 + blkif_vdev_t _pad1; /* was "handle" for read/write requests */
  74 + uint64_t id; /* private guest value, echoed in resp */
72 75 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
73   - uint64_t nr_sectors;
74   -};
  76 + uint64_t nr_sectors;
  77 +} __attribute__((__packed__));
75 78  
76 79 struct blkif_x86_32_request {
77 80 uint8_t operation; /* BLKIF_OP_??? */
78   - uint8_t nr_segments; /* number of segments */
79   - blkif_vdev_t handle; /* only for read/write requests */
80   - uint64_t id; /* private guest value, echoed in resp */
81 81 union {
82 82 struct blkif_x86_32_request_rw rw;
83 83 struct blkif_x86_32_request_discard discard;
84 84 } u;
85   -};
  85 +} __attribute__((__packed__));
  86 +
  87 +/* i386 protocol version */
  88 +#pragma pack(push, 4)
86 89 struct blkif_x86_32_response {
87 90 uint64_t id; /* copied from request */
88 91 uint8_t operation; /* copied from request */
89 92 int16_t status; /* BLKIF_RSP_??? */
90 93 };
91 94 #pragma pack(pop)
92   -
93 95 /* x86_64 protocol version */
94 96  
95 97 struct blkif_x86_64_request_rw {
  98 + uint8_t nr_segments; /* number of segments */
  99 + blkif_vdev_t handle; /* only for read/write requests */
  100 + uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
  101 + uint64_t id;
96 102 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
97 103 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
98   -};
  104 +} __attribute__((__packed__));
99 105  
100 106 struct blkif_x86_64_request_discard {
  107 + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
  108 + blkif_vdev_t _pad1; /* was "handle" for read/write requests */
  109 + uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */
  110 + uint64_t id;
101 111 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
102   - uint64_t nr_sectors;
103   -};
  112 + uint64_t nr_sectors;
  113 +} __attribute__((__packed__));
104 114  
105 115 struct blkif_x86_64_request {
106 116 uint8_t operation; /* BLKIF_OP_??? */
107   - uint8_t nr_segments; /* number of segments */
108   - blkif_vdev_t handle; /* only for read/write requests */
109   - uint64_t __attribute__((__aligned__(8))) id;
110 117 union {
111 118 struct blkif_x86_64_request_rw rw;
112 119 struct blkif_x86_64_request_discard discard;
113 120 } u;
114   -};
  121 +} __attribute__((__packed__));
  122 +
115 123 struct blkif_x86_64_response {
116 124 uint64_t __attribute__((__aligned__(8))) id;
117 125 uint8_t operation; /* copied from request */
... ... @@ -156,6 +164,7 @@
156 164 /* Cached size parameter. */
157 165 sector_t size;
158 166 bool flush_support;
  167 + bool discard_secure;
159 168 };
160 169  
161 170 struct backend_info;
162 171  
163 172  
164 173  
... ... @@ -237,22 +246,23 @@
237 246 {
238 247 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
239 248 dst->operation = src->operation;
240   - dst->nr_segments = src->nr_segments;
241   - dst->handle = src->handle;
242   - dst->id = src->id;
243 249 switch (src->operation) {
244 250 case BLKIF_OP_READ:
245 251 case BLKIF_OP_WRITE:
246 252 case BLKIF_OP_WRITE_BARRIER:
247 253 case BLKIF_OP_FLUSH_DISKCACHE:
  254 + dst->u.rw.nr_segments = src->u.rw.nr_segments;
  255 + dst->u.rw.handle = src->u.rw.handle;
  256 + dst->u.rw.id = src->u.rw.id;
248 257 dst->u.rw.sector_number = src->u.rw.sector_number;
249 258 barrier();
250   - if (n > dst->nr_segments)
251   - n = dst->nr_segments;
  259 + if (n > dst->u.rw.nr_segments)
  260 + n = dst->u.rw.nr_segments;
252 261 for (i = 0; i < n; i++)
253 262 dst->u.rw.seg[i] = src->u.rw.seg[i];
254 263 break;
255 264 case BLKIF_OP_DISCARD:
  265 + dst->u.discard.flag = src->u.discard.flag;
256 266 dst->u.discard.sector_number = src->u.discard.sector_number;
257 267 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
258 268 break;
259 269  
260 270  
261 271  
... ... @@ -266,22 +276,23 @@
266 276 {
267 277 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
268 278 dst->operation = src->operation;
269   - dst->nr_segments = src->nr_segments;
270   - dst->handle = src->handle;
271   - dst->id = src->id;
272 279 switch (src->operation) {
273 280 case BLKIF_OP_READ:
274 281 case BLKIF_OP_WRITE:
275 282 case BLKIF_OP_WRITE_BARRIER:
276 283 case BLKIF_OP_FLUSH_DISKCACHE:
  284 + dst->u.rw.nr_segments = src->u.rw.nr_segments;
  285 + dst->u.rw.handle = src->u.rw.handle;
  286 + dst->u.rw.id = src->u.rw.id;
277 287 dst->u.rw.sector_number = src->u.rw.sector_number;
278 288 barrier();
279   - if (n > dst->nr_segments)
280   - n = dst->nr_segments;
  289 + if (n > dst->u.rw.nr_segments)
  290 + n = dst->u.rw.nr_segments;
281 291 for (i = 0; i < n; i++)
282 292 dst->u.rw.seg[i] = src->u.rw.seg[i];
283 293 break;
284 294 case BLKIF_OP_DISCARD:
  295 + dst->u.discard.flag = src->u.discard.flag;
285 296 dst->u.discard.sector_number = src->u.discard.sector_number;
286 297 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
287 298 break;
drivers/block/xen-blkback/xenbus.c
... ... @@ -338,6 +338,9 @@
338 338 if (q && q->flush_flags)
339 339 vbd->flush_support = true;
340 340  
  341 + if (q && blk_queue_secdiscard(q))
  342 + vbd->discard_secure = true;
  343 +
341 344 DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
342 345 handle, blkif->domid);
343 346 return 0;
... ... @@ -419,6 +422,15 @@
419 422 }
420 423 state = 1;
421 424 blkif->blk_backend_type = BLKIF_BACKEND_PHY;
  425 + }
  426 + /* Optional. */
  427 + err = xenbus_printf(xbt, dev->nodename,
  428 + "discard-secure", "%d",
  429 + blkif->vbd.discard_secure);
  430 + if (err) {
  431 + xenbus_dev_fatal(dev, err,
  432 + "writting discard-secure");
  433 + goto kfree;
422 434 }
423 435 }
424 436 } else {
drivers/block/xen-blkfront.c
... ... @@ -98,7 +98,8 @@
98 98 unsigned long shadow_free;
99 99 unsigned int feature_flush;
100 100 unsigned int flush_op;
101   - unsigned int feature_discard;
  101 + unsigned int feature_discard:1;
  102 + unsigned int feature_secdiscard:1;
102 103 unsigned int discard_granularity;
103 104 unsigned int discard_alignment;
104 105 int is_ready;
105 106  
... ... @@ -135,15 +136,15 @@
135 136 {
136 137 unsigned long free = info->shadow_free;
137 138 BUG_ON(free >= BLK_RING_SIZE);
138   - info->shadow_free = info->shadow[free].req.id;
139   - info->shadow[free].req.id = 0x0fffffee; /* debug */
  139 + info->shadow_free = info->shadow[free].req.u.rw.id;
  140 + info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
140 141 return free;
141 142 }
142 143  
143 144 static void add_id_to_freelist(struct blkfront_info *info,
144 145 unsigned long id)
145 146 {
146   - info->shadow[id].req.id = info->shadow_free;
  147 + info->shadow[id].req.u.rw.id = info->shadow_free;
147 148 info->shadow[id].request = NULL;
148 149 info->shadow_free = id;
149 150 }
... ... @@ -156,7 +157,7 @@
156 157 if (end > nr_minors) {
157 158 unsigned long *bitmap, *old;
158 159  
159   - bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
  160 + bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
160 161 GFP_KERNEL);
161 162 if (bitmap == NULL)
162 163 return -ENOMEM;
163 164  
... ... @@ -287,9 +288,9 @@
287 288 id = get_id_from_freelist(info);
288 289 info->shadow[id].request = req;
289 290  
290   - ring_req->id = id;
  291 + ring_req->u.rw.id = id;
291 292 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
292   - ring_req->handle = info->handle;
  293 + ring_req->u.rw.handle = info->handle;
293 294  
294 295 ring_req->operation = rq_data_dir(req) ?
295 296 BLKIF_OP_WRITE : BLKIF_OP_READ;
296 297  
297 298  
298 299  
299 300  
... ... @@ -305,16 +306,21 @@
305 306 ring_req->operation = info->flush_op;
306 307 }
307 308  
308   - if (unlikely(req->cmd_flags & REQ_DISCARD)) {
  309 + if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
309 310 /* id, sector_number and handle are set above. */
310 311 ring_req->operation = BLKIF_OP_DISCARD;
311   - ring_req->nr_segments = 0;
312 312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
  313 + if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
  314 + ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
  315 + else
  316 + ring_req->u.discard.flag = 0;
313 317 } else {
314   - ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
315   - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
  318 + ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
  319 + info->sg);
  320 + BUG_ON(ring_req->u.rw.nr_segments >
  321 + BLKIF_MAX_SEGMENTS_PER_REQUEST);
316 322  
317   - for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
  323 + for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
318 324 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
319 325 fsect = sg->offset >> 9;
320 326 lsect = fsect + (sg->length >> 9) - 1;
... ... @@ -424,6 +430,8 @@
424 430 blk_queue_max_discard_sectors(rq, get_capacity(gd));
425 431 rq->limits.discard_granularity = info->discard_granularity;
426 432 rq->limits.discard_alignment = info->discard_alignment;
  433 + if (info->feature_secdiscard)
  434 + queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
427 435 }
428 436  
429 437 /* Hard sector size and max sectors impersonate the equiv. hardware. */
... ... @@ -705,7 +713,9 @@
705 713 static void blkif_completion(struct blk_shadow *s)
706 714 {
707 715 int i;
708   - for (i = 0; i < s->req.nr_segments; i++)
  716 + /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
  717 + * flag. */
  718 + for (i = 0; i < s->req.u.rw.nr_segments; i++)
709 719 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
710 720 }
711 721  
... ... @@ -736,7 +746,8 @@
736 746 id = bret->id;
737 747 req = info->shadow[id].request;
738 748  
739   - blkif_completion(&info->shadow[id]);
  749 + if (bret->operation != BLKIF_OP_DISCARD)
  750 + blkif_completion(&info->shadow[id]);
740 751  
741 752 add_id_to_freelist(info, id);
742 753  
743 754  
... ... @@ -749,7 +760,9 @@
749 760 info->gd->disk_name);
750 761 error = -EOPNOTSUPP;
751 762 info->feature_discard = 0;
  763 + info->feature_secdiscard = 0;
752 764 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
  765 + queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
753 766 }
754 767 __blk_end_request_all(req, error);
755 768 break;
... ... @@ -763,7 +776,7 @@
763 776 error = -EOPNOTSUPP;
764 777 }
765 778 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
766   - info->shadow[id].req.nr_segments == 0)) {
  779 + info->shadow[id].req.u.rw.nr_segments == 0)) {
767 780 printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
768 781 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
769 782 "barrier" : "flush disk cache",
... ... @@ -984,8 +997,8 @@
984 997 INIT_WORK(&info->work, blkif_restart_queue);
985 998  
986 999 for (i = 0; i < BLK_RING_SIZE; i++)
987   - info->shadow[i].req.id = i+1;
988   - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
  1000 + info->shadow[i].req.u.rw.id = i+1;
  1001 + info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
989 1002  
990 1003 /* Front end dir is a number, which is used as the id. */
991 1004 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
992 1005  
... ... @@ -1019,9 +1032,9 @@
1019 1032 /* Stage 2: Set up free list. */
1020 1033 memset(&info->shadow, 0, sizeof(info->shadow));
1021 1034 for (i = 0; i < BLK_RING_SIZE; i++)
1022   - info->shadow[i].req.id = i+1;
  1035 + info->shadow[i].req.u.rw.id = i+1;
1023 1036 info->shadow_free = info->ring.req_prod_pvt;
1024   - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
  1037 + info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1025 1038  
1026 1039 /* Stage 3: Find pending requests and requeue them. */
1027 1040 for (i = 0; i < BLK_RING_SIZE; i++) {
1028 1041  
1029 1042  
... ... @@ -1034,17 +1047,19 @@
1034 1047 *req = copy[i].req;
1035 1048  
1036 1049 /* We get a new request id, and must reset the shadow state. */
1037   - req->id = get_id_from_freelist(info);
1038   - memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
  1050 + req->u.rw.id = get_id_from_freelist(info);
  1051 + memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
1039 1052  
  1053 + if (req->operation != BLKIF_OP_DISCARD) {
1040 1054 /* Rewrite any grant references invalidated by susp/resume. */
1041   - for (j = 0; j < req->nr_segments; j++)
1042   - gnttab_grant_foreign_access_ref(
1043   - req->u.rw.seg[j].gref,
1044   - info->xbdev->otherend_id,
1045   - pfn_to_mfn(info->shadow[req->id].frame[j]),
1046   - rq_data_dir(info->shadow[req->id].request));
1047   - info->shadow[req->id].req = *req;
  1055 + for (j = 0; j < req->u.rw.nr_segments; j++)
  1056 + gnttab_grant_foreign_access_ref(
  1057 + req->u.rw.seg[j].gref,
  1058 + info->xbdev->otherend_id,
  1059 + pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
  1060 + rq_data_dir(info->shadow[req->u.rw.id].request));
  1061 + }
  1062 + info->shadow[req->u.rw.id].req = *req;
1048 1063  
1049 1064 info->ring.req_prod_pvt++;
1050 1065 }
1051 1066  
... ... @@ -1135,11 +1150,13 @@
1135 1150 char *type;
1136 1151 unsigned int discard_granularity;
1137 1152 unsigned int discard_alignment;
  1153 + unsigned int discard_secure;
1138 1154  
1139 1155 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1140 1156 if (IS_ERR(type))
1141 1157 return;
1142 1158  
  1159 + info->feature_secdiscard = 0;
1143 1160 if (strncmp(type, "phy", 3) == 0) {
1144 1161 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1145 1162 "discard-granularity", "%u", &discard_granularity,
... ... @@ -1150,6 +1167,12 @@
1150 1167 info->discard_granularity = discard_granularity;
1151 1168 info->discard_alignment = discard_alignment;
1152 1169 }
  1170 + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
  1171 + "discard-secure", "%d", &discard_secure,
  1172 + NULL);
  1173 + if (!err)
  1174 + info->feature_secdiscard = discard_secure;
  1175 +
1153 1176 } else if (strncmp(type, "file", 4) == 0)
1154 1177 info->feature_discard = 1;
1155 1178  
include/xen/interface/io/blkif.h
... ... @@ -84,6 +84,21 @@
84 84 * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
85 85 * http://www.seagate.com/staticfiles/support/disc/manuals/
86 86 * Interface%20manuals/100293068c.pdf
  87 + * The backend can optionally provide three extra XenBus attributes to
  88 + * further optimize the discard functionality:
  89 + * 'discard-aligment' - Devices that support discard functionality may
  90 + * internally allocate space in units that are bigger than the exported
  91 + * logical block size. The discard-alignment parameter indicates how many bytes
  92 + * the beginning of the partition is offset from the internal allocation unit's
  93 + * natural alignment.
  94 + * 'discard-granularity' - Devices that support discard functionality may
  95 + * internally allocate space using units that are bigger than the logical block
  96 + * size. The discard-granularity parameter indicates the size of the internal
  97 + * allocation unit in bytes if reported by the device. Otherwise the
  98 + * discard-granularity will be set to match the device's physical block size.
  99 + * 'discard-secure' - All copies of the discarded sectors (potentially created
  100 + * by garbage collection) must also be erased. To use this feature, the flag
  101 + * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim.
87 102 */
88 103 #define BLKIF_OP_DISCARD 5
89 104  
... ... @@ -95,6 +110,12 @@
95 110 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
96 111  
97 112 struct blkif_request_rw {
  113 + uint8_t nr_segments; /* number of segments */
  114 + blkif_vdev_t handle; /* only for read/write requests */
  115 +#ifdef CONFIG_X86_64
  116 + uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */
  117 +#endif
  118 + uint64_t id; /* private guest value, echoed in resp */
98 119 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
99 120 struct blkif_request_segment {
100 121 grant_ref_t gref; /* reference to I/O buffer frame */
101 122  
102 123  
103 124  
104 125  
... ... @@ -102,23 +123,28 @@
102 123 /* @last_sect: last sector in frame to transfer (inclusive). */
103 124 uint8_t first_sect, last_sect;
104 125 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
105   -};
  126 +} __attribute__((__packed__));
106 127  
107 128 struct blkif_request_discard {
  129 + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */
  130 +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
  131 + blkif_vdev_t _pad1; /* only for read/write requests */
  132 +#ifdef CONFIG_X86_64
  133 + uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/
  134 +#endif
  135 + uint64_t id; /* private guest value, echoed in resp */
108 136 blkif_sector_t sector_number;
109   - uint64_t nr_sectors;
110   -};
  137 + uint64_t nr_sectors;
  138 + uint8_t _pad3;
  139 +} __attribute__((__packed__));
111 140  
112 141 struct blkif_request {
113 142 uint8_t operation; /* BLKIF_OP_??? */
114   - uint8_t nr_segments; /* number of segments */
115   - blkif_vdev_t handle; /* only for read/write requests */
116   - uint64_t id; /* private guest value, echoed in resp */
117 143 union {
118 144 struct blkif_request_rw rw;
119 145 struct blkif_request_discard discard;
120 146 } u;
121   -};
  147 +} __attribute__((__packed__));
122 148  
123 149 struct blkif_response {
124 150 uint64_t id; /* copied from request */