Commit 21e98932dcf15fe7eabd09a35f2020e0dd86b685
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Merge git://git.infradead.org/users/willy/linux-nvme
Pull NVMe driver fixes from Matthew Wilcox: "Now that actual hardware has been released (don't have any yet myself), people are starting to want some of these fixes merged." Willy doesn't have hardware? Guys... * git://git.infradead.org/users/willy/linux-nvme: NVMe: Cancel outstanding IOs on queue deletion NVMe: Free admin queue memory on initialisation failure NVMe: Use ida for nvme device instance NVMe: Fix whitespace damage in nvme_init NVMe: handle allocation failure in nvme_map_user_pages() NVMe: Fix uninitialized iod compiler warning NVMe: Do not set IO queue depth beyond device max NVMe: Set block queue max sectors NVMe: use namespace id for nvme_get_features NVMe: replace nvme_ns with nvme_dev for user admin NVMe: Fix nvme module init when nvme_major is set NVMe: Set request queue logical block size
Showing 2 changed files Side-by-side Diff
drivers/block/nvme.c
... | ... | @@ -79,6 +79,7 @@ |
79 | 79 | char serial[20]; |
80 | 80 | char model[40]; |
81 | 81 | char firmware_rev[8]; |
82 | + u32 max_hw_sectors; | |
82 | 83 | }; |
83 | 84 | |
84 | 85 | /* |
85 | 86 | |
86 | 87 | |
... | ... | @@ -835,15 +836,15 @@ |
835 | 836 | } |
836 | 837 | |
837 | 838 | static int nvme_get_features(struct nvme_dev *dev, unsigned fid, |
838 | - unsigned dword11, dma_addr_t dma_addr) | |
839 | + unsigned nsid, dma_addr_t dma_addr) | |
839 | 840 | { |
840 | 841 | struct nvme_command c; |
841 | 842 | |
842 | 843 | memset(&c, 0, sizeof(c)); |
843 | 844 | c.features.opcode = nvme_admin_get_features; |
845 | + c.features.nsid = cpu_to_le32(nsid); | |
844 | 846 | c.features.prp1 = cpu_to_le64(dma_addr); |
845 | 847 | c.features.fid = cpu_to_le32(fid); |
846 | - c.features.dword11 = cpu_to_le32(dword11); | |
847 | 848 | |
848 | 849 | return nvme_submit_admin_cmd(dev, &c, NULL); |
849 | 850 | } |
850 | 851 | |
... | ... | @@ -862,11 +863,51 @@ |
862 | 863 | return nvme_submit_admin_cmd(dev, &c, result); |
863 | 864 | } |
864 | 865 | |
866 | +/** | |
867 | + * nvme_cancel_ios - Cancel outstanding I/Os | |
868 | + * @queue: The queue to cancel I/Os on | |
869 | + * @timeout: True to only cancel I/Os which have timed out | |
870 | + */ | |
871 | +static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) | |
872 | +{ | |
873 | + int depth = nvmeq->q_depth - 1; | |
874 | + struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | |
875 | + unsigned long now = jiffies; | |
876 | + int cmdid; | |
877 | + | |
878 | + for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | |
879 | + void *ctx; | |
880 | + nvme_completion_fn fn; | |
881 | + static struct nvme_completion cqe = { | |
882 | + .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, | |
883 | + }; | |
884 | + | |
885 | + if (timeout && !time_after(now, info[cmdid].timeout)) | |
886 | + continue; | |
887 | + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); | |
888 | + ctx = cancel_cmdid(nvmeq, cmdid, &fn); | |
889 | + fn(nvmeq->dev, ctx, &cqe); | |
890 | + } | |
891 | +} | |
892 | + | |
893 | +static void nvme_free_queue_mem(struct nvme_queue *nvmeq) | |
894 | +{ | |
895 | + dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | |
896 | + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | |
897 | + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | |
898 | + nvmeq->sq_cmds, nvmeq->sq_dma_addr); | |
899 | + kfree(nvmeq); | |
900 | +} | |
901 | + | |
865 | 902 | static void nvme_free_queue(struct nvme_dev *dev, int qid) |
866 | 903 | { |
867 | 904 | struct nvme_queue *nvmeq = dev->queues[qid]; |
868 | 905 | int vector = dev->entry[nvmeq->cq_vector].vector; |
869 | 906 | |
907 | + spin_lock_irq(&nvmeq->q_lock); | |
908 | + nvme_cancel_ios(nvmeq, false); | |
909 | + spin_unlock_irq(&nvmeq->q_lock); | |
910 | + | |
870 | 911 | irq_set_affinity_hint(vector, NULL); |
871 | 912 | free_irq(vector, nvmeq); |
872 | 913 | |
873 | 914 | |
... | ... | @@ -876,18 +917,15 @@ |
876 | 917 | adapter_delete_cq(dev, qid); |
877 | 918 | } |
878 | 919 | |
879 | - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | |
880 | - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | |
881 | - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | |
882 | - nvmeq->sq_cmds, nvmeq->sq_dma_addr); | |
883 | - kfree(nvmeq); | |
920 | + nvme_free_queue_mem(nvmeq); | |
884 | 921 | } |
885 | 922 | |
886 | 923 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, |
887 | 924 | int depth, int vector) |
888 | 925 | { |
889 | 926 | struct device *dmadev = &dev->pci_dev->dev; |
890 | - unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); | |
927 | + unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * | |
928 | + sizeof(struct nvme_cmd_info)); | |
891 | 929 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); |
892 | 930 | if (!nvmeq) |
893 | 931 | return NULL; |
... | ... | @@ -975,7 +1013,7 @@ |
975 | 1013 | |
976 | 1014 | static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) |
977 | 1015 | { |
978 | - int result; | |
1016 | + int result = 0; | |
979 | 1017 | u32 aqa; |
980 | 1018 | u64 cap; |
981 | 1019 | unsigned long timeout; |
982 | 1020 | |
983 | 1021 | |
984 | 1022 | |
... | ... | @@ -1005,17 +1043,22 @@ |
1005 | 1043 | timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; |
1006 | 1044 | dev->db_stride = NVME_CAP_STRIDE(cap); |
1007 | 1045 | |
1008 | - while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { | |
1046 | + while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { | |
1009 | 1047 | msleep(100); |
1010 | 1048 | if (fatal_signal_pending(current)) |
1011 | - return -EINTR; | |
1049 | + result = -EINTR; | |
1012 | 1050 | if (time_after(jiffies, timeout)) { |
1013 | 1051 | dev_err(&dev->pci_dev->dev, |
1014 | 1052 | "Device not ready; aborting initialisation\n"); |
1015 | - return -ENODEV; | |
1053 | + result = -ENODEV; | |
1016 | 1054 | } |
1017 | 1055 | } |
1018 | 1056 | |
1057 | + if (result) { | |
1058 | + nvme_free_queue_mem(nvmeq); | |
1059 | + return result; | |
1060 | + } | |
1061 | + | |
1019 | 1062 | result = queue_request_irq(dev, nvmeq, "nvme admin"); |
1020 | 1063 | dev->queues[0] = nvmeq; |
1021 | 1064 | return result; |
... | ... | @@ -1037,6 +1080,8 @@ |
1037 | 1080 | offset = offset_in_page(addr); |
1038 | 1081 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); |
1039 | 1082 | pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); |
1083 | + if (!pages) | |
1084 | + return ERR_PTR(-ENOMEM); | |
1040 | 1085 | |
1041 | 1086 | err = get_user_pages_fast(addr, count, 1, pages); |
1042 | 1087 | if (err < count) { |
1043 | 1088 | |
1044 | 1089 | |
... | ... | @@ -1146,14 +1191,13 @@ |
1146 | 1191 | return status; |
1147 | 1192 | } |
1148 | 1193 | |
1149 | -static int nvme_user_admin_cmd(struct nvme_ns *ns, | |
1194 | +static int nvme_user_admin_cmd(struct nvme_dev *dev, | |
1150 | 1195 | struct nvme_admin_cmd __user *ucmd) |
1151 | 1196 | { |
1152 | - struct nvme_dev *dev = ns->dev; | |
1153 | 1197 | struct nvme_admin_cmd cmd; |
1154 | 1198 | struct nvme_command c; |
1155 | 1199 | int status, length; |
1156 | - struct nvme_iod *iod; | |
1200 | + struct nvme_iod *uninitialized_var(iod); | |
1157 | 1201 | |
1158 | 1202 | if (!capable(CAP_SYS_ADMIN)) |
1159 | 1203 | return -EACCES; |
... | ... | @@ -1204,7 +1248,7 @@ |
1204 | 1248 | case NVME_IOCTL_ID: |
1205 | 1249 | return ns->ns_id; |
1206 | 1250 | case NVME_IOCTL_ADMIN_CMD: |
1207 | - return nvme_user_admin_cmd(ns, (void __user *)arg); | |
1251 | + return nvme_user_admin_cmd(ns->dev, (void __user *)arg); | |
1208 | 1252 | case NVME_IOCTL_SUBMIT_IO: |
1209 | 1253 | return nvme_submit_io(ns, (void __user *)arg); |
1210 | 1254 | default: |
... | ... | @@ -1218,26 +1262,6 @@ |
1218 | 1262 | .compat_ioctl = nvme_ioctl, |
1219 | 1263 | }; |
1220 | 1264 | |
1221 | -static void nvme_timeout_ios(struct nvme_queue *nvmeq) | |
1222 | -{ | |
1223 | - int depth = nvmeq->q_depth - 1; | |
1224 | - struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | |
1225 | - unsigned long now = jiffies; | |
1226 | - int cmdid; | |
1227 | - | |
1228 | - for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | |
1229 | - void *ctx; | |
1230 | - nvme_completion_fn fn; | |
1231 | - static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; | |
1232 | - | |
1233 | - if (!time_after(now, info[cmdid].timeout)) | |
1234 | - continue; | |
1235 | - dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); | |
1236 | - ctx = cancel_cmdid(nvmeq, cmdid, &fn); | |
1237 | - fn(nvmeq->dev, ctx, &cqe); | |
1238 | - } | |
1239 | -} | |
1240 | - | |
1241 | 1265 | static void nvme_resubmit_bios(struct nvme_queue *nvmeq) |
1242 | 1266 | { |
1243 | 1267 | while (bio_list_peek(&nvmeq->sq_cong)) { |
... | ... | @@ -1269,7 +1293,7 @@ |
1269 | 1293 | spin_lock_irq(&nvmeq->q_lock); |
1270 | 1294 | if (nvme_process_cq(nvmeq)) |
1271 | 1295 | printk("process_cq did something\n"); |
1272 | - nvme_timeout_ios(nvmeq); | |
1296 | + nvme_cancel_ios(nvmeq, true); | |
1273 | 1297 | nvme_resubmit_bios(nvmeq); |
1274 | 1298 | spin_unlock_irq(&nvmeq->q_lock); |
1275 | 1299 | } |
... | ... | @@ -1339,6 +1363,9 @@ |
1339 | 1363 | ns->disk = disk; |
1340 | 1364 | lbaf = id->flbas & 0xf; |
1341 | 1365 | ns->lba_shift = id->lbaf[lbaf].ds; |
1366 | + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | |
1367 | + if (dev->max_hw_sectors) | |
1368 | + blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); | |
1342 | 1369 | |
1343 | 1370 | disk->major = nvme_major; |
1344 | 1371 | disk->minors = NVME_MINORS; |
... | ... | @@ -1383,7 +1410,7 @@ |
1383 | 1410 | |
1384 | 1411 | static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) |
1385 | 1412 | { |
1386 | - int result, cpu, i, nr_io_queues, db_bar_size; | |
1413 | + int result, cpu, i, nr_io_queues, db_bar_size, q_depth; | |
1387 | 1414 | |
1388 | 1415 | nr_io_queues = num_online_cpus(); |
1389 | 1416 | result = set_queue_count(dev, nr_io_queues); |
1390 | 1417 | |
... | ... | @@ -1429,9 +1456,10 @@ |
1429 | 1456 | cpu = cpumask_next(cpu, cpu_online_mask); |
1430 | 1457 | } |
1431 | 1458 | |
1459 | + q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, | |
1460 | + NVME_Q_DEPTH); | |
1432 | 1461 | for (i = 0; i < nr_io_queues; i++) { |
1433 | - dev->queues[i + 1] = nvme_create_queue(dev, i + 1, | |
1434 | - NVME_Q_DEPTH, i); | |
1462 | + dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); | |
1435 | 1463 | if (IS_ERR(dev->queues[i + 1])) |
1436 | 1464 | return PTR_ERR(dev->queues[i + 1]); |
1437 | 1465 | dev->queue_count++; |
... | ... | @@ -1480,6 +1508,10 @@ |
1480 | 1508 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); |
1481 | 1509 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); |
1482 | 1510 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); |
1511 | + if (ctrl->mdts) { | |
1512 | + int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; | |
1513 | + dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); | |
1514 | + } | |
1483 | 1515 | |
1484 | 1516 | id_ns = mem; |
1485 | 1517 | for (i = 1; i <= nn; i++) { |
... | ... | @@ -1523,8 +1555,6 @@ |
1523 | 1555 | list_del(&dev->node); |
1524 | 1556 | spin_unlock(&dev_list_lock); |
1525 | 1557 | |
1526 | - /* TODO: wait all I/O finished or cancel them */ | |
1527 | - | |
1528 | 1558 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { |
1529 | 1559 | list_del(&ns->list); |
1530 | 1560 | del_gendisk(ns->disk); |
1531 | 1561 | |
1532 | 1562 | |
... | ... | @@ -1560,15 +1590,33 @@ |
1560 | 1590 | dma_pool_destroy(dev->prp_small_pool); |
1561 | 1591 | } |
1562 | 1592 | |
1563 | -/* XXX: Use an ida or something to let remove / add work correctly */ | |
1564 | -static void nvme_set_instance(struct nvme_dev *dev) | |
1593 | +static DEFINE_IDA(nvme_instance_ida); | |
1594 | + | |
1595 | +static int nvme_set_instance(struct nvme_dev *dev) | |
1565 | 1596 | { |
1566 | - static int instance; | |
1567 | - dev->instance = instance++; | |
1597 | + int instance, error; | |
1598 | + | |
1599 | + do { | |
1600 | + if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) | |
1601 | + return -ENODEV; | |
1602 | + | |
1603 | + spin_lock(&dev_list_lock); | |
1604 | + error = ida_get_new(&nvme_instance_ida, &instance); | |
1605 | + spin_unlock(&dev_list_lock); | |
1606 | + } while (error == -EAGAIN); | |
1607 | + | |
1608 | + if (error) | |
1609 | + return -ENODEV; | |
1610 | + | |
1611 | + dev->instance = instance; | |
1612 | + return 0; | |
1568 | 1613 | } |
1569 | 1614 | |
1570 | 1615 | static void nvme_release_instance(struct nvme_dev *dev) |
1571 | 1616 | { |
1617 | + spin_lock(&dev_list_lock); | |
1618 | + ida_remove(&nvme_instance_ida, dev->instance); | |
1619 | + spin_unlock(&dev_list_lock); | |
1572 | 1620 | } |
1573 | 1621 | |
1574 | 1622 | static int __devinit nvme_probe(struct pci_dev *pdev, |
... | ... | @@ -1601,7 +1649,10 @@ |
1601 | 1649 | pci_set_drvdata(pdev, dev); |
1602 | 1650 | dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); |
1603 | 1651 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); |
1604 | - nvme_set_instance(dev); | |
1652 | + result = nvme_set_instance(dev); | |
1653 | + if (result) | |
1654 | + goto disable; | |
1655 | + | |
1605 | 1656 | dev->entry[0].vector = pdev->irq; |
1606 | 1657 | |
1607 | 1658 | result = nvme_setup_prp_pools(dev); |
1608 | 1659 | |
1609 | 1660 | |
... | ... | @@ -1704,15 +1755,17 @@ |
1704 | 1755 | |
1705 | 1756 | static int __init nvme_init(void) |
1706 | 1757 | { |
1707 | - int result = -EBUSY; | |
1758 | + int result; | |
1708 | 1759 | |
1709 | 1760 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); |
1710 | 1761 | if (IS_ERR(nvme_thread)) |
1711 | 1762 | return PTR_ERR(nvme_thread); |
1712 | 1763 | |
1713 | - nvme_major = register_blkdev(nvme_major, "nvme"); | |
1714 | - if (nvme_major <= 0) | |
1764 | + result = register_blkdev(nvme_major, "nvme"); | |
1765 | + if (result < 0) | |
1715 | 1766 | goto kill_kthread; |
1767 | + else if (result > 0) | |
1768 | + nvme_major = result; | |
1716 | 1769 | |
1717 | 1770 | result = pci_register_driver(&nvme_driver); |
1718 | 1771 | if (result) |
include/linux/nvme.h
... | ... | @@ -35,8 +35,10 @@ |
35 | 35 | __u64 acq; /* Admin CQ Base Address */ |
36 | 36 | }; |
37 | 37 | |
38 | +#define NVME_CAP_MQES(cap) ((cap) & 0xffff) | |
38 | 39 | #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) |
39 | 40 | #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) |
41 | +#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) | |
40 | 42 | |
41 | 43 | enum { |
42 | 44 | NVME_CC_ENABLE = 1 << 0, |