diff options
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r-- | drivers/nvme/host/pci.c | 189 |
1 files changed, 77 insertions, 112 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f0f8027644bb..5b95c94ee40f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -36,14 +36,15 @@ #define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) -#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) +#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) /* * These can be higher, but we need to ensure that any command doesn't * require an sg allocation that needs more than a page of data. */ -#define NVME_MAX_KB_SZ 4096 -#define NVME_MAX_SEGS 127 +#define NVME_MAX_KB_SZ 8192 +#define NVME_MAX_SEGS 128 +#define NVME_MAX_NR_ALLOCATIONS 5 static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0444); @@ -110,6 +111,7 @@ struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_delete_io_queues(struct nvme_dev *dev); +static void nvme_update_attrs(struct nvme_dev *dev); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -144,9 +146,9 @@ struct nvme_dev { mempool_t *iod_mempool; /* shadow doorbell buffer support: */ - u32 *dbbuf_dbs; + __le32 *dbbuf_dbs; dma_addr_t dbbuf_dbs_dma_addr; - u32 *dbbuf_eis; + __le32 *dbbuf_eis; dma_addr_t dbbuf_eis_dma_addr; /* host memory buffer support: */ @@ -208,13 +210,18 @@ struct nvme_queue { #define NVMEQ_SQ_CMB 1 #define NVMEQ_DELETE_ERROR 2 #define NVMEQ_POLLED 3 - u32 *dbbuf_sq_db; - u32 *dbbuf_cq_db; - u32 *dbbuf_sq_ei; - u32 *dbbuf_cq_ei; + __le32 *dbbuf_sq_db; + __le32 *dbbuf_cq_db; + __le32 *dbbuf_sq_ei; + __le32 *dbbuf_cq_ei; struct completion delete_done; }; +union nvme_descriptor { + struct nvme_sgl_desc *sg_list; + __le64 *prp_list; +}; + /* * The nvme_iod describes the data in an I/O. * @@ -224,7 +231,6 @@ struct nvme_queue { struct nvme_iod { struct nvme_request req; struct nvme_command cmd; - bool use_sgl; bool aborted; s8 nr_allocations; /* PRP list pool allocations. 0 means small pool in use */ @@ -232,6 +238,7 @@ struct nvme_iod { dma_addr_t first_dma; dma_addr_t meta_dma; struct sg_table sgt; + union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS]; }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -343,11 +350,11 @@ static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) } /* Update dbbuf and return true if an MMIO is required */ -static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, - volatile u32 *dbbuf_ei) +static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, + volatile __le32 *dbbuf_ei) { if (dbbuf_db) { - u16 old_value; + u16 old_value, event_idx; /* * Ensure that the queue is written before updating @@ -355,8 +362,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, */ wmb(); - old_value = *dbbuf_db; - *dbbuf_db = value; + old_value = le32_to_cpu(*dbbuf_db); + *dbbuf_db = cpu_to_le32(value); /* * Ensure that the doorbell is updated before reading the event @@ -366,7 +373,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, */ mb(); - if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) + event_idx = le32_to_cpu(*dbbuf_ei); + if (!nvme_dbbuf_need_event(event_idx, value, old_value)) return false; } @@ -380,19 +388,9 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, */ static int nvme_pci_npages_prp(void) { - unsigned nprps = DIV_ROUND_UP(NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE, - NVME_CTRL_PAGE_SIZE); - return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); -} - -/* - * Calculates the number of pages needed for the SGL segments. For example a 4k - * page can accommodate 256 SGL descriptors. - */ -static int nvme_pci_npages_sgl(void) -{ - return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc), - PAGE_SIZE); + unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; + unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); + return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); } static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -508,16 +506,10 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) spin_unlock(&nvmeq->sq_lock); } -static void **nvme_pci_iod_list(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (void **)(iod->sgt.sgl + blk_rq_nr_phys_segments(req)); -} - -static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) +static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req, + int nseg) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - int nseg = blk_rq_nr_phys_segments(req); unsigned int avg_seg_size; avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); @@ -539,7 +531,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) int i; for (i = 0; i < iod->nr_allocations; i++) { - __le64 *prp_list = nvme_pci_iod_list(req)[i]; + __le64 *prp_list = iod->list[i].prp_list; dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); @@ -547,22 +539,6 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) } } -static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) -{ - const int last_sg = SGES_PER_PAGE - 1; - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - dma_addr_t dma_addr = iod->first_dma; - int i; - - for (i = 0; i < iod->nr_allocations; i++) { - struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; - dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); - - dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); - dma_addr = next_dma_addr; - } -} - static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -578,10 +554,11 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); if (iod->nr_allocations == 0) - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], + dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list, + iod->first_dma); + else if (iod->nr_allocations == 1) + dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list, iod->first_dma); - else if (iod->use_sgl) - nvme_free_sgls(dev, req); else nvme_free_prps(dev, req); mempool_free(iod->sgt.sgl, dev->iod_mempool); @@ -612,7 +589,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, u64 dma_addr = sg_dma_address(sg); int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); __le64 *prp_list; - void **list = nvme_pci_iod_list(req); dma_addr_t prp_dma; int nprps, i; @@ -650,7 +626,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, iod->nr_allocations = -1; return BLK_STS_RESOURCE; } - list[0] = prp_list; + iod->list[0].prp_list = prp_list; iod->first_dma = prp_dma; i = 0; for (;;) { @@ -659,7 +635,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) goto free_prps; - list[iod->nr_allocations++] = prp_list; + iod->list[iod->nr_allocations++].prp_list = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); i = 1; @@ -704,13 +680,8 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, dma_addr_t dma_addr, int entries) { sge->addr = cpu_to_le64(dma_addr); - if (entries < SGES_PER_PAGE) { - sge->length = cpu_to_le32(entries * sizeof(*sge)); - sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; - } else { - sge->length = cpu_to_le32(PAGE_SIZE); - sge->type = NVME_SGL_FMT_SEG_DESC << 4; - } + sge->length = cpu_to_le32(entries * sizeof(*sge)); + sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; } static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, @@ -746,34 +717,16 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, return BLK_STS_RESOURCE; } - nvme_pci_iod_list(req)[0] = sg_list; + iod->list[0].sg_list = sg_list; iod->first_dma = sgl_dma; nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); - do { - if (i == SGES_PER_PAGE) { - struct nvme_sgl_desc *old_sg_desc = sg_list; - struct nvme_sgl_desc *link = &old_sg_desc[i - 1]; - - sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); - if (!sg_list) - goto free_sgls; - - i = 0; - nvme_pci_iod_list(req)[iod->nr_allocations++] = sg_list; - sg_list[i++] = *link; - nvme_pci_sgl_set_seg(link, sgl_dma, entries); - } - nvme_pci_sgl_set_data(&sg_list[i++], sg); sg = sg_next(sg); } while (--entries > 0); return BLK_STS_OK; -free_sgls: - nvme_free_sgls(dev, req); - return BLK_STS_RESOURCE; } static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, @@ -855,8 +808,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out_free_sg; } - iod->use_sgl = nvme_pci_use_sgls(dev, req); - if (iod->use_sgl) + if (nvme_pci_use_sgls(dev, req, iod->sgt.nents)) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); @@ -1361,7 +1313,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) else nvme_poll_irqdisable(nvmeq); - if (blk_mq_request_completed(req)) { + if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); @@ -1922,6 +1874,8 @@ static void nvme_map_cmb(struct nvme_dev *dev) if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) pci_p2pmem_publish(pdev, true); + + nvme_update_attrs(dev); } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) @@ -2208,6 +2162,11 @@ static const struct attribute_group *nvme_pci_dev_attr_groups[] = { NULL, }; +static void nvme_update_attrs(struct nvme_dev *dev) +{ + sysfs_update_group(&dev->ctrl.device->kobj, &nvme_pci_dev_attrs_group); +} + /* * nirqs is the number of interrupts available for write and read * queues. The core already reserved an interrupt for the admin queue. @@ -2332,10 +2291,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (dev->cmb_use_sqes) { result = nvme_cmb_qdepth(dev, nr_io_queues, sizeof(struct nvme_command)); - if (result > 0) + if (result > 0) { dev->q_depth = result; - else + dev->ctrl.sqsize = result - 1; + } else { dev->cmb_use_sqes = false; + } } do { @@ -2506,18 +2467,12 @@ static int nvme_pci_enable(struct nvme_dev *dev) { int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); - int dma_address_bits = 64; if (pci_enable_device_mem(pdev)) return result; pci_set_master(pdev); - if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48) - dma_address_bits = 48; - if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(dma_address_bits))) - goto disable; - if (readl(dev->bar + NVME_REG_CSTS) == -1) { result = -ENODEV; goto disable; @@ -2530,13 +2485,12 @@ static int nvme_pci_enable(struct nvme_dev *dev) */ result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); if (result < 0) - return result; + goto disable; dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1, io_queue_depth); - dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); dev->dbs = dev->bar + 4096; @@ -2577,15 +2531,20 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", dev->q_depth); } - + dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ nvme_map_cmb(dev); pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); - return nvme_pci_configure_admin_queue(dev); + result = nvme_pci_configure_admin_queue(dev); + if (result) + goto free_irq; + return result; + free_irq: + pci_free_irq_vectors(pdev); disable: pci_disable_device(pdev); return result; @@ -2697,11 +2656,8 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) { - size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); - size_t alloc_size = sizeof(__le64 *) * npages + - sizeof(struct scatterlist) * NVME_MAX_SEGS; + size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS; - WARN_ON_ONCE(alloc_size > PAGE_SIZE); dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, mempool_kfree, (void *)alloc_size, GFP_KERNEL, @@ -2963,7 +2919,7 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) - return NULL; + return ERR_PTR(-ENOMEM); INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); mutex_init(&dev->shutdown_lock); @@ -2991,7 +2947,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks); if (ret) goto out_put_device; - + + if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + else + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1); dma_set_max_seg_size(&pdev->dev, 0xffffffff); @@ -3024,8 +2984,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int result = -ENOMEM; dev = nvme_pci_alloc_dev(pdev, id); - if (!dev) - return -ENOMEM; + if (IS_ERR(dev)) + return PTR_ERR(dev); result = nvme_dev_map(dev); if (result) @@ -3095,6 +3055,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_start_ctrl(&dev->ctrl); nvme_put_ctrl(&dev->ctrl); + flush_work(&dev->ctrl.scan_work); return 0; out_disable: @@ -3415,6 +3376,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x10ec, 0x5763), /* ADATA SX6000PNP */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, @@ -3493,7 +3456,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | - NVME_QUIRK_SKIP_CID_GEN }, + NVME_QUIRK_SKIP_CID_GEN | + NVME_QUIRK_IDENTIFY_CNS }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; @@ -3521,8 +3485,9 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); - BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) > - S8_MAX); + BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE); + BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE); + BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS); return pci_register_driver(&nvme_driver); } |