diff options
Diffstat (limited to 'drivers/infiniband/ulp/srpt/ib_srpt.c')
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.c | 1325 |
1 files changed, 433 insertions, 892 deletions
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index bc5470c43d26..25bdaeef2520 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -91,74 +91,32 @@ MODULE_PARM_DESC(srpt_service_guid, " instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; -static void srpt_release_channel(struct srpt_rdma_ch *ch); +static void srpt_release_cmd(struct se_cmd *se_cmd); +static void srpt_free_ch(struct kref *kref); static int srpt_queue_status(struct se_cmd *cmd); +static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_process_wait_list(struct srpt_rdma_ch *ch); -/** - * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. - */ -static inline -enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) -{ - switch (dir) { - case DMA_TO_DEVICE: return DMA_FROM_DEVICE; - case DMA_FROM_DEVICE: return DMA_TO_DEVICE; - default: return dir; - } -} - -/** - * srpt_sdev_name() - Return the name associated with the HCA. - * - * Examples are ib0, ib1, ... - */ -static inline const char *srpt_sdev_name(struct srpt_device *sdev) -{ - return sdev->device->name; -} - -static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch) -{ - unsigned long flags; - enum rdma_ch_state state; - - spin_lock_irqsave(&ch->spinlock, flags); - state = ch->state; - spin_unlock_irqrestore(&ch->spinlock, flags); - return state; -} - -static enum rdma_ch_state -srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) -{ - unsigned long flags; - enum rdma_ch_state prev; - - spin_lock_irqsave(&ch->spinlock, flags); - prev = ch->state; - ch->state = new_state; - spin_unlock_irqrestore(&ch->spinlock, flags); - return prev; -} - -/** - * srpt_test_and_set_ch_state() - Test and set the channel state. - * - * Returns true if and only if the channel state has been set to the new state. +/* + * The only allowed channel state changes are those that change the channel + * state into a state with a higher numerical value. Hence the new > prev test. */ -static bool -srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old, - enum rdma_ch_state new) +static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new) { unsigned long flags; enum rdma_ch_state prev; + bool changed = false; spin_lock_irqsave(&ch->spinlock, flags); prev = ch->state; - if (prev == old) + if (new > prev) { ch->state = new; + changed = true; + } spin_unlock_irqrestore(&ch->spinlock, flags); - return prev == old; + + return changed; } /** @@ -180,7 +138,7 @@ static void srpt_event_handler(struct ib_event_handler *handler, return; pr_debug("ASYNC event= %d on device= %s\n", event->event, - srpt_sdev_name(sdev)); + sdev->device->name); switch (event->event) { case IB_EVENT_PORT_ERR: @@ -218,25 +176,39 @@ static void srpt_srq_event(struct ib_event *event, void *ctx) pr_info("SRQ event %d\n", event->event); } +static const char *get_ch_state_name(enum rdma_ch_state s) +{ + switch (s) { + case CH_CONNECTING: + return "connecting"; + case CH_LIVE: + return "live"; + case CH_DISCONNECTING: + return "disconnecting"; + case CH_DRAINING: + return "draining"; + case CH_DISCONNECTED: + return "disconnected"; + } + return "???"; +} + /** * srpt_qp_event() - QP event callback function. */ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) { pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", - event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch)); + event->event, ch->cm_id, ch->sess_name, ch->state); switch (event->event) { case IB_EVENT_COMM_EST: ib_cm_notify(ch->cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED: - if (srpt_test_and_set_ch_state(ch, CH_DRAINING, - CH_RELEASING)) - srpt_release_channel(ch); - else - pr_debug("%s: state %d - ignored LAST_WQE.\n", - ch->sess_name, srpt_get_ch_state(ch)); + pr_debug("%s-%d, state %s: received Last WQE event.\n", + ch->sess_name, ch->qp->qp_num, + get_ch_state_name(ch->state)); break; default: pr_err("received unrecognized IB QP event %d\n", event->event); @@ -279,7 +251,7 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad) struct ib_class_port_info *cif; cif = (struct ib_class_port_info *)mad->data; - memset(cif, 0, sizeof *cif); + memset(cif, 0, sizeof(*cif)); cif->base_version = 1; cif->class_version = 1; cif->resp_time_value = 20; @@ -338,13 +310,13 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, return; } - memset(iocp, 0, sizeof *iocp); + memset(iocp, 0, sizeof(*iocp)); strcpy(iocp->id_string, SRPT_ID_STRING); iocp->guid = cpu_to_be64(srpt_service_guid); - iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); - iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id); - iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); - iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); + iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); + iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id); + iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver); + iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); iocp->subsys_device_id = 0x0; iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); @@ -388,7 +360,7 @@ static void srpt_get_svc_entries(u64 ioc_guid, } svc_entries = (struct ib_dm_svc_entries *)mad->data; - memset(svc_entries, 0, sizeof *svc_entries); + memset(svc_entries, 0, sizeof(*svc_entries)); svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); snprintf(svc_entries->service_entries[0].name, sizeof(svc_entries->service_entries[0].name), @@ -453,6 +425,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, * srpt_mad_recv_handler() - MAD reception callback function. */ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_wc) { struct srpt_port *sport = (struct srpt_port *)mad_agent->context; @@ -481,7 +454,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, rsp->ah = ah; dm_mad = rsp->mad; - memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad); + memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad)); dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; dm_mad->mad_hdr.status = 0; @@ -529,7 +502,7 @@ static int srpt_refresh_port(struct srpt_port *sport) struct ib_port_attr port_attr; int ret; - memset(&port_modify, 0, sizeof port_modify); + memset(&port_modify, 0, sizeof(port_modify)); port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; port_modify.clr_port_cap_mask = 0; @@ -550,7 +523,7 @@ static int srpt_refresh_port(struct srpt_port *sport) goto err_query_port; if (!sport->mad_agent) { - memset(®_req, 0, sizeof reg_req); + memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); @@ -778,12 +751,12 @@ static int srpt_post_recv(struct srpt_device *sdev, struct ib_recv_wr wr, *bad_wr; BUG_ON(!sdev); - wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index); - list.addr = ioctx->ioctx.dma; list.length = srp_max_req_size; list.lkey = sdev->pd->local_dma_lkey; + ioctx->ioctx.cqe.done = srpt_recv_done; + wr.wr_cqe = &ioctx->ioctx.cqe; wr.next = NULL; wr.sg_list = &list; wr.num_sge = 1; @@ -819,8 +792,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch, list.length = len; list.lkey = sdev->pd->local_dma_lkey; + ioctx->ioctx.cqe.done = srpt_send_done; wr.next = NULL; - wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); + wr.wr_cqe = &ioctx->ioctx.cqe; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IB_WR_SEND; @@ -837,6 +811,39 @@ out: } /** + * srpt_zerolength_write() - Perform a zero-length RDMA write. + * + * A quote from the InfiniBand specification: C9-88: For an HCA responder + * using Reliable Connection service, for each zero-length RDMA READ or WRITE + * request, the R_Key shall not be validated, even if the request includes + * Immediate data. + */ +static int srpt_zerolength_write(struct srpt_rdma_ch *ch) +{ + struct ib_send_wr wr, *bad_wr; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_RDMA_WRITE; + wr.wr_cqe = &ch->zw_cqe; + wr.send_flags = IB_SEND_SIGNALED; + return ib_post_send(ch->qp, &wr, &bad_wr); +} + +static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srpt_rdma_ch *ch = cq->cq_context; + + if (wc->status == IB_WC_SUCCESS) { + srpt_process_wait_list(ch); + } else { + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num); + } +} + +/** * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. * @ioctx: Pointer to the I/O context associated with the request. * @srp_cmd: Pointer to the SRP_CMD request data. @@ -899,14 +906,14 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, db = (struct srp_direct_buf *)(srp_cmd->add_data + add_cdb_offset); - memcpy(ioctx->rbufs, db, sizeof *db); + memcpy(ioctx->rbufs, db, sizeof(*db)); *data_len = be32_to_cpu(db->len); } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { idb = (struct srp_indirect_buf *)(srp_cmd->add_data + add_cdb_offset); - ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db; + ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db); if (ioctx->n_rbuf > (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { @@ -925,7 +932,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, ioctx->rbufs = &ioctx->single_rbuf; else { ioctx->rbufs = - kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC); + kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC); if (!ioctx->rbufs) { ioctx->n_rbuf = 0; ret = -ENOMEM; @@ -934,7 +941,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, } db = idb->desc_list; - memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db); + memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db)); *data_len = be32_to_cpu(idb->len); } out: @@ -952,7 +959,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) struct ib_qp_attr *attr; int ret; - attr = kzalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -1052,13 +1059,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, BUG_ON(!ch); BUG_ON(!ioctx); - BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius); + BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs); while (ioctx->n_rdma) - kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge); + kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list); - kfree(ioctx->rdma_ius); - ioctx->rdma_ius = NULL; + kfree(ioctx->rdma_wrs); + ioctx->rdma_wrs = NULL; if (ioctx->mapped_sg_count) { sg = ioctx->sg; @@ -1066,7 +1073,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, dir = ioctx->cmd.data_direction; BUG_ON(dir == DMA_NONE); ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, - opposite_dma_dir(dir)); + target_reverse_dma_direction(&ioctx->cmd)); ioctx->mapped_sg_count = 0; } } @@ -1082,7 +1089,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, struct scatterlist *sg, *sg_orig; int sg_cnt; enum dma_data_direction dir; - struct rdma_iu *riu; + struct ib_rdma_wr *riu; struct srp_direct_buf *db; dma_addr_t dma_addr; struct ib_sge *sge; @@ -1103,29 +1110,30 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ioctx->sg_cnt = sg_cnt = cmd->t_data_nents; count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, - opposite_dma_dir(dir)); + target_reverse_dma_direction(cmd)); if (unlikely(!count)) return -EAGAIN; ioctx->mapped_sg_count = count; - if (ioctx->rdma_ius && ioctx->n_rdma_ius) - nrdma = ioctx->n_rdma_ius; + if (ioctx->rdma_wrs && ioctx->n_rdma_wrs) + nrdma = ioctx->n_rdma_wrs; else { nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf; - ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL); - if (!ioctx->rdma_ius) + ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs), + GFP_KERNEL); + if (!ioctx->rdma_wrs) goto free_mem; - ioctx->n_rdma_ius = nrdma; + ioctx->n_rdma_wrs = nrdma; } db = ioctx->rbufs; tsize = cmd->data_length; dma_len = ib_sg_dma_len(dev, &sg[0]); - riu = ioctx->rdma_ius; + riu = ioctx->rdma_wrs; /* * For each remote desc - calculate the #ib_sge. @@ -1139,9 +1147,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { rsize = be32_to_cpu(db->len); raddr = be64_to_cpu(db->va); - riu->raddr = raddr; + riu->remote_addr = raddr; riu->rkey = be32_to_cpu(db->key); - riu->sge_cnt = 0; + riu->wr.num_sge = 0; /* calculate how many sge required for this remote_buf */ while (rsize > 0 && tsize > 0) { @@ -1165,33 +1173,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, rsize = 0; } - ++riu->sge_cnt; + ++riu->wr.num_sge; - if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) { + if (rsize > 0 && + riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) { ++ioctx->n_rdma; - riu->sge = - kmalloc(riu->sge_cnt * sizeof *riu->sge, - GFP_KERNEL); - if (!riu->sge) + riu->wr.sg_list = kmalloc_array(riu->wr.num_sge, + sizeof(*riu->wr.sg_list), + GFP_KERNEL); + if (!riu->wr.sg_list) goto free_mem; ++riu; - riu->sge_cnt = 0; - riu->raddr = raddr; + riu->wr.num_sge = 0; + riu->remote_addr = raddr; riu->rkey = be32_to_cpu(db->key); } } ++ioctx->n_rdma; - riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge, - GFP_KERNEL); - if (!riu->sge) + riu->wr.sg_list = kmalloc_array(riu->wr.num_sge, + sizeof(*riu->wr.sg_list), + GFP_KERNEL); + if (!riu->wr.sg_list) goto free_mem; } db = ioctx->rbufs; tsize = cmd->data_length; - riu = ioctx->rdma_ius; + riu = ioctx->rdma_wrs; sg = sg_orig; dma_len = ib_sg_dma_len(dev, &sg[0]); dma_addr = ib_sg_dma_address(dev, &sg[0]); @@ -1200,7 +1210,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, for (i = 0, j = 0; j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { rsize = be32_to_cpu(db->len); - sge = riu->sge; + sge = riu->wr.sg_list; k = 0; while (rsize > 0 && tsize > 0) { @@ -1232,9 +1242,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, } ++k; - if (k == riu->sge_cnt && rsize > 0 && tsize > 0) { + if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) { ++riu; - sge = riu->sge; + sge = riu->wr.sg_list; k = 0; } else if (rsize > 0 && tsize > 0) ++sge; @@ -1277,8 +1287,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) ioctx->n_rbuf = 0; ioctx->rbufs = NULL; ioctx->n_rdma = 0; - ioctx->n_rdma_ius = 0; - ioctx->rdma_ius = NULL; + ioctx->n_rdma_wrs = 0; + ioctx->rdma_wrs = NULL; ioctx->mapped_sg_count = 0; init_completion(&ioctx->tx_done); ioctx->queue_status_only = false; @@ -1306,10 +1316,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) /* * If the command is in a state where the target core is waiting for - * the ib_srpt driver, change the state to the next state. Changing - * the state of the command from SRPT_STATE_NEED_DATA to - * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this - * function a second time. + * the ib_srpt driver, change the state to the next state. */ spin_lock_irqsave(&ioctx->spinlock, flags); @@ -1318,25 +1325,17 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) case SRPT_STATE_NEED_DATA: ioctx->state = SRPT_STATE_DATA_IN; break; - case SRPT_STATE_DATA_IN: case SRPT_STATE_CMD_RSP_SENT: case SRPT_STATE_MGMT_RSP_SENT: ioctx->state = SRPT_STATE_DONE; break; default: + WARN_ONCE(true, "%s: unexpected I/O context state %d\n", + __func__, state); break; } spin_unlock_irqrestore(&ioctx->spinlock, flags); - if (state == SRPT_STATE_DONE) { - struct srpt_rdma_ch *ch = ioctx->ch; - - BUG_ON(ch->sess == NULL); - - target_put_sess_cmd(&ioctx->cmd); - goto out; - } - pr_debug("Aborting cmd with state %d and tag %lld\n", state, ioctx->cmd.tag); @@ -1344,19 +1343,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) case SRPT_STATE_NEW: case SRPT_STATE_DATA_IN: case SRPT_STATE_MGMT: + case SRPT_STATE_DONE: /* * Do nothing - defer abort processing until * srpt_queue_response() is invoked. */ - WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false)); break; case SRPT_STATE_NEED_DATA: - /* DMA_TO_DEVICE (write) - RDMA read error. */ - - /* XXX(hch): this is a horrible layering violation.. */ - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); + pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag); + transport_generic_request_failure(&ioctx->cmd, + TCM_CHECK_CONDITION_ABORT_CMD); break; case SRPT_STATE_CMD_RSP_SENT: /* @@ -1364,134 +1360,63 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(&ioctx->cmd); + transport_generic_free_cmd(&ioctx->cmd, 0); break; case SRPT_STATE_MGMT_RSP_SENT: - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(&ioctx->cmd); + transport_generic_free_cmd(&ioctx->cmd, 0); break; default: WARN(1, "Unexpected command state (%d)", state); break; } -out: return state; } /** - * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion. - */ -static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) -{ - struct srpt_send_ioctx *ioctx; - enum srpt_command_state state; - u32 index; - - atomic_inc(&ch->sq_wr_avail); - - index = idx_from_wr_id(wr_id); - ioctx = ch->ioctx_ring[index]; - state = srpt_get_cmd_state(ioctx); - - WARN_ON(state != SRPT_STATE_CMD_RSP_SENT - && state != SRPT_STATE_MGMT_RSP_SENT - && state != SRPT_STATE_NEED_DATA - && state != SRPT_STATE_DONE); - - /* If SRP_RSP sending failed, undo the ch->req_lim change. */ - if (state == SRPT_STATE_CMD_RSP_SENT - || state == SRPT_STATE_MGMT_RSP_SENT) - atomic_dec(&ch->req_lim); - - srpt_abort_cmd(ioctx); -} - -/** - * srpt_handle_send_comp() - Process an IB send completion notification. - */ -static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, - struct srpt_send_ioctx *ioctx) -{ - enum srpt_command_state state; - - atomic_inc(&ch->sq_wr_avail); - - state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - - if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT - && state != SRPT_STATE_MGMT_RSP_SENT - && state != SRPT_STATE_DONE)) - pr_debug("state = %d\n", state); - - if (state != SRPT_STATE_DONE) { - srpt_unmap_sg_to_ib_sge(ch, ioctx); - transport_generic_free_cmd(&ioctx->cmd, 0); - } else { - pr_err("IB completion has been received too late for" - " wr_id = %u.\n", ioctx->ioctx.index); - } -} - -/** - * srpt_handle_rdma_comp() - Process an IB RDMA completion notification. - * * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping * the data that has been transferred via IB RDMA had to be postponed until the * check_stop_free() callback. None of this is necessary anymore and needs to * be cleaned up. */ -static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, - struct srpt_send_ioctx *ioctx, - enum srpt_opcode opcode) +static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) { + struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); + WARN_ON(ioctx->n_rdma <= 0); atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); - if (opcode == SRPT_RDMA_READ_LAST) { - if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, - SRPT_STATE_DATA_IN)) - target_execute_cmd(&ioctx->cmd); - else - pr_err("%s[%d]: wrong state = %d\n", __func__, - __LINE__, srpt_get_cmd_state(ioctx)); - } else if (opcode == SRPT_RDMA_ABORT) { - ioctx->rdma_aborted = true; - } else { - WARN(true, "unexpected opcode %d\n", opcode); + if (unlikely(wc->status != IB_WC_SUCCESS)) { + pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); + srpt_abort_cmd(ioctx); + return; } + + if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, + SRPT_STATE_DATA_IN)) + target_execute_cmd(&ioctx->cmd); + else + pr_err("%s[%d]: wrong state = %d\n", __func__, + __LINE__, srpt_get_cmd_state(ioctx)); } -/** - * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion. - */ -static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, - struct srpt_send_ioctx *ioctx, - enum srpt_opcode opcode) +static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) { - enum srpt_command_state state; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); - state = srpt_get_cmd_state(ioctx); - switch (opcode) { - case SRPT_RDMA_READ_LAST: - if (ioctx->n_rdma <= 0) { - pr_err("Received invalid RDMA read" - " error completion with idx %d\n", - ioctx->ioctx.index); - break; - } - atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); - if (state == SRPT_STATE_NEED_DATA) - srpt_abort_cmd(ioctx); - else - pr_err("%s[%d]: wrong state = %d\n", - __func__, __LINE__, state); - break; - case SRPT_RDMA_WRITE_LAST: - break; - default: - pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode); - break; + if (unlikely(wc->status != IB_WC_SUCCESS)) { + /* + * Note: if an RDMA write error completion is received that + * means that a SEND also has been posted. Defer further + * processing of the associated command until the send error + * completion has been received. + */ + pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); } } @@ -1531,7 +1456,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, sense_data_len = ioctx->cmd.scsi_sense_length; WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); - memset(srp_rsp, 0, sizeof *srp_rsp); + memset(srp_rsp, 0, sizeof(*srp_rsp)); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); @@ -1581,7 +1506,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, srp_rsp = ioctx->ioctx.buf; BUG_ON(!srp_rsp); - memset(srp_rsp, 0, sizeof *srp_rsp); + memset(srp_rsp, 0, sizeof(*srp_rsp)); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = @@ -1595,80 +1520,6 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, return resp_len; } -#define NO_SUCH_LUN ((uint64_t)-1LL) - -/* - * SCSI LUN addressing method. See also SAM-2 and the section about - * eight byte LUNs. - */ -enum scsi_lun_addr_method { - SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0, - SCSI_LUN_ADDR_METHOD_FLAT = 1, - SCSI_LUN_ADDR_METHOD_LUN = 2, - SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3, -}; - -/* - * srpt_unpack_lun() - Convert from network LUN to linear LUN. - * - * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte - * order (big endian) to a linear LUN. Supports three LUN addressing methods: - * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40). - */ -static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) -{ - uint64_t res = NO_SUCH_LUN; - int addressing_method; - - if (unlikely(len < 2)) { - pr_err("Illegal LUN length %d, expected 2 bytes or more\n", - len); - goto out; - } - - switch (len) { - case 8: - if ((*((__be64 *)lun) & - cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) - goto out_err; - break; - case 4: - if (*((__be16 *)&lun[2]) != 0) - goto out_err; - break; - case 6: - if (*((__be32 *)&lun[2]) != 0) - goto out_err; - break; - case 2: - break; - default: - goto out_err; - } - - addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */ - switch (addressing_method) { - case SCSI_LUN_ADDR_METHOD_PERIPHERAL: - case SCSI_LUN_ADDR_METHOD_FLAT: - case SCSI_LUN_ADDR_METHOD_LUN: - res = *(lun + 1) | (((*lun) & 0x3f) << 8); - break; - - case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: - default: - pr_err("Unimplemented LUN addressing method %u\n", - addressing_method); - break; - } - -out: - return res; - -out_err: - pr_err("Support for multi-level LUNs has not yet been implemented\n"); - goto out; -} - static int srpt_check_stop_free(struct se_cmd *cmd) { struct srpt_send_ioctx *ioctx = container_of(cmd, @@ -1680,16 +1531,14 @@ static int srpt_check_stop_free(struct se_cmd *cmd) /** * srpt_handle_cmd() - Process SRP_CMD. */ -static int srpt_handle_cmd(struct srpt_rdma_ch *ch, - struct srpt_recv_ioctx *recv_ioctx, - struct srpt_send_ioctx *send_ioctx) +static void srpt_handle_cmd(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) { struct se_cmd *cmd; struct srp_cmd *srp_cmd; - uint64_t unpacked_lun; u64 data_len; enum dma_data_direction dir; - sense_reason_t ret; int rc; BUG_ON(!send_ioctx); @@ -1717,65 +1566,23 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { pr_err("0x%llx: parsing SRP descriptor table failed.\n", srp_cmd->tag); - ret = TCM_INVALID_CDB_FIELD; - goto send_sense; + goto release_ioctx; } - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun, - sizeof(srp_cmd->lun)); rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, - &send_ioctx->sense_data[0], unpacked_lun, data_len, - TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); + &send_ioctx->sense_data[0], + scsilun_to_int(&srp_cmd->lun), data_len, + TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); if (rc != 0) { - ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - goto send_sense; + pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc, + srp_cmd->tag); + goto release_ioctx; } - return 0; - -send_sense: - transport_send_check_condition_and_sense(cmd, ret, 0); - return -1; -} - -/** - * srpt_rx_mgmt_fn_tag() - Process a task management function by tag. - * @ch: RDMA channel of the task management request. - * @fn: Task management function to perform. - * @req_tag: Tag of the SRP task management request. - * @mgmt_ioctx: I/O context of the task management request. - * - * Returns zero if the target core will process the task management - * request asynchronously. - * - * Note: It is assumed that the initiator serializes tag-based task management - * requests. - */ -static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) -{ - struct srpt_device *sdev; - struct srpt_rdma_ch *ch; - struct srpt_send_ioctx *target; - int ret, i; + return; - ret = -EINVAL; - ch = ioctx->ch; - BUG_ON(!ch); - BUG_ON(!ch->sport); - sdev = ch->sport->sdev; - BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - for (i = 0; i < ch->rq_size; ++i) { - target = ch->ioctx_ring[i]; - if (target->cmd.se_lun == ioctx->cmd.se_lun && - target->cmd.tag == tag && - srpt_get_cmd_state(target) != SRPT_STATE_DONE) { - ret = 0; - /* now let the target core abort &target->cmd; */ - break; - } - } - spin_unlock_irq(&sdev->spinlock); - return ret; +release_ioctx: + send_ioctx->state = SRPT_STATE_DONE; + srpt_release_cmd(cmd); } static int srp_tmr_to_tcm(int fn) @@ -1811,8 +1618,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, struct srp_tsk_mgmt *srp_tsk; struct se_cmd *cmd; struct se_session *sess = ch->sess; - uint64_t unpacked_lun; - uint32_t tag = 0; int tcm_tmr; int rc; @@ -1828,26 +1633,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); - if (tcm_tmr < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; - goto fail; - } - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, - sizeof(srp_tsk->lun)); - - if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) { - rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag); - if (rc < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_DOES_NOT_EXIST; - goto fail; - } - tag = srp_tsk->task_tag; - } - rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun, - srp_tsk, tcm_tmr, GFP_KERNEL, tag, - TARGET_SCF_ACK_KREF); + rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, + scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr, + GFP_KERNEL, srp_tsk->task_tag, + TARGET_SCF_ACK_KREF); if (rc != 0) { send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; goto fail; @@ -1867,7 +1656,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *send_ioctx) { struct srp_cmd *srp_cmd; - enum rdma_ch_state ch_state; BUG_ON(!ch); BUG_ON(!recv_ioctx); @@ -1876,13 +1664,12 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, recv_ioctx->ioctx.dma, srp_max_req_size, DMA_FROM_DEVICE); - ch_state = srpt_get_ch_state(ch); - if (unlikely(ch_state == CH_CONNECTING)) { + if (unlikely(ch->state == CH_CONNECTING)) { list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); goto out; } - if (unlikely(ch_state != CH_LIVE)) + if (unlikely(ch->state != CH_LIVE)) goto out; srp_cmd = recv_ioctx->ioctx.buf; @@ -1926,32 +1713,48 @@ out: return; } -static void srpt_process_rcv_completion(struct ib_cq *cq, - struct srpt_rdma_ch *ch, - struct ib_wc *wc) +static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_device *sdev = ch->sport->sdev; - struct srpt_recv_ioctx *ioctx; - u32 index; + struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_recv_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe); - index = idx_from_wr_id(wc->wr_id); if (wc->status == IB_WC_SUCCESS) { int req_lim; req_lim = atomic_dec_return(&ch->req_lim); if (unlikely(req_lim < 0)) pr_err("req_lim = %d < 0\n", req_lim); - ioctx = sdev->ioctx_ring[index]; srpt_handle_new_iu(ch, ioctx, NULL); } else { - pr_info("receiving failed for idx %u with status %d\n", - index, wc->status); + pr_info("receiving failed for ioctx %p with status %d\n", + ioctx, wc->status); + } +} + +/* + * This function must be called from the context in which RDMA completions are + * processed because it accesses the wait list without protection against + * access from other threads. + */ +static void srpt_process_wait_list(struct srpt_rdma_ch *ch) +{ + struct srpt_send_ioctx *ioctx; + + while (!list_empty(&ch->cmd_wait_list) && + ch->state >= CH_LIVE && + (ioctx = srpt_get_send_ioctx(ch)) != NULL) { + struct srpt_recv_ioctx *recv_ioctx; + + recv_ioctx = list_first_entry(&ch->cmd_wait_list, + struct srpt_recv_ioctx, + wait_list); + list_del(&recv_ioctx->wait_list); + srpt_handle_new_iu(ch, recv_ioctx, ioctx); } } /** - * srpt_process_send_completion() - Process an IB send completion. - * * Note: Although this has not yet been observed during tests, at least in * theory it is possible that the srpt_get_send_ioctx() call invoked by * srpt_handle_new_iu() fails. This is possible because the req_lim_delta @@ -1964,106 +1767,33 @@ static void srpt_process_rcv_completion(struct ib_cq *cq, * are queued on cmd_wait_list. The code below processes these delayed * requests one at a time. */ -static void srpt_process_send_completion(struct ib_cq *cq, - struct srpt_rdma_ch *ch, - struct ib_wc *wc) +static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_send_ioctx *send_ioctx; - uint32_t index; - enum srpt_opcode opcode; - - index = idx_from_wr_id(wc->wr_id); - opcode = opcode_from_wr_id(wc->wr_id); - send_ioctx = ch->ioctx_ring[index]; - if (wc->status == IB_WC_SUCCESS) { - if (opcode == SRPT_SEND) - srpt_handle_send_comp(ch, send_ioctx); - else { - WARN_ON(opcode != SRPT_RDMA_ABORT && - wc->opcode != IB_WC_RDMA_READ); - srpt_handle_rdma_comp(ch, send_ioctx, opcode); - } - } else { - if (opcode == SRPT_SEND) { - pr_info("sending response for idx %u failed" - " with status %d\n", index, wc->status); - srpt_handle_send_err_comp(ch, wc->wr_id); - } else if (opcode != SRPT_RDMA_MID) { - pr_info("RDMA t %d for idx %u failed with" - " status %d\n", opcode, index, wc->status); - srpt_handle_rdma_err_comp(ch, send_ioctx, opcode); - } - } + struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); + enum srpt_command_state state; - while (unlikely(opcode == SRPT_SEND - && !list_empty(&ch->cmd_wait_list) - && srpt_get_ch_state(ch) == CH_LIVE - && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { - struct srpt_recv_ioctx *recv_ioctx; + state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - recv_ioctx = list_first_entry(&ch->cmd_wait_list, - struct srpt_recv_ioctx, - wait_list); - list_del(&recv_ioctx->wait_list); - srpt_handle_new_iu(ch, recv_ioctx, send_ioctx); - } -} + WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && + state != SRPT_STATE_MGMT_RSP_SENT); -static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch) -{ - struct ib_wc *const wc = ch->wc; - int i, n; + atomic_inc(&ch->sq_wr_avail); - WARN_ON(cq != ch->cq); + if (wc->status != IB_WC_SUCCESS) + pr_info("sending response for ioctx 0x%p failed" + " with status %d\n", ioctx, wc->status); - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) { - for (i = 0; i < n; i++) { - if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV) - srpt_process_rcv_completion(cq, ch, &wc[i]); - else - srpt_process_send_completion(cq, ch, &wc[i]); - } + if (state != SRPT_STATE_DONE) { + srpt_unmap_sg_to_ib_sge(ch, ioctx); + transport_generic_free_cmd(&ioctx->cmd, 0); + } else { + pr_err("IB completion has been received too late for" + " wr_id = %u.\n", ioctx->ioctx.index); } -} -/** - * srpt_completion() - IB completion queue callback function. - * - * Notes: - * - It is guaranteed that a completion handler will never be invoked - * concurrently on two different CPUs for the same completion queue. See also - * Documentation/infiniband/core_locking.txt and the implementation of - * handle_edge_irq() in kernel/irq/chip.c. - * - When threaded IRQs are enabled, completion handlers are invoked in thread - * context instead of interrupt context. - */ -static void srpt_completion(struct ib_cq *cq, void *ctx) -{ - struct srpt_rdma_ch *ch = ctx; - - wake_up_interruptible(&ch->wait_queue); -} - -static int srpt_compl_thread(void *arg) -{ - struct srpt_rdma_ch *ch; - - /* Hibernation / freezing of the SRPT kernel thread is not supported. */ - current->flags |= PF_NOFREEZE; - - ch = arg; - BUG_ON(!ch); - pr_info("Session %s: kernel thread %s (PID %d) started\n", - ch->sess_name, ch->thread->comm, current->pid); - while (!kthread_should_stop()) { - wait_event_interruptible(ch->wait_queue, - (srpt_process_completion(ch->cq, ch), - kthread_should_stop())); - } - pr_info("Session %s: kernel thread %s (PID %d) stopped\n", - ch->sess_name, ch->thread->comm, current->pid); - return 0; + srpt_process_wait_list(ch); } /** @@ -2075,20 +1805,18 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_port *sport = ch->sport; struct srpt_device *sdev = sport->sdev; u32 srp_sq_size = sport->port_attrib.srp_sq_size; - struct ib_cq_init_attr cq_attr = {}; int ret; WARN_ON(ch->rq_size < 1); ret = -ENOMEM; - qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL); + qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL); if (!qp_init) goto out; retry: - cq_attr.cqe = ch->rq_size + srp_sq_size; - ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, - &cq_attr); + ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size, + 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", @@ -2131,18 +1859,6 @@ retry: if (ret) goto err_destroy_qp; - init_waitqueue_head(&ch->wait_queue); - - pr_debug("creating thread for session %s\n", ch->sess_name); - - ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl"); - if (IS_ERR(ch->thread)) { - pr_err("failed to create kernel thread %ld\n", - PTR_ERR(ch->thread)); - ch->thread = NULL; - goto err_destroy_qp; - } - out: kfree(qp_init); return ret; @@ -2150,182 +1866,113 @@ out: err_destroy_qp: ib_destroy_qp(ch->qp); err_destroy_cq: - ib_destroy_cq(ch->cq); + ib_free_cq(ch->cq); goto out; } static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) { - if (ch->thread) - kthread_stop(ch->thread); - ib_destroy_qp(ch->qp); - ib_destroy_cq(ch->cq); + ib_free_cq(ch->cq); } /** - * __srpt_close_ch() - Close an RDMA channel by setting the QP error state. + * srpt_close_ch() - Close an RDMA channel. * - * Reset the QP and make sure all resources associated with the channel will - * be deallocated at an appropriate time. + * Make sure all resources associated with the channel will be deallocated at + * an appropriate time. * - * Note: The caller must hold ch->sport->sdev->spinlock. + * Returns true if and only if the channel state has been modified into + * CH_DRAINING. */ -static void __srpt_close_ch(struct srpt_rdma_ch *ch) +static bool srpt_close_ch(struct srpt_rdma_ch *ch) { - enum rdma_ch_state prev_state; - unsigned long flags; + int ret; - spin_lock_irqsave(&ch->spinlock, flags); - prev_state = ch->state; - switch (prev_state) { - case CH_CONNECTING: - case CH_LIVE: - ch->state = CH_DISCONNECTING; - break; - default: - break; + if (!srpt_set_ch_state(ch, CH_DRAINING)) { + pr_debug("%s-%d: already closed\n", ch->sess_name, + ch->qp->qp_num); + return false; } - spin_unlock_irqrestore(&ch->spinlock, flags); - switch (prev_state) { - case CH_CONNECTING: - ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0, - NULL, 0); - /* fall through */ - case CH_LIVE: - if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) - pr_err("sending CM DREQ failed.\n"); - break; - case CH_DISCONNECTING: - break; - case CH_DRAINING: - case CH_RELEASING: - break; - } -} + kref_get(&ch->kref); -/** - * srpt_close_ch() - Close an RDMA channel. - */ -static void srpt_close_ch(struct srpt_rdma_ch *ch) -{ - struct srpt_device *sdev; - - sdev = ch->sport->sdev; - spin_lock_irq(&sdev->spinlock); - __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); -} - -/** - * srpt_shutdown_session() - Whether or not a session may be shut down. - */ -static int srpt_shutdown_session(struct se_session *se_sess) -{ - struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; - unsigned long flags; + ret = srpt_ch_qp_err(ch); + if (ret < 0) + pr_err("%s-%d: changing queue pair into error state failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); - spin_lock_irqsave(&ch->spinlock, flags); - if (ch->in_shutdown) { - spin_unlock_irqrestore(&ch->spinlock, flags); - return true; + pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, + ch->qp->qp_num); + ret = srpt_zerolength_write(ch); + if (ret < 0) { + pr_err("%s-%d: queuing zero-length write failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ON_ONCE(true); } - ch->in_shutdown = true; - target_sess_cmd_list_set_waiting(se_sess); - spin_unlock_irqrestore(&ch->spinlock, flags); + kref_put(&ch->kref, srpt_free_ch); return true; } -/** - * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. - * @cm_id: Pointer to the CM ID of the channel to be drained. - * - * Note: Must be called from inside srpt_cm_handler to avoid a race between - * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one() - * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one() - * waits until all target sessions for the associated IB device have been - * unregistered and target session registration involves a call to - * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until - * this function has finished). +/* + * Change the channel state into CH_DISCONNECTING. If a channel has not yet + * reached the connected state, close it. If a channel is in the connected + * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is + * the responsibility of the caller to ensure that this function is not + * invoked concurrently with the code that accepts a connection. This means + * that this function must either be invoked from inside a CM callback + * function or that it must be invoked with the srpt_port.mutex held. */ -static void srpt_drain_channel(struct ib_cm_id *cm_id) +static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) { - struct srpt_device *sdev; - struct srpt_rdma_ch *ch; int ret; - bool do_reset = false; - WARN_ON_ONCE(irqs_disabled()); + if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) + return -ENOTCONN; - sdev = cm_id->context; - BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->cm_id == cm_id) { - do_reset = srpt_test_and_set_ch_state(ch, - CH_CONNECTING, CH_DRAINING) || - srpt_test_and_set_ch_state(ch, - CH_LIVE, CH_DRAINING) || - srpt_test_and_set_ch_state(ch, - CH_DISCONNECTING, CH_DRAINING); - break; - } - } - spin_unlock_irq(&sdev->spinlock); + ret = ib_send_cm_dreq(ch->cm_id, NULL, 0); + if (ret < 0) + ret = ib_send_cm_drep(ch->cm_id, NULL, 0); - if (do_reset) { - if (ch->sess) - srpt_shutdown_session(ch->sess); + if (ret < 0 && srpt_close_ch(ch)) + ret = 0; - ret = srpt_ch_qp_err(ch); - if (ret < 0) - pr_err("Setting queue pair in error state" - " failed: %d\n", ret); - } + return ret; } -/** - * srpt_find_channel() - Look up an RDMA channel. - * @cm_id: Pointer to the CM ID of the channel to be looked up. - * - * Return NULL if no matching RDMA channel has been found. - */ -static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, - struct ib_cm_id *cm_id) +static void __srpt_close_all_ch(struct srpt_device *sdev) { struct srpt_rdma_ch *ch; - bool found; - WARN_ON_ONCE(irqs_disabled()); - BUG_ON(!sdev); + lockdep_assert_held(&sdev->mutex); - found = false; - spin_lock_irq(&sdev->spinlock); list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->cm_id == cm_id) { - found = true; - break; - } + if (srpt_disconnect_ch(ch) >= 0) + pr_info("Closing channel %s-%d because target %s has been disabled\n", + ch->sess_name, ch->qp->qp_num, + sdev->device->name); + srpt_close_ch(ch); } - spin_unlock_irq(&sdev->spinlock); - - return found ? ch : NULL; } /** - * srpt_release_channel() - Release channel resources. - * - * Schedules the actual release because: - * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would - * trigger a deadlock. - * - It is not safe to call TCM transport_* functions from interrupt context. + * srpt_shutdown_session() - Whether or not a session may be shut down. */ -static void srpt_release_channel(struct srpt_rdma_ch *ch) +static int srpt_shutdown_session(struct se_session *se_sess) +{ + return 1; +} + +static void srpt_free_ch(struct kref *kref) { - schedule_work(&ch->release_work); + struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); + + kfree(ch); } static void srpt_release_channel_work(struct work_struct *w) @@ -2335,8 +1982,8 @@ static void srpt_release_channel_work(struct work_struct *w) struct se_session *se_sess; ch = container_of(w, struct srpt_rdma_ch, release_work); - pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess, - ch->release_done); + pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name, + ch->qp->qp_num, ch->release_done); sdev = ch->sport->sdev; BUG_ON(!sdev); @@ -2344,6 +1991,7 @@ static void srpt_release_channel_work(struct work_struct *w) se_sess = ch->sess; BUG_ON(!se_sess); + target_sess_cmd_list_set_waiting(se_sess); target_wait_for_sess_cmds(se_sess); transport_deregister_session_configfs(se_sess); @@ -2358,16 +2006,15 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_size, DMA_TO_DEVICE); - spin_lock_irq(&sdev->spinlock); - list_del(&ch->list); - spin_unlock_irq(&sdev->spinlock); - + mutex_lock(&sdev->mutex); + list_del_init(&ch->list); if (ch->release_done) complete(ch->release_done); + mutex_unlock(&sdev->mutex); wake_up(&sdev->ch_releaseQ); - kfree(ch); + kref_put(&ch->kref, srpt_free_ch); } /** @@ -2413,9 +2060,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); - rsp = kzalloc(sizeof *rsp, GFP_KERNEL); - rej = kzalloc(sizeof *rej, GFP_KERNEL); - rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL); + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); + rej = kzalloc(sizeof(*rej), GFP_KERNEL); + rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL); if (!rsp || !rej || !rep_param) { ret = -ENOMEM; @@ -2444,7 +2091,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) { if (!memcmp(ch->i_port_id, req->initiator_port_id, 16) @@ -2452,26 +2099,16 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, && param->port == ch->sport->port && param->listen_id == ch->sport->sdev->cm_id && ch->cm_id) { - enum rdma_ch_state ch_state; - - ch_state = srpt_get_ch_state(ch); - if (ch_state != CH_CONNECTING - && ch_state != CH_LIVE) + if (srpt_disconnect_ch(ch) < 0) continue; - - /* found an existing channel */ - pr_debug("Found existing channel %s" - " cm_id= %p state= %d\n", - ch->sess_name, ch->cm_id, ch_state); - - __srpt_close_ch(ch); - + pr_info("Relogin - closed existing channel %s\n", + ch->sess_name); rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED; } } - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); } else rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; @@ -2487,7 +2124,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto reject; } - ch = kzalloc(sizeof *ch, GFP_KERNEL); + ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) { rej->reason = cpu_to_be32( SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); @@ -2496,11 +2133,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto reject; } + kref_init(&ch->kref); + ch->zw_cqe.done = srpt_zerolength_write_done; INIT_WORK(&ch->release_work, srpt_release_channel_work); memcpy(ch->i_port_id, req->initiator_port_id, 16); memcpy(ch->t_port_id, req->target_port_id, 16); ch->sport = &sdev->port[param->port - 1]; ch->cm_id = cm_id; + cm_id->context = ch; /* * Avoid QUEUE_FULL conditions by limiting the number of buffers used * for the SRP protocol to the command queue size. @@ -2600,7 +2240,7 @@ try_again: /* create cm reply */ rep_param->qp_num = ch->qp->qp_num; rep_param->private_data = (void *)rsp; - rep_param->private_data_len = sizeof *rsp; + rep_param->private_data_len = sizeof(*rsp); rep_param->rnr_retry_count = 7; rep_param->flow_control = 1; rep_param->failover_accepted = 0; @@ -2615,14 +2255,14 @@ try_again: goto release_channel; } - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_add_tail(&ch->list, &sdev->rch_list); - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); goto out; release_channel: - srpt_set_ch_state(ch, CH_RELEASING); + srpt_disconnect_ch(ch); transport_deregister_session_configfs(ch->sess); transport_deregister_session(ch->sess); ch->sess = NULL; @@ -2644,7 +2284,7 @@ reject: | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, - (void *)rej, sizeof *rej); + (void *)rej, sizeof(*rej)); out: kfree(rep_param); @@ -2654,10 +2294,23 @@ out: return ret; } -static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) +static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, + enum ib_cm_rej_reason reason, + const u8 *private_data, + u8 private_data_len) { - pr_info("Received IB REJ for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); + char *priv = NULL; + int i; + + if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1, + GFP_KERNEL))) { + for (i = 0; i < private_data_len; i++) + sprintf(priv + 3 * i, " %02x", private_data[i]); + } + pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n", + ch->sess_name, ch->qp->qp_num, reason, private_data_len ? + "; private data" : "", priv ? priv : " (?)"); + kfree(priv); } /** @@ -2666,87 +2319,23 @@ static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) * An IB_CM_RTU_RECEIVED message indicates that the connection is established * and that the recipient may begin transmitting (RTU = ready to use). */ -static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) +static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) { - struct srpt_rdma_ch *ch; int ret; - ch = srpt_find_channel(cm_id->context, cm_id); - BUG_ON(!ch); - - if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) { - struct srpt_recv_ioctx *ioctx, *ioctx_tmp; - + if (srpt_set_ch_state(ch, CH_LIVE)) { ret = srpt_ch_qp_rts(ch, ch->qp); - list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, - wait_list) { - list_del(&ioctx->wait_list); - srpt_handle_new_iu(ch, ioctx, NULL); - } - if (ret) + if (ret == 0) { + /* Trigger wait list processing. */ + ret = srpt_zerolength_write(ch); + WARN_ONCE(ret < 0, "%d\n", ret); + } else { srpt_close_ch(ch); + } } } -static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) -{ - pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - -static void srpt_cm_rep_error(struct ib_cm_id *cm_id) -{ - pr_info("Received IB REP error for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - -/** - * srpt_cm_dreq_recv() - Process reception of a DREQ message. - */ -static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) -{ - struct srpt_rdma_ch *ch; - unsigned long flags; - bool send_drep = false; - - ch = srpt_find_channel(cm_id->context, cm_id); - BUG_ON(!ch); - - pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch)); - - spin_lock_irqsave(&ch->spinlock, flags); - switch (ch->state) { - case CH_CONNECTING: - case CH_LIVE: - send_drep = true; - ch->state = CH_DISCONNECTING; - break; - case CH_DISCONNECTING: - case CH_DRAINING: - case CH_RELEASING: - WARN(true, "unexpected channel state %d\n", ch->state); - break; - } - spin_unlock_irqrestore(&ch->spinlock, flags); - - if (send_drep) { - if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) - pr_err("Sending IB DREP failed.\n"); - pr_info("Received DREQ and sent DREP for session %s.\n", - ch->sess_name); - } -} - -/** - * srpt_cm_drep_recv() - Process reception of a DREP message. - */ -static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) -{ - pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - /** * srpt_cm_handler() - IB connection manager callback function. * @@ -2759,6 +2348,7 @@ static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) */ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { + struct srpt_rdma_ch *ch = cm_id->context; int ret; ret = 0; @@ -2768,32 +2358,39 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) event->private_data); break; case IB_CM_REJ_RECEIVED: - srpt_cm_rej_recv(cm_id); + srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason, + event->private_data, + IB_CM_REJ_PRIVATE_DATA_SIZE); break; case IB_CM_RTU_RECEIVED: case IB_CM_USER_ESTABLISHED: - srpt_cm_rtu_recv(cm_id); + srpt_cm_rtu_recv(ch); break; case IB_CM_DREQ_RECEIVED: - srpt_cm_dreq_recv(cm_id); + srpt_disconnect_ch(ch); break; case IB_CM_DREP_RECEIVED: - srpt_cm_drep_recv(cm_id); + pr_info("Received CM DREP message for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_close_ch(ch); break; case IB_CM_TIMEWAIT_EXIT: - srpt_cm_timewait_exit(cm_id); + pr_info("Received CM TimeWait exit for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_close_ch(ch); break; case IB_CM_REP_ERROR: - srpt_cm_rep_error(cm_id); + pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, + ch->qp->qp_num); break; case IB_CM_DREQ_ERROR: - pr_info("Received IB DREQ ERROR event.\n"); + pr_info("Received CM DREQ ERROR event.\n"); break; case IB_CM_MRA_RECEIVED: - pr_info("Received IB MRA event\n"); + pr_info("Received CM MRA event\n"); break; default: - pr_err("received unrecognized IB CM event %d\n", event->event); + pr_err("received unrecognized CM event %d\n", event->event); break; } @@ -2808,12 +2405,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx) { - struct ib_rdma_wr wr; struct ib_send_wr *bad_wr; - struct rdma_iu *riu; - int i; - int ret; - int sq_wr_avail; + int sq_wr_avail, ret, i; enum dma_data_direction dir; const int n_rdma = ioctx->n_rdma; @@ -2829,59 +2422,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, } } - ioctx->rdma_aborted = false; - ret = 0; - riu = ioctx->rdma_ius; - memset(&wr, 0, sizeof wr); - - for (i = 0; i < n_rdma; ++i, ++riu) { - if (dir == DMA_FROM_DEVICE) { - wr.wr.opcode = IB_WR_RDMA_WRITE; - wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? - SRPT_RDMA_WRITE_LAST : - SRPT_RDMA_MID, - ioctx->ioctx.index); - } else { - wr.wr.opcode = IB_WR_RDMA_READ; - wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? - SRPT_RDMA_READ_LAST : - SRPT_RDMA_MID, - ioctx->ioctx.index); - } - wr.wr.next = NULL; - wr.remote_addr = riu->raddr; - wr.rkey = riu->rkey; - wr.wr.num_sge = riu->sge_cnt; - wr.wr.sg_list = riu->sge; + for (i = 0; i < n_rdma; i++) { + struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr; - /* only get completion event for the last rdma write */ - if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) - wr.wr.send_flags = IB_SEND_SIGNALED; + wr->opcode = (dir == DMA_FROM_DEVICE) ? + IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; - ret = ib_post_send(ch->qp, &wr.wr, &bad_wr); - if (ret) - break; + if (i == n_rdma - 1) { + /* only get completion event for the last rdma read */ + if (dir == DMA_TO_DEVICE) { + wr->send_flags = IB_SEND_SIGNALED; + ioctx->rdma_cqe.done = srpt_rdma_read_done; + } else { + ioctx->rdma_cqe.done = srpt_rdma_write_done; + } + wr->wr_cqe = &ioctx->rdma_cqe; + wr->next = NULL; + } else { + wr->wr_cqe = NULL; + wr->next = &ioctx->rdma_wrs[i + 1].wr; + } } + ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr); if (ret) pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", __func__, __LINE__, ret, i, n_rdma); - if (ret && i > 0) { - wr.wr.num_sge = 0; - wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); - wr.wr.send_flags = IB_SEND_SIGNALED; - while (ch->state == CH_LIVE && - ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) { - pr_info("Trying to abort failed RDMA transfer [%d]\n", - ioctx->ioctx.index); - msleep(1000); - } - while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) { - pr_info("Waiting until RDMA abort finished [%d]\n", - ioctx->ioctx.index); - msleep(1000); - } - } out: if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) atomic_add(n_rdma, &ch->sq_wr_avail); @@ -2933,41 +2499,14 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd) */ static int srpt_write_pending(struct se_cmd *se_cmd) { - struct srpt_rdma_ch *ch; - struct srpt_send_ioctx *ioctx; + struct srpt_send_ioctx *ioctx = + container_of(se_cmd, struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; enum srpt_command_state new_state; - enum rdma_ch_state ch_state; - int ret; - - ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); WARN_ON(new_state == SRPT_STATE_DONE); - - ch = ioctx->ch; - BUG_ON(!ch); - - ch_state = srpt_get_ch_state(ch); - switch (ch_state) { - case CH_CONNECTING: - WARN(true, "unexpected channel state %d\n", ch_state); - ret = -EINVAL; - goto out; - case CH_LIVE: - break; - case CH_DISCONNECTING: - case CH_DRAINING: - case CH_RELEASING: - pr_debug("cmd with tag %lld: channel disconnecting\n", - ioctx->cmd.tag); - srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); - ret = -EINVAL; - goto out; - } - ret = srpt_xfer_data(ch, ioctx); - -out: - return ret; + return srpt_xfer_data(ch, ioctx); } static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) @@ -3098,36 +2637,25 @@ static void srpt_refresh_port_work(struct work_struct *work) srpt_refresh_port(sport); } -static int srpt_ch_list_empty(struct srpt_device *sdev) -{ - int res; - - spin_lock_irq(&sdev->spinlock); - res = list_empty(&sdev->rch_list); - spin_unlock_irq(&sdev->spinlock); - - return res; -} - /** * srpt_release_sdev() - Free the channel resources associated with a target. */ static int srpt_release_sdev(struct srpt_device *sdev) { - struct srpt_rdma_ch *ch, *tmp_ch; - int res; + int i, res; WARN_ON_ONCE(irqs_disabled()); BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) - __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); + for (i = 0; i < ARRAY_SIZE(sdev->port); i++) + sdev->port[i].enabled = false; + __srpt_close_all_ch(sdev); + mutex_unlock(&sdev->mutex); res = wait_event_interruptible(sdev->ch_releaseQ, - srpt_ch_list_empty(sdev)); + list_empty_careful(&sdev->rch_list)); if (res) pr_err("%s: interrupted.\n", __func__); @@ -3181,23 +2709,20 @@ static void srpt_add_one(struct ib_device *device) pr_debug("device = %p, device->dma_ops = %p\n", device, device->dma_ops); - sdev = kzalloc(sizeof *sdev, GFP_KERNEL); + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) goto err; sdev->device = device; INIT_LIST_HEAD(&sdev->rch_list); init_waitqueue_head(&sdev->ch_releaseQ); - spin_lock_init(&sdev->spinlock); - - if (ib_query_device(device, &sdev->dev_attr)) - goto free_dev; + mutex_init(&sdev->mutex); sdev->pd = ib_alloc_pd(device); if (IS_ERR(sdev->pd)) goto free_dev; - sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr); + sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr); srq_attr.event_handler = srpt_srq_event; srq_attr.srq_context = (void *)sdev; @@ -3211,7 +2736,7 @@ static void srpt_add_one(struct ib_device *device) goto err_pd; pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", - __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr, + __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr, device->name); if (!srpt_service_guid) @@ -3263,7 +2788,7 @@ static void srpt_add_one(struct ib_device *device) if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", - srpt_sdev_name(sdev), i); + sdev->device->name, i); goto err_ring; } snprintf(sport->port_guid, sizeof(sport->port_guid), @@ -3412,24 +2937,26 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) static void srpt_close_session(struct se_session *se_sess) { DECLARE_COMPLETION_ONSTACK(release_done); - struct srpt_rdma_ch *ch; - struct srpt_device *sdev; - unsigned long res; - - ch = se_sess->fabric_sess_ptr; - WARN_ON(ch->sess != se_sess); + struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; + struct srpt_device *sdev = ch->sport->sdev; + bool wait; - pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch)); + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, + ch->state); - sdev = ch->sport->sdev; - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); BUG_ON(ch->release_done); ch->release_done = &release_done; - __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); + wait = !list_empty(&ch->list); + srpt_disconnect_ch(ch); + mutex_unlock(&sdev->mutex); - res = wait_for_completion_timeout(&release_done, 60 * HZ); - WARN_ON(res == 0); + if (!wait) + return; + + while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) + pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, + ch->sess_name, ch->qp->qp_num, ch->state); } /** @@ -3637,6 +3164,8 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, { struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + struct srpt_device *sdev = sport->sdev; + struct srpt_rdma_ch *ch; unsigned long tmp; int ret; @@ -3650,11 +3179,24 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp); return -EINVAL; } - if (tmp == 1) - sport->enabled = true; - else - sport->enabled = false; + if (sport->enabled == tmp) + goto out; + sport->enabled = tmp; + if (sport->enabled) + goto out; + mutex_lock(&sdev->mutex); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (ch->sport == sport) { + pr_debug("%s: ch %p %s-%d\n", __func__, ch, + ch->sess_name, ch->qp->qp_num); + srpt_disconnect_ch(ch); + srpt_close_ch(ch); + } + } + mutex_unlock(&sdev->mutex); + +out: return count; } @@ -3746,7 +3288,6 @@ static struct configfs_attribute *srpt_wwn_attrs[] = { static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, .name = "srpt", - .node_acl_size = sizeof(struct srpt_node_acl), .get_fabric_name = srpt_get_fabric_name, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, |