Commit 77369ed31daac51f4827c50d30f233c45480235a

Authored by Jack Morgenstein
Committed by Roland Dreier
1 parent ec914c52d6

[IB] uverbs: have kernel return QP capabilities

Move the computation of QP capabilities (max scatter/gather entries,
max inline data, etc) into the kernel, and have the uverbs module
return the values as part of the create QP response.  This keeps
precise knowledge of device limits in the low-level kernel driver.

This requires an ABI bump, so while we're making changes, get rid of
the max_sge parameter for the modify SRQ command -- it's not used and
shouldn't be there.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

Showing 8 changed files with 98 additions and 16 deletions Side-by-side Diff

drivers/infiniband/core/uverbs_cmd.c
... ... @@ -708,7 +708,7 @@
708 708 resp->wc[i].opcode = wc[i].opcode;
709 709 resp->wc[i].vendor_err = wc[i].vendor_err;
710 710 resp->wc[i].byte_len = wc[i].byte_len;
711   - resp->wc[i].imm_data = wc[i].imm_data;
  711 + resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data;
712 712 resp->wc[i].qp_num = wc[i].qp_num;
713 713 resp->wc[i].src_qp = wc[i].src_qp;
714 714 resp->wc[i].wc_flags = wc[i].wc_flags;
... ... @@ -908,7 +908,12 @@
908 908 if (ret)
909 909 goto err_destroy;
910 910  
911   - resp.qp_handle = uobj->uobject.id;
  911 + resp.qp_handle = uobj->uobject.id;
  912 + resp.max_recv_sge = attr.cap.max_recv_sge;
  913 + resp.max_send_sge = attr.cap.max_send_sge;
  914 + resp.max_recv_wr = attr.cap.max_recv_wr;
  915 + resp.max_send_wr = attr.cap.max_send_wr;
  916 + resp.max_inline_data = attr.cap.max_inline_data;
912 917  
913 918 if (copy_to_user((void __user *) (unsigned long) cmd.response,
914 919 &resp, sizeof resp)) {
... ... @@ -1135,7 +1140,7 @@
1135 1140 next->num_sge = user_wr->num_sge;
1136 1141 next->opcode = user_wr->opcode;
1137 1142 next->send_flags = user_wr->send_flags;
1138   - next->imm_data = user_wr->imm_data;
  1143 + next->imm_data = (__be32 __force) user_wr->imm_data;
1139 1144  
1140 1145 if (qp->qp_type == IB_QPT_UD) {
1141 1146 next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
... ... @@ -1701,7 +1706,6 @@
1701 1706 }
1702 1707  
1703 1708 attr.max_wr = cmd.max_wr;
1704   - attr.max_sge = cmd.max_sge;
1705 1709 attr.srq_limit = cmd.srq_limit;
1706 1710  
1707 1711 ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
drivers/infiniband/hw/mthca/mthca_cmd.c
... ... @@ -1060,6 +1060,8 @@
1060 1060 dev_lim->hca.arbel.resize_srq = field & 1;
1061 1061 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
1062 1062 dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
  1063 + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET);
  1064 + dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz);
1063 1065 MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
1064 1066 dev_lim->mpt_entry_sz = size;
1065 1067 MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
drivers/infiniband/hw/mthca/mthca_dev.h
... ... @@ -131,6 +131,7 @@
131 131 int max_sg;
132 132 int num_qps;
133 133 int max_wqes;
  134 + int max_desc_sz;
134 135 int max_qp_init_rdma;
135 136 int reserved_qps;
136 137 int num_srqs;
drivers/infiniband/hw/mthca/mthca_main.c
... ... @@ -168,6 +168,7 @@
168 168 mdev->limits.max_srq_wqes = dev_lim->max_srq_sz;
169 169 mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
170 170 mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
  171 + mdev->limits.max_desc_sz = dev_lim->max_desc_sz;
171 172 /*
172 173 * Subtract 1 from the limit because we need to allocate a
173 174 * spare CQE so the HCA HW can tell the difference between an
drivers/infiniband/hw/mthca/mthca_provider.c
... ... @@ -616,11 +616,11 @@
616 616 return ERR_PTR(err);
617 617 }
618 618  
619   - init_attr->cap.max_inline_data = 0;
620 619 init_attr->cap.max_send_wr = qp->sq.max;
621 620 init_attr->cap.max_recv_wr = qp->rq.max;
622 621 init_attr->cap.max_send_sge = qp->sq.max_gs;
623 622 init_attr->cap.max_recv_sge = qp->rq.max_gs;
  623 + init_attr->cap.max_inline_data = qp->max_inline_data;
624 624  
625 625 return &qp->ibqp;
626 626 }
drivers/infiniband/hw/mthca/mthca_provider.h
... ... @@ -251,6 +251,7 @@
251 251 struct mthca_wq sq;
252 252 enum ib_sig_type sq_policy;
253 253 int send_wqe_offset;
  254 + int max_inline_data;
254 255  
255 256 u64 *wrid;
256 257 union mthca_buf queue;
drivers/infiniband/hw/mthca/mthca_qp.c
... ... @@ -885,6 +885,48 @@
885 885 return err;
886 886 }
887 887  
  888 +static void mthca_adjust_qp_caps(struct mthca_dev *dev,
  889 + struct mthca_pd *pd,
  890 + struct mthca_qp *qp)
  891 +{
  892 + int max_data_size;
  893 +
  894 + /*
  895 + * Calculate the maximum size of WQE s/g segments, excluding
  896 + * the next segment and other non-data segments.
  897 + */
  898 + max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) -
  899 + sizeof (struct mthca_next_seg);
  900 +
  901 + switch (qp->transport) {
  902 + case MLX:
  903 + max_data_size -= 2 * sizeof (struct mthca_data_seg);
  904 + break;
  905 +
  906 + case UD:
  907 + if (mthca_is_memfree(dev))
  908 + max_data_size -= sizeof (struct mthca_arbel_ud_seg);
  909 + else
  910 + max_data_size -= sizeof (struct mthca_tavor_ud_seg);
  911 + break;
  912 +
  913 + default:
  914 + max_data_size -= sizeof (struct mthca_raddr_seg);
  915 + break;
  916 + }
  917 +
  918 + /* We don't support inline data for kernel QPs (yet). */
  919 + if (!pd->ibpd.uobject)
  920 + qp->max_inline_data = 0;
  921 + else
  922 + qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE;
  923 +
  924 + qp->sq.max_gs = max_data_size / sizeof (struct mthca_data_seg);
  925 + qp->rq.max_gs = (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
  926 + sizeof (struct mthca_next_seg)) /
  927 + sizeof (struct mthca_data_seg);
  928 +}
  929 +
888 930 /*
889 931 * Allocate and register buffer for WQEs. qp->rq.max, sq.max,
890 932 * rq.max_gs and sq.max_gs must all be assigned.
891 933  
892 934  
893 935  
894 936  
895 937  
896 938  
... ... @@ -902,27 +944,53 @@
902 944 size = sizeof (struct mthca_next_seg) +
903 945 qp->rq.max_gs * sizeof (struct mthca_data_seg);
904 946  
  947 + if (size > dev->limits.max_desc_sz)
  948 + return -EINVAL;
  949 +
905 950 for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
906 951 qp->rq.wqe_shift++)
907 952 ; /* nothing */
908 953  
909   - size = sizeof (struct mthca_next_seg) +
910   - qp->sq.max_gs * sizeof (struct mthca_data_seg);
  954 + size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
911 955 switch (qp->transport) {
912 956 case MLX:
913 957 size += 2 * sizeof (struct mthca_data_seg);
914 958 break;
  959 +
915 960 case UD:
916   - if (mthca_is_memfree(dev))
917   - size += sizeof (struct mthca_arbel_ud_seg);
918   - else
919   - size += sizeof (struct mthca_tavor_ud_seg);
  961 + size += mthca_is_memfree(dev) ?
  962 + sizeof (struct mthca_arbel_ud_seg) :
  963 + sizeof (struct mthca_tavor_ud_seg);
920 964 break;
  965 +
  966 + case UC:
  967 + size += sizeof (struct mthca_raddr_seg);
  968 + break;
  969 +
  970 + case RC:
  971 + size += sizeof (struct mthca_raddr_seg);
  972 + /*
  973 + * An atomic op will require an atomic segment, a
  974 + * remote address segment and one scatter entry.
  975 + */
  976 + size = max_t(int, size,
  977 + sizeof (struct mthca_atomic_seg) +
  978 + sizeof (struct mthca_raddr_seg) +
  979 + sizeof (struct mthca_data_seg));
  980 + break;
  981 +
921 982 default:
922   - /* bind seg is as big as atomic + raddr segs */
923   - size += sizeof (struct mthca_bind_seg);
  983 + break;
924 984 }
925 985  
  986 + /* Make sure that we have enough space for a bind request */
  987 + size = max_t(int, size, sizeof (struct mthca_bind_seg));
  988 +
  989 + size += sizeof (struct mthca_next_seg);
  990 +
  991 + if (size > dev->limits.max_desc_sz)
  992 + return -EINVAL;
  993 +
926 994 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
927 995 qp->sq.wqe_shift++)
928 996 ; /* nothing */
... ... @@ -1065,6 +1133,8 @@
1065 1133 mthca_unmap_memfree(dev, qp);
1066 1134 return ret;
1067 1135 }
  1136 +
  1137 + mthca_adjust_qp_caps(dev, pd, qp);
1068 1138  
1069 1139 /*
1070 1140 * If this is a userspace QP, we're done now. The doorbells
include/rdma/ib_user_verbs.h
... ... @@ -43,7 +43,7 @@
43 43 * Increment this value if any changes that break userspace ABI
44 44 * compatibility are made.
45 45 */
46   -#define IB_USER_VERBS_ABI_VERSION 3
  46 +#define IB_USER_VERBS_ABI_VERSION 4
47 47  
48 48 enum {
49 49 IB_USER_VERBS_CMD_GET_CONTEXT,
... ... @@ -333,6 +333,11 @@
333 333 struct ib_uverbs_create_qp_resp {
334 334 __u32 qp_handle;
335 335 __u32 qpn;
  336 + __u32 max_send_wr;
  337 + __u32 max_recv_wr;
  338 + __u32 max_send_sge;
  339 + __u32 max_recv_sge;
  340 + __u32 max_inline_data;
336 341 };
337 342  
338 343 /*
339 344  
... ... @@ -552,9 +557,7 @@
552 557 __u32 srq_handle;
553 558 __u32 attr_mask;
554 559 __u32 max_wr;
555   - __u32 max_sge;
556 560 __u32 srq_limit;
557   - __u32 reserved;
558 561 __u64 driver_data[0];
559 562 };
560 563