Commit 77369ed31daac51f4827c50d30f233c45480235a
Committed by
Roland Dreier
1 parent
ec914c52d6
Exists in
master
and in
7 other branches
[IB] uverbs: have kernel return QP capabilities
Move the computation of QP capabilities (max scatter/gather entries, max inline data, etc) into the kernel, and have the uverbs module return the values as part of the create QP response. This keeps precise knowledge of device limits in the low-level kernel driver. This requires an ABI bump, so while we're making changes, get rid of the max_sge parameter for the modify SRQ command -- it's not used and shouldn't be there. Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il> Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Showing 8 changed files with 98 additions and 16 deletions Side-by-side Diff
- drivers/infiniband/core/uverbs_cmd.c
- drivers/infiniband/hw/mthca/mthca_cmd.c
- drivers/infiniband/hw/mthca/mthca_dev.h
- drivers/infiniband/hw/mthca/mthca_main.c
- drivers/infiniband/hw/mthca/mthca_provider.c
- drivers/infiniband/hw/mthca/mthca_provider.h
- drivers/infiniband/hw/mthca/mthca_qp.c
- include/rdma/ib_user_verbs.h
drivers/infiniband/core/uverbs_cmd.c
... | ... | @@ -708,7 +708,7 @@ |
708 | 708 | resp->wc[i].opcode = wc[i].opcode; |
709 | 709 | resp->wc[i].vendor_err = wc[i].vendor_err; |
710 | 710 | resp->wc[i].byte_len = wc[i].byte_len; |
711 | - resp->wc[i].imm_data = wc[i].imm_data; | |
711 | + resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; | |
712 | 712 | resp->wc[i].qp_num = wc[i].qp_num; |
713 | 713 | resp->wc[i].src_qp = wc[i].src_qp; |
714 | 714 | resp->wc[i].wc_flags = wc[i].wc_flags; |
... | ... | @@ -908,7 +908,12 @@ |
908 | 908 | if (ret) |
909 | 909 | goto err_destroy; |
910 | 910 | |
911 | - resp.qp_handle = uobj->uobject.id; | |
911 | + resp.qp_handle = uobj->uobject.id; | |
912 | + resp.max_recv_sge = attr.cap.max_recv_sge; | |
913 | + resp.max_send_sge = attr.cap.max_send_sge; | |
914 | + resp.max_recv_wr = attr.cap.max_recv_wr; | |
915 | + resp.max_send_wr = attr.cap.max_send_wr; | |
916 | + resp.max_inline_data = attr.cap.max_inline_data; | |
912 | 917 | |
913 | 918 | if (copy_to_user((void __user *) (unsigned long) cmd.response, |
914 | 919 | &resp, sizeof resp)) { |
... | ... | @@ -1135,7 +1140,7 @@ |
1135 | 1140 | next->num_sge = user_wr->num_sge; |
1136 | 1141 | next->opcode = user_wr->opcode; |
1137 | 1142 | next->send_flags = user_wr->send_flags; |
1138 | - next->imm_data = user_wr->imm_data; | |
1143 | + next->imm_data = (__be32 __force) user_wr->imm_data; | |
1139 | 1144 | |
1140 | 1145 | if (qp->qp_type == IB_QPT_UD) { |
1141 | 1146 | next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, |
... | ... | @@ -1701,7 +1706,6 @@ |
1701 | 1706 | } |
1702 | 1707 | |
1703 | 1708 | attr.max_wr = cmd.max_wr; |
1704 | - attr.max_sge = cmd.max_sge; | |
1705 | 1709 | attr.srq_limit = cmd.srq_limit; |
1706 | 1710 | |
1707 | 1711 | ret = ib_modify_srq(srq, &attr, cmd.attr_mask); |
drivers/infiniband/hw/mthca/mthca_cmd.c
... | ... | @@ -1060,6 +1060,8 @@ |
1060 | 1060 | dev_lim->hca.arbel.resize_srq = field & 1; |
1061 | 1061 | MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); |
1062 | 1062 | dev_lim->max_sg = min_t(int, field, dev_lim->max_sg); |
1063 | + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET); | |
1064 | + dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz); | |
1063 | 1065 | MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET); |
1064 | 1066 | dev_lim->mpt_entry_sz = size; |
1065 | 1067 | MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET); |
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_main.c
... | ... | @@ -168,6 +168,7 @@ |
168 | 168 | mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; |
169 | 169 | mdev->limits.reserved_srqs = dev_lim->reserved_srqs; |
170 | 170 | mdev->limits.reserved_eecs = dev_lim->reserved_eecs; |
171 | + mdev->limits.max_desc_sz = dev_lim->max_desc_sz; | |
171 | 172 | /* |
172 | 173 | * Subtract 1 from the limit because we need to allocate a |
173 | 174 | * spare CQE so the HCA HW can tell the difference between an |
drivers/infiniband/hw/mthca/mthca_provider.c
... | ... | @@ -616,11 +616,11 @@ |
616 | 616 | return ERR_PTR(err); |
617 | 617 | } |
618 | 618 | |
619 | - init_attr->cap.max_inline_data = 0; | |
620 | 619 | init_attr->cap.max_send_wr = qp->sq.max; |
621 | 620 | init_attr->cap.max_recv_wr = qp->rq.max; |
622 | 621 | init_attr->cap.max_send_sge = qp->sq.max_gs; |
623 | 622 | init_attr->cap.max_recv_sge = qp->rq.max_gs; |
623 | + init_attr->cap.max_inline_data = qp->max_inline_data; | |
624 | 624 | |
625 | 625 | return &qp->ibqp; |
626 | 626 | } |
drivers/infiniband/hw/mthca/mthca_provider.h
drivers/infiniband/hw/mthca/mthca_qp.c
... | ... | @@ -885,6 +885,48 @@ |
885 | 885 | return err; |
886 | 886 | } |
887 | 887 | |
888 | +static void mthca_adjust_qp_caps(struct mthca_dev *dev, | |
889 | + struct mthca_pd *pd, | |
890 | + struct mthca_qp *qp) | |
891 | +{ | |
892 | + int max_data_size; | |
893 | + | |
894 | + /* | |
895 | + * Calculate the maximum size of WQE s/g segments, excluding | |
896 | + * the next segment and other non-data segments. | |
897 | + */ | |
898 | + max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) - | |
899 | + sizeof (struct mthca_next_seg); | |
900 | + | |
901 | + switch (qp->transport) { | |
902 | + case MLX: | |
903 | + max_data_size -= 2 * sizeof (struct mthca_data_seg); | |
904 | + break; | |
905 | + | |
906 | + case UD: | |
907 | + if (mthca_is_memfree(dev)) | |
908 | + max_data_size -= sizeof (struct mthca_arbel_ud_seg); | |
909 | + else | |
910 | + max_data_size -= sizeof (struct mthca_tavor_ud_seg); | |
911 | + break; | |
912 | + | |
913 | + default: | |
914 | + max_data_size -= sizeof (struct mthca_raddr_seg); | |
915 | + break; | |
916 | + } | |
917 | + | |
918 | + /* We don't support inline data for kernel QPs (yet). */ | |
919 | + if (!pd->ibpd.uobject) | |
920 | + qp->max_inline_data = 0; | |
921 | + else | |
922 | + qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE; | |
923 | + | |
924 | + qp->sq.max_gs = max_data_size / sizeof (struct mthca_data_seg); | |
925 | + qp->rq.max_gs = (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) - | |
926 | + sizeof (struct mthca_next_seg)) / | |
927 | + sizeof (struct mthca_data_seg); | |
928 | +} | |
929 | + | |
888 | 930 | /* |
889 | 931 | * Allocate and register buffer for WQEs. qp->rq.max, sq.max, |
890 | 932 | * rq.max_gs and sq.max_gs must all be assigned. |
891 | 933 | |
892 | 934 | |
893 | 935 | |
894 | 936 | |
895 | 937 | |
896 | 938 | |
... | ... | @@ -902,27 +944,53 @@ |
902 | 944 | size = sizeof (struct mthca_next_seg) + |
903 | 945 | qp->rq.max_gs * sizeof (struct mthca_data_seg); |
904 | 946 | |
947 | + if (size > dev->limits.max_desc_sz) | |
948 | + return -EINVAL; | |
949 | + | |
905 | 950 | for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; |
906 | 951 | qp->rq.wqe_shift++) |
907 | 952 | ; /* nothing */ |
908 | 953 | |
909 | - size = sizeof (struct mthca_next_seg) + | |
910 | - qp->sq.max_gs * sizeof (struct mthca_data_seg); | |
954 | + size = qp->sq.max_gs * sizeof (struct mthca_data_seg); | |
911 | 955 | switch (qp->transport) { |
912 | 956 | case MLX: |
913 | 957 | size += 2 * sizeof (struct mthca_data_seg); |
914 | 958 | break; |
959 | + | |
915 | 960 | case UD: |
916 | - if (mthca_is_memfree(dev)) | |
917 | - size += sizeof (struct mthca_arbel_ud_seg); | |
918 | - else | |
919 | - size += sizeof (struct mthca_tavor_ud_seg); | |
961 | + size += mthca_is_memfree(dev) ? | |
962 | + sizeof (struct mthca_arbel_ud_seg) : | |
963 | + sizeof (struct mthca_tavor_ud_seg); | |
920 | 964 | break; |
965 | + | |
966 | + case UC: | |
967 | + size += sizeof (struct mthca_raddr_seg); | |
968 | + break; | |
969 | + | |
970 | + case RC: | |
971 | + size += sizeof (struct mthca_raddr_seg); | |
972 | + /* | |
973 | + * An atomic op will require an atomic segment, a | |
974 | + * remote address segment and one scatter entry. | |
975 | + */ | |
976 | + size = max_t(int, size, | |
977 | + sizeof (struct mthca_atomic_seg) + | |
978 | + sizeof (struct mthca_raddr_seg) + | |
979 | + sizeof (struct mthca_data_seg)); | |
980 | + break; | |
981 | + | |
921 | 982 | default: |
922 | - /* bind seg is as big as atomic + raddr segs */ | |
923 | - size += sizeof (struct mthca_bind_seg); | |
983 | + break; | |
924 | 984 | } |
925 | 985 | |
986 | + /* Make sure that we have enough space for a bind request */ | |
987 | + size = max_t(int, size, sizeof (struct mthca_bind_seg)); | |
988 | + | |
989 | + size += sizeof (struct mthca_next_seg); | |
990 | + | |
991 | + if (size > dev->limits.max_desc_sz) | |
992 | + return -EINVAL; | |
993 | + | |
926 | 994 | for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; |
927 | 995 | qp->sq.wqe_shift++) |
928 | 996 | ; /* nothing */ |
... | ... | @@ -1065,6 +1133,8 @@ |
1065 | 1133 | mthca_unmap_memfree(dev, qp); |
1066 | 1134 | return ret; |
1067 | 1135 | } |
1136 | + | |
1137 | + mthca_adjust_qp_caps(dev, pd, qp); | |
1068 | 1138 | |
1069 | 1139 | /* |
1070 | 1140 | * If this is a userspace QP, we're done now. The doorbells |
include/rdma/ib_user_verbs.h
... | ... | @@ -43,7 +43,7 @@ |
43 | 43 | * Increment this value if any changes that break userspace ABI |
44 | 44 | * compatibility are made. |
45 | 45 | */ |
46 | -#define IB_USER_VERBS_ABI_VERSION 3 | |
46 | +#define IB_USER_VERBS_ABI_VERSION 4 | |
47 | 47 | |
48 | 48 | enum { |
49 | 49 | IB_USER_VERBS_CMD_GET_CONTEXT, |
... | ... | @@ -333,6 +333,11 @@ |
333 | 333 | struct ib_uverbs_create_qp_resp { |
334 | 334 | __u32 qp_handle; |
335 | 335 | __u32 qpn; |
336 | + __u32 max_send_wr; | |
337 | + __u32 max_recv_wr; | |
338 | + __u32 max_send_sge; | |
339 | + __u32 max_recv_sge; | |
340 | + __u32 max_inline_data; | |
336 | 341 | }; |
337 | 342 | |
338 | 343 | /* |
339 | 344 | |
... | ... | @@ -552,9 +557,7 @@ |
552 | 557 | __u32 srq_handle; |
553 | 558 | __u32 attr_mask; |
554 | 559 | __u32 max_wr; |
555 | - __u32 max_sge; | |
556 | 560 | __u32 srq_limit; |
557 | - __u32 reserved; | |
558 | 561 | __u64 driver_data[0]; |
559 | 562 | }; |
560 | 563 |