Commit dd5f03beb4f76ae65d76d8c22a8815e424fc607c

Authored by Matan Barak
Committed by Roland Dreier
1 parent 7e22e91102

IB/core: Ethernet L2 attributes in verbs/cm structures

This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.

When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.

Thus, those attributes were added to the following structures:

* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id

For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.

On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).

On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.

When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.

ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>

Showing 18 changed files with 343 additions and 24 deletions Side-by-side Diff

drivers/infiniband/core/addr.c
... ... @@ -86,6 +86,8 @@
86 86 }
87 87 EXPORT_SYMBOL(rdma_addr_size);
88 88  
  89 +static struct rdma_addr_client self;
  90 +
89 91 void rdma_addr_register_client(struct rdma_addr_client *client)
90 92 {
91 93 atomic_set(&client->refcount, 1);
... ... @@ -119,7 +121,8 @@
119 121 }
120 122 EXPORT_SYMBOL(rdma_copy_addr);
121 123  
122   -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
  124 +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
  125 + u16 *vlan_id)
123 126 {
124 127 struct net_device *dev;
125 128 int ret = -EADDRNOTAVAIL;
... ... @@ -142,6 +145,8 @@
142 145 return ret;
143 146  
144 147 ret = rdma_copy_addr(dev_addr, dev, NULL);
  148 + if (vlan_id)
  149 + *vlan_id = rdma_vlan_dev_vlan_id(dev);
145 150 dev_put(dev);
146 151 break;
147 152  
... ... @@ -153,6 +158,8 @@
153 158 &((struct sockaddr_in6 *) addr)->sin6_addr,
154 159 dev, 1)) {
155 160 ret = rdma_copy_addr(dev_addr, dev, NULL);
  161 + if (vlan_id)
  162 + *vlan_id = rdma_vlan_dev_vlan_id(dev);
156 163 break;
157 164 }
158 165 }
... ... @@ -238,7 +245,7 @@
238 245 src_in->sin_addr.s_addr = fl4.saddr;
239 246  
240 247 if (rt->dst.dev->flags & IFF_LOOPBACK) {
241   - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
  248 + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
242 249 if (!ret)
243 250 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
244 251 goto put;
... ... @@ -286,7 +293,7 @@
286 293 }
287 294  
288 295 if (dst->dev->flags & IFF_LOOPBACK) {
289   - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
  296 + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
290 297 if (!ret)
291 298 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
292 299 goto put;
... ... @@ -437,6 +444,88 @@
437 444 }
438 445 EXPORT_SYMBOL(rdma_addr_cancel);
439 446  
  447 +struct resolve_cb_context {
  448 + struct rdma_dev_addr *addr;
  449 + struct completion comp;
  450 +};
  451 +
  452 +static void resolve_cb(int status, struct sockaddr *src_addr,
  453 + struct rdma_dev_addr *addr, void *context)
  454 +{
  455 + memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
  456 + rdma_dev_addr));
  457 + complete(&((struct resolve_cb_context *)context)->comp);
  458 +}
  459 +
  460 +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
  461 + u16 *vlan_id)
  462 +{
  463 + int ret = 0;
  464 + struct rdma_dev_addr dev_addr;
  465 + struct resolve_cb_context ctx;
  466 + struct net_device *dev;
  467 +
  468 + union {
  469 + struct sockaddr _sockaddr;
  470 + struct sockaddr_in _sockaddr_in;
  471 + struct sockaddr_in6 _sockaddr_in6;
  472 + } sgid_addr, dgid_addr;
  473 +
  474 +
  475 + ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
  476 + if (ret)
  477 + return ret;
  478 +
  479 + ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
  480 + if (ret)
  481 + return ret;
  482 +
  483 + memset(&dev_addr, 0, sizeof(dev_addr));
  484 +
  485 + ctx.addr = &dev_addr;
  486 + init_completion(&ctx.comp);
  487 + ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
  488 + &dev_addr, 1000, resolve_cb, &ctx);
  489 + if (ret)
  490 + return ret;
  491 +
  492 + wait_for_completion(&ctx.comp);
  493 +
  494 + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
  495 + dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
  496 + if (!dev)
  497 + return -ENODEV;
  498 + if (vlan_id)
  499 + *vlan_id = rdma_vlan_dev_vlan_id(dev);
  500 + dev_put(dev);
  501 + return ret;
  502 +}
  503 +EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
  504 +
  505 +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
  506 +{
  507 + int ret = 0;
  508 + struct rdma_dev_addr dev_addr;
  509 + union {
  510 + struct sockaddr _sockaddr;
  511 + struct sockaddr_in _sockaddr_in;
  512 + struct sockaddr_in6 _sockaddr_in6;
  513 + } gid_addr;
  514 +
  515 + ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
  516 +
  517 + if (ret)
  518 + return ret;
  519 + memset(&dev_addr, 0, sizeof(dev_addr));
  520 + ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
  521 + if (ret)
  522 + return ret;
  523 +
  524 + memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
  525 + return ret;
  526 +}
  527 +EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
  528 +
440 529 static int netevent_callback(struct notifier_block *self, unsigned long event,
441 530 void *ctx)
442 531 {
443 532  
... ... @@ -461,11 +550,13 @@
461 550 return -ENOMEM;
462 551  
463 552 register_netevent_notifier(&nb);
  553 + rdma_addr_register_client(&self);
464 554 return 0;
465 555 }
466 556  
467 557 static void __exit addr_cleanup(void)
468 558 {
  559 + rdma_addr_unregister_client(&self);
469 560 unregister_netevent_notifier(&nb);
470 561 destroy_workqueue(addr_wq);
471 562 }
drivers/infiniband/core/cm.c
... ... @@ -47,6 +47,7 @@
47 47 #include <linux/sysfs.h>
48 48 #include <linux/workqueue.h>
49 49 #include <linux/kdev_t.h>
  50 +#include <linux/etherdevice.h>
50 51  
51 52 #include <rdma/ib_cache.h>
52 53 #include <rdma/ib_cm.h>
... ... @@ -177,6 +178,8 @@
177 178 struct ib_ah_attr ah_attr;
178 179 u16 pkey_index;
179 180 u8 timeout;
  181 + u8 valid;
  182 + u8 smac[ETH_ALEN];
180 183 };
181 184  
182 185 struct cm_work {
... ... @@ -346,6 +349,23 @@
346 349 grh, &av->ah_attr);
347 350 }
348 351  
  352 +int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
  353 +{
  354 + struct cm_id_private *cm_id_priv;
  355 +
  356 + cm_id_priv = container_of(id, struct cm_id_private, id);
  357 +
  358 + if (smac != NULL)
  359 + memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
  360 +
  361 + if (alt_smac != NULL)
  362 + memcpy(cm_id_priv->alt_av.smac, alt_smac,
  363 + sizeof(cm_id_priv->alt_av.smac));
  364 +
  365 + return 0;
  366 +}
  367 +EXPORT_SYMBOL(ib_update_cm_av);
  368 +
349 369 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
350 370 {
351 371 struct cm_device *cm_dev;
... ... @@ -376,6 +396,9 @@
376 396 ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
377 397 &av->ah_attr);
378 398 av->timeout = path->packet_life_time + 1;
  399 + memcpy(av->smac, path->smac, sizeof(av->smac));
  400 +
  401 + av->valid = 1;
379 402 return 0;
380 403 }
381 404  
... ... @@ -1554,6 +1577,9 @@
1554 1577  
1555 1578 cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1556 1579 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
  1580 +
  1581 + memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
  1582 + work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
1557 1583 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1558 1584 if (ret) {
1559 1585 ib_get_cached_gid(work->port->cm_dev->ib_device,
... ... @@ -3500,6 +3526,30 @@
3500 3526 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3501 3527 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3502 3528 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
  3529 + if (!cm_id_priv->av.valid)
  3530 + return -EINVAL;
  3531 + if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
  3532 + qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
  3533 + *qp_attr_mask |= IB_QP_VID;
  3534 + }
  3535 + if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
  3536 + memcpy(qp_attr->smac, cm_id_priv->av.smac,
  3537 + sizeof(qp_attr->smac));
  3538 + *qp_attr_mask |= IB_QP_SMAC;
  3539 + }
  3540 + if (cm_id_priv->alt_av.valid) {
  3541 + if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
  3542 + qp_attr->alt_vlan_id =
  3543 + cm_id_priv->alt_av.ah_attr.vlan_id;
  3544 + *qp_attr_mask |= IB_QP_ALT_VID;
  3545 + }
  3546 + if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
  3547 + memcpy(qp_attr->alt_smac,
  3548 + cm_id_priv->alt_av.smac,
  3549 + sizeof(qp_attr->alt_smac));
  3550 + *qp_attr_mask |= IB_QP_ALT_SMAC;
  3551 + }
  3552 + }
3503 3553 qp_attr->path_mtu = cm_id_priv->path_mtu;
3504 3554 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3505 3555 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
drivers/infiniband/core/cma.c
... ... @@ -340,7 +340,7 @@
340 340 int ret;
341 341  
342 342 if (addr->sa_family != AF_IB) {
343   - ret = rdma_translate_ip(addr, dev_addr);
  343 + ret = rdma_translate_ip(addr, dev_addr, NULL);
344 344 } else {
345 345 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
346 346 ret = 0;
... ... @@ -603,6 +603,7 @@
603 603 {
604 604 struct ib_qp_attr qp_attr;
605 605 int qp_attr_mask, ret;
  606 + union ib_gid sgid;
606 607  
607 608 mutex_lock(&id_priv->qp_mutex);
608 609 if (!id_priv->id.qp) {
... ... @@ -625,6 +626,20 @@
625 626 if (ret)
626 627 goto out;
627 628  
  629 + ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
  630 + qp_attr.ah_attr.grh.sgid_index, &sgid);
  631 + if (ret)
  632 + goto out;
  633 +
  634 + if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
  635 + == RDMA_TRANSPORT_IB &&
  636 + rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
  637 + == IB_LINK_LAYER_ETHERNET) {
  638 + ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
  639 +
  640 + if (ret)
  641 + goto out;
  642 + }
628 643 if (conn_param)
629 644 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
630 645 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
... ... @@ -725,6 +740,7 @@
725 740 else
726 741 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
727 742 qp_attr_mask);
  743 +
728 744 if (qp_attr->qp_state == IB_QPS_RTR)
729 745 qp_attr->rq_psn = id_priv->seq_num;
730 746 break;
... ... @@ -1266,6 +1282,15 @@
1266 1282 struct rdma_id_private *listen_id, *conn_id;
1267 1283 struct rdma_cm_event event;
1268 1284 int offset, ret;
  1285 + u8 smac[ETH_ALEN];
  1286 + u8 alt_smac[ETH_ALEN];
  1287 + u8 *psmac = smac;
  1288 + u8 *palt_smac = alt_smac;
  1289 + int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
  1290 + RDMA_TRANSPORT_IB) &&
  1291 + (rdma_port_get_link_layer(cm_id->device,
  1292 + ib_event->param.req_rcvd.port) ==
  1293 + IB_LINK_LAYER_ETHERNET));
1269 1294  
1270 1295 listen_id = cm_id->context;
1271 1296 if (!cma_check_req_qp_type(&listen_id->id, ib_event))
1272 1297  
... ... @@ -1310,12 +1335,29 @@
1310 1335 if (ret)
1311 1336 goto err3;
1312 1337  
  1338 + if (is_iboe) {
  1339 + if (ib_event->param.req_rcvd.primary_path != NULL)
  1340 + rdma_addr_find_smac_by_sgid(
  1341 + &ib_event->param.req_rcvd.primary_path->sgid,
  1342 + psmac, NULL);
  1343 + else
  1344 + psmac = NULL;
  1345 + if (ib_event->param.req_rcvd.alternate_path != NULL)
  1346 + rdma_addr_find_smac_by_sgid(
  1347 + &ib_event->param.req_rcvd.alternate_path->sgid,
  1348 + palt_smac, NULL);
  1349 + else
  1350 + palt_smac = NULL;
  1351 + }
1313 1352 /*
1314 1353 * Acquire mutex to prevent user executing rdma_destroy_id()
1315 1354 * while we're accessing the cm_id.
1316 1355 */
1317 1356 mutex_lock(&lock);
1318   - if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
  1357 + if (is_iboe)
  1358 + ib_update_cm_av(cm_id, psmac, palt_smac);
  1359 + if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
  1360 + (conn_id->id.qp_type != IB_QPT_UD))
1319 1361 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1320 1362 mutex_unlock(&lock);
1321 1363 mutex_unlock(&conn_id->handler_mutex);
... ... @@ -1474,7 +1516,7 @@
1474 1516 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1475 1517 conn_id->state = RDMA_CM_CONNECT;
1476 1518  
1477   - ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
  1519 + ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
1478 1520 if (ret) {
1479 1521 mutex_unlock(&conn_id->handler_mutex);
1480 1522 rdma_destroy_id(new_cm_id);
1481 1523  
... ... @@ -1873,8 +1915,8 @@
1873 1915 struct cma_work *work;
1874 1916 int ret;
1875 1917 struct net_device *ndev = NULL;
1876   - u16 vid;
1877 1918  
  1919 +
1878 1920 work = kzalloc(sizeof *work, GFP_KERNEL);
1879 1921 if (!work)
1880 1922 return -ENOMEM;
1881 1923  
... ... @@ -1897,10 +1939,14 @@
1897 1939 goto err2;
1898 1940 }
1899 1941  
1900   - vid = rdma_vlan_dev_vlan_id(ndev);
  1942 + route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
  1943 + memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
  1944 + memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
1901 1945  
1902   - iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
1903   - iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
  1946 + iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr,
  1947 + route->path_rec->vlan_id);
  1948 + iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr,
  1949 + route->path_rec->vlan_id);
1904 1950  
1905 1951 route->path_rec->hop_limit = 1;
1906 1952 route->path_rec->reversible = 1;
drivers/infiniband/core/sa_query.c
... ... @@ -42,7 +42,7 @@
42 42 #include <linux/kref.h>
43 43 #include <linux/idr.h>
44 44 #include <linux/workqueue.h>
45   -
  45 +#include <uapi/linux/if_ether.h>
46 46 #include <rdma/ib_pack.h>
47 47 #include <rdma/ib_cache.h>
48 48 #include "sa.h"
... ... @@ -556,6 +556,13 @@
556 556 ah_attr->grh.hop_limit = rec->hop_limit;
557 557 ah_attr->grh.traffic_class = rec->traffic_class;
558 558 }
  559 + if (force_grh) {
  560 + memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
  561 + ah_attr->vlan_id = rec->vlan_id;
  562 + } else {
  563 + ah_attr->vlan_id = 0xffff;
  564 + }
  565 +
559 566 return 0;
560 567 }
561 568 EXPORT_SYMBOL(ib_init_ah_from_path);
... ... @@ -670,6 +677,9 @@
670 677  
671 678 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
672 679 mad->data, &rec);
  680 + rec.vlan_id = 0xffff;
  681 + memset(rec.dmac, 0, ETH_ALEN);
  682 + memset(rec.smac, 0, ETH_ALEN);
673 683 query->callback(status, &rec, query->context);
674 684 } else
675 685 query->callback(status, NULL, query->context);
drivers/infiniband/core/verbs.c
... ... @@ -44,6 +44,7 @@
44 44  
45 45 #include <rdma/ib_verbs.h>
46 46 #include <rdma/ib_cache.h>
  47 +#include <rdma/ib_addr.h>
47 48  
48 49 int ib_rate_to_mult(enum ib_rate rate)
49 50 {
50 51  
... ... @@ -192,8 +193,28 @@
192 193 u32 flow_class;
193 194 u16 gid_index;
194 195 int ret;
  196 + int is_eth = (rdma_port_get_link_layer(device, port_num) ==
  197 + IB_LINK_LAYER_ETHERNET);
195 198  
196 199 memset(ah_attr, 0, sizeof *ah_attr);
  200 + if (is_eth) {
  201 + if (!(wc->wc_flags & IB_WC_GRH))
  202 + return -EPROTOTYPE;
  203 +
  204 + if (wc->wc_flags & IB_WC_WITH_SMAC &&
  205 + wc->wc_flags & IB_WC_WITH_VLAN) {
  206 + memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
  207 + ah_attr->vlan_id = wc->vlan_id;
  208 + } else {
  209 + ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
  210 + ah_attr->dmac, &ah_attr->vlan_id);
  211 + if (ret)
  212 + return ret;
  213 + }
  214 + } else {
  215 + ah_attr->vlan_id = 0xffff;
  216 + }
  217 +
197 218 ah_attr->dlid = wc->slid;
198 219 ah_attr->sl = wc->sl;
199 220 ah_attr->src_path_bits = wc->dlid_path_bits;
200 221  
... ... @@ -476,7 +497,9 @@
476 497 static const struct {
477 498 int valid;
478 499 enum ib_qp_attr_mask req_param[IB_QPT_MAX];
  500 + enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
479 501 enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
  502 + enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
480 503 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
481 504 [IB_QPS_RESET] = {
482 505 [IB_QPS_RESET] = { .valid = 1 },
... ... @@ -557,6 +580,12 @@
557 580 IB_QP_MAX_DEST_RD_ATOMIC |
558 581 IB_QP_MIN_RNR_TIMER),
559 582 },
  583 + .req_param_add_eth = {
  584 + [IB_QPT_RC] = (IB_QP_SMAC),
  585 + [IB_QPT_UC] = (IB_QP_SMAC),
  586 + [IB_QPT_XRC_INI] = (IB_QP_SMAC),
  587 + [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
  588 + },
560 589 .opt_param = {
561 590 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
562 591 IB_QP_QKEY),
... ... @@ -576,7 +605,21 @@
576 605 IB_QP_QKEY),
577 606 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
578 607 IB_QP_QKEY),
579   - }
  608 + },
  609 + .opt_param_add_eth = {
  610 + [IB_QPT_RC] = (IB_QP_ALT_SMAC |
  611 + IB_QP_VID |
  612 + IB_QP_ALT_VID),
  613 + [IB_QPT_UC] = (IB_QP_ALT_SMAC |
  614 + IB_QP_VID |
  615 + IB_QP_ALT_VID),
  616 + [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
  617 + IB_QP_VID |
  618 + IB_QP_ALT_VID),
  619 + [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
  620 + IB_QP_VID |
  621 + IB_QP_ALT_VID)
  622 + }
580 623 }
581 624 },
582 625 [IB_QPS_RTR] = {
... ... @@ -779,7 +822,8 @@
779 822 };
780 823  
781 824 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
782   - enum ib_qp_type type, enum ib_qp_attr_mask mask)
  825 + enum ib_qp_type type, enum ib_qp_attr_mask mask,
  826 + enum rdma_link_layer ll)
783 827 {
784 828 enum ib_qp_attr_mask req_param, opt_param;
785 829  
... ... @@ -797,6 +841,13 @@
797 841  
798 842 req_param = qp_state_table[cur_state][next_state].req_param[type];
799 843 opt_param = qp_state_table[cur_state][next_state].opt_param[type];
  844 +
  845 + if (ll == IB_LINK_LAYER_ETHERNET) {
  846 + req_param |= qp_state_table[cur_state][next_state].
  847 + req_param_add_eth[type];
  848 + opt_param |= qp_state_table[cur_state][next_state].
  849 + opt_param_add_eth[type];
  850 + }
800 851  
801 852 if ((mask & req_param) != req_param)
802 853 return 0;
drivers/infiniband/hw/ehca/ehca_qp.c
... ... @@ -1329,7 +1329,7 @@
1329 1329 qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
1330 1330 if (!smi_reset2init &&
1331 1331 !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
1332   - attr_mask)) {
  1332 + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
1333 1333 ret = -EINVAL;
1334 1334 ehca_err(ibqp->device,
1335 1335 "Invalid qp transition new_state=%x cur_state=%x "
drivers/infiniband/hw/ipath/ipath_qp.c
... ... @@ -463,7 +463,7 @@
463 463 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
464 464  
465 465 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
466   - attr_mask))
  466 + attr_mask, IB_LINK_LAYER_UNSPECIFIED))
467 467 goto inval;
468 468  
469 469 if (attr_mask & IB_QP_AV) {
drivers/infiniband/hw/mlx4/qp.c
... ... @@ -1561,13 +1561,18 @@
1561 1561 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1562 1562 enum ib_qp_state cur_state, new_state;
1563 1563 int err = -EINVAL;
1564   -
  1564 + int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1565 1565 mutex_lock(&qp->mutex);
1566 1566  
1567 1567 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1568 1568 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1569 1569  
1570   - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
  1570 + if (cur_state == new_state && cur_state == IB_QPS_RESET)
  1571 + p = IB_LINK_LAYER_UNSPECIFIED;
  1572 +
  1573 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
  1574 + attr_mask,
  1575 + rdma_port_get_link_layer(&dev->ib_dev, p))) {
1571 1576 pr_debug("qpn 0x%x: invalid attribute mask specified "
1572 1577 "for transition %d to %d. qp_type %d,"
1573 1578 " attr_mask 0x%x\n",
drivers/infiniband/hw/mlx5/qp.c
... ... @@ -1616,7 +1616,8 @@
1616 1616 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1617 1617  
1618 1618 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
1619   - !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
  1619 + !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
  1620 + IB_LINK_LAYER_UNSPECIFIED))
1620 1621 goto out;
1621 1622  
1622 1623 if ((attr_mask & IB_QP_PORT) &&
drivers/infiniband/hw/mthca/mthca_qp.c
... ... @@ -860,7 +860,8 @@
860 860  
861 861 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
862 862  
863   - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
  863 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
  864 + IB_LINK_LAYER_UNSPECIFIED)) {
864 865 mthca_dbg(dev, "Bad QP transition (transport %d) "
865 866 "%d->%d with attr 0x%08x\n",
866 867 qp->transport, cur_state, new_state,
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
... ... @@ -1326,7 +1326,8 @@
1326 1326 new_qps = old_qps;
1327 1327 spin_unlock_irqrestore(&qp->q_lock, flags);
1328 1328  
1329   - if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
  1329 + if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
  1330 + IB_LINK_LAYER_UNSPECIFIED)) {
1330 1331 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1331 1332 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1332 1333 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
drivers/infiniband/hw/qib/qib_qp.c
... ... @@ -585,7 +585,7 @@
585 585 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
586 586  
587 587 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
588   - attr_mask))
  588 + attr_mask, IB_LINK_LAYER_UNSPECIFIED))
589 589 goto inval;
590 590  
591 591 if (attr_mask & IB_QP_AV) {
include/linux/mlx4/device.h
... ... @@ -1095,6 +1095,7 @@
1095 1095 int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
1096 1096 int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
1097 1097 u8 *pg, u16 *ratelimit);
  1098 +int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
1098 1099 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
1099 1100 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
1100 1101 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
include/rdma/ib_addr.h
... ... @@ -42,6 +42,7 @@
42 42 #include <linux/if_vlan.h>
43 43 #include <rdma/ib_verbs.h>
44 44 #include <rdma/ib_pack.h>
  45 +#include <net/ipv6.h>
45 46  
46 47 struct rdma_addr_client {
47 48 atomic_t refcount;
... ... @@ -72,7 +73,8 @@
72 73 * rdma_translate_ip - Translate a local IP address to an RDMA hardware
73 74 * address.
74 75 */
75   -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
  76 +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
  77 + u16 *vlan_id);
76 78  
77 79 /**
78 80 * rdma_resolve_ip - Resolve source and destination IP addresses to
... ... @@ -104,6 +106,10 @@
104 106  
105 107 int rdma_addr_size(struct sockaddr *addr);
106 108  
  109 +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
  110 +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
  111 + u16 *vlan_id);
  112 +
107 113 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
108 114 {
109 115 return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
... ... @@ -140,6 +146,40 @@
140 146 memcpy(gid->raw + 13, mac + 3, 3);
141 147 memcpy(gid->raw + 8, mac, 3);
142 148 gid->raw[8] ^= 2;
  149 +}
  150 +
  151 +static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
  152 +{
  153 + switch (addr->sa_family) {
  154 + case AF_INET:
  155 + ipv6_addr_set_v4mapped(((struct sockaddr_in *)
  156 + addr)->sin_addr.s_addr,
  157 + (struct in6_addr *)gid);
  158 + break;
  159 + case AF_INET6:
  160 + memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, 16);
  161 + break;
  162 + default:
  163 + return -EINVAL;
  164 + }
  165 + return 0;
  166 +}
  167 +
  168 +/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
  169 +static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
  170 +{
  171 + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
  172 + struct sockaddr_in *out_in = (struct sockaddr_in *)out;
  173 + memset(out_in, 0, sizeof(*out_in));
  174 + out_in->sin_family = AF_INET;
  175 + memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
  176 + } else {
  177 + struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
  178 + memset(out_in, 0, sizeof(*out_in));
  179 + out_in->sin6_family = AF_INET6;
  180 + memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
  181 + }
  182 + return 0;
143 183 }
144 184  
145 185 static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
include/rdma/ib_cm.h
... ... @@ -601,5 +601,6 @@
601 601 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
602 602 struct ib_cm_sidr_rep_param *param);
603 603  
  604 +int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
604 605 #endif /* IB_CM_H */
include/rdma/ib_pack.h
... ... @@ -34,6 +34,7 @@
34 34 #define IB_PACK_H
35 35  
36 36 #include <rdma/ib_verbs.h>
  37 +#include <uapi/linux/if_ether.h>
37 38  
38 39 enum {
39 40 IB_LRH_BYTES = 8,
include/rdma/ib_sa.h
... ... @@ -154,6 +154,9 @@
154 154 u8 packet_life_time_selector;
155 155 u8 packet_life_time;
156 156 u8 preference;
  157 + u8 smac[ETH_ALEN];
  158 + u8 dmac[ETH_ALEN];
  159 + u16 vlan_id;
157 160 };
158 161  
159 162 #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
include/rdma/ib_verbs.h
... ... @@ -48,6 +48,7 @@
48 48 #include <linux/rwsem.h>
49 49 #include <linux/scatterlist.h>
50 50 #include <linux/workqueue.h>
  51 +#include <uapi/linux/if_ether.h>
51 52  
52 53 #include <linux/atomic.h>
53 54 #include <asm/uaccess.h>
... ... @@ -472,6 +473,8 @@
472 473 u8 static_rate;
473 474 u8 ah_flags;
474 475 u8 port_num;
  476 + u8 dmac[ETH_ALEN];
  477 + u16 vlan_id;
475 478 };
476 479  
477 480 enum ib_wc_status {
... ... @@ -524,6 +527,8 @@
524 527 IB_WC_WITH_IMM = (1<<1),
525 528 IB_WC_WITH_INVALIDATE = (1<<2),
526 529 IB_WC_IP_CSUM_OK = (1<<3),
  530 + IB_WC_WITH_SMAC = (1<<4),
  531 + IB_WC_WITH_VLAN = (1<<5),
527 532 };
528 533  
529 534 struct ib_wc {
... ... @@ -544,6 +549,8 @@
544 549 u8 sl;
545 550 u8 dlid_path_bits;
546 551 u8 port_num; /* valid only for DR SMPs on switches */
  552 + u8 smac[ETH_ALEN];
  553 + u16 vlan_id;
547 554 };
548 555  
549 556 enum ib_cq_notify_flags {
... ... @@ -721,7 +728,11 @@
721 728 IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
722 729 IB_QP_PATH_MIG_STATE = (1<<18),
723 730 IB_QP_CAP = (1<<19),
724   - IB_QP_DEST_QPN = (1<<20)
  731 + IB_QP_DEST_QPN = (1<<20),
  732 + IB_QP_SMAC = (1<<21),
  733 + IB_QP_ALT_SMAC = (1<<22),
  734 + IB_QP_VID = (1<<23),
  735 + IB_QP_ALT_VID = (1<<24),
725 736 };
726 737  
727 738 enum ib_qp_state {
... ... @@ -771,6 +782,10 @@
771 782 u8 rnr_retry;
772 783 u8 alt_port_num;
773 784 u8 alt_timeout;
  785 + u8 smac[ETH_ALEN];
  786 + u8 alt_smac[ETH_ALEN];
  787 + u16 vlan_id;
  788 + u16 alt_vlan_id;
774 789 };
775 790  
776 791 enum ib_wr_opcode {
... ... @@ -1488,6 +1503,7 @@
1488 1503 * @next_state: Next QP state
1489 1504 * @type: QP type
1490 1505 * @mask: Mask of supplied QP attributes
  1506 + * @ll : link layer of port
1491 1507 *
1492 1508 * This function is a helper function that a low-level driver's
1493 1509 * modify_qp method can use to validate the consumer's input. It
... ... @@ -1496,7 +1512,8 @@
1496 1512 * and that the attribute mask supplied is allowed for the transition.
1497 1513 */
1498 1514 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1499   - enum ib_qp_type type, enum ib_qp_attr_mask mask);
  1515 + enum ib_qp_type type, enum ib_qp_attr_mask mask,
  1516 + enum rdma_link_layer ll);
1500 1517  
1501 1518 int ib_register_event_handler (struct ib_event_handler *event_handler);
1502 1519 int ib_unregister_event_handler(struct ib_event_handler *event_handler);