Commit fba75200ad92892bf32d8d6f1443c6f1e4f48676

Authored by Bryan O'Sullivan
Committed by Linus Torvalds
1 parent 85322947d7

[PATCH] IB/ipath: fixes to performance get counters for IB compliance

This patch fixes some problems uncovered during IB compliance testing to
return the right values for error counters returned by the Performance Get
Counters packet.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Cc: "Michael S. Tsirkin" <mst@mellanox.co.il>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 9 changed files with 93 additions and 27 deletions Side-by-side Diff

drivers/infiniband/hw/ipath/ipath_driver.c
... ... @@ -460,6 +460,8 @@
460 460 * by ipath_setup_htconfig.
461 461 */
462 462 dd->ipath_flags = 0;
  463 + dd->ipath_lli_counter = 0;
  464 + dd->ipath_lli_errors = 0;
463 465  
464 466 if (dd->ipath_f_bus(dd, pdev))
465 467 ipath_dev_err(dd, "Failed to setup config space; "
... ... @@ -942,6 +944,18 @@
942 944 "tlen=%x opcode=%x egridx=%x: %s\n",
943 945 eflags, l, etype, tlen, bthbytes[0],
944 946 ips_get_index((__le32 *) rc), emsg);
  947 + /* Count local link integrity errors. */
  948 + if (eflags & (INFINIPATH_RHF_H_ICRCERR |
  949 + INFINIPATH_RHF_H_VCRCERR)) {
  950 + u8 n = (dd->ipath_ibcctrl >>
  951 + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
  952 + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
  953 +
  954 + if (++dd->ipath_lli_counter > n) {
  955 + dd->ipath_lli_counter = 0;
  956 + dd->ipath_lli_errors++;
  957 + }
  958 + }
945 959 } else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
946 960 int ret = __ipath_verbs_rcv(dd, rc + 1,
947 961 ebuf, tlen);
... ... @@ -949,6 +963,9 @@
949 963 ipath_cdbg(VERBOSE,
950 964 "received IB packet, "
951 965 "not SMA (QP=%x)\n", qp);
  966 + if (dd->ipath_lli_counter)
  967 + dd->ipath_lli_counter--;
  968 +
952 969 } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
953 970 if (qp == IPATH_KD_QP &&
954 971 bthbytes[0] == ipath_layer_rcv_opcode &&
drivers/infiniband/hw/ipath/ipath_intr.c
... ... @@ -262,6 +262,7 @@
262 262 | IPATH_LINKACTIVE |
263 263 IPATH_LINKARMED);
264 264 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
  265 + dd->ipath_lli_counter = 0;
265 266 if (!noprint) {
266 267 if (((dd->ipath_lastibcstat >>
267 268 INFINIPATH_IBCS_LINKSTATE_SHIFT) &
drivers/infiniband/hw/ipath/ipath_kernel.h
... ... @@ -507,6 +507,11 @@
507 507 u8 ipath_pci_cacheline;
508 508 /* LID mask control */
509 509 u8 ipath_lmc;
  510 +
  511 + /* local link integrity counter */
  512 + u32 ipath_lli_counter;
  513 + /* local link integrity errors */
  514 + u32 ipath_lli_errors;
510 515 };
511 516  
512 517 extern struct list_head ipath_dev_list;
drivers/infiniband/hw/ipath/ipath_layer.c
... ... @@ -1032,19 +1032,22 @@
1032 1032 ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
1033 1033 cntrs->link_error_recovery_counter =
1034 1034 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
  1035 + /*
  1036 + * The link downed counter counts when the other side downs the
  1037 + * connection. We add in the number of times we downed the link
  1038 + * due to local link integrity errors to compensate.
  1039 + */
1035 1040 cntrs->link_downed_counter =
1036 1041 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
1037 1042 cntrs->port_rcv_errors =
1038 1043 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
1039 1044 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
1040 1045 ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
1041   - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errrcvflowctrlcnt) +
1042 1046 ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
1043 1047 ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
1044 1048 ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
1045 1049 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
1046 1050 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
1047   - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlinkcnt) +
1048 1051 ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
1049 1052 cntrs->port_rcv_remphys_errors =
1050 1053 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
... ... @@ -1058,6 +1061,8 @@
1058 1061 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1059 1062 cntrs->port_rcv_packets =
1060 1063 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
  1064 + cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
  1065 + cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
1061 1066  
1062 1067 ret = 0;
1063 1068  
drivers/infiniband/hw/ipath/ipath_layer.h
... ... @@ -55,6 +55,8 @@
55 55 u64 port_rcv_data;
56 56 u64 port_xmit_packets;
57 57 u64 port_rcv_packets;
  58 + u32 local_link_integrity_errors;
  59 + u32 excessive_buffer_overrun_errors;
58 60 };
59 61  
60 62 /*
drivers/infiniband/hw/ipath/ipath_mad.c
... ... @@ -613,6 +613,9 @@
613 613 #define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008)
614 614 #define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010)
615 615 #define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040)
  616 +#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200)
  617 +#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400)
  618 +#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800)
616 619 #define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000)
617 620 #define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000)
618 621 #define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000)
... ... @@ -859,6 +862,10 @@
859 862 cntrs.port_rcv_data -= dev->z_port_rcv_data;
860 863 cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
861 864 cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
  865 + cntrs.local_link_integrity_errors -=
  866 + dev->z_local_link_integrity_errors;
  867 + cntrs.excessive_buffer_overrun_errors -=
  868 + dev->z_excessive_buffer_overrun_errors;
862 869  
863 870 memset(pmp->data, 0, sizeof(pmp->data));
864 871  
... ... @@ -896,6 +903,16 @@
896 903 else
897 904 p->port_xmit_discards =
898 905 cpu_to_be16((u16)cntrs.port_xmit_discards);
  906 + if (cntrs.local_link_integrity_errors > 0xFUL)
  907 + cntrs.local_link_integrity_errors = 0xFUL;
  908 + if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
  909 + cntrs.excessive_buffer_overrun_errors = 0xFUL;
  910 + p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
  911 + cntrs.excessive_buffer_overrun_errors;
  912 + if (dev->n_vl15_dropped > 0xFFFFUL)
  913 + p->vl15_dropped = __constant_cpu_to_be16(0xFFFF);
  914 + else
  915 + p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped);
899 916 if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
900 917 p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
901 918 else
... ... @@ -990,6 +1007,17 @@
990 1007 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
991 1008 dev->z_port_xmit_discards = cntrs.port_xmit_discards;
992 1009  
  1010 + if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
  1011 + dev->z_local_link_integrity_errors =
  1012 + cntrs.local_link_integrity_errors;
  1013 +
  1014 + if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
  1015 + dev->z_excessive_buffer_overrun_errors =
  1016 + cntrs.excessive_buffer_overrun_errors;
  1017 +
  1018 + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED)
  1019 + dev->n_vl15_dropped = 0;
  1020 +
993 1021 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
994 1022 dev->z_port_xmit_data = cntrs.port_xmit_data;
995 1023  
996 1024  
... ... @@ -1275,32 +1303,8 @@
1275 1303 struct ib_wc *in_wc, struct ib_grh *in_grh,
1276 1304 struct ib_mad *in_mad, struct ib_mad *out_mad)
1277 1305 {
1278   - struct ipath_ibdev *dev = to_idev(ibdev);
1279 1306 int ret;
1280 1307  
1281   - /*
1282   - * Snapshot current HW counters to "clear" them.
1283   - * This should be done when the driver is loaded except that for
1284   - * some reason we get a zillion errors when brining up the link.
1285   - */
1286   - if (dev->rcv_errors == 0) {
1287   - struct ipath_layer_counters cntrs;
1288   -
1289   - ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs);
1290   - dev->rcv_errors++;
1291   - dev->z_symbol_error_counter = cntrs.symbol_error_counter;
1292   - dev->z_link_error_recovery_counter =
1293   - cntrs.link_error_recovery_counter;
1294   - dev->z_link_downed_counter = cntrs.link_downed_counter;
1295   - dev->z_port_rcv_errors = cntrs.port_rcv_errors + 1;
1296   - dev->z_port_rcv_remphys_errors =
1297   - cntrs.port_rcv_remphys_errors;
1298   - dev->z_port_xmit_discards = cntrs.port_xmit_discards;
1299   - dev->z_port_xmit_data = cntrs.port_xmit_data;
1300   - dev->z_port_rcv_data = cntrs.port_rcv_data;
1301   - dev->z_port_xmit_packets = cntrs.port_xmit_packets;
1302   - dev->z_port_rcv_packets = cntrs.port_rcv_packets;
1303   - }
1304 1308 switch (in_mad->mad_hdr.mgmt_class) {
1305 1309 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
1306 1310 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
drivers/infiniband/hw/ipath/ipath_ud.c
... ... @@ -560,7 +560,16 @@
560 560 spin_lock_irqsave(&rq->lock, flags);
561 561 if (rq->tail == rq->head) {
562 562 spin_unlock_irqrestore(&rq->lock, flags);
563   - dev->n_pkt_drops++;
  563 + /*
  564 + * Count VL15 packets dropped due to no receive buffer.
  565 + * Otherwise, count them as buffer overruns since usually,
  566 + * the HW will be able to receive packets even if there are
  567 + * no QPs with posted receive buffers.
  568 + */
  569 + if (qp->ibqp.qp_num == 0)
  570 + dev->n_vl15_dropped++;
  571 + else
  572 + dev->rcv_errors++;
564 573 goto bail;
565 574 }
566 575 /* Silently drop packets which are too big. */
drivers/infiniband/hw/ipath/ipath_verbs.c
... ... @@ -981,6 +981,7 @@
981 981 */
982 982 static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
983 983 {
  984 + struct ipath_layer_counters cntrs;
984 985 struct ipath_ibdev *idev;
985 986 struct ib_device *dev;
986 987 int ret;
... ... @@ -1030,6 +1031,25 @@
1030 1031 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
1031 1032 idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
1032 1033 idev->link_width_enabled = 3; /* 1x or 4x */
  1034 +
  1035 + /* Snapshot current HW counters to "clear" them. */
  1036 + ipath_layer_get_counters(dd, &cntrs);
  1037 + idev->z_symbol_error_counter = cntrs.symbol_error_counter;
  1038 + idev->z_link_error_recovery_counter =
  1039 + cntrs.link_error_recovery_counter;
  1040 + idev->z_link_downed_counter = cntrs.link_downed_counter;
  1041 + idev->z_port_rcv_errors = cntrs.port_rcv_errors;
  1042 + idev->z_port_rcv_remphys_errors =
  1043 + cntrs.port_rcv_remphys_errors;
  1044 + idev->z_port_xmit_discards = cntrs.port_xmit_discards;
  1045 + idev->z_port_xmit_data = cntrs.port_xmit_data;
  1046 + idev->z_port_rcv_data = cntrs.port_rcv_data;
  1047 + idev->z_port_xmit_packets = cntrs.port_xmit_packets;
  1048 + idev->z_port_rcv_packets = cntrs.port_rcv_packets;
  1049 + idev->z_local_link_integrity_errors =
  1050 + cntrs.local_link_integrity_errors;
  1051 + idev->z_excessive_buffer_overrun_errors =
  1052 + cntrs.excessive_buffer_overrun_errors;
1033 1053  
1034 1054 /*
1035 1055 * The system image GUID is supposed to be the same for all
drivers/infiniband/hw/ipath/ipath_verbs.h
... ... @@ -460,6 +460,8 @@
460 460 u64 z_port_xmit_packets; /* starting count for PMA */
461 461 u64 z_port_rcv_packets; /* starting count for PMA */
462 462 u32 z_pkey_violations; /* starting count for PMA */
  463 + u32 z_local_link_integrity_errors; /* starting count for PMA */
  464 + u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
463 465 u32 n_rc_resends;
464 466 u32 n_rc_acks;
465 467 u32 n_rc_qacks;
... ... @@ -469,6 +471,7 @@
469 471 u32 n_other_naks;
470 472 u32 n_timeouts;
471 473 u32 n_pkt_drops;
  474 + u32 n_vl15_dropped;
472 475 u32 n_wqe_errs;
473 476 u32 n_rdma_dup_busy;
474 477 u32 n_piowait;