Commit fa45459e5e4507402350f1cdaf44b7455602770c

Authored by Ayaz Abdulla
Committed by Jeff Garzik
1 parent 3bb8a18ae8

[PATCH] forcedeth: TSO fix for large buffers

This contains a bug fix for large buffers.  Originally, if a tx buffer to
be sent was larger then the maximum size of the tx descriptor, it would
overwrite other control bits.  In this patch, the buffer is split over
multiple descriptors.  Also, the fragments are now setup in forward order.

Signed-off-by: Ayaz Abdulla <aabdulla@nvidia.com>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>

Showing 1 changed file with 100 additions and 64 deletions Side-by-side Diff

drivers/net/forcedeth.c
... ... @@ -101,6 +101,7 @@
101 101 * 0.46: 20 Oct 2005: Add irq optimization modes.
102 102 * 0.47: 26 Oct 2005: Add phyaddr 0 in phy scan.
103 103 * 0.48: 24 Dec 2005: Disable TSO, bugfix for pci_map_single
  104 + * 0.49: 10 Dec 2005: Fix tso for large buffers.
104 105 *
105 106 * Known bugs:
106 107 * We suspect that on some hardware no TX done interrupts are generated.
... ... @@ -112,7 +113,7 @@
112 113 * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
113 114 * superfluous timer interrupts from the nic.
114 115 */
115   -#define FORCEDETH_VERSION "0.48"
  116 +#define FORCEDETH_VERSION "0.49"
116 117 #define DRV_NAME "forcedeth"
117 118  
118 119 #include <linux/module.h>
... ... @@ -349,6 +350,8 @@
349 350 #define NV_TX2_VALID (1<<31)
350 351 #define NV_TX2_TSO (1<<28)
351 352 #define NV_TX2_TSO_SHIFT 14
  353 +#define NV_TX2_TSO_MAX_SHIFT 14
  354 +#define NV_TX2_TSO_MAX_SIZE (1<<NV_TX2_TSO_MAX_SHIFT)
352 355 #define NV_TX2_CHECKSUM_L3 (1<<27)
353 356 #define NV_TX2_CHECKSUM_L4 (1<<26)
354 357  
355 358  
... ... @@ -408,15 +411,15 @@
408 411 #define NV_WATCHDOG_TIMEO (5*HZ)
409 412  
410 413 #define RX_RING 128
411   -#define TX_RING 64
  414 +#define TX_RING 256
412 415 /*
413 416 * If your nic mysteriously hangs then try to reduce the limits
414 417 * to 1/0: It might be required to set NV_TX_LASTPACKET in the
415 418 * last valid ring entry. But this would be impossible to
416 419 * implement - probably a disassembly error.
417 420 */
418   -#define TX_LIMIT_STOP 63
419   -#define TX_LIMIT_START 62
  421 +#define TX_LIMIT_STOP 255
  422 +#define TX_LIMIT_START 254
420 423  
421 424 /* rx/tx mac addr + type + vlan + align + slack*/
422 425 #define NV_RX_HEADERS (64)
... ... @@ -535,6 +538,7 @@
535 538 unsigned int next_tx, nic_tx;
536 539 struct sk_buff *tx_skbuff[TX_RING];
537 540 dma_addr_t tx_dma[TX_RING];
  541 + unsigned int tx_dma_len[TX_RING];
538 542 u32 tx_flags;
539 543 };
540 544  
... ... @@ -935,6 +939,7 @@
935 939 else
936 940 np->tx_ring.ex[i].FlagLen = 0;
937 941 np->tx_skbuff[i] = NULL;
  942 + np->tx_dma[i] = 0;
938 943 }
939 944 }
940 945  
941 946  
942 947  
... ... @@ -945,30 +950,27 @@
945 950 return nv_alloc_rx(dev);
946 951 }
947 952  
948   -static void nv_release_txskb(struct net_device *dev, unsigned int skbnr)
  953 +static int nv_release_txskb(struct net_device *dev, unsigned int skbnr)
949 954 {
950 955 struct fe_priv *np = netdev_priv(dev);
951   - struct sk_buff *skb = np->tx_skbuff[skbnr];
952   - unsigned int j, entry, fragments;
953   -
954   - dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d, skb %p\n",
955   - dev->name, skbnr, np->tx_skbuff[skbnr]);
956   -
957   - entry = skbnr;
958   - if ((fragments = skb_shinfo(skb)->nr_frags) != 0) {
959   - for (j = fragments; j >= 1; j--) {
960   - skb_frag_t *frag = &skb_shinfo(skb)->frags[j-1];
961   - pci_unmap_page(np->pci_dev, np->tx_dma[entry],
962   - frag->size,
963   - PCI_DMA_TODEVICE);
964   - entry = (entry - 1) % TX_RING;
965   - }
  956 +
  957 + dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d\n",
  958 + dev->name, skbnr);
  959 +
  960 + if (np->tx_dma[skbnr]) {
  961 + pci_unmap_page(np->pci_dev, np->tx_dma[skbnr],
  962 + np->tx_dma_len[skbnr],
  963 + PCI_DMA_TODEVICE);
  964 + np->tx_dma[skbnr] = 0;
966 965 }
967   - pci_unmap_single(np->pci_dev, np->tx_dma[entry],
968   - skb->len - skb->data_len,
969   - PCI_DMA_TODEVICE);
970   - dev_kfree_skb_irq(skb);
971   - np->tx_skbuff[skbnr] = NULL;
  966 +
  967 + if (np->tx_skbuff[skbnr]) {
  968 + dev_kfree_skb_irq(np->tx_skbuff[skbnr]);
  969 + np->tx_skbuff[skbnr] = NULL;
  970 + return 1;
  971 + } else {
  972 + return 0;
  973 + }
972 974 }
973 975  
974 976 static void nv_drain_tx(struct net_device *dev)
975 977  
... ... @@ -981,10 +983,8 @@
981 983 np->tx_ring.orig[i].FlagLen = 0;
982 984 else
983 985 np->tx_ring.ex[i].FlagLen = 0;
984   - if (np->tx_skbuff[i]) {
985   - nv_release_txskb(dev, i);
  986 + if (nv_release_txskb(dev, i))
986 987 np->stats.tx_dropped++;
987   - }
988 988 }
989 989 }
990 990  
991 991  
992 992  
993 993  
994 994  
995 995  
996 996  
997 997  
998 998  
999 999  
1000 1000  
1001 1001  
1002 1002  
1003 1003  
1004 1004  
1005 1005  
1006 1006  
1007 1007  
... ... @@ -1021,68 +1021,105 @@
1021 1021 static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
1022 1022 {
1023 1023 struct fe_priv *np = netdev_priv(dev);
  1024 + u32 tx_flags = 0;
1024 1025 u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
1025 1026 unsigned int fragments = skb_shinfo(skb)->nr_frags;
1026   - unsigned int nr = (np->next_tx + fragments) % TX_RING;
  1027 + unsigned int nr = (np->next_tx - 1) % TX_RING;
  1028 + unsigned int start_nr = np->next_tx % TX_RING;
1027 1029 unsigned int i;
  1030 + u32 offset = 0;
  1031 + u32 bcnt;
  1032 + u32 size = skb->len-skb->data_len;
  1033 + u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
1028 1034  
  1035 + /* add fragments to entries count */
  1036 + for (i = 0; i < fragments; i++) {
  1037 + entries += (skb_shinfo(skb)->frags[i].size >> NV_TX2_TSO_MAX_SHIFT) +
  1038 + ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
  1039 + }
  1040 +
1029 1041 spin_lock_irq(&np->lock);
1030 1042  
1031   - if ((np->next_tx - np->nic_tx + fragments) > TX_LIMIT_STOP) {
  1043 + if ((np->next_tx - np->nic_tx + entries - 1) > TX_LIMIT_STOP) {
1032 1044 spin_unlock_irq(&np->lock);
1033 1045 netif_stop_queue(dev);
1034 1046 return NETDEV_TX_BUSY;
1035 1047 }
1036 1048  
1037   - np->tx_skbuff[nr] = skb;
1038   -
1039   - if (fragments) {
1040   - dprintk(KERN_DEBUG "%s: nv_start_xmit: buffer contains %d fragments\n", dev->name, fragments);
1041   - /* setup descriptors in reverse order */
1042   - for (i = fragments; i >= 1; i--) {
1043   - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1044   - np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset, frag->size,
1045   - PCI_DMA_TODEVICE);
  1049 + /* setup the header buffer */
  1050 + do {
  1051 + bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
  1052 + nr = (nr + 1) % TX_RING;
1046 1053  
  1054 + np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
  1055 + PCI_DMA_TODEVICE);
  1056 + np->tx_dma_len[nr] = bcnt;
  1057 +
  1058 + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
  1059 + np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
  1060 + np->tx_ring.orig[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
  1061 + } else {
  1062 + np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
  1063 + np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
  1064 + np->tx_ring.ex[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
  1065 + }
  1066 + tx_flags = np->tx_flags;
  1067 + offset += bcnt;
  1068 + size -= bcnt;
  1069 + } while(size);
  1070 +
  1071 + /* setup the fragments */
  1072 + for (i = 0; i < fragments; i++) {
  1073 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  1074 + u32 size = frag->size;
  1075 + offset = 0;
  1076 +
  1077 + do {
  1078 + bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
  1079 + nr = (nr + 1) % TX_RING;
  1080 +
  1081 + np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
  1082 + PCI_DMA_TODEVICE);
  1083 + np->tx_dma_len[nr] = bcnt;
  1084 +
1047 1085 if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
1048 1086 np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
1049   - np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra);
  1087 + np->tx_ring.orig[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
1050 1088 } else {
1051 1089 np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
1052 1090 np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
1053   - np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra);
  1091 + np->tx_ring.ex[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags);
1054 1092 }
1055   -
1056   - nr = (nr - 1) % TX_RING;
  1093 + offset += bcnt;
  1094 + size -= bcnt;
  1095 + } while (size);
  1096 + }
1057 1097  
1058   - if (np->desc_ver == DESC_VER_1)
1059   - tx_flags_extra &= ~NV_TX_LASTPACKET;
1060   - else
1061   - tx_flags_extra &= ~NV_TX2_LASTPACKET;
1062   - }
  1098 + /* set last fragment flag */
  1099 + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
  1100 + np->tx_ring.orig[nr].FlagLen |= cpu_to_le32(tx_flags_extra);
  1101 + } else {
  1102 + np->tx_ring.ex[nr].FlagLen |= cpu_to_le32(tx_flags_extra);
1063 1103 }
1064 1104  
  1105 + np->tx_skbuff[nr] = skb;
  1106 +
1065 1107 #ifdef NETIF_F_TSO
1066 1108 if (skb_shinfo(skb)->tso_size)
1067   - tx_flags_extra |= NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
  1109 + tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
1068 1110 else
1069 1111 #endif
1070   - tx_flags_extra |= (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
  1112 + tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
1071 1113  
1072   - np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data, skb->len-skb->data_len,
1073   - PCI_DMA_TODEVICE);
1074   -
  1114 + /* set tx flags */
1075 1115 if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
1076   - np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
1077   - np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra);
  1116 + np->tx_ring.orig[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra);
1078 1117 } else {
1079   - np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
1080   - np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
1081   - np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra);
  1118 + np->tx_ring.ex[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra);
1082 1119 }
1083 1120  
1084   - dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission. tx_flags_extra: %x\n",
1085   - dev->name, np->next_tx, tx_flags_extra);
  1121 + dprintk(KERN_DEBUG "%s: nv_start_xmit: packet %d (entries %d) queued for transmission. tx_flags_extra: %x\n",
  1122 + dev->name, np->next_tx, entries, tx_flags_extra);
1086 1123 {
1087 1124 int j;
1088 1125 for (j=0; j<64; j++) {
... ... @@ -1093,7 +1130,7 @@
1093 1130 dprintk("\n");
1094 1131 }
1095 1132  
1096   - np->next_tx += 1 + fragments;
  1133 + np->next_tx += entries;
1097 1134  
1098 1135 dev->trans_start = jiffies;
1099 1136 spin_unlock_irq(&np->lock);
... ... @@ -1140,7 +1177,6 @@
1140 1177 np->stats.tx_packets++;
1141 1178 np->stats.tx_bytes += skb->len;
1142 1179 }
1143   - nv_release_txskb(dev, i);
1144 1180 }
1145 1181 } else {
1146 1182 if (Flags & NV_TX2_LASTPACKET) {
1147 1183  
... ... @@ -1156,9 +1192,9 @@
1156 1192 np->stats.tx_packets++;
1157 1193 np->stats.tx_bytes += skb->len;
1158 1194 }
1159   - nv_release_txskb(dev, i);
1160 1195 }
1161 1196 }
  1197 + nv_release_txskb(dev, i);
1162 1198 np->nic_tx++;
1163 1199 }
1164 1200 if (np->next_tx - np->nic_tx < TX_LIMIT_START)
... ... @@ -2456,7 +2492,7 @@
2456 2492 np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
2457 2493 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2458 2494 #ifdef NETIF_F_TSO
2459   - /* disabled dev->features |= NETIF_F_TSO; */
  2495 + dev->features |= NETIF_F_TSO;
2460 2496 #endif
2461 2497 }
2462 2498