Commit 5640f7685831e088fe6c2e1f863a6805962f8e81

Authored by Eric Dumazet
Committed by David S. Miller
1 parent b98b8babd6

net: use a per task frag allocator

We currently use a per socket order-0 page cache for tcp_sendmsg()
operations.

This page is used to build fragments for skbs.

Its done to increase probability of coalescing small write() into
single segments in skbs still in write queue (not yet sent)

But it wastes a lot of memory for applications handling many mostly
idle sockets, since each socket holds one page in sk->sk_sndmsg_page

Its also quite inefficient to build TSO 64KB packets, because we need
about 16 pages per skb on arches where PAGE_SIZE = 4096, so we hit
page allocator more than wanted.

This patch adds a per task frag allocator and uses bigger pages,
if available. An automatic fallback is done in case of memory pressure.

(up to 32768 bytes per frag, thats order-3 pages on x86)

This increases TCP stream performance by 20% on loopback device,
but also benefits on other network devices, since 8x less frags are
mapped on transmit and unmapped on tx completion. Alexander Duyck
mentioned a probable performance win on systems with IOMMU enabled.

Its possible some SG enabled hardware cant cope with bigger fragments,
but their ndo_start_xmit() should already handle this, splitting a
fragment in sub fragments, since some arches have PAGE_SIZE=65536

Successfully tested on various ethernet devices.
(ixgbe, igb, bnx2x, tg3, mellanox mlx4)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Vijay Subramanian <subramanian.vijay@gmail.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 13 changed files with 167 additions and 200 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -1530,6 +1530,9 @@
1530 1530 * cache last used pipe for splice
1531 1531 */
1532 1532 struct pipe_inode_info *splice_pipe;
  1533 +
  1534 + struct page_frag task_frag;
  1535 +
1533 1536 #ifdef CONFIG_TASK_DELAY_ACCT
1534 1537 struct task_delay_info *delays;
1535 1538 #endif
include/net/inet_sock.h
... ... @@ -101,10 +101,8 @@
101 101 __be32 addr;
102 102 struct ip_options *opt;
103 103 unsigned int fragsize;
104   - struct dst_entry *dst;
105 104 int length; /* Total length of all frames */
106   - struct page *page;
107   - u32 off;
  105 + struct dst_entry *dst;
108 106 u8 tx_flags;
109 107 };
110 108  
... ... @@ -247,8 +247,7 @@
247 247 * @sk_stamp: time stamp of last packet received
248 248 * @sk_socket: Identd and reporting IO signals
249 249 * @sk_user_data: RPC layer private data
250   - * @sk_sndmsg_page: cached page for sendmsg
251   - * @sk_sndmsg_off: cached offset for sendmsg
  250 + * @sk_frag: cached page frag
252 251 * @sk_peek_off: current peek_offset value
253 252 * @sk_send_head: front of stuff to transmit
254 253 * @sk_security: used by security modules
255 254  
... ... @@ -362,9 +361,8 @@
362 361 ktime_t sk_stamp;
363 362 struct socket *sk_socket;
364 363 void *sk_user_data;
365   - struct page *sk_sndmsg_page;
  364 + struct page_frag sk_frag;
366 365 struct sk_buff *sk_send_head;
367   - __u32 sk_sndmsg_off;
368 366 __s32 sk_peek_off;
369 367 int sk_write_pending;
370 368 #ifdef CONFIG_SECURITY
371 369  
372 370  
373 371  
... ... @@ -2034,17 +2032,22 @@
2034 2032  
2035 2033 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
2036 2034  
2037   -static inline struct page *sk_stream_alloc_page(struct sock *sk)
  2035 +/**
  2036 + * sk_page_frag - return an appropriate page_frag
  2037 + * @sk: socket
  2038 + *
  2039 + * If socket allocation mode allows current thread to sleep, it means its
  2040 + * safe to use the per task page_frag instead of the per socket one.
  2041 + */
  2042 +static inline struct page_frag *sk_page_frag(struct sock *sk)
2038 2043 {
2039   - struct page *page = NULL;
  2044 + if (sk->sk_allocation & __GFP_WAIT)
  2045 + return &current->task_frag;
2040 2046  
2041   - page = alloc_pages(sk->sk_allocation, 0);
2042   - if (!page) {
2043   - sk_enter_memory_pressure(sk);
2044   - sk_stream_moderate_sndbuf(sk);
2045   - }
2046   - return page;
  2047 + return &sk->sk_frag;
2047 2048 }
  2049 +
  2050 +extern bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
2048 2051  
2049 2052 /*
2050 2053 * Default write policy as shown to user space via poll/select/SIGIO
... ... @@ -1046,6 +1046,9 @@
1046 1046 if (tsk->splice_pipe)
1047 1047 __free_pipe_info(tsk->splice_pipe);
1048 1048  
  1049 + if (tsk->task_frag.page)
  1050 + put_page(tsk->task_frag.page);
  1051 +
1049 1052 validate_creds_for_do_exit(tsk);
1050 1053  
1051 1054 preempt_disable();
... ... @@ -330,6 +330,7 @@
330 330 tsk->btrace_seq = 0;
331 331 #endif
332 332 tsk->splice_pipe = NULL;
  333 + tsk->task_frag.page = NULL;
333 334  
334 335 account_kernel_stack(ti, 1);
335 336  
... ... @@ -1655,38 +1655,19 @@
1655 1655 unsigned int *offset,
1656 1656 struct sk_buff *skb, struct sock *sk)
1657 1657 {
1658   - struct page *p = sk->sk_sndmsg_page;
1659   - unsigned int off;
  1658 + struct page_frag *pfrag = sk_page_frag(sk);
1660 1659  
1661   - if (!p) {
1662   -new_page:
1663   - p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
1664   - if (!p)
1665   - return NULL;
  1660 + if (!sk_page_frag_refill(sk, pfrag))
  1661 + return NULL;
1666 1662  
1667   - off = sk->sk_sndmsg_off = 0;
1668   - /* hold one ref to this page until it's full */
1669   - } else {
1670   - unsigned int mlen;
  1663 + *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
1671 1664  
1672   - /* If we are the only user of the page, we can reset offset */
1673   - if (page_count(p) == 1)
1674   - sk->sk_sndmsg_off = 0;
1675   - off = sk->sk_sndmsg_off;
1676   - mlen = PAGE_SIZE - off;
1677   - if (mlen < 64 && mlen < *len) {
1678   - put_page(p);
1679   - goto new_page;
1680   - }
  1665 + memcpy(page_address(pfrag->page) + pfrag->offset,
  1666 + page_address(page) + *offset, *len);
  1667 + *offset = pfrag->offset;
  1668 + pfrag->offset += *len;
1681 1669  
1682   - *len = min_t(unsigned int, *len, mlen);
1683   - }
1684   -
1685   - memcpy(page_address(p) + off, page_address(page) + *offset, *len);
1686   - sk->sk_sndmsg_off += *len;
1687   - *offset = off;
1688   -
1689   - return p;
  1670 + return pfrag->page;
1690 1671 }
1691 1672  
1692 1673 static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
... ... @@ -1744,6 +1744,45 @@
1744 1744 }
1745 1745 EXPORT_SYMBOL(sock_alloc_send_skb);
1746 1746  
  1747 +/* On 32bit arches, an skb frag is limited to 2^15 */
  1748 +#define SKB_FRAG_PAGE_ORDER get_order(32768)
  1749 +
  1750 +bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
  1751 +{
  1752 + int order;
  1753 +
  1754 + if (pfrag->page) {
  1755 + if (atomic_read(&pfrag->page->_count) == 1) {
  1756 + pfrag->offset = 0;
  1757 + return true;
  1758 + }
  1759 + if (pfrag->offset < pfrag->size)
  1760 + return true;
  1761 + put_page(pfrag->page);
  1762 + }
  1763 +
  1764 + /* We restrict high order allocations to users that can afford to wait */
  1765 + order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
  1766 +
  1767 + do {
  1768 + gfp_t gfp = sk->sk_allocation;
  1769 +
  1770 + if (order)
  1771 + gfp |= __GFP_COMP | __GFP_NOWARN;
  1772 + pfrag->page = alloc_pages(gfp, order);
  1773 + if (likely(pfrag->page)) {
  1774 + pfrag->offset = 0;
  1775 + pfrag->size = PAGE_SIZE << order;
  1776 + return true;
  1777 + }
  1778 + } while (--order >= 0);
  1779 +
  1780 + sk_enter_memory_pressure(sk);
  1781 + sk_stream_moderate_sndbuf(sk);
  1782 + return false;
  1783 +}
  1784 +EXPORT_SYMBOL(sk_page_frag_refill);
  1785 +
1747 1786 static void __lock_sock(struct sock *sk)
1748 1787 __releases(&sk->sk_lock.slock)
1749 1788 __acquires(&sk->sk_lock.slock)
... ... @@ -2173,8 +2212,8 @@
2173 2212 sk->sk_error_report = sock_def_error_report;
2174 2213 sk->sk_destruct = sock_def_destruct;
2175 2214  
2176   - sk->sk_sndmsg_page = NULL;
2177   - sk->sk_sndmsg_off = 0;
  2215 + sk->sk_frag.page = NULL;
  2216 + sk->sk_frag.offset = 0;
2178 2217 sk->sk_peek_off = -1;
2179 2218  
2180 2219 sk->sk_peer_pid = NULL;
... ... @@ -2417,6 +2456,12 @@
2417 2456 xfrm_sk_free_policy(sk);
2418 2457  
2419 2458 sk_refcnt_debug_release(sk);
  2459 +
  2460 + if (sk->sk_frag.page) {
  2461 + put_page(sk->sk_frag.page);
  2462 + sk->sk_frag.page = NULL;
  2463 + }
  2464 +
2420 2465 sock_put(sk);
2421 2466 }
2422 2467 EXPORT_SYMBOL(sk_common_release);
net/ipv4/ip_output.c
... ... @@ -793,6 +793,7 @@
793 793 struct flowi4 *fl4,
794 794 struct sk_buff_head *queue,
795 795 struct inet_cork *cork,
  796 + struct page_frag *pfrag,
796 797 int getfrag(void *from, char *to, int offset,
797 798 int len, int odd, struct sk_buff *skb),
798 799 void *from, int length, int transhdrlen,
799 800  
800 801  
801 802  
802 803  
... ... @@ -987,47 +988,30 @@
987 988 }
988 989 } else {
989 990 int i = skb_shinfo(skb)->nr_frags;
990   - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
991   - struct page *page = cork->page;
992   - int off = cork->off;
993   - unsigned int left;
994 991  
995   - if (page && (left = PAGE_SIZE - off) > 0) {
996   - if (copy >= left)
997   - copy = left;
998   - if (page != skb_frag_page(frag)) {
999   - if (i == MAX_SKB_FRAGS) {
1000   - err = -EMSGSIZE;
1001   - goto error;
1002   - }
1003   - skb_fill_page_desc(skb, i, page, off, 0);
1004   - skb_frag_ref(skb, i);
1005   - frag = &skb_shinfo(skb)->frags[i];
1006   - }
1007   - } else if (i < MAX_SKB_FRAGS) {
1008   - if (copy > PAGE_SIZE)
1009   - copy = PAGE_SIZE;
1010   - page = alloc_pages(sk->sk_allocation, 0);
1011   - if (page == NULL) {
1012   - err = -ENOMEM;
1013   - goto error;
1014   - }
1015   - cork->page = page;
1016   - cork->off = 0;
  992 + err = -ENOMEM;
  993 + if (!sk_page_frag_refill(sk, pfrag))
  994 + goto error;
1017 995  
1018   - skb_fill_page_desc(skb, i, page, 0, 0);
1019   - frag = &skb_shinfo(skb)->frags[i];
1020   - } else {
  996 + if (!skb_can_coalesce(skb, i, pfrag->page,
  997 + pfrag->offset)) {
1021 998 err = -EMSGSIZE;
1022   - goto error;
  999 + if (i == MAX_SKB_FRAGS)
  1000 + goto error;
  1001 +
  1002 + __skb_fill_page_desc(skb, i, pfrag->page,
  1003 + pfrag->offset, 0);
  1004 + skb_shinfo(skb)->nr_frags = ++i;
  1005 + get_page(pfrag->page);
1023 1006 }
1024   - if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag),
1025   - offset, copy, skb->len, skb) < 0) {
1026   - err = -EFAULT;
1027   - goto error;
1028   - }
1029   - cork->off += copy;
1030   - skb_frag_size_add(frag, copy);
  1007 + copy = min_t(int, copy, pfrag->size - pfrag->offset);
  1008 + if (getfrag(from,
  1009 + page_address(pfrag->page) + pfrag->offset,
  1010 + offset, copy, skb->len, skb) < 0)
  1011 + goto error_efault;
  1012 +
  1013 + pfrag->offset += copy;
  1014 + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1031 1015 skb->len += copy;
1032 1016 skb->data_len += copy;
1033 1017 skb->truesize += copy;
... ... @@ -1039,6 +1023,8 @@
1039 1023  
1040 1024 return 0;
1041 1025  
  1026 +error_efault:
  1027 + err = -EFAULT;
1042 1028 error:
1043 1029 cork->length -= length;
1044 1030 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
... ... @@ -1079,8 +1065,6 @@
1079 1065 cork->dst = &rt->dst;
1080 1066 cork->length = 0;
1081 1067 cork->tx_flags = ipc->tx_flags;
1082   - cork->page = NULL;
1083   - cork->off = 0;
1084 1068  
1085 1069 return 0;
1086 1070 }
... ... @@ -1117,7 +1101,8 @@
1117 1101 transhdrlen = 0;
1118 1102 }
1119 1103  
1120   - return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag,
  1104 + return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base,
  1105 + sk_page_frag(sk), getfrag,
1121 1106 from, length, transhdrlen, flags);
1122 1107 }
1123 1108  
... ... @@ -1439,7 +1424,8 @@
1439 1424 if (err)
1440 1425 return ERR_PTR(err);
1441 1426  
1442   - err = __ip_append_data(sk, fl4, &queue, &cork, getfrag,
  1427 + err = __ip_append_data(sk, fl4, &queue, &cork,
  1428 + &current->task_frag, getfrag,
1443 1429 from, length, transhdrlen, flags);
1444 1430 if (err) {
1445 1431 __ip_flush_pending_frames(sk, &queue, &cork);
... ... @@ -131,18 +131,23 @@
131 131 * 0 - deliver
132 132 * 1 - block
133 133 */
134   -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
  134 +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
135 135 {
136   - int type;
  136 + struct icmphdr _hdr;
  137 + const struct icmphdr *hdr;
137 138  
138   - if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
  139 + pr_err("icmp_filter skb_transport_offset %d data-head %ld len %d/%d\n",
  140 + skb_transport_offset(skb), skb->data - skb->head, skb->len, skb->data_len);
  141 + hdr = skb_header_pointer(skb, skb_transport_offset(skb),
  142 + sizeof(_hdr), &_hdr);
  143 + pr_err("head %p data %p hdr %p type %d\n", skb->head, skb->data, hdr, hdr ? hdr->type : -1);
  144 + if (!hdr)
139 145 return 1;
140 146  
141   - type = icmp_hdr(skb)->type;
142   - if (type < 32) {
  147 + if (hdr->type < 32) {
143 148 __u32 data = raw_sk(sk)->filter.data;
144 149  
145   - return ((1 << type) & data) != 0;
  150 + return ((1U << hdr->type) & data) != 0;
146 151 }
147 152  
148 153 /* Do not block unknown ICMP types */
... ... @@ -1150,78 +1150,43 @@
1150 1150 if (err)
1151 1151 goto do_fault;
1152 1152 } else {
1153   - bool merge = false;
  1153 + bool merge = true;
1154 1154 int i = skb_shinfo(skb)->nr_frags;
1155   - struct page *page = sk->sk_sndmsg_page;
1156   - int off;
  1155 + struct page_frag *pfrag = sk_page_frag(sk);
1157 1156  
1158   - if (page && page_count(page) == 1)
1159   - sk->sk_sndmsg_off = 0;
  1157 + if (!sk_page_frag_refill(sk, pfrag))
  1158 + goto wait_for_memory;
1160 1159  
1161   - off = sk->sk_sndmsg_off;
1162   -
1163   - if (skb_can_coalesce(skb, i, page, off) &&
1164   - off != PAGE_SIZE) {
1165   - /* We can extend the last page
1166   - * fragment. */
1167   - merge = true;
1168   - } else if (i == MAX_SKB_FRAGS || !sg) {
1169   - /* Need to add new fragment and cannot
1170   - * do this because interface is non-SG,
1171   - * or because all the page slots are
1172   - * busy. */
1173   - tcp_mark_push(tp, skb);
1174   - goto new_segment;
1175   - } else if (page) {
1176   - if (off == PAGE_SIZE) {
1177   - put_page(page);
1178   - sk->sk_sndmsg_page = page = NULL;
1179   - off = 0;
  1160 + if (!skb_can_coalesce(skb, i, pfrag->page,
  1161 + pfrag->offset)) {
  1162 + if (i == MAX_SKB_FRAGS || !sg) {
  1163 + tcp_mark_push(tp, skb);
  1164 + goto new_segment;
1180 1165 }
1181   - } else
1182   - off = 0;
  1166 + merge = false;
  1167 + }
1183 1168  
1184   - if (copy > PAGE_SIZE - off)
1185   - copy = PAGE_SIZE - off;
  1169 + copy = min_t(int, copy, pfrag->size - pfrag->offset);
1186 1170  
1187 1171 if (!sk_wmem_schedule(sk, copy))
1188 1172 goto wait_for_memory;
1189 1173  
1190   - if (!page) {
1191   - /* Allocate new cache page. */
1192   - if (!(page = sk_stream_alloc_page(sk)))
1193   - goto wait_for_memory;
1194   - }
1195   -
1196   - /* Time to copy data. We are close to
1197   - * the end! */
1198 1174 err = skb_copy_to_page_nocache(sk, from, skb,
1199   - page, off, copy);
1200   - if (err) {
1201   - /* If this page was new, give it to the
1202   - * socket so it does not get leaked.
1203   - */
1204   - if (!sk->sk_sndmsg_page) {
1205   - sk->sk_sndmsg_page = page;
1206   - sk->sk_sndmsg_off = 0;
1207   - }
  1175 + pfrag->page,
  1176 + pfrag->offset,
  1177 + copy);
  1178 + if (err)
1208 1179 goto do_error;
1209   - }
1210 1180  
1211 1181 /* Update the skb. */
1212 1182 if (merge) {
1213 1183 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1214 1184 } else {
1215   - skb_fill_page_desc(skb, i, page, off, copy);
1216   - if (sk->sk_sndmsg_page) {
1217   - get_page(page);
1218   - } else if (off + copy < PAGE_SIZE) {
1219   - get_page(page);
1220   - sk->sk_sndmsg_page = page;
1221   - }
  1185 + skb_fill_page_desc(skb, i, pfrag->page,
  1186 + pfrag->offset, copy);
  1187 + get_page(pfrag->page);
1222 1188 }
1223   -
1224   - sk->sk_sndmsg_off = off + copy;
  1189 + pfrag->offset += copy;
1225 1190 }
1226 1191  
1227 1192 if (!copied)
... ... @@ -2200,14 +2200,6 @@
2200 2200 if (inet_csk(sk)->icsk_bind_hash)
2201 2201 inet_put_port(sk);
2202 2202  
2203   - /*
2204   - * If sendmsg cached page exists, toss it.
2205   - */
2206   - if (sk->sk_sndmsg_page) {
2207   - __free_page(sk->sk_sndmsg_page);
2208   - sk->sk_sndmsg_page = NULL;
2209   - }
2210   -
2211 2203 /* TCP Cookie Transactions */
2212 2204 if (tp->cookie_values != NULL) {
2213 2205 kref_put(&tp->cookie_values->kref,
net/ipv6/ip6_output.c
... ... @@ -1279,8 +1279,6 @@
1279 1279 if (dst_allfrag(rt->dst.path))
1280 1280 cork->flags |= IPCORK_ALLFRAG;
1281 1281 cork->length = 0;
1282   - sk->sk_sndmsg_page = NULL;
1283   - sk->sk_sndmsg_off = 0;
1284 1282 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1285 1283 length += exthdrlen;
1286 1284 transhdrlen += exthdrlen;
1287 1285  
1288 1286  
1289 1287  
1290 1288  
1291 1289  
... ... @@ -1504,48 +1502,31 @@
1504 1502 }
1505 1503 } else {
1506 1504 int i = skb_shinfo(skb)->nr_frags;
1507   - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1508   - struct page *page = sk->sk_sndmsg_page;
1509   - int off = sk->sk_sndmsg_off;
1510   - unsigned int left;
  1505 + struct page_frag *pfrag = sk_page_frag(sk);
1511 1506  
1512   - if (page && (left = PAGE_SIZE - off) > 0) {
1513   - if (copy >= left)
1514   - copy = left;
1515   - if (page != skb_frag_page(frag)) {
1516   - if (i == MAX_SKB_FRAGS) {
1517   - err = -EMSGSIZE;
1518   - goto error;
1519   - }
1520   - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1521   - skb_frag_ref(skb, i);
1522   - frag = &skb_shinfo(skb)->frags[i];
1523   - }
1524   - } else if(i < MAX_SKB_FRAGS) {
1525   - if (copy > PAGE_SIZE)
1526   - copy = PAGE_SIZE;
1527   - page = alloc_pages(sk->sk_allocation, 0);
1528   - if (page == NULL) {
1529   - err = -ENOMEM;
1530   - goto error;
1531   - }
1532   - sk->sk_sndmsg_page = page;
1533   - sk->sk_sndmsg_off = 0;
  1507 + err = -ENOMEM;
  1508 + if (!sk_page_frag_refill(sk, pfrag))
  1509 + goto error;
1534 1510  
1535   - skb_fill_page_desc(skb, i, page, 0, 0);
1536   - frag = &skb_shinfo(skb)->frags[i];
1537   - } else {
  1511 + if (!skb_can_coalesce(skb, i, pfrag->page,
  1512 + pfrag->offset)) {
1538 1513 err = -EMSGSIZE;
1539   - goto error;
  1514 + if (i == MAX_SKB_FRAGS)
  1515 + goto error;
  1516 +
  1517 + __skb_fill_page_desc(skb, i, pfrag->page,
  1518 + pfrag->offset, 0);
  1519 + skb_shinfo(skb)->nr_frags = ++i;
  1520 + get_page(pfrag->page);
1540 1521 }
  1522 + copy = min_t(int, copy, pfrag->size - pfrag->offset);
1541 1523 if (getfrag(from,
1542   - skb_frag_address(frag) + skb_frag_size(frag),
1543   - offset, copy, skb->len, skb) < 0) {
1544   - err = -EFAULT;
1545   - goto error;
1546   - }
1547   - sk->sk_sndmsg_off += copy;
1548   - skb_frag_size_add(frag, copy);
  1524 + page_address(pfrag->page) + pfrag->offset,
  1525 + offset, copy, skb->len, skb) < 0)
  1526 + goto error_efault;
  1527 +
  1528 + pfrag->offset += copy;
  1529 + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1549 1530 skb->len += copy;
1550 1531 skb->data_len += copy;
1551 1532 skb->truesize += copy;
1552 1533  
... ... @@ -1554,7 +1535,11 @@
1554 1535 offset += copy;
1555 1536 length -= copy;
1556 1537 }
  1538 +
1557 1539 return 0;
  1540 +
  1541 +error_efault:
  1542 + err = -EFAULT;
1558 1543 error:
1559 1544 cork->length -= length;
1560 1545 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
... ... @@ -461,7 +461,7 @@
461 461 META_COLLECTOR(int_sk_sendmsg_off)
462 462 {
463 463 SKIP_NONLOCAL(skb);
464   - dst->value = skb->sk->sk_sndmsg_off;
  464 + dst->value = skb->sk->sk_frag.offset;
465 465 }
466 466  
467 467 META_COLLECTOR(int_sk_write_pend)