Commit bd7ea31b9e8a342be76e0fe8d638343886c2d8c5

Authored by Tom Tucker
Committed by Trond Myklebust
1 parent b064eca2cf

RPCRDMA: Fix to XDR page base interpretation in marshalling logic.

The RPCRDMA marshalling logic assumed that xdr->page_base was an
offset into the first page of xdr->page_list. It is in fact an
offset into the xdr->page_list itself, that is, it selects the
first page in the page_list and the offset into that page.

The symptom depended in part on the rpc_memreg_strategy, if it was
FRMR, or some other one-shot mapping mode, the connection would get
torn down on a base and bounds error. When the badly marshalled RPC
was retransmitted it would reconnect, get the error, and tear down the
connection again in a loop forever. This resulted in a hung-mount. For
the other modes, it would result in silent data corruption. This bug is
most easily reproduced by writing more data than the filesystem
has space for.

This fix corrects the page_base assumption and otherwise simplifies
the iov mapping logic.

Signed-off-by: Tom Tucker <tom@ogc.us>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

Showing 1 changed file with 42 additions and 44 deletions Side-by-side Diff

net/sunrpc/xprtrdma/rpc_rdma.c
... ... @@ -87,6 +87,8 @@
87 87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
88 88 {
89 89 int len, n = 0, p;
  90 + int page_base;
  91 + struct page **ppages;
90 92  
91 93 if (pos == 0 && xdrbuf->head[0].iov_len) {
92 94 seg[n].mr_page = NULL;
93 95  
94 96  
95 97  
... ... @@ -95,34 +97,32 @@
95 97 ++n;
96 98 }
97 99  
98   - if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
99   - if (n == nsegs)
100   - return 0;
101   - seg[n].mr_page = xdrbuf->pages[0];
102   - seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
103   - seg[n].mr_len = min_t(u32,
104   - PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
105   - len = xdrbuf->page_len - seg[n].mr_len;
  100 + len = xdrbuf->page_len;
  101 + ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
  102 + page_base = xdrbuf->page_base & ~PAGE_MASK;
  103 + p = 0;
  104 + while (len && n < nsegs) {
  105 + seg[n].mr_page = ppages[p];
  106 + seg[n].mr_offset = (void *)(unsigned long) page_base;
  107 + seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
  108 + BUG_ON(seg[n].mr_len > PAGE_SIZE);
  109 + len -= seg[n].mr_len;
106 110 ++n;
107   - p = 1;
108   - while (len > 0) {
109   - if (n == nsegs)
110   - return 0;
111   - seg[n].mr_page = xdrbuf->pages[p];
112   - seg[n].mr_offset = NULL;
113   - seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
114   - len -= seg[n].mr_len;
115   - ++n;
116   - ++p;
117   - }
  111 + ++p;
  112 + page_base = 0; /* page offset only applies to first page */
118 113 }
119 114  
  115 + /* Message overflows the seg array */
  116 + if (len && n == nsegs)
  117 + return 0;
  118 +
120 119 if (xdrbuf->tail[0].iov_len) {
121 120 /* the rpcrdma protocol allows us to omit any trailing
122 121 * xdr pad bytes, saving the server an RDMA operation. */
123 122 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
124 123 return n;
125 124 if (n == nsegs)
  125 + /* Tail remains, but we're out of segments */
126 126 return 0;
127 127 seg[n].mr_page = NULL;
128 128 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
... ... @@ -296,6 +296,8 @@
296 296 int copy_len;
297 297 unsigned char *srcp, *destp;
298 298 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
  299 + int page_base;
  300 + struct page **ppages;
299 301  
300 302 destp = rqst->rq_svec[0].iov_base;
301 303 curlen = rqst->rq_svec[0].iov_len;
302 304  
303 305  
304 306  
305 307  
... ... @@ -324,28 +326,25 @@
324 326 __func__, destp + copy_len, curlen);
325 327 rqst->rq_svec[0].iov_len += curlen;
326 328 }
327   -
328 329 r_xprt->rx_stats.pullup_copy_count += copy_len;
329   - npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
  330 +
  331 + page_base = rqst->rq_snd_buf.page_base;
  332 + ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
  333 + page_base &= ~PAGE_MASK;
  334 + npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
330 335 for (i = 0; copy_len && i < npages; i++) {
331   - if (i == 0)
332   - curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
333   - else
334   - curlen = PAGE_SIZE;
  336 + curlen = PAGE_SIZE - page_base;
335 337 if (curlen > copy_len)
336 338 curlen = copy_len;
337 339 dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
338 340 __func__, i, destp, copy_len, curlen);
339   - srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
340   - KM_SKB_SUNRPC_DATA);
341   - if (i == 0)
342   - memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
343   - else
344   - memcpy(destp, srcp, curlen);
  341 + srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
  342 + memcpy(destp, srcp+page_base, curlen);
345 343 kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
346 344 rqst->rq_svec[0].iov_len += curlen;
347 345 destp += curlen;
348 346 copy_len -= curlen;
  347 + page_base = 0;
349 348 }
350 349 /* header now contains entire send message */
351 350 return pad;
... ... @@ -606,6 +605,8 @@
606 605 {
607 606 int i, npages, curlen, olen;
608 607 char *destp;
  608 + struct page **ppages;
  609 + int page_base;
609 610  
610 611 curlen = rqst->rq_rcv_buf.head[0].iov_len;
611 612 if (curlen > copy_len) { /* write chunk header fixup */
612 613  
613 614  
614 615  
615 616  
... ... @@ -624,32 +625,29 @@
624 625 olen = copy_len;
625 626 i = 0;
626 627 rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
  628 + page_base = rqst->rq_rcv_buf.page_base;
  629 + ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
  630 + page_base &= ~PAGE_MASK;
  631 +
627 632 if (copy_len && rqst->rq_rcv_buf.page_len) {
628   - npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
  633 + npages = PAGE_ALIGN(page_base +
629 634 rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
630 635 for (; i < npages; i++) {
631   - if (i == 0)
632   - curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
633   - else
634   - curlen = PAGE_SIZE;
  636 + curlen = PAGE_SIZE - page_base;
635 637 if (curlen > copy_len)
636 638 curlen = copy_len;
637 639 dprintk("RPC: %s: page %d"
638 640 " srcp 0x%p len %d curlen %d\n",
639 641 __func__, i, srcp, copy_len, curlen);
640   - destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
641   - KM_SKB_SUNRPC_DATA);
642   - if (i == 0)
643   - memcpy(destp + rqst->rq_rcv_buf.page_base,
644   - srcp, curlen);
645   - else
646   - memcpy(destp, srcp, curlen);
647   - flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
  642 + destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
  643 + memcpy(destp + page_base, srcp, curlen);
  644 + flush_dcache_page(ppages[i]);
648 645 kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
649 646 srcp += curlen;
650 647 copy_len -= curlen;
651 648 if (copy_len == 0)
652 649 break;
  650 + page_base = 0;
653 651 }
654 652 rqst->rq_rcv_buf.page_len = olen - copy_len;
655 653 } else