Commit d521b63b27e3a397e0ef7ca86b6e813861083c83
Committed by
David S. Miller
1 parent
0514f8a9c0
Exists in
master
and in
7 other branches
RDS/IB+IW: Move recv processing to a tasklet
Move receive processing from event handler to a tasklet. This should help prevent hangcheck timer from going off when RDS is under heavy load. Signed-off-by: Andy Grover <andy.grover@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 6 changed files with 52 additions and 12 deletions Side-by-side Diff
net/rds/ib.h
... | ... | @@ -98,6 +98,7 @@ |
98 | 98 | struct rds_ib_send_work *i_sends; |
99 | 99 | |
100 | 100 | /* rx */ |
101 | + struct tasklet_struct i_recv_tasklet; | |
101 | 102 | struct mutex i_recv_mutex; |
102 | 103 | struct rds_ib_work_ring i_recv_ring; |
103 | 104 | struct rds_ib_incoming *i_ibinc; |
... | ... | @@ -303,6 +304,7 @@ |
303 | 304 | int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, |
304 | 305 | size_t size); |
305 | 306 | void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); |
307 | +void rds_ib_recv_tasklet_fn(unsigned long data); | |
306 | 308 | void rds_ib_recv_init_ring(struct rds_ib_connection *ic); |
307 | 309 | void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); |
308 | 310 | void rds_ib_recv_init_ack(struct rds_ib_connection *ic); |
net/rds/ib_cm.c
... | ... | @@ -694,6 +694,8 @@ |
694 | 694 | return -ENOMEM; |
695 | 695 | |
696 | 696 | INIT_LIST_HEAD(&ic->ib_node); |
697 | + tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, | |
698 | + (unsigned long) ic); | |
697 | 699 | mutex_init(&ic->i_recv_mutex); |
698 | 700 | #ifndef KERNEL_HAS_ATOMIC64 |
699 | 701 | spin_lock_init(&ic->i_ack_lock); |
net/rds/ib_recv.c
... | ... | @@ -825,17 +825,22 @@ |
825 | 825 | { |
826 | 826 | struct rds_connection *conn = context; |
827 | 827 | struct rds_ib_connection *ic = conn->c_transport_data; |
828 | - struct ib_wc wc; | |
829 | - struct rds_ib_ack_state state = { 0, }; | |
830 | - struct rds_ib_recv_work *recv; | |
831 | 828 | |
832 | 829 | rdsdebug("conn %p cq %p\n", conn, cq); |
833 | 830 | |
834 | 831 | rds_ib_stats_inc(s_ib_rx_cq_call); |
835 | 832 | |
836 | - ib_req_notify_cq(cq, IB_CQ_SOLICITED); | |
833 | + tasklet_schedule(&ic->i_recv_tasklet); | |
834 | +} | |
837 | 835 | |
838 | - while (ib_poll_cq(cq, 1, &wc) > 0) { | |
836 | +static inline void rds_poll_cq(struct rds_ib_connection *ic, | |
837 | + struct rds_ib_ack_state *state) | |
838 | +{ | |
839 | + struct rds_connection *conn = ic->conn; | |
840 | + struct ib_wc wc; | |
841 | + struct rds_ib_recv_work *recv; | |
842 | + | |
843 | + while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { | |
839 | 844 | rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", |
840 | 845 | (unsigned long long)wc.wr_id, wc.status, wc.byte_len, |
841 | 846 | be32_to_cpu(wc.ex.imm_data)); |
... | ... | @@ -853,7 +858,7 @@ |
853 | 858 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) { |
854 | 859 | /* We expect errors as the qp is drained during shutdown */ |
855 | 860 | if (wc.status == IB_WC_SUCCESS) { |
856 | - rds_ib_process_recv(conn, recv, wc.byte_len, &state); | |
861 | + rds_ib_process_recv(conn, recv, wc.byte_len, state); | |
857 | 862 | } else { |
858 | 863 | rds_ib_conn_error(conn, "recv completion on " |
859 | 864 | "%pI4 had status %u, disconnecting and " |
... | ... | @@ -864,6 +869,17 @@ |
864 | 869 | |
865 | 870 | rds_ib_ring_free(&ic->i_recv_ring, 1); |
866 | 871 | } |
872 | +} | |
873 | + | |
874 | +void rds_ib_recv_tasklet_fn(unsigned long data) | |
875 | +{ | |
876 | + struct rds_ib_connection *ic = (struct rds_ib_connection *) data; | |
877 | + struct rds_connection *conn = ic->conn; | |
878 | + struct rds_ib_ack_state state = { 0, }; | |
879 | + | |
880 | + rds_poll_cq(ic, &state); | |
881 | + ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); | |
882 | + rds_poll_cq(ic, &state); | |
867 | 883 | |
868 | 884 | if (state.ack_next_valid) |
869 | 885 | rds_ib_set_ack(ic, state.ack_next, state.ack_required); |
net/rds/iw.h
... | ... | @@ -119,6 +119,7 @@ |
119 | 119 | struct rds_iw_send_work *i_sends; |
120 | 120 | |
121 | 121 | /* rx */ |
122 | + struct tasklet_struct i_recv_tasklet; | |
122 | 123 | struct mutex i_recv_mutex; |
123 | 124 | struct rds_iw_work_ring i_recv_ring; |
124 | 125 | struct rds_iw_incoming *i_iwinc; |
... | ... | @@ -330,6 +331,7 @@ |
330 | 331 | int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, |
331 | 332 | size_t size); |
332 | 333 | void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context); |
334 | +void rds_iw_recv_tasklet_fn(unsigned long data); | |
333 | 335 | void rds_iw_recv_init_ring(struct rds_iw_connection *ic); |
334 | 336 | void rds_iw_recv_clear_ring(struct rds_iw_connection *ic); |
335 | 337 | void rds_iw_recv_init_ack(struct rds_iw_connection *ic); |
net/rds/iw_cm.c
... | ... | @@ -696,6 +696,8 @@ |
696 | 696 | return -ENOMEM; |
697 | 697 | |
698 | 698 | INIT_LIST_HEAD(&ic->iw_node); |
699 | + tasklet_init(&ic->i_recv_tasklet, rds_iw_recv_tasklet_fn, | |
700 | + (unsigned long) ic); | |
699 | 701 | mutex_init(&ic->i_recv_mutex); |
700 | 702 | #ifndef KERNEL_HAS_ATOMIC64 |
701 | 703 | spin_lock_init(&ic->i_ack_lock); |
net/rds/iw_recv.c
... | ... | @@ -784,17 +784,22 @@ |
784 | 784 | { |
785 | 785 | struct rds_connection *conn = context; |
786 | 786 | struct rds_iw_connection *ic = conn->c_transport_data; |
787 | - struct ib_wc wc; | |
788 | - struct rds_iw_ack_state state = { 0, }; | |
789 | - struct rds_iw_recv_work *recv; | |
790 | 787 | |
791 | 788 | rdsdebug("conn %p cq %p\n", conn, cq); |
792 | 789 | |
793 | 790 | rds_iw_stats_inc(s_iw_rx_cq_call); |
794 | 791 | |
795 | - ib_req_notify_cq(cq, IB_CQ_SOLICITED); | |
792 | + tasklet_schedule(&ic->i_recv_tasklet); | |
793 | +} | |
796 | 794 | |
797 | - while (ib_poll_cq(cq, 1, &wc) > 0) { | |
795 | +static inline void rds_poll_cq(struct rds_iw_connection *ic, | |
796 | + struct rds_iw_ack_state *state) | |
797 | +{ | |
798 | + struct rds_connection *conn = ic->conn; | |
799 | + struct ib_wc wc; | |
800 | + struct rds_iw_recv_work *recv; | |
801 | + | |
802 | + while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { | |
798 | 803 | rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", |
799 | 804 | (unsigned long long)wc.wr_id, wc.status, wc.byte_len, |
800 | 805 | be32_to_cpu(wc.ex.imm_data)); |
... | ... | @@ -812,7 +817,7 @@ |
812 | 817 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) { |
813 | 818 | /* We expect errors as the qp is drained during shutdown */ |
814 | 819 | if (wc.status == IB_WC_SUCCESS) { |
815 | - rds_iw_process_recv(conn, recv, wc.byte_len, &state); | |
820 | + rds_iw_process_recv(conn, recv, wc.byte_len, state); | |
816 | 821 | } else { |
817 | 822 | rds_iw_conn_error(conn, "recv completion on " |
818 | 823 | "%pI4 had status %u, disconnecting and " |
... | ... | @@ -823,6 +828,17 @@ |
823 | 828 | |
824 | 829 | rds_iw_ring_free(&ic->i_recv_ring, 1); |
825 | 830 | } |
831 | +} | |
832 | + | |
833 | +void rds_iw_recv_tasklet_fn(unsigned long data) | |
834 | +{ | |
835 | + struct rds_iw_connection *ic = (struct rds_iw_connection *) data; | |
836 | + struct rds_connection *conn = ic->conn; | |
837 | + struct rds_iw_ack_state state = { 0, }; | |
838 | + | |
839 | + rds_poll_cq(ic, &state); | |
840 | + ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); | |
841 | + rds_poll_cq(ic, &state); | |
826 | 842 | |
827 | 843 | if (state.ack_next_valid) |
828 | 844 | rds_iw_set_ack(ic, state.ack_next, state.ack_required); |