Commit 2f19a40ab952cf99324a11e0f790e102ae9d6a76
Committed by
Greg Kroah-Hartman
1 parent
367b490b53
sctp: fix possible seqlock seadlock in sctp_packet_transmit()
[ Upstream commit 757efd32d5ce31f67193cc0e6a56e4dffcc42fb1 ] Dave reported following splat, caused by improper use of IP_INC_STATS_BH() in process context. BUG: using __this_cpu_add() in preemptible [00000000] code: trinity-c117/14551 caller is __this_cpu_preempt_check+0x13/0x20 CPU: 3 PID: 14551 Comm: trinity-c117 Not tainted 3.16.0+ #33 ffffffff9ec898f0 0000000047ea7e23 ffff88022d32f7f0 ffffffff9e7ee207 0000000000000003 ffff88022d32f818 ffffffff9e397eaa ffff88023ee70b40 ffff88022d32f970 ffff8801c026d580 ffff88022d32f828 ffffffff9e397ee3 Call Trace: [<ffffffff9e7ee207>] dump_stack+0x4e/0x7a [<ffffffff9e397eaa>] check_preemption_disabled+0xfa/0x100 [<ffffffff9e397ee3>] __this_cpu_preempt_check+0x13/0x20 [<ffffffffc0839872>] sctp_packet_transmit+0x692/0x710 [sctp] [<ffffffffc082a7f2>] sctp_outq_flush+0x2a2/0xc30 [sctp] [<ffffffff9e0d985c>] ? mark_held_locks+0x7c/0xb0 [<ffffffff9e7f8c6d>] ? _raw_spin_unlock_irqrestore+0x5d/0x80 [<ffffffffc082b99a>] sctp_outq_uncork+0x1a/0x20 [sctp] [<ffffffffc081e112>] sctp_cmd_interpreter.isra.23+0x1142/0x13f0 [sctp] [<ffffffffc081c86b>] sctp_do_sm+0xdb/0x330 [sctp] [<ffffffff9e0b8f1b>] ? preempt_count_sub+0xab/0x100 [<ffffffffc083b350>] ? sctp_cname+0x70/0x70 [sctp] [<ffffffffc08389ca>] sctp_primitive_ASSOCIATE+0x3a/0x50 [sctp] [<ffffffffc083358f>] sctp_sendmsg+0x88f/0xe30 [sctp] [<ffffffff9e0d673a>] ? lock_release_holdtime.part.28+0x9a/0x160 [<ffffffff9e0d62ce>] ? put_lock_stats.isra.27+0xe/0x30 [<ffffffff9e73b624>] inet_sendmsg+0x104/0x220 [<ffffffff9e73b525>] ? inet_sendmsg+0x5/0x220 [<ffffffff9e68ac4e>] sock_sendmsg+0x9e/0xe0 [<ffffffff9e1c0c09>] ? might_fault+0xb9/0xc0 [<ffffffff9e1c0bae>] ? might_fault+0x5e/0xc0 [<ffffffff9e68b234>] SYSC_sendto+0x124/0x1c0 [<ffffffff9e0136b0>] ? syscall_trace_enter+0x250/0x330 [<ffffffff9e68c3ce>] SyS_sendto+0xe/0x10 [<ffffffff9e7f9be4>] tracesys+0xdd/0xe2 This is a followup of commits f1d8cba61c3c4b ("inet: fix possible seqlock deadlocks") and 7f88c6b23afbd315 ("ipv6: fix possible seqlock deadlock in ip6_finish_output2") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> Reported-by: Dave Jones <davej@redhat.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Showing 1 changed file with 1 additions and 1 deletions Inline Diff
net/sctp/output.c
1 | /* SCTP kernel implementation | 1 | /* SCTP kernel implementation |
2 | * (C) Copyright IBM Corp. 2001, 2004 | 2 | * (C) Copyright IBM Corp. 2001, 2004 |
3 | * Copyright (c) 1999-2000 Cisco, Inc. | 3 | * Copyright (c) 1999-2000 Cisco, Inc. |
4 | * Copyright (c) 1999-2001 Motorola, Inc. | 4 | * Copyright (c) 1999-2001 Motorola, Inc. |
5 | * | 5 | * |
6 | * This file is part of the SCTP kernel implementation | 6 | * This file is part of the SCTP kernel implementation |
7 | * | 7 | * |
8 | * These functions handle output processing. | 8 | * These functions handle output processing. |
9 | * | 9 | * |
10 | * This SCTP implementation is free software; | 10 | * This SCTP implementation is free software; |
11 | * you can redistribute it and/or modify it under the terms of | 11 | * you can redistribute it and/or modify it under the terms of |
12 | * the GNU General Public License as published by | 12 | * the GNU General Public License as published by |
13 | * the Free Software Foundation; either version 2, or (at your option) | 13 | * the Free Software Foundation; either version 2, or (at your option) |
14 | * any later version. | 14 | * any later version. |
15 | * | 15 | * |
16 | * This SCTP implementation is distributed in the hope that it | 16 | * This SCTP implementation is distributed in the hope that it |
17 | * will be useful, but WITHOUT ANY WARRANTY; without even the implied | 17 | * will be useful, but WITHOUT ANY WARRANTY; without even the implied |
18 | * ************************ | 18 | * ************************ |
19 | * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 19 | * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
20 | * See the GNU General Public License for more details. | 20 | * See the GNU General Public License for more details. |
21 | * | 21 | * |
22 | * You should have received a copy of the GNU General Public License | 22 | * You should have received a copy of the GNU General Public License |
23 | * along with GNU CC; see the file COPYING. If not, see | 23 | * along with GNU CC; see the file COPYING. If not, see |
24 | * <http://www.gnu.org/licenses/>. | 24 | * <http://www.gnu.org/licenses/>. |
25 | * | 25 | * |
26 | * Please send any bug reports or fixes you make to the | 26 | * Please send any bug reports or fixes you make to the |
27 | * email address(es): | 27 | * email address(es): |
28 | * lksctp developers <linux-sctp@vger.kernel.org> | 28 | * lksctp developers <linux-sctp@vger.kernel.org> |
29 | * | 29 | * |
30 | * Written or modified by: | 30 | * Written or modified by: |
31 | * La Monte H.P. Yarroll <piggy@acm.org> | 31 | * La Monte H.P. Yarroll <piggy@acm.org> |
32 | * Karl Knutson <karl@athena.chicago.il.us> | 32 | * Karl Knutson <karl@athena.chicago.il.us> |
33 | * Jon Grimm <jgrimm@austin.ibm.com> | 33 | * Jon Grimm <jgrimm@austin.ibm.com> |
34 | * Sridhar Samudrala <sri@us.ibm.com> | 34 | * Sridhar Samudrala <sri@us.ibm.com> |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 37 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
38 | 38 | ||
39 | #include <linux/types.h> | 39 | #include <linux/types.h> |
40 | #include <linux/kernel.h> | 40 | #include <linux/kernel.h> |
41 | #include <linux/wait.h> | 41 | #include <linux/wait.h> |
42 | #include <linux/time.h> | 42 | #include <linux/time.h> |
43 | #include <linux/ip.h> | 43 | #include <linux/ip.h> |
44 | #include <linux/ipv6.h> | 44 | #include <linux/ipv6.h> |
45 | #include <linux/init.h> | 45 | #include <linux/init.h> |
46 | #include <linux/slab.h> | 46 | #include <linux/slab.h> |
47 | #include <net/inet_ecn.h> | 47 | #include <net/inet_ecn.h> |
48 | #include <net/ip.h> | 48 | #include <net/ip.h> |
49 | #include <net/icmp.h> | 49 | #include <net/icmp.h> |
50 | #include <net/net_namespace.h> | 50 | #include <net/net_namespace.h> |
51 | 51 | ||
52 | #include <linux/socket.h> /* for sa_family_t */ | 52 | #include <linux/socket.h> /* for sa_family_t */ |
53 | #include <net/sock.h> | 53 | #include <net/sock.h> |
54 | 54 | ||
55 | #include <net/sctp/sctp.h> | 55 | #include <net/sctp/sctp.h> |
56 | #include <net/sctp/sm.h> | 56 | #include <net/sctp/sm.h> |
57 | #include <net/sctp/checksum.h> | 57 | #include <net/sctp/checksum.h> |
58 | 58 | ||
59 | /* Forward declarations for private helpers. */ | 59 | /* Forward declarations for private helpers. */ |
60 | static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, | 60 | static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, |
61 | struct sctp_chunk *chunk); | 61 | struct sctp_chunk *chunk); |
62 | static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, | 62 | static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, |
63 | struct sctp_chunk *chunk); | 63 | struct sctp_chunk *chunk); |
64 | static void sctp_packet_append_data(struct sctp_packet *packet, | 64 | static void sctp_packet_append_data(struct sctp_packet *packet, |
65 | struct sctp_chunk *chunk); | 65 | struct sctp_chunk *chunk); |
66 | static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, | 66 | static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, |
67 | struct sctp_chunk *chunk, | 67 | struct sctp_chunk *chunk, |
68 | u16 chunk_len); | 68 | u16 chunk_len); |
69 | 69 | ||
70 | static void sctp_packet_reset(struct sctp_packet *packet) | 70 | static void sctp_packet_reset(struct sctp_packet *packet) |
71 | { | 71 | { |
72 | packet->size = packet->overhead; | 72 | packet->size = packet->overhead; |
73 | packet->has_cookie_echo = 0; | 73 | packet->has_cookie_echo = 0; |
74 | packet->has_sack = 0; | 74 | packet->has_sack = 0; |
75 | packet->has_data = 0; | 75 | packet->has_data = 0; |
76 | packet->has_auth = 0; | 76 | packet->has_auth = 0; |
77 | packet->ipfragok = 0; | 77 | packet->ipfragok = 0; |
78 | packet->auth = NULL; | 78 | packet->auth = NULL; |
79 | } | 79 | } |
80 | 80 | ||
81 | /* Config a packet. | 81 | /* Config a packet. |
82 | * This appears to be a followup set of initializations. | 82 | * This appears to be a followup set of initializations. |
83 | */ | 83 | */ |
84 | struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, | 84 | struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, |
85 | __u32 vtag, int ecn_capable) | 85 | __u32 vtag, int ecn_capable) |
86 | { | 86 | { |
87 | struct sctp_chunk *chunk = NULL; | 87 | struct sctp_chunk *chunk = NULL; |
88 | 88 | ||
89 | pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); | 89 | pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); |
90 | 90 | ||
91 | packet->vtag = vtag; | 91 | packet->vtag = vtag; |
92 | 92 | ||
93 | if (ecn_capable && sctp_packet_empty(packet)) { | 93 | if (ecn_capable && sctp_packet_empty(packet)) { |
94 | chunk = sctp_get_ecne_prepend(packet->transport->asoc); | 94 | chunk = sctp_get_ecne_prepend(packet->transport->asoc); |
95 | 95 | ||
96 | /* If there a is a prepend chunk stick it on the list before | 96 | /* If there a is a prepend chunk stick it on the list before |
97 | * any other chunks get appended. | 97 | * any other chunks get appended. |
98 | */ | 98 | */ |
99 | if (chunk) | 99 | if (chunk) |
100 | sctp_packet_append_chunk(packet, chunk); | 100 | sctp_packet_append_chunk(packet, chunk); |
101 | } | 101 | } |
102 | 102 | ||
103 | return packet; | 103 | return packet; |
104 | } | 104 | } |
105 | 105 | ||
106 | /* Initialize the packet structure. */ | 106 | /* Initialize the packet structure. */ |
107 | struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, | 107 | struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, |
108 | struct sctp_transport *transport, | 108 | struct sctp_transport *transport, |
109 | __u16 sport, __u16 dport) | 109 | __u16 sport, __u16 dport) |
110 | { | 110 | { |
111 | struct sctp_association *asoc = transport->asoc; | 111 | struct sctp_association *asoc = transport->asoc; |
112 | size_t overhead; | 112 | size_t overhead; |
113 | 113 | ||
114 | pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport); | 114 | pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport); |
115 | 115 | ||
116 | packet->transport = transport; | 116 | packet->transport = transport; |
117 | packet->source_port = sport; | 117 | packet->source_port = sport; |
118 | packet->destination_port = dport; | 118 | packet->destination_port = dport; |
119 | INIT_LIST_HEAD(&packet->chunk_list); | 119 | INIT_LIST_HEAD(&packet->chunk_list); |
120 | if (asoc) { | 120 | if (asoc) { |
121 | struct sctp_sock *sp = sctp_sk(asoc->base.sk); | 121 | struct sctp_sock *sp = sctp_sk(asoc->base.sk); |
122 | overhead = sp->pf->af->net_header_len; | 122 | overhead = sp->pf->af->net_header_len; |
123 | } else { | 123 | } else { |
124 | overhead = sizeof(struct ipv6hdr); | 124 | overhead = sizeof(struct ipv6hdr); |
125 | } | 125 | } |
126 | overhead += sizeof(struct sctphdr); | 126 | overhead += sizeof(struct sctphdr); |
127 | packet->overhead = overhead; | 127 | packet->overhead = overhead; |
128 | sctp_packet_reset(packet); | 128 | sctp_packet_reset(packet); |
129 | packet->vtag = 0; | 129 | packet->vtag = 0; |
130 | 130 | ||
131 | return packet; | 131 | return packet; |
132 | } | 132 | } |
133 | 133 | ||
134 | /* Free a packet. */ | 134 | /* Free a packet. */ |
135 | void sctp_packet_free(struct sctp_packet *packet) | 135 | void sctp_packet_free(struct sctp_packet *packet) |
136 | { | 136 | { |
137 | struct sctp_chunk *chunk, *tmp; | 137 | struct sctp_chunk *chunk, *tmp; |
138 | 138 | ||
139 | pr_debug("%s: packet:%p\n", __func__, packet); | 139 | pr_debug("%s: packet:%p\n", __func__, packet); |
140 | 140 | ||
141 | list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { | 141 | list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { |
142 | list_del_init(&chunk->list); | 142 | list_del_init(&chunk->list); |
143 | sctp_chunk_free(chunk); | 143 | sctp_chunk_free(chunk); |
144 | } | 144 | } |
145 | } | 145 | } |
146 | 146 | ||
147 | /* This routine tries to append the chunk to the offered packet. If adding | 147 | /* This routine tries to append the chunk to the offered packet. If adding |
148 | * the chunk causes the packet to exceed the path MTU and COOKIE_ECHO chunk | 148 | * the chunk causes the packet to exceed the path MTU and COOKIE_ECHO chunk |
149 | * is not present in the packet, it transmits the input packet. | 149 | * is not present in the packet, it transmits the input packet. |
150 | * Data can be bundled with a packet containing a COOKIE_ECHO chunk as long | 150 | * Data can be bundled with a packet containing a COOKIE_ECHO chunk as long |
151 | * as it can fit in the packet, but any more data that does not fit in this | 151 | * as it can fit in the packet, but any more data that does not fit in this |
152 | * packet can be sent only after receiving the COOKIE_ACK. | 152 | * packet can be sent only after receiving the COOKIE_ACK. |
153 | */ | 153 | */ |
154 | sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, | 154 | sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, |
155 | struct sctp_chunk *chunk, | 155 | struct sctp_chunk *chunk, |
156 | int one_packet) | 156 | int one_packet) |
157 | { | 157 | { |
158 | sctp_xmit_t retval; | 158 | sctp_xmit_t retval; |
159 | int error = 0; | 159 | int error = 0; |
160 | 160 | ||
161 | pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); | 161 | pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); |
162 | 162 | ||
163 | switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { | 163 | switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { |
164 | case SCTP_XMIT_PMTU_FULL: | 164 | case SCTP_XMIT_PMTU_FULL: |
165 | if (!packet->has_cookie_echo) { | 165 | if (!packet->has_cookie_echo) { |
166 | error = sctp_packet_transmit(packet); | 166 | error = sctp_packet_transmit(packet); |
167 | if (error < 0) | 167 | if (error < 0) |
168 | chunk->skb->sk->sk_err = -error; | 168 | chunk->skb->sk->sk_err = -error; |
169 | 169 | ||
170 | /* If we have an empty packet, then we can NOT ever | 170 | /* If we have an empty packet, then we can NOT ever |
171 | * return PMTU_FULL. | 171 | * return PMTU_FULL. |
172 | */ | 172 | */ |
173 | if (!one_packet) | 173 | if (!one_packet) |
174 | retval = sctp_packet_append_chunk(packet, | 174 | retval = sctp_packet_append_chunk(packet, |
175 | chunk); | 175 | chunk); |
176 | } | 176 | } |
177 | break; | 177 | break; |
178 | 178 | ||
179 | case SCTP_XMIT_RWND_FULL: | 179 | case SCTP_XMIT_RWND_FULL: |
180 | case SCTP_XMIT_OK: | 180 | case SCTP_XMIT_OK: |
181 | case SCTP_XMIT_NAGLE_DELAY: | 181 | case SCTP_XMIT_NAGLE_DELAY: |
182 | break; | 182 | break; |
183 | } | 183 | } |
184 | 184 | ||
185 | return retval; | 185 | return retval; |
186 | } | 186 | } |
187 | 187 | ||
188 | /* Try to bundle an auth chunk into the packet. */ | 188 | /* Try to bundle an auth chunk into the packet. */ |
189 | static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, | 189 | static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, |
190 | struct sctp_chunk *chunk) | 190 | struct sctp_chunk *chunk) |
191 | { | 191 | { |
192 | struct sctp_association *asoc = pkt->transport->asoc; | 192 | struct sctp_association *asoc = pkt->transport->asoc; |
193 | struct sctp_chunk *auth; | 193 | struct sctp_chunk *auth; |
194 | sctp_xmit_t retval = SCTP_XMIT_OK; | 194 | sctp_xmit_t retval = SCTP_XMIT_OK; |
195 | 195 | ||
196 | /* if we don't have an association, we can't do authentication */ | 196 | /* if we don't have an association, we can't do authentication */ |
197 | if (!asoc) | 197 | if (!asoc) |
198 | return retval; | 198 | return retval; |
199 | 199 | ||
200 | /* See if this is an auth chunk we are bundling or if | 200 | /* See if this is an auth chunk we are bundling or if |
201 | * auth is already bundled. | 201 | * auth is already bundled. |
202 | */ | 202 | */ |
203 | if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth) | 203 | if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth) |
204 | return retval; | 204 | return retval; |
205 | 205 | ||
206 | /* if the peer did not request this chunk to be authenticated, | 206 | /* if the peer did not request this chunk to be authenticated, |
207 | * don't do it | 207 | * don't do it |
208 | */ | 208 | */ |
209 | if (!chunk->auth) | 209 | if (!chunk->auth) |
210 | return retval; | 210 | return retval; |
211 | 211 | ||
212 | auth = sctp_make_auth(asoc); | 212 | auth = sctp_make_auth(asoc); |
213 | if (!auth) | 213 | if (!auth) |
214 | return retval; | 214 | return retval; |
215 | 215 | ||
216 | retval = __sctp_packet_append_chunk(pkt, auth); | 216 | retval = __sctp_packet_append_chunk(pkt, auth); |
217 | 217 | ||
218 | if (retval != SCTP_XMIT_OK) | 218 | if (retval != SCTP_XMIT_OK) |
219 | sctp_chunk_free(auth); | 219 | sctp_chunk_free(auth); |
220 | 220 | ||
221 | return retval; | 221 | return retval; |
222 | } | 222 | } |
223 | 223 | ||
224 | /* Try to bundle a SACK with the packet. */ | 224 | /* Try to bundle a SACK with the packet. */ |
225 | static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, | 225 | static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, |
226 | struct sctp_chunk *chunk) | 226 | struct sctp_chunk *chunk) |
227 | { | 227 | { |
228 | sctp_xmit_t retval = SCTP_XMIT_OK; | 228 | sctp_xmit_t retval = SCTP_XMIT_OK; |
229 | 229 | ||
230 | /* If sending DATA and haven't aleady bundled a SACK, try to | 230 | /* If sending DATA and haven't aleady bundled a SACK, try to |
231 | * bundle one in to the packet. | 231 | * bundle one in to the packet. |
232 | */ | 232 | */ |
233 | if (sctp_chunk_is_data(chunk) && !pkt->has_sack && | 233 | if (sctp_chunk_is_data(chunk) && !pkt->has_sack && |
234 | !pkt->has_cookie_echo) { | 234 | !pkt->has_cookie_echo) { |
235 | struct sctp_association *asoc; | 235 | struct sctp_association *asoc; |
236 | struct timer_list *timer; | 236 | struct timer_list *timer; |
237 | asoc = pkt->transport->asoc; | 237 | asoc = pkt->transport->asoc; |
238 | timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; | 238 | timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; |
239 | 239 | ||
240 | /* If the SACK timer is running, we have a pending SACK */ | 240 | /* If the SACK timer is running, we have a pending SACK */ |
241 | if (timer_pending(timer)) { | 241 | if (timer_pending(timer)) { |
242 | struct sctp_chunk *sack; | 242 | struct sctp_chunk *sack; |
243 | 243 | ||
244 | if (pkt->transport->sack_generation != | 244 | if (pkt->transport->sack_generation != |
245 | pkt->transport->asoc->peer.sack_generation) | 245 | pkt->transport->asoc->peer.sack_generation) |
246 | return retval; | 246 | return retval; |
247 | 247 | ||
248 | asoc->a_rwnd = asoc->rwnd; | 248 | asoc->a_rwnd = asoc->rwnd; |
249 | sack = sctp_make_sack(asoc); | 249 | sack = sctp_make_sack(asoc); |
250 | if (sack) { | 250 | if (sack) { |
251 | retval = __sctp_packet_append_chunk(pkt, sack); | 251 | retval = __sctp_packet_append_chunk(pkt, sack); |
252 | if (retval != SCTP_XMIT_OK) { | 252 | if (retval != SCTP_XMIT_OK) { |
253 | sctp_chunk_free(sack); | 253 | sctp_chunk_free(sack); |
254 | goto out; | 254 | goto out; |
255 | } | 255 | } |
256 | asoc->peer.sack_needed = 0; | 256 | asoc->peer.sack_needed = 0; |
257 | if (del_timer(timer)) | 257 | if (del_timer(timer)) |
258 | sctp_association_put(asoc); | 258 | sctp_association_put(asoc); |
259 | } | 259 | } |
260 | } | 260 | } |
261 | } | 261 | } |
262 | out: | 262 | out: |
263 | return retval; | 263 | return retval; |
264 | } | 264 | } |
265 | 265 | ||
266 | 266 | ||
267 | /* Append a chunk to the offered packet reporting back any inability to do | 267 | /* Append a chunk to the offered packet reporting back any inability to do |
268 | * so. | 268 | * so. |
269 | */ | 269 | */ |
270 | static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, | 270 | static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, |
271 | struct sctp_chunk *chunk) | 271 | struct sctp_chunk *chunk) |
272 | { | 272 | { |
273 | sctp_xmit_t retval = SCTP_XMIT_OK; | 273 | sctp_xmit_t retval = SCTP_XMIT_OK; |
274 | __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); | 274 | __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); |
275 | 275 | ||
276 | /* Check to see if this chunk will fit into the packet */ | 276 | /* Check to see if this chunk will fit into the packet */ |
277 | retval = sctp_packet_will_fit(packet, chunk, chunk_len); | 277 | retval = sctp_packet_will_fit(packet, chunk, chunk_len); |
278 | if (retval != SCTP_XMIT_OK) | 278 | if (retval != SCTP_XMIT_OK) |
279 | goto finish; | 279 | goto finish; |
280 | 280 | ||
281 | /* We believe that this chunk is OK to add to the packet */ | 281 | /* We believe that this chunk is OK to add to the packet */ |
282 | switch (chunk->chunk_hdr->type) { | 282 | switch (chunk->chunk_hdr->type) { |
283 | case SCTP_CID_DATA: | 283 | case SCTP_CID_DATA: |
284 | /* Account for the data being in the packet */ | 284 | /* Account for the data being in the packet */ |
285 | sctp_packet_append_data(packet, chunk); | 285 | sctp_packet_append_data(packet, chunk); |
286 | /* Disallow SACK bundling after DATA. */ | 286 | /* Disallow SACK bundling after DATA. */ |
287 | packet->has_sack = 1; | 287 | packet->has_sack = 1; |
288 | /* Disallow AUTH bundling after DATA */ | 288 | /* Disallow AUTH bundling after DATA */ |
289 | packet->has_auth = 1; | 289 | packet->has_auth = 1; |
290 | /* Let it be knows that packet has DATA in it */ | 290 | /* Let it be knows that packet has DATA in it */ |
291 | packet->has_data = 1; | 291 | packet->has_data = 1; |
292 | /* timestamp the chunk for rtx purposes */ | 292 | /* timestamp the chunk for rtx purposes */ |
293 | chunk->sent_at = jiffies; | 293 | chunk->sent_at = jiffies; |
294 | break; | 294 | break; |
295 | case SCTP_CID_COOKIE_ECHO: | 295 | case SCTP_CID_COOKIE_ECHO: |
296 | packet->has_cookie_echo = 1; | 296 | packet->has_cookie_echo = 1; |
297 | break; | 297 | break; |
298 | 298 | ||
299 | case SCTP_CID_SACK: | 299 | case SCTP_CID_SACK: |
300 | packet->has_sack = 1; | 300 | packet->has_sack = 1; |
301 | if (chunk->asoc) | 301 | if (chunk->asoc) |
302 | chunk->asoc->stats.osacks++; | 302 | chunk->asoc->stats.osacks++; |
303 | break; | 303 | break; |
304 | 304 | ||
305 | case SCTP_CID_AUTH: | 305 | case SCTP_CID_AUTH: |
306 | packet->has_auth = 1; | 306 | packet->has_auth = 1; |
307 | packet->auth = chunk; | 307 | packet->auth = chunk; |
308 | break; | 308 | break; |
309 | } | 309 | } |
310 | 310 | ||
311 | /* It is OK to send this chunk. */ | 311 | /* It is OK to send this chunk. */ |
312 | list_add_tail(&chunk->list, &packet->chunk_list); | 312 | list_add_tail(&chunk->list, &packet->chunk_list); |
313 | packet->size += chunk_len; | 313 | packet->size += chunk_len; |
314 | chunk->transport = packet->transport; | 314 | chunk->transport = packet->transport; |
315 | finish: | 315 | finish: |
316 | return retval; | 316 | return retval; |
317 | } | 317 | } |
318 | 318 | ||
319 | /* Append a chunk to the offered packet reporting back any inability to do | 319 | /* Append a chunk to the offered packet reporting back any inability to do |
320 | * so. | 320 | * so. |
321 | */ | 321 | */ |
322 | sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, | 322 | sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, |
323 | struct sctp_chunk *chunk) | 323 | struct sctp_chunk *chunk) |
324 | { | 324 | { |
325 | sctp_xmit_t retval = SCTP_XMIT_OK; | 325 | sctp_xmit_t retval = SCTP_XMIT_OK; |
326 | 326 | ||
327 | pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); | 327 | pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); |
328 | 328 | ||
329 | /* Data chunks are special. Before seeing what else we can | 329 | /* Data chunks are special. Before seeing what else we can |
330 | * bundle into this packet, check to see if we are allowed to | 330 | * bundle into this packet, check to see if we are allowed to |
331 | * send this DATA. | 331 | * send this DATA. |
332 | */ | 332 | */ |
333 | if (sctp_chunk_is_data(chunk)) { | 333 | if (sctp_chunk_is_data(chunk)) { |
334 | retval = sctp_packet_can_append_data(packet, chunk); | 334 | retval = sctp_packet_can_append_data(packet, chunk); |
335 | if (retval != SCTP_XMIT_OK) | 335 | if (retval != SCTP_XMIT_OK) |
336 | goto finish; | 336 | goto finish; |
337 | } | 337 | } |
338 | 338 | ||
339 | /* Try to bundle AUTH chunk */ | 339 | /* Try to bundle AUTH chunk */ |
340 | retval = sctp_packet_bundle_auth(packet, chunk); | 340 | retval = sctp_packet_bundle_auth(packet, chunk); |
341 | if (retval != SCTP_XMIT_OK) | 341 | if (retval != SCTP_XMIT_OK) |
342 | goto finish; | 342 | goto finish; |
343 | 343 | ||
344 | /* Try to bundle SACK chunk */ | 344 | /* Try to bundle SACK chunk */ |
345 | retval = sctp_packet_bundle_sack(packet, chunk); | 345 | retval = sctp_packet_bundle_sack(packet, chunk); |
346 | if (retval != SCTP_XMIT_OK) | 346 | if (retval != SCTP_XMIT_OK) |
347 | goto finish; | 347 | goto finish; |
348 | 348 | ||
349 | retval = __sctp_packet_append_chunk(packet, chunk); | 349 | retval = __sctp_packet_append_chunk(packet, chunk); |
350 | 350 | ||
351 | finish: | 351 | finish: |
352 | return retval; | 352 | return retval; |
353 | } | 353 | } |
354 | 354 | ||
355 | static void sctp_packet_release_owner(struct sk_buff *skb) | 355 | static void sctp_packet_release_owner(struct sk_buff *skb) |
356 | { | 356 | { |
357 | sk_free(skb->sk); | 357 | sk_free(skb->sk); |
358 | } | 358 | } |
359 | 359 | ||
360 | static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) | 360 | static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) |
361 | { | 361 | { |
362 | skb_orphan(skb); | 362 | skb_orphan(skb); |
363 | skb->sk = sk; | 363 | skb->sk = sk; |
364 | skb->destructor = sctp_packet_release_owner; | 364 | skb->destructor = sctp_packet_release_owner; |
365 | 365 | ||
366 | /* | 366 | /* |
367 | * The data chunks have already been accounted for in sctp_sendmsg(), | 367 | * The data chunks have already been accounted for in sctp_sendmsg(), |
368 | * therefore only reserve a single byte to keep socket around until | 368 | * therefore only reserve a single byte to keep socket around until |
369 | * the packet has been transmitted. | 369 | * the packet has been transmitted. |
370 | */ | 370 | */ |
371 | atomic_inc(&sk->sk_wmem_alloc); | 371 | atomic_inc(&sk->sk_wmem_alloc); |
372 | } | 372 | } |
373 | 373 | ||
374 | /* All packets are sent to the network through this function from | 374 | /* All packets are sent to the network through this function from |
375 | * sctp_outq_tail(). | 375 | * sctp_outq_tail(). |
376 | * | 376 | * |
377 | * The return value is a normal kernel error return value. | 377 | * The return value is a normal kernel error return value. |
378 | */ | 378 | */ |
379 | int sctp_packet_transmit(struct sctp_packet *packet) | 379 | int sctp_packet_transmit(struct sctp_packet *packet) |
380 | { | 380 | { |
381 | struct sctp_transport *tp = packet->transport; | 381 | struct sctp_transport *tp = packet->transport; |
382 | struct sctp_association *asoc = tp->asoc; | 382 | struct sctp_association *asoc = tp->asoc; |
383 | struct sctphdr *sh; | 383 | struct sctphdr *sh; |
384 | struct sk_buff *nskb; | 384 | struct sk_buff *nskb; |
385 | struct sctp_chunk *chunk, *tmp; | 385 | struct sctp_chunk *chunk, *tmp; |
386 | struct sock *sk; | 386 | struct sock *sk; |
387 | int err = 0; | 387 | int err = 0; |
388 | int padding; /* How much padding do we need? */ | 388 | int padding; /* How much padding do we need? */ |
389 | __u8 has_data = 0; | 389 | __u8 has_data = 0; |
390 | struct dst_entry *dst; | 390 | struct dst_entry *dst; |
391 | unsigned char *auth = NULL; /* pointer to auth in skb data */ | 391 | unsigned char *auth = NULL; /* pointer to auth in skb data */ |
392 | 392 | ||
393 | pr_debug("%s: packet:%p\n", __func__, packet); | 393 | pr_debug("%s: packet:%p\n", __func__, packet); |
394 | 394 | ||
395 | /* Do NOT generate a chunkless packet. */ | 395 | /* Do NOT generate a chunkless packet. */ |
396 | if (list_empty(&packet->chunk_list)) | 396 | if (list_empty(&packet->chunk_list)) |
397 | return err; | 397 | return err; |
398 | 398 | ||
399 | /* Set up convenience variables... */ | 399 | /* Set up convenience variables... */ |
400 | chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); | 400 | chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); |
401 | sk = chunk->skb->sk; | 401 | sk = chunk->skb->sk; |
402 | 402 | ||
403 | /* Allocate the new skb. */ | 403 | /* Allocate the new skb. */ |
404 | nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); | 404 | nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); |
405 | if (!nskb) | 405 | if (!nskb) |
406 | goto nomem; | 406 | goto nomem; |
407 | 407 | ||
408 | /* Make sure the outbound skb has enough header room reserved. */ | 408 | /* Make sure the outbound skb has enough header room reserved. */ |
409 | skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); | 409 | skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); |
410 | 410 | ||
411 | /* Set the owning socket so that we know where to get the | 411 | /* Set the owning socket so that we know where to get the |
412 | * destination IP address. | 412 | * destination IP address. |
413 | */ | 413 | */ |
414 | sctp_packet_set_owner_w(nskb, sk); | 414 | sctp_packet_set_owner_w(nskb, sk); |
415 | 415 | ||
416 | if (!sctp_transport_dst_check(tp)) { | 416 | if (!sctp_transport_dst_check(tp)) { |
417 | sctp_transport_route(tp, NULL, sctp_sk(sk)); | 417 | sctp_transport_route(tp, NULL, sctp_sk(sk)); |
418 | if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { | 418 | if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { |
419 | sctp_assoc_sync_pmtu(sk, asoc); | 419 | sctp_assoc_sync_pmtu(sk, asoc); |
420 | } | 420 | } |
421 | } | 421 | } |
422 | dst = dst_clone(tp->dst); | 422 | dst = dst_clone(tp->dst); |
423 | if (!dst) | 423 | if (!dst) |
424 | goto no_route; | 424 | goto no_route; |
425 | skb_dst_set(nskb, dst); | 425 | skb_dst_set(nskb, dst); |
426 | 426 | ||
427 | /* Build the SCTP header. */ | 427 | /* Build the SCTP header. */ |
428 | sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); | 428 | sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); |
429 | skb_reset_transport_header(nskb); | 429 | skb_reset_transport_header(nskb); |
430 | sh->source = htons(packet->source_port); | 430 | sh->source = htons(packet->source_port); |
431 | sh->dest = htons(packet->destination_port); | 431 | sh->dest = htons(packet->destination_port); |
432 | 432 | ||
433 | /* From 6.8 Adler-32 Checksum Calculation: | 433 | /* From 6.8 Adler-32 Checksum Calculation: |
434 | * After the packet is constructed (containing the SCTP common | 434 | * After the packet is constructed (containing the SCTP common |
435 | * header and one or more control or DATA chunks), the | 435 | * header and one or more control or DATA chunks), the |
436 | * transmitter shall: | 436 | * transmitter shall: |
437 | * | 437 | * |
438 | * 1) Fill in the proper Verification Tag in the SCTP common | 438 | * 1) Fill in the proper Verification Tag in the SCTP common |
439 | * header and initialize the checksum field to 0's. | 439 | * header and initialize the checksum field to 0's. |
440 | */ | 440 | */ |
441 | sh->vtag = htonl(packet->vtag); | 441 | sh->vtag = htonl(packet->vtag); |
442 | sh->checksum = 0; | 442 | sh->checksum = 0; |
443 | 443 | ||
444 | /** | 444 | /** |
445 | * 6.10 Bundling | 445 | * 6.10 Bundling |
446 | * | 446 | * |
447 | * An endpoint bundles chunks by simply including multiple | 447 | * An endpoint bundles chunks by simply including multiple |
448 | * chunks in one outbound SCTP packet. ... | 448 | * chunks in one outbound SCTP packet. ... |
449 | */ | 449 | */ |
450 | 450 | ||
451 | /** | 451 | /** |
452 | * 3.2 Chunk Field Descriptions | 452 | * 3.2 Chunk Field Descriptions |
453 | * | 453 | * |
454 | * The total length of a chunk (including Type, Length and | 454 | * The total length of a chunk (including Type, Length and |
455 | * Value fields) MUST be a multiple of 4 bytes. If the length | 455 | * Value fields) MUST be a multiple of 4 bytes. If the length |
456 | * of the chunk is not a multiple of 4 bytes, the sender MUST | 456 | * of the chunk is not a multiple of 4 bytes, the sender MUST |
457 | * pad the chunk with all zero bytes and this padding is not | 457 | * pad the chunk with all zero bytes and this padding is not |
458 | * included in the chunk length field. The sender should | 458 | * included in the chunk length field. The sender should |
459 | * never pad with more than 3 bytes. | 459 | * never pad with more than 3 bytes. |
460 | * | 460 | * |
461 | * [This whole comment explains WORD_ROUND() below.] | 461 | * [This whole comment explains WORD_ROUND() below.] |
462 | */ | 462 | */ |
463 | 463 | ||
464 | pr_debug("***sctp_transmit_packet***\n"); | 464 | pr_debug("***sctp_transmit_packet***\n"); |
465 | 465 | ||
466 | list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { | 466 | list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { |
467 | list_del_init(&chunk->list); | 467 | list_del_init(&chunk->list); |
468 | if (sctp_chunk_is_data(chunk)) { | 468 | if (sctp_chunk_is_data(chunk)) { |
469 | /* 6.3.1 C4) When data is in flight and when allowed | 469 | /* 6.3.1 C4) When data is in flight and when allowed |
470 | * by rule C5, a new RTT measurement MUST be made each | 470 | * by rule C5, a new RTT measurement MUST be made each |
471 | * round trip. Furthermore, new RTT measurements | 471 | * round trip. Furthermore, new RTT measurements |
472 | * SHOULD be made no more than once per round-trip | 472 | * SHOULD be made no more than once per round-trip |
473 | * for a given destination transport address. | 473 | * for a given destination transport address. |
474 | */ | 474 | */ |
475 | 475 | ||
476 | if (!chunk->resent && !tp->rto_pending) { | 476 | if (!chunk->resent && !tp->rto_pending) { |
477 | chunk->rtt_in_progress = 1; | 477 | chunk->rtt_in_progress = 1; |
478 | tp->rto_pending = 1; | 478 | tp->rto_pending = 1; |
479 | } | 479 | } |
480 | 480 | ||
481 | has_data = 1; | 481 | has_data = 1; |
482 | } | 482 | } |
483 | 483 | ||
484 | padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; | 484 | padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; |
485 | if (padding) | 485 | if (padding) |
486 | memset(skb_put(chunk->skb, padding), 0, padding); | 486 | memset(skb_put(chunk->skb, padding), 0, padding); |
487 | 487 | ||
488 | /* if this is the auth chunk that we are adding, | 488 | /* if this is the auth chunk that we are adding, |
489 | * store pointer where it will be added and put | 489 | * store pointer where it will be added and put |
490 | * the auth into the packet. | 490 | * the auth into the packet. |
491 | */ | 491 | */ |
492 | if (chunk == packet->auth) | 492 | if (chunk == packet->auth) |
493 | auth = skb_tail_pointer(nskb); | 493 | auth = skb_tail_pointer(nskb); |
494 | 494 | ||
495 | memcpy(skb_put(nskb, chunk->skb->len), | 495 | memcpy(skb_put(nskb, chunk->skb->len), |
496 | chunk->skb->data, chunk->skb->len); | 496 | chunk->skb->data, chunk->skb->len); |
497 | 497 | ||
498 | pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " | 498 | pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " |
499 | "rtt_in_progress:%d\n", chunk, | 499 | "rtt_in_progress:%d\n", chunk, |
500 | sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), | 500 | sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), |
501 | chunk->has_tsn ? "TSN" : "No TSN", | 501 | chunk->has_tsn ? "TSN" : "No TSN", |
502 | chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, | 502 | chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, |
503 | ntohs(chunk->chunk_hdr->length), chunk->skb->len, | 503 | ntohs(chunk->chunk_hdr->length), chunk->skb->len, |
504 | chunk->rtt_in_progress); | 504 | chunk->rtt_in_progress); |
505 | 505 | ||
506 | /* | 506 | /* |
507 | * If this is a control chunk, this is our last | 507 | * If this is a control chunk, this is our last |
508 | * reference. Free data chunks after they've been | 508 | * reference. Free data chunks after they've been |
509 | * acknowledged or have failed. | 509 | * acknowledged or have failed. |
510 | */ | 510 | */ |
511 | if (!sctp_chunk_is_data(chunk)) | 511 | if (!sctp_chunk_is_data(chunk)) |
512 | sctp_chunk_free(chunk); | 512 | sctp_chunk_free(chunk); |
513 | } | 513 | } |
514 | 514 | ||
515 | /* SCTP-AUTH, Section 6.2 | 515 | /* SCTP-AUTH, Section 6.2 |
516 | * The sender MUST calculate the MAC as described in RFC2104 [2] | 516 | * The sender MUST calculate the MAC as described in RFC2104 [2] |
517 | * using the hash function H as described by the MAC Identifier and | 517 | * using the hash function H as described by the MAC Identifier and |
518 | * the shared association key K based on the endpoint pair shared key | 518 | * the shared association key K based on the endpoint pair shared key |
519 | * described by the shared key identifier. The 'data' used for the | 519 | * described by the shared key identifier. The 'data' used for the |
520 | * computation of the AUTH-chunk is given by the AUTH chunk with its | 520 | * computation of the AUTH-chunk is given by the AUTH chunk with its |
521 | * HMAC field set to zero (as shown in Figure 6) followed by all | 521 | * HMAC field set to zero (as shown in Figure 6) followed by all |
522 | * chunks that are placed after the AUTH chunk in the SCTP packet. | 522 | * chunks that are placed after the AUTH chunk in the SCTP packet. |
523 | */ | 523 | */ |
524 | if (auth) | 524 | if (auth) |
525 | sctp_auth_calculate_hmac(asoc, nskb, | 525 | sctp_auth_calculate_hmac(asoc, nskb, |
526 | (struct sctp_auth_chunk *)auth, | 526 | (struct sctp_auth_chunk *)auth, |
527 | GFP_ATOMIC); | 527 | GFP_ATOMIC); |
528 | 528 | ||
529 | /* 2) Calculate the Adler-32 checksum of the whole packet, | 529 | /* 2) Calculate the Adler-32 checksum of the whole packet, |
530 | * including the SCTP common header and all the | 530 | * including the SCTP common header and all the |
531 | * chunks. | 531 | * chunks. |
532 | * | 532 | * |
533 | * Note: Adler-32 is no longer applicable, as has been replaced | 533 | * Note: Adler-32 is no longer applicable, as has been replaced |
534 | * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. | 534 | * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. |
535 | */ | 535 | */ |
536 | if (!sctp_checksum_disable) { | 536 | if (!sctp_checksum_disable) { |
537 | if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || | 537 | if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || |
538 | (dst_xfrm(dst) != NULL) || packet->ipfragok) { | 538 | (dst_xfrm(dst) != NULL) || packet->ipfragok) { |
539 | sh->checksum = sctp_compute_cksum(nskb, 0); | 539 | sh->checksum = sctp_compute_cksum(nskb, 0); |
540 | } else { | 540 | } else { |
541 | /* no need to seed pseudo checksum for SCTP */ | 541 | /* no need to seed pseudo checksum for SCTP */ |
542 | nskb->ip_summed = CHECKSUM_PARTIAL; | 542 | nskb->ip_summed = CHECKSUM_PARTIAL; |
543 | nskb->csum_start = skb_transport_header(nskb) - nskb->head; | 543 | nskb->csum_start = skb_transport_header(nskb) - nskb->head; |
544 | nskb->csum_offset = offsetof(struct sctphdr, checksum); | 544 | nskb->csum_offset = offsetof(struct sctphdr, checksum); |
545 | } | 545 | } |
546 | } | 546 | } |
547 | 547 | ||
548 | /* IP layer ECN support | 548 | /* IP layer ECN support |
549 | * From RFC 2481 | 549 | * From RFC 2481 |
550 | * "The ECN-Capable Transport (ECT) bit would be set by the | 550 | * "The ECN-Capable Transport (ECT) bit would be set by the |
551 | * data sender to indicate that the end-points of the | 551 | * data sender to indicate that the end-points of the |
552 | * transport protocol are ECN-capable." | 552 | * transport protocol are ECN-capable." |
553 | * | 553 | * |
554 | * Now setting the ECT bit all the time, as it should not cause | 554 | * Now setting the ECT bit all the time, as it should not cause |
555 | * any problems protocol-wise even if our peer ignores it. | 555 | * any problems protocol-wise even if our peer ignores it. |
556 | * | 556 | * |
557 | * Note: The works for IPv6 layer checks this bit too later | 557 | * Note: The works for IPv6 layer checks this bit too later |
558 | * in transmission. See IP6_ECN_flow_xmit(). | 558 | * in transmission. See IP6_ECN_flow_xmit(). |
559 | */ | 559 | */ |
560 | tp->af_specific->ecn_capable(nskb->sk); | 560 | tp->af_specific->ecn_capable(nskb->sk); |
561 | 561 | ||
562 | /* Set up the IP options. */ | 562 | /* Set up the IP options. */ |
563 | /* BUG: not implemented | 563 | /* BUG: not implemented |
564 | * For v4 this all lives somewhere in sk->sk_opt... | 564 | * For v4 this all lives somewhere in sk->sk_opt... |
565 | */ | 565 | */ |
566 | 566 | ||
567 | /* Dump that on IP! */ | 567 | /* Dump that on IP! */ |
568 | if (asoc) { | 568 | if (asoc) { |
569 | asoc->stats.opackets++; | 569 | asoc->stats.opackets++; |
570 | if (asoc->peer.last_sent_to != tp) | 570 | if (asoc->peer.last_sent_to != tp) |
571 | /* Considering the multiple CPU scenario, this is a | 571 | /* Considering the multiple CPU scenario, this is a |
572 | * "correcter" place for last_sent_to. --xguo | 572 | * "correcter" place for last_sent_to. --xguo |
573 | */ | 573 | */ |
574 | asoc->peer.last_sent_to = tp; | 574 | asoc->peer.last_sent_to = tp; |
575 | } | 575 | } |
576 | 576 | ||
577 | if (has_data) { | 577 | if (has_data) { |
578 | struct timer_list *timer; | 578 | struct timer_list *timer; |
579 | unsigned long timeout; | 579 | unsigned long timeout; |
580 | 580 | ||
581 | /* Restart the AUTOCLOSE timer when sending data. */ | 581 | /* Restart the AUTOCLOSE timer when sending data. */ |
582 | if (sctp_state(asoc, ESTABLISHED) && | 582 | if (sctp_state(asoc, ESTABLISHED) && |
583 | asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { | 583 | asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { |
584 | timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; | 584 | timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; |
585 | timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; | 585 | timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; |
586 | 586 | ||
587 | if (!mod_timer(timer, jiffies + timeout)) | 587 | if (!mod_timer(timer, jiffies + timeout)) |
588 | sctp_association_hold(asoc); | 588 | sctp_association_hold(asoc); |
589 | } | 589 | } |
590 | } | 590 | } |
591 | 591 | ||
592 | pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); | 592 | pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); |
593 | 593 | ||
594 | nskb->local_df = packet->ipfragok; | 594 | nskb->local_df = packet->ipfragok; |
595 | tp->af_specific->sctp_xmit(nskb, tp); | 595 | tp->af_specific->sctp_xmit(nskb, tp); |
596 | 596 | ||
597 | out: | 597 | out: |
598 | sctp_packet_reset(packet); | 598 | sctp_packet_reset(packet); |
599 | return err; | 599 | return err; |
600 | no_route: | 600 | no_route: |
601 | kfree_skb(nskb); | 601 | kfree_skb(nskb); |
602 | IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); | 602 | IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); |
603 | 603 | ||
604 | /* FIXME: Returning the 'err' will effect all the associations | 604 | /* FIXME: Returning the 'err' will effect all the associations |
605 | * associated with a socket, although only one of the paths of the | 605 | * associated with a socket, although only one of the paths of the |
606 | * association is unreachable. | 606 | * association is unreachable. |
607 | * The real failure of a transport or association can be passed on | 607 | * The real failure of a transport or association can be passed on |
608 | * to the user via notifications. So setting this error may not be | 608 | * to the user via notifications. So setting this error may not be |
609 | * required. | 609 | * required. |
610 | */ | 610 | */ |
611 | /* err = -EHOSTUNREACH; */ | 611 | /* err = -EHOSTUNREACH; */ |
612 | err: | 612 | err: |
613 | /* Control chunks are unreliable so just drop them. DATA chunks | 613 | /* Control chunks are unreliable so just drop them. DATA chunks |
614 | * will get resent or dropped later. | 614 | * will get resent or dropped later. |
615 | */ | 615 | */ |
616 | 616 | ||
617 | list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { | 617 | list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { |
618 | list_del_init(&chunk->list); | 618 | list_del_init(&chunk->list); |
619 | if (!sctp_chunk_is_data(chunk)) | 619 | if (!sctp_chunk_is_data(chunk)) |
620 | sctp_chunk_free(chunk); | 620 | sctp_chunk_free(chunk); |
621 | } | 621 | } |
622 | goto out; | 622 | goto out; |
623 | nomem: | 623 | nomem: |
624 | err = -ENOMEM; | 624 | err = -ENOMEM; |
625 | goto err; | 625 | goto err; |
626 | } | 626 | } |
627 | 627 | ||
628 | /******************************************************************** | 628 | /******************************************************************** |
629 | * 2nd Level Abstractions | 629 | * 2nd Level Abstractions |
630 | ********************************************************************/ | 630 | ********************************************************************/ |
631 | 631 | ||
632 | /* This private function check to see if a chunk can be added */ | 632 | /* This private function check to see if a chunk can be added */ |
633 | static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, | 633 | static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, |
634 | struct sctp_chunk *chunk) | 634 | struct sctp_chunk *chunk) |
635 | { | 635 | { |
636 | sctp_xmit_t retval = SCTP_XMIT_OK; | 636 | sctp_xmit_t retval = SCTP_XMIT_OK; |
637 | size_t datasize, rwnd, inflight, flight_size; | 637 | size_t datasize, rwnd, inflight, flight_size; |
638 | struct sctp_transport *transport = packet->transport; | 638 | struct sctp_transport *transport = packet->transport; |
639 | struct sctp_association *asoc = transport->asoc; | 639 | struct sctp_association *asoc = transport->asoc; |
640 | struct sctp_outq *q = &asoc->outqueue; | 640 | struct sctp_outq *q = &asoc->outqueue; |
641 | 641 | ||
642 | /* RFC 2960 6.1 Transmission of DATA Chunks | 642 | /* RFC 2960 6.1 Transmission of DATA Chunks |
643 | * | 643 | * |
644 | * A) At any given time, the data sender MUST NOT transmit new data to | 644 | * A) At any given time, the data sender MUST NOT transmit new data to |
645 | * any destination transport address if its peer's rwnd indicates | 645 | * any destination transport address if its peer's rwnd indicates |
646 | * that the peer has no buffer space (i.e. rwnd is 0, see Section | 646 | * that the peer has no buffer space (i.e. rwnd is 0, see Section |
647 | * 6.2.1). However, regardless of the value of rwnd (including if it | 647 | * 6.2.1). However, regardless of the value of rwnd (including if it |
648 | * is 0), the data sender can always have one DATA chunk in flight to | 648 | * is 0), the data sender can always have one DATA chunk in flight to |
649 | * the receiver if allowed by cwnd (see rule B below). This rule | 649 | * the receiver if allowed by cwnd (see rule B below). This rule |
650 | * allows the sender to probe for a change in rwnd that the sender | 650 | * allows the sender to probe for a change in rwnd that the sender |
651 | * missed due to the SACK having been lost in transit from the data | 651 | * missed due to the SACK having been lost in transit from the data |
652 | * receiver to the data sender. | 652 | * receiver to the data sender. |
653 | */ | 653 | */ |
654 | 654 | ||
655 | rwnd = asoc->peer.rwnd; | 655 | rwnd = asoc->peer.rwnd; |
656 | inflight = q->outstanding_bytes; | 656 | inflight = q->outstanding_bytes; |
657 | flight_size = transport->flight_size; | 657 | flight_size = transport->flight_size; |
658 | 658 | ||
659 | datasize = sctp_data_size(chunk); | 659 | datasize = sctp_data_size(chunk); |
660 | 660 | ||
661 | if (datasize > rwnd) { | 661 | if (datasize > rwnd) { |
662 | if (inflight > 0) { | 662 | if (inflight > 0) { |
663 | /* We have (at least) one data chunk in flight, | 663 | /* We have (at least) one data chunk in flight, |
664 | * so we can't fall back to rule 6.1 B). | 664 | * so we can't fall back to rule 6.1 B). |
665 | */ | 665 | */ |
666 | retval = SCTP_XMIT_RWND_FULL; | 666 | retval = SCTP_XMIT_RWND_FULL; |
667 | goto finish; | 667 | goto finish; |
668 | } | 668 | } |
669 | } | 669 | } |
670 | 670 | ||
671 | /* RFC 2960 6.1 Transmission of DATA Chunks | 671 | /* RFC 2960 6.1 Transmission of DATA Chunks |
672 | * | 672 | * |
673 | * B) At any given time, the sender MUST NOT transmit new data | 673 | * B) At any given time, the sender MUST NOT transmit new data |
674 | * to a given transport address if it has cwnd or more bytes | 674 | * to a given transport address if it has cwnd or more bytes |
675 | * of data outstanding to that transport address. | 675 | * of data outstanding to that transport address. |
676 | */ | 676 | */ |
677 | /* RFC 7.2.4 & the Implementers Guide 2.8. | 677 | /* RFC 7.2.4 & the Implementers Guide 2.8. |
678 | * | 678 | * |
679 | * 3) ... | 679 | * 3) ... |
680 | * When a Fast Retransmit is being performed the sender SHOULD | 680 | * When a Fast Retransmit is being performed the sender SHOULD |
681 | * ignore the value of cwnd and SHOULD NOT delay retransmission. | 681 | * ignore the value of cwnd and SHOULD NOT delay retransmission. |
682 | */ | 682 | */ |
683 | if (chunk->fast_retransmit != SCTP_NEED_FRTX) | 683 | if (chunk->fast_retransmit != SCTP_NEED_FRTX) |
684 | if (flight_size >= transport->cwnd) { | 684 | if (flight_size >= transport->cwnd) { |
685 | retval = SCTP_XMIT_RWND_FULL; | 685 | retval = SCTP_XMIT_RWND_FULL; |
686 | goto finish; | 686 | goto finish; |
687 | } | 687 | } |
688 | 688 | ||
689 | /* Nagle's algorithm to solve small-packet problem: | 689 | /* Nagle's algorithm to solve small-packet problem: |
690 | * Inhibit the sending of new chunks when new outgoing data arrives | 690 | * Inhibit the sending of new chunks when new outgoing data arrives |
691 | * if any previously transmitted data on the connection remains | 691 | * if any previously transmitted data on the connection remains |
692 | * unacknowledged. | 692 | * unacknowledged. |
693 | */ | 693 | */ |
694 | if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) && | 694 | if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) && |
695 | inflight && sctp_state(asoc, ESTABLISHED)) { | 695 | inflight && sctp_state(asoc, ESTABLISHED)) { |
696 | unsigned int max = transport->pathmtu - packet->overhead; | 696 | unsigned int max = transport->pathmtu - packet->overhead; |
697 | unsigned int len = chunk->skb->len + q->out_qlen; | 697 | unsigned int len = chunk->skb->len + q->out_qlen; |
698 | 698 | ||
699 | /* Check whether this chunk and all the rest of pending | 699 | /* Check whether this chunk and all the rest of pending |
700 | * data will fit or delay in hopes of bundling a full | 700 | * data will fit or delay in hopes of bundling a full |
701 | * sized packet. | 701 | * sized packet. |
702 | * Don't delay large message writes that may have been | 702 | * Don't delay large message writes that may have been |
703 | * fragmeneted into small peices. | 703 | * fragmeneted into small peices. |
704 | */ | 704 | */ |
705 | if ((len < max) && chunk->msg->can_delay) { | 705 | if ((len < max) && chunk->msg->can_delay) { |
706 | retval = SCTP_XMIT_NAGLE_DELAY; | 706 | retval = SCTP_XMIT_NAGLE_DELAY; |
707 | goto finish; | 707 | goto finish; |
708 | } | 708 | } |
709 | } | 709 | } |
710 | 710 | ||
711 | finish: | 711 | finish: |
712 | return retval; | 712 | return retval; |
713 | } | 713 | } |
714 | 714 | ||
715 | /* This private function does management things when adding DATA chunk */ | 715 | /* This private function does management things when adding DATA chunk */ |
716 | static void sctp_packet_append_data(struct sctp_packet *packet, | 716 | static void sctp_packet_append_data(struct sctp_packet *packet, |
717 | struct sctp_chunk *chunk) | 717 | struct sctp_chunk *chunk) |
718 | { | 718 | { |
719 | struct sctp_transport *transport = packet->transport; | 719 | struct sctp_transport *transport = packet->transport; |
720 | size_t datasize = sctp_data_size(chunk); | 720 | size_t datasize = sctp_data_size(chunk); |
721 | struct sctp_association *asoc = transport->asoc; | 721 | struct sctp_association *asoc = transport->asoc; |
722 | u32 rwnd = asoc->peer.rwnd; | 722 | u32 rwnd = asoc->peer.rwnd; |
723 | 723 | ||
724 | /* Keep track of how many bytes are in flight over this transport. */ | 724 | /* Keep track of how many bytes are in flight over this transport. */ |
725 | transport->flight_size += datasize; | 725 | transport->flight_size += datasize; |
726 | 726 | ||
727 | /* Keep track of how many bytes are in flight to the receiver. */ | 727 | /* Keep track of how many bytes are in flight to the receiver. */ |
728 | asoc->outqueue.outstanding_bytes += datasize; | 728 | asoc->outqueue.outstanding_bytes += datasize; |
729 | 729 | ||
730 | /* Update our view of the receiver's rwnd. */ | 730 | /* Update our view of the receiver's rwnd. */ |
731 | if (datasize < rwnd) | 731 | if (datasize < rwnd) |
732 | rwnd -= datasize; | 732 | rwnd -= datasize; |
733 | else | 733 | else |
734 | rwnd = 0; | 734 | rwnd = 0; |
735 | 735 | ||
736 | asoc->peer.rwnd = rwnd; | 736 | asoc->peer.rwnd = rwnd; |
737 | /* Has been accepted for transmission. */ | 737 | /* Has been accepted for transmission. */ |
738 | if (!asoc->peer.prsctp_capable) | 738 | if (!asoc->peer.prsctp_capable) |
739 | chunk->msg->can_abandon = 0; | 739 | chunk->msg->can_abandon = 0; |
740 | sctp_chunk_assign_tsn(chunk); | 740 | sctp_chunk_assign_tsn(chunk); |
741 | sctp_chunk_assign_ssn(chunk); | 741 | sctp_chunk_assign_ssn(chunk); |
742 | } | 742 | } |
743 | 743 | ||
744 | static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, | 744 | static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, |
745 | struct sctp_chunk *chunk, | 745 | struct sctp_chunk *chunk, |
746 | u16 chunk_len) | 746 | u16 chunk_len) |
747 | { | 747 | { |
748 | size_t psize; | 748 | size_t psize; |
749 | size_t pmtu; | 749 | size_t pmtu; |
750 | int too_big; | 750 | int too_big; |
751 | sctp_xmit_t retval = SCTP_XMIT_OK; | 751 | sctp_xmit_t retval = SCTP_XMIT_OK; |
752 | 752 | ||
753 | psize = packet->size; | 753 | psize = packet->size; |
754 | pmtu = ((packet->transport->asoc) ? | 754 | pmtu = ((packet->transport->asoc) ? |
755 | (packet->transport->asoc->pathmtu) : | 755 | (packet->transport->asoc->pathmtu) : |
756 | (packet->transport->pathmtu)); | 756 | (packet->transport->pathmtu)); |
757 | 757 | ||
758 | too_big = (psize + chunk_len > pmtu); | 758 | too_big = (psize + chunk_len > pmtu); |
759 | 759 | ||
760 | /* Decide if we need to fragment or resubmit later. */ | 760 | /* Decide if we need to fragment or resubmit later. */ |
761 | if (too_big) { | 761 | if (too_big) { |
762 | /* It's OK to fragmet at IP level if any one of the following | 762 | /* It's OK to fragmet at IP level if any one of the following |
763 | * is true: | 763 | * is true: |
764 | * 1. The packet is empty (meaning this chunk is greater | 764 | * 1. The packet is empty (meaning this chunk is greater |
765 | * the MTU) | 765 | * the MTU) |
766 | * 2. The chunk we are adding is a control chunk | 766 | * 2. The chunk we are adding is a control chunk |
767 | * 3. The packet doesn't have any data in it yet and data | 767 | * 3. The packet doesn't have any data in it yet and data |
768 | * requires authentication. | 768 | * requires authentication. |
769 | */ | 769 | */ |
770 | if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || | 770 | if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || |
771 | (!packet->has_data && chunk->auth)) { | 771 | (!packet->has_data && chunk->auth)) { |
772 | /* We no longer do re-fragmentation. | 772 | /* We no longer do re-fragmentation. |
773 | * Just fragment at the IP layer, if we | 773 | * Just fragment at the IP layer, if we |
774 | * actually hit this condition | 774 | * actually hit this condition |
775 | */ | 775 | */ |
776 | packet->ipfragok = 1; | 776 | packet->ipfragok = 1; |
777 | } else { | 777 | } else { |
778 | retval = SCTP_XMIT_PMTU_FULL; | 778 | retval = SCTP_XMIT_PMTU_FULL; |
779 | } | 779 | } |
780 | } | 780 | } |
781 | 781 | ||
782 | return retval; | 782 | return retval; |
783 | } | 783 | } |
784 | 784 |