Commit f5460618405eec8c3300947a499011528a115acd

Authored by David S. Miller

Merge branch 'net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vxy/lksctp-dev

Add missing linux/vmalloc.h include to net/sctp/probe.c

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 16 changed files Side-by-side Diff

include/net/sctp/sctp.h
... ... @@ -547,7 +547,7 @@
547 547 #define WORD_ROUND(s) (((s)+3)&~3)
548 548  
549 549 /* Make a new instance of type. */
550   -#define t_new(type, flags) (type *)kmalloc(sizeof(type), flags)
  550 +#define t_new(type, flags) (type *)kzalloc(sizeof(type), flags)
551 551  
552 552 /* Compare two timevals. */
553 553 #define tv_lt(s, t) \
include/net/sctp/sm.h
... ... @@ -437,7 +437,7 @@
437 437 */
438 438 if ((!sctp_test_T_bit(chunk) &&
439 439 (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)) ||
440   - (sctp_test_T_bit(chunk) &&
  440 + (sctp_test_T_bit(chunk) && asoc->c.peer_vtag &&
441 441 (ntohl(chunk->sctp_hdr->vtag) == asoc->c.peer_vtag))) {
442 442 return 1;
443 443 }
include/net/sctp/structs.h
... ... @@ -643,17 +643,15 @@
643 643 struct sctp_datamsg {
644 644 /* Chunks waiting to be submitted to lower layer. */
645 645 struct list_head chunks;
646   - /* Chunks that have been transmitted. */
647   - size_t msg_size;
648 646 /* Reference counting. */
649 647 atomic_t refcnt;
650 648 /* When is this message no longer interesting to the peer? */
651 649 unsigned long expires_at;
652 650 /* Did the messenge fail to send? */
653 651 int send_error;
654   - char send_failed;
655   - /* Control whether chunks from this message can be abandoned. */
656   - char can_abandon;
  652 + u8 send_failed:1,
  653 + can_abandon:1, /* can chunks from this message can be abandoned. */
  654 + can_delay; /* should this message be Nagle delayed */
657 655 };
658 656  
659 657 struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
... ... @@ -757,7 +755,6 @@
757 755 #define SCTP_NEED_FRTX 0x1
758 756 #define SCTP_DONT_FRTX 0x2
759 757 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */
760   - resent:1, /* Has this chunk ever been resent. */
761 758 has_tsn:1, /* Does this chunk have a TSN yet? */
762 759 has_ssn:1, /* Does this chunk have a SSN yet? */
763 760 singleton:1, /* Only chunk in the packet? */
764 761  
... ... @@ -879,8 +876,31 @@
879 876  
880 877 /* Reference counting. */
881 878 atomic_t refcnt;
882   - int dead;
  879 + int dead:1,
  880 + /* RTO-Pending : A flag used to track if one of the DATA
  881 + * chunks sent to this address is currently being
  882 + * used to compute a RTT. If this flag is 0,
  883 + * the next DATA chunk sent to this destination
  884 + * should be used to compute a RTT and this flag
  885 + * should be set. Every time the RTT
  886 + * calculation completes (i.e. the DATA chunk
  887 + * is SACK'd) clear this flag.
  888 + */
  889 + rto_pending:1,
883 890  
  891 + /*
  892 + * hb_sent : a flag that signals that we have a pending
  893 + * heartbeat.
  894 + */
  895 + hb_sent:1,
  896 +
  897 + /* Is the Path MTU update pending on this tranport */
  898 + pmtu_pending:1,
  899 +
  900 + /* Is this structure kfree()able? */
  901 + malloced:1;
  902 +
  903 +
884 904 /* This is the peer's IP address and port. */
885 905 union sctp_addr ipaddr;
886 906  
... ... @@ -909,22 +929,6 @@
909 929 /* SRTT : The current smoothed round trip time. */
910 930 __u32 srtt;
911 931  
912   - /* RTO-Pending : A flag used to track if one of the DATA
913   - * chunks sent to this address is currently being
914   - * used to compute a RTT. If this flag is 0,
915   - * the next DATA chunk sent to this destination
916   - * should be used to compute a RTT and this flag
917   - * should be set. Every time the RTT
918   - * calculation completes (i.e. the DATA chunk
919   - * is SACK'd) clear this flag.
920   - * hb_sent : a flag that signals that we have a pending heartbeat.
921   - */
922   - __u8 rto_pending;
923   - __u8 hb_sent;
924   -
925   - /* Flag to track the current fast recovery state */
926   - __u8 fast_recovery;
927   -
928 932 /*
929 933 * These are the congestion stats.
930 934 */
... ... @@ -944,9 +948,6 @@
944 948  
945 949 __u32 burst_limited; /* Holds old cwnd when max.burst is applied */
946 950  
947   - /* TSN marking the fast recovery exit point */
948   - __u32 fast_recovery_exit;
949   -
950 951 /* Destination */
951 952 struct dst_entry *dst;
952 953 /* Source address. */
... ... @@ -977,9 +978,6 @@
977 978 */
978 979 __u16 pathmaxrxt;
979 980  
980   - /* is the Path MTU update pending on this tranport */
981   - __u8 pmtu_pending;
982   -
983 981 /* PMTU : The current known path MTU. */
984 982 __u32 pathmtu;
985 983  
... ... @@ -1023,8 +1021,6 @@
1023 1021 /* This is the list of transports that have chunks to send. */
1024 1022 struct list_head send_ready;
1025 1023  
1026   - int malloced; /* Is this structure kfree()able? */
1027   -
1028 1024 /* State information saved for SFR_CACC algorithm. The key
1029 1025 * idea in SFR_CACC is to maintain state at the sender on a
1030 1026 * per-destination basis when a changeover happens.
... ... @@ -1066,7 +1062,7 @@
1066 1062 struct sctp_sock *);
1067 1063 void sctp_transport_pmtu(struct sctp_transport *);
1068 1064 void sctp_transport_free(struct sctp_transport *);
1069   -void sctp_transport_reset_timers(struct sctp_transport *, int);
  1065 +void sctp_transport_reset_timers(struct sctp_transport *);
1070 1066 void sctp_transport_hold(struct sctp_transport *);
1071 1067 void sctp_transport_put(struct sctp_transport *);
1072 1068 void sctp_transport_update_rto(struct sctp_transport *, __u32);
... ... @@ -1719,6 +1715,12 @@
1719 1715  
1720 1716 /* Highest TSN that is acknowledged by incoming SACKs. */
1721 1717 __u32 highest_sacked;
  1718 +
  1719 + /* TSN marking the fast recovery exit point */
  1720 + __u32 fast_recovery_exit;
  1721 +
  1722 + /* Flag to track the current fast recovery state */
  1723 + __u8 fast_recovery;
1722 1724  
1723 1725 /* The number of unacknowledged data chunks. Reported through
1724 1726 * the SCTP_STATUS sockopt.
... ... @@ -37,6 +37,18 @@
37 37  
38 38 if IP_SCTP
39 39  
  40 +config NET_SCTPPROBE
  41 + tristate "SCTP: Association probing"
  42 + depends on PROC_FS && KPROBES
  43 + ---help---
  44 + This module allows for capturing the changes to SCTP association
  45 + state in response to incoming packets. It is used for debugging
  46 + SCTP congestion control algorithms. If you don't understand
  47 + what was just said, you don't need it: say N.
  48 +
  49 + To compile this code as a module, choose M here: the
  50 + module will be called sctp_probe.
  51 +
40 52 config SCTP_DBG_MSG
41 53 bool "SCTP: Debug messages"
42 54 help
... ... @@ -3,6 +3,7 @@
3 3 #
4 4  
5 5 obj-$(CONFIG_IP_SCTP) += sctp.o
  6 +obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
6 7  
7 8 sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
8 9 protocol.o endpointola.o associola.o \
... ... @@ -10,6 +11,8 @@
10 11 inqueue.o outqueue.o ulpqueue.o command.o \
11 12 tsnmap.o bind_addr.o socket.o primitive.o \
12 13 output.o input.o debug.o ssnmap.o auth.o
  14 +
  15 +sctp_probe-y := probe.o
13 16  
14 17 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
15 18 sctp-$(CONFIG_PROC_FS) += proc.o
net/sctp/associola.c
... ... @@ -87,9 +87,6 @@
87 87 /* Retrieve the SCTP per socket area. */
88 88 sp = sctp_sk((struct sock *)sk);
89 89  
90   - /* Init all variables to a known value. */
91   - memset(asoc, 0, sizeof(struct sctp_association));
92   -
93 90 /* Discarding const is appropriate here. */
94 91 asoc->ep = (struct sctp_endpoint *)ep;
95 92 sctp_endpoint_hold(asoc->ep);
... ... @@ -762,7 +759,8 @@
762 759 asoc->peer.retran_path = peer;
763 760 }
764 761  
765   - if (asoc->peer.active_path == asoc->peer.retran_path) {
  762 + if (asoc->peer.active_path == asoc->peer.retran_path &&
  763 + peer->state != SCTP_UNCONFIRMED) {
766 764 asoc->peer.retran_path = peer;
767 765 }
768 766  
769 767  
... ... @@ -1320,12 +1318,13 @@
1320 1318 /* Keep track of the next transport in case
1321 1319 * we don't find any active transport.
1322 1320 */
1323   - if (!next)
  1321 + if (t->state != SCTP_UNCONFIRMED && !next)
1324 1322 next = t;
1325 1323 }
1326 1324 }
1327 1325  
1328   - asoc->peer.retran_path = t;
  1326 + if (t)
  1327 + asoc->peer.retran_path = t;
1329 1328  
1330 1329 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
1331 1330 " %p addr: ",
... ... @@ -1485,7 +1484,7 @@
1485 1484 if (asoc->rwnd >= len) {
1486 1485 asoc->rwnd -= len;
1487 1486 if (over) {
1488   - asoc->rwnd_press = asoc->rwnd;
  1487 + asoc->rwnd_press += asoc->rwnd;
1489 1488 asoc->rwnd = 0;
1490 1489 }
1491 1490 } else {
... ... @@ -58,9 +58,9 @@
58 58 msg->send_failed = 0;
59 59 msg->send_error = 0;
60 60 msg->can_abandon = 0;
  61 + msg->can_delay = 1;
61 62 msg->expires_at = 0;
62 63 INIT_LIST_HEAD(&msg->chunks);
63   - msg->msg_size = 0;
64 64 }
65 65  
66 66 /* Allocate and initialize datamsg. */
... ... @@ -157,7 +157,6 @@
157 157 {
158 158 sctp_datamsg_hold(msg);
159 159 chunk->msg = msg;
160   - msg->msg_size += chunk->skb->len;
161 160 }
162 161  
163 162  
... ... @@ -247,6 +246,7 @@
247 246 if (msg_len >= first_len) {
248 247 msg_len -= first_len;
249 248 whole = 1;
  249 + msg->can_delay = 0;
250 250 }
251 251  
252 252 /* How many full sized? How many bytes leftover? */
net/sctp/endpointola.c
... ... @@ -70,8 +70,6 @@
70 70 struct sctp_shared_key *null_key;
71 71 int err;
72 72  
73   - memset(ep, 0, sizeof(struct sctp_endpoint));
74   -
75 73 ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
76 74 if (!ep->digest)
77 75 return NULL;
... ... @@ -429,24 +429,17 @@
429 429 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
430 430 list_del_init(&chunk->list);
431 431 if (sctp_chunk_is_data(chunk)) {
  432 + /* 6.3.1 C4) When data is in flight and when allowed
  433 + * by rule C5, a new RTT measurement MUST be made each
  434 + * round trip. Furthermore, new RTT measurements
  435 + * SHOULD be made no more than once per round-trip
  436 + * for a given destination transport address.
  437 + */
432 438  
433   - if (!chunk->resent) {
434   -
435   - /* 6.3.1 C4) When data is in flight and when allowed
436   - * by rule C5, a new RTT measurement MUST be made each
437   - * round trip. Furthermore, new RTT measurements
438   - * SHOULD be made no more than once per round-trip
439   - * for a given destination transport address.
440   - */
441   -
442   - if (!tp->rto_pending) {
443   - chunk->rtt_in_progress = 1;
444   - tp->rto_pending = 1;
445   - }
  439 + if (!tp->rto_pending) {
  440 + chunk->rtt_in_progress = 1;
  441 + tp->rto_pending = 1;
446 442 }
447   -
448   - chunk->resent = 1;
449   -
450 443 has_data = 1;
451 444 }
452 445  
... ... @@ -681,7 +674,7 @@
681 674 * Don't delay large message writes that may have been
682 675 * fragmeneted into small peices.
683 676 */
684   - if ((len < max) && (chunk->msg->msg_size < max)) {
  677 + if ((len < max) && chunk->msg->can_delay) {
685 678 retval = SCTP_XMIT_NAGLE_DELAY;
686 679 goto finish;
687 680 }
... ... @@ -62,7 +62,7 @@
62 62 struct list_head *transmitted_queue,
63 63 struct sctp_transport *transport,
64 64 struct sctp_sackhdr *sack,
65   - __u32 highest_new_tsn);
  65 + __u32 *highest_new_tsn);
66 66  
67 67 static void sctp_mark_missing(struct sctp_outq *q,
68 68 struct list_head *transmitted_queue,
... ... @@ -308,7 +308,7 @@
308 308 /* If it is data, queue it up, otherwise, send it
309 309 * immediately.
310 310 */
311   - if (SCTP_CID_DATA == chunk->chunk_hdr->type) {
  311 + if (sctp_chunk_is_data(chunk)) {
312 312 /* Is it OK to queue data chunks? */
313 313 /* From 9. Termination of Association
314 314 *
315 315  
... ... @@ -598,11 +598,23 @@
598 598 if (fast_rtx && !chunk->fast_retransmit)
599 599 continue;
600 600  
  601 +redo:
601 602 /* Attempt to append this chunk to the packet. */
602 603 status = sctp_packet_append_chunk(pkt, chunk);
603 604  
604 605 switch (status) {
605 606 case SCTP_XMIT_PMTU_FULL:
  607 + if (!pkt->has_data && !pkt->has_cookie_echo) {
  608 + /* If this packet did not contain DATA then
  609 + * retransmission did not happen, so do it
  610 + * again. We'll ignore the error here since
  611 + * control chunks are already freed so there
  612 + * is nothing we can do.
  613 + */
  614 + sctp_packet_transmit(pkt);
  615 + goto redo;
  616 + }
  617 +
606 618 /* Send this packet. */
607 619 error = sctp_packet_transmit(pkt);
608 620  
... ... @@ -647,14 +659,6 @@
647 659 if (chunk->fast_retransmit == SCTP_NEED_FRTX)
648 660 chunk->fast_retransmit = SCTP_DONT_FRTX;
649 661  
650   - /* Force start T3-rtx timer when fast retransmitting
651   - * the earliest outstanding TSN
652   - */
653   - if (!timer && fast_rtx &&
654   - ntohl(chunk->subh.data_hdr->tsn) ==
655   - asoc->ctsn_ack_point + 1)
656   - timer = 2;
657   -
658 662 q->empty = 0;
659 663 break;
660 664 }
... ... @@ -854,6 +858,12 @@
854 858 if (status != SCTP_XMIT_OK) {
855 859 /* put the chunk back */
856 860 list_add(&chunk->list, &q->control_chunk_list);
  861 + } else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
  862 + /* PR-SCTP C5) If a FORWARD TSN is sent, the
  863 + * sender MUST assure that at least one T3-rtx
  864 + * timer is running.
  865 + */
  866 + sctp_transport_reset_timers(transport);
857 867 }
858 868 break;
859 869  
... ... @@ -906,8 +916,7 @@
906 916 rtx_timeout, &start_timer);
907 917  
908 918 if (start_timer)
909   - sctp_transport_reset_timers(transport,
910   - start_timer-1);
  919 + sctp_transport_reset_timers(transport);
911 920  
912 921 /* This can happen on COOKIE-ECHO resend. Only
913 922 * one chunk can get bundled with a COOKIE-ECHO.
... ... @@ -1040,7 +1049,7 @@
1040 1049 list_add_tail(&chunk->transmitted_list,
1041 1050 &transport->transmitted);
1042 1051  
1043   - sctp_transport_reset_timers(transport, 0);
  1052 + sctp_transport_reset_timers(transport);
1044 1053  
1045 1054 q->empty = 0;
1046 1055  
... ... @@ -1100,32 +1109,6 @@
1100 1109 assoc->unack_data = unack_data;
1101 1110 }
1102 1111  
1103   -/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
1104   -static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
1105   - struct sctp_association *asoc)
1106   -{
1107   - struct sctp_transport *transport;
1108   - struct sctp_chunk *chunk;
1109   - __u32 highest_new_tsn, tsn;
1110   - struct list_head *transport_list = &asoc->peer.transport_addr_list;
1111   -
1112   - highest_new_tsn = ntohl(sack->cum_tsn_ack);
1113   -
1114   - list_for_each_entry(transport, transport_list, transports) {
1115   - list_for_each_entry(chunk, &transport->transmitted,
1116   - transmitted_list) {
1117   - tsn = ntohl(chunk->subh.data_hdr->tsn);
1118   -
1119   - if (!chunk->tsn_gap_acked &&
1120   - TSN_lt(highest_new_tsn, tsn) &&
1121   - sctp_acked(sack, tsn))
1122   - highest_new_tsn = tsn;
1123   - }
1124   - }
1125   -
1126   - return highest_new_tsn;
1127   -}
1128   -
1129 1112 /* This is where we REALLY process a SACK.
1130 1113 *
1131 1114 * Process the SACK against the outqueue. Mostly, this just frees
... ... @@ -1145,6 +1128,7 @@
1145 1128 struct sctp_transport *primary = asoc->peer.primary_path;
1146 1129 int count_of_newacks = 0;
1147 1130 int gap_ack_blocks;
  1131 + u8 accum_moved = 0;
1148 1132  
1149 1133 /* Grab the association's destination address list. */
1150 1134 transport_list = &asoc->peer.transport_addr_list;
1151 1135  
1152 1136  
1153 1137  
... ... @@ -1193,18 +1177,15 @@
1193 1177 if (gap_ack_blocks)
1194 1178 highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
1195 1179  
1196   - if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
1197   - highest_new_tsn = highest_tsn;
  1180 + if (TSN_lt(asoc->highest_sacked, highest_tsn))
1198 1181 asoc->highest_sacked = highest_tsn;
1199   - } else {
1200   - highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
1201   - }
1202 1182  
  1183 + highest_new_tsn = sack_ctsn;
1203 1184  
1204 1185 /* Run through the retransmit queue. Credit bytes received
1205 1186 * and free those chunks that we can.
1206 1187 */
1207   - sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
  1188 + sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
1208 1189  
1209 1190 /* Run through the transmitted queue.
1210 1191 * Credit bytes received and free those chunks which we can.
... ... @@ -1213,7 +1194,7 @@
1213 1194 */
1214 1195 list_for_each_entry(transport, transport_list, transports) {
1215 1196 sctp_check_transmitted(q, &transport->transmitted,
1216   - transport, sack, highest_new_tsn);
  1197 + transport, sack, &highest_new_tsn);
1217 1198 /*
1218 1199 * SFR-CACC algorithm:
1219 1200 * C) Let count_of_newacks be the number of
1220 1201  
1221 1202  
... ... @@ -1223,16 +1204,22 @@
1223 1204 count_of_newacks ++;
1224 1205 }
1225 1206  
  1207 + /* Move the Cumulative TSN Ack Point if appropriate. */
  1208 + if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) {
  1209 + asoc->ctsn_ack_point = sack_ctsn;
  1210 + accum_moved = 1;
  1211 + }
  1212 +
1226 1213 if (gap_ack_blocks) {
  1214 +
  1215 + if (asoc->fast_recovery && accum_moved)
  1216 + highest_new_tsn = highest_tsn;
  1217 +
1227 1218 list_for_each_entry(transport, transport_list, transports)
1228 1219 sctp_mark_missing(q, &transport->transmitted, transport,
1229 1220 highest_new_tsn, count_of_newacks);
1230 1221 }
1231 1222  
1232   - /* Move the Cumulative TSN Ack Point if appropriate. */
1233   - if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
1234   - asoc->ctsn_ack_point = sack_ctsn;
1235   -
1236 1223 /* Update unack_data field in the assoc. */
1237 1224 sctp_sack_update_unack_data(asoc, sack);
1238 1225  
... ... @@ -1315,7 +1302,7 @@
1315 1302 struct list_head *transmitted_queue,
1316 1303 struct sctp_transport *transport,
1317 1304 struct sctp_sackhdr *sack,
1318   - __u32 highest_new_tsn_in_sack)
  1305 + __u32 *highest_new_tsn_in_sack)
1319 1306 {
1320 1307 struct list_head *lchunk;
1321 1308 struct sctp_chunk *tchunk;
... ... @@ -1387,7 +1374,6 @@
1387 1374 * instance).
1388 1375 */
1389 1376 if (!tchunk->tsn_gap_acked &&
1390   - !tchunk->resent &&
1391 1377 tchunk->rtt_in_progress) {
1392 1378 tchunk->rtt_in_progress = 0;
1393 1379 rtt = jiffies - tchunk->sent_at;
... ... @@ -1404,6 +1390,7 @@
1404 1390 */
1405 1391 if (!tchunk->tsn_gap_acked) {
1406 1392 tchunk->tsn_gap_acked = 1;
  1393 + *highest_new_tsn_in_sack = tsn;
1407 1394 bytes_acked += sctp_data_size(tchunk);
1408 1395 if (!tchunk->transport)
1409 1396 migrate_bytes += sctp_data_size(tchunk);
... ... @@ -1677,7 +1664,8 @@
1677 1664 struct sctp_chunk *chunk;
1678 1665 __u32 tsn;
1679 1666 char do_fast_retransmit = 0;
1680   - struct sctp_transport *primary = q->asoc->peer.primary_path;
  1667 + struct sctp_association *asoc = q->asoc;
  1668 + struct sctp_transport *primary = asoc->peer.primary_path;
1681 1669  
1682 1670 list_for_each_entry(chunk, transmitted_queue, transmitted_list) {
1683 1671  
  1 +/*
  2 + * sctp_probe - Observe the SCTP flow with kprobes.
  3 + *
  4 + * The idea for this came from Werner Almesberger's umlsim
  5 + * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
  6 + *
  7 + * Modified for SCTP from Stephen Hemminger's code
  8 + * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
  9 + *
  10 + * This program is free software; you can redistribute it and/or modify
  11 + * it under the terms of the GNU General Public License as published by
  12 + * the Free Software Foundation; either version 2 of the License, or
  13 + * (at your option) any later version.
  14 + *
  15 + * This program is distributed in the hope that it will be useful,
  16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18 + * GNU General Public License for more details.
  19 + *
  20 + * You should have received a copy of the GNU General Public License
  21 + * along with this program; if not, write to the Free Software
  22 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  23 + */
  24 +
  25 +#include <linux/kernel.h>
  26 +#include <linux/kprobes.h>
  27 +#include <linux/socket.h>
  28 +#include <linux/sctp.h>
  29 +#include <linux/proc_fs.h>
  30 +#include <linux/vmalloc.h>
  31 +#include <linux/module.h>
  32 +#include <linux/kfifo.h>
  33 +#include <linux/time.h>
  34 +#include <net/net_namespace.h>
  35 +
  36 +#include <net/sctp/sctp.h>
  37 +#include <net/sctp/sm.h>
  38 +
  39 +MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
  40 +MODULE_DESCRIPTION("SCTP snooper");
  41 +MODULE_LICENSE("GPL");
  42 +
  43 +static int port __read_mostly = 0;
  44 +MODULE_PARM_DESC(port, "Port to match (0=all)");
  45 +module_param(port, int, 0);
  46 +
  47 +static int bufsize __read_mostly = 64 * 1024;
  48 +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
  49 +module_param(bufsize, int, 0);
  50 +
  51 +static int full __read_mostly = 1;
  52 +MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
  53 +module_param(full, int, 0);
  54 +
  55 +static const char procname[] = "sctpprobe";
  56 +
  57 +static struct {
  58 + struct kfifo fifo;
  59 + spinlock_t lock;
  60 + wait_queue_head_t wait;
  61 + struct timespec tstart;
  62 +} sctpw;
  63 +
  64 +static void printl(const char *fmt, ...)
  65 +{
  66 + va_list args;
  67 + int len;
  68 + char tbuf[256];
  69 +
  70 + va_start(args, fmt);
  71 + len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
  72 + va_end(args);
  73 +
  74 + kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
  75 + wake_up(&sctpw.wait);
  76 +}
  77 +
  78 +static int sctpprobe_open(struct inode *inode, struct file *file)
  79 +{
  80 + kfifo_reset(&sctpw.fifo);
  81 + getnstimeofday(&sctpw.tstart);
  82 +
  83 + return 0;
  84 +}
  85 +
  86 +static ssize_t sctpprobe_read(struct file *file, char __user *buf,
  87 + size_t len, loff_t *ppos)
  88 +{
  89 + int error = 0, cnt = 0;
  90 + unsigned char *tbuf;
  91 +
  92 + if (!buf)
  93 + return -EINVAL;
  94 +
  95 + if (len == 0)
  96 + return 0;
  97 +
  98 + tbuf = vmalloc(len);
  99 + if (!tbuf)
  100 + return -ENOMEM;
  101 +
  102 + error = wait_event_interruptible(sctpw.wait,
  103 + kfifo_len(&sctpw.fifo) != 0);
  104 + if (error)
  105 + goto out_free;
  106 +
  107 + cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
  108 + error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
  109 +
  110 +out_free:
  111 + vfree(tbuf);
  112 +
  113 + return error ? error : cnt;
  114 +}
  115 +
  116 +static const struct file_operations sctpprobe_fops = {
  117 + .owner = THIS_MODULE,
  118 + .open = sctpprobe_open,
  119 + .read = sctpprobe_read,
  120 +};
  121 +
  122 +sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
  123 + const struct sctp_association *asoc,
  124 + const sctp_subtype_t type,
  125 + void *arg,
  126 + sctp_cmd_seq_t *commands)
  127 +{
  128 + struct sctp_transport *sp;
  129 + static __u32 lcwnd = 0;
  130 + struct timespec now;
  131 +
  132 + sp = asoc->peer.primary_path;
  133 +
  134 + if ((full || sp->cwnd != lcwnd) &&
  135 + (!port || asoc->peer.port == port ||
  136 + ep->base.bind_addr.port == port)) {
  137 + lcwnd = sp->cwnd;
  138 +
  139 + getnstimeofday(&now);
  140 + now = timespec_sub(now, sctpw.tstart);
  141 +
  142 + printl("%lu.%06lu ", (unsigned long) now.tv_sec,
  143 + (unsigned long) now.tv_nsec / NSEC_PER_USEC);
  144 +
  145 + printl("%p %5d %5d %5d %8d %5d ", asoc,
  146 + ep->base.bind_addr.port, asoc->peer.port,
  147 + asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
  148 +
  149 + list_for_each_entry(sp, &asoc->peer.transport_addr_list,
  150 + transports) {
  151 + if (sp == asoc->peer.primary_path)
  152 + printl("*");
  153 +
  154 + if (sp->ipaddr.sa.sa_family == AF_INET)
  155 + printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
  156 + else
  157 + printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
  158 +
  159 + printl("%2u %8u %8u %8u %8u %8u ",
  160 + sp->state, sp->cwnd, sp->ssthresh,
  161 + sp->flight_size, sp->partial_bytes_acked,
  162 + sp->pathmtu);
  163 + }
  164 + printl("\n");
  165 + }
  166 +
  167 + jprobe_return();
  168 + return 0;
  169 +}
  170 +
  171 +static struct jprobe sctp_recv_probe = {
  172 + .kp = {
  173 + .symbol_name = "sctp_sf_eat_sack_6_2",
  174 + },
  175 + .entry = jsctp_sf_eat_sack,
  176 +};
  177 +
  178 +static __init int sctpprobe_init(void)
  179 +{
  180 + int ret = -ENOMEM;
  181 +
  182 + init_waitqueue_head(&sctpw.wait);
  183 + spin_lock_init(&sctpw.lock);
  184 + if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
  185 + return ret;
  186 +
  187 + if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
  188 + &sctpprobe_fops))
  189 + goto free_kfifo;
  190 +
  191 + ret = register_jprobe(&sctp_recv_probe);
  192 + if (ret)
  193 + goto remove_proc;
  194 +
  195 + pr_info("SCTP probe registered (port=%d)\n", port);
  196 +
  197 + return 0;
  198 +
  199 +remove_proc:
  200 + proc_net_remove(&init_net, procname);
  201 +free_kfifo:
  202 + kfifo_free(&sctpw.fifo);
  203 + return ret;
  204 +}
  205 +
  206 +static __exit void sctpprobe_exit(void)
  207 +{
  208 + kfifo_free(&sctpw.fifo);
  209 + proc_net_remove(&init_net, procname);
  210 + unregister_jprobe(&sctp_recv_probe);
  211 +}
  212 +
  213 +module_init(sctpprobe_init);
  214 +module_exit(sctpprobe_exit);
... ... @@ -474,13 +474,17 @@
474 474  
475 475 memset(&fl, 0x0, sizeof(struct flowi));
476 476 fl.fl4_dst = daddr->v4.sin_addr.s_addr;
  477 + fl.fl_ip_dport = daddr->v4.sin_port;
477 478 fl.proto = IPPROTO_SCTP;
478 479 if (asoc) {
479 480 fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
480 481 fl.oif = asoc->base.sk->sk_bound_dev_if;
  482 + fl.fl_ip_sport = htons(asoc->base.bind_addr.port);
481 483 }
482   - if (saddr)
  484 + if (saddr) {
483 485 fl.fl4_src = saddr->v4.sin_addr.s_addr;
  486 + fl.fl_ip_sport = saddr->v4.sin_port;
  487 + }
484 488  
485 489 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
486 490 __func__, &fl.fl4_dst, &fl.fl4_src);
... ... @@ -528,6 +532,7 @@
528 532 if ((laddr->state == SCTP_ADDR_SRC) &&
529 533 (AF_INET == laddr->a.sa.sa_family)) {
530 534 fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
  535 + fl.fl_ip_sport = laddr->a.v4.sin_port;
531 536 if (!ip_route_output_key(&init_net, &rt, &fl)) {
532 537 dst = &rt->u.dst;
533 538 goto out_unlock;
net/sctp/sm_make_chunk.c
... ... @@ -445,10 +445,17 @@
445 445 if (!retval)
446 446 goto nomem_chunk;
447 447  
448   - /* Per the advice in RFC 2960 6.4, send this reply to
449   - * the source of the INIT packet.
  448 + /* RFC 2960 6.4 Multi-homed SCTP Endpoints
  449 + *
  450 + * An endpoint SHOULD transmit reply chunks (e.g., SACK,
  451 + * HEARTBEAT ACK, * etc.) to the same destination transport
  452 + * address from which it received the DATA or control chunk
  453 + * to which it is replying.
  454 + *
  455 + * [INIT ACK back to where the INIT came from.]
450 456 */
451 457 retval->transport = chunk->transport;
  458 +
452 459 retval->subh.init_hdr =
453 460 sctp_addto_chunk(retval, sizeof(initack), &initack);
454 461 retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
... ... @@ -487,18 +494,6 @@
487 494 /* We need to remove the const qualifier at this point. */
488 495 retval->asoc = (struct sctp_association *) asoc;
489 496  
490   - /* RFC 2960 6.4 Multi-homed SCTP Endpoints
491   - *
492   - * An endpoint SHOULD transmit reply chunks (e.g., SACK,
493   - * HEARTBEAT ACK, * etc.) to the same destination transport
494   - * address from which it received the DATA or control chunk
495   - * to which it is replying.
496   - *
497   - * [INIT ACK back to where the INIT came from.]
498   - */
499   - if (chunk)
500   - retval->transport = chunk->transport;
501   -
502 497 nomem_chunk:
503 498 kfree(cookie);
504 499 nomem_cookie:
... ... @@ -1254,7 +1249,6 @@
1254 1249 INIT_LIST_HEAD(&retval->list);
1255 1250 retval->skb = skb;
1256 1251 retval->asoc = (struct sctp_association *)asoc;
1257   - retval->resent = 0;
1258 1252 retval->has_tsn = 0;
1259 1253 retval->has_ssn = 0;
1260 1254 retval->rtt_in_progress = 0;
net/sctp/sm_sideeffect.c
... ... @@ -697,11 +697,15 @@
697 697 {
698 698 struct sctp_transport *t;
699 699  
700   - t = sctp_assoc_choose_alter_transport(asoc,
  700 + if (chunk->transport)
  701 + t = chunk->transport;
  702 + else {
  703 + t = sctp_assoc_choose_alter_transport(asoc,
701 704 asoc->shutdown_last_sent_to);
  705 + chunk->transport = t;
  706 + }
702 707 asoc->shutdown_last_sent_to = t;
703 708 asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
704   - chunk->transport = t;
705 709 }
706 710  
707 711 /* Helper function to change the state of an association. */
... ... @@ -4384,7 +4384,7 @@
4384 4384 transports) {
4385 4385 memcpy(&temp, &from->ipaddr, sizeof(temp));
4386 4386 sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
4387   - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
  4387 + addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
4388 4388 if (space_left < addrlen)
4389 4389 return -ENOMEM;
4390 4390 if (copy_to_user(to, &temp, addrlen))
net/sctp/transport.c
... ... @@ -64,9 +64,6 @@
64 64 /* Copy in the address. */
65 65 peer->ipaddr = *addr;
66 66 peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
67   - peer->asoc = NULL;
68   -
69   - peer->dst = NULL;
70 67 memset(&peer->saddr, 0, sizeof(union sctp_addr));
71 68  
72 69 /* From 6.3.1 RTO Calculation:
73 70  
74 71  
75 72  
76 73  
... ... @@ -76,34 +73,21 @@
76 73 * parameter 'RTO.Initial'.
77 74 */
78 75 peer->rto = msecs_to_jiffies(sctp_rto_initial);
79   - peer->rtt = 0;
80   - peer->rttvar = 0;
81   - peer->srtt = 0;
82   - peer->rto_pending = 0;
83   - peer->hb_sent = 0;
84   - peer->fast_recovery = 0;
85 76  
86 77 peer->last_time_heard = jiffies;
87 78 peer->last_time_ecne_reduced = jiffies;
88 79  
89   - peer->init_sent_count = 0;
90   -
91 80 peer->param_flags = SPP_HB_DISABLE |
92 81 SPP_PMTUD_ENABLE |
93 82 SPP_SACKDELAY_ENABLE;
94   - peer->hbinterval = 0;
95 83  
96 84 /* Initialize the default path max_retrans. */
97 85 peer->pathmaxrxt = sctp_max_retrans_path;
98   - peer->error_count = 0;
99 86  
100 87 INIT_LIST_HEAD(&peer->transmitted);
101 88 INIT_LIST_HEAD(&peer->send_ready);
102 89 INIT_LIST_HEAD(&peer->transports);
103 90  
104   - peer->T3_rtx_timer.expires = 0;
105   - peer->hb_timer.expires = 0;
106   -
107 91 setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
108 92 (unsigned long)peer);
109 93 setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
110 94  
... ... @@ -113,16 +97,7 @@
113 97 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
114 98  
115 99 atomic_set(&peer->refcnt, 1);
116   - peer->dead = 0;
117 100  
118   - peer->malloced = 0;
119   -
120   - /* Initialize the state information for SFR-CACC */
121   - peer->cacc.changeover_active = 0;
122   - peer->cacc.cycling_changeover = 0;
123   - peer->cacc.next_tsn_at_change = 0;
124   - peer->cacc.cacc_saw_newack = 0;
125   -
126 101 return peer;
127 102 }
128 103  
... ... @@ -195,7 +170,7 @@
195 170 /* Start T3_rtx timer if it is not already running and update the heartbeat
196 171 * timer. This routine is called every time a DATA chunk is sent.
197 172 */
198   -void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
  173 +void sctp_transport_reset_timers(struct sctp_transport *transport)
199 174 {
200 175 /* RFC 2960 6.3.2 Retransmission Timer Rules
201 176 *
... ... @@ -205,7 +180,7 @@
205 180 * address.
206 181 */
207 182  
208   - if (force || !timer_pending(&transport->T3_rtx_timer))
  183 + if (!timer_pending(&transport->T3_rtx_timer))
209 184 if (!mod_timer(&transport->T3_rtx_timer,
210 185 jiffies + transport->rto))
211 186 sctp_transport_hold(transport);
212 187  
... ... @@ -403,15 +378,16 @@
403 378 void sctp_transport_raise_cwnd(struct sctp_transport *transport,
404 379 __u32 sack_ctsn, __u32 bytes_acked)
405 380 {
  381 + struct sctp_association *asoc = transport->asoc;
406 382 __u32 cwnd, ssthresh, flight_size, pba, pmtu;
407 383  
408 384 cwnd = transport->cwnd;
409 385 flight_size = transport->flight_size;
410 386  
411 387 /* See if we need to exit Fast Recovery first */
412   - if (transport->fast_recovery &&
413   - TSN_lte(transport->fast_recovery_exit, sack_ctsn))
414   - transport->fast_recovery = 0;
  388 + if (asoc->fast_recovery &&
  389 + TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
  390 + asoc->fast_recovery = 0;
415 391  
416 392 /* The appropriate cwnd increase algorithm is performed if, and only
417 393 * if the cumulative TSN whould advanced and the congestion window is
... ... @@ -440,7 +416,7 @@
440 416 * 2) the destination's path MTU. This upper bound protects
441 417 * against the ACK-Splitting attack outlined in [SAVAGE99].
442 418 */
443   - if (transport->fast_recovery)
  419 + if (asoc->fast_recovery)
444 420 return;
445 421  
446 422 if (bytes_acked > pmtu)
... ... @@ -491,6 +467,8 @@
491 467 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
492 468 sctp_lower_cwnd_t reason)
493 469 {
  470 + struct sctp_association *asoc = transport->asoc;
  471 +
494 472 switch (reason) {
495 473 case SCTP_LOWER_CWND_T3_RTX:
496 474 /* RFC 2960 Section 7.2.3, sctpimpguide
497 475  
... ... @@ -501,11 +479,11 @@
501 479 * partial_bytes_acked = 0
502 480 */
503 481 transport->ssthresh = max(transport->cwnd/2,
504   - 4*transport->asoc->pathmtu);
505   - transport->cwnd = transport->asoc->pathmtu;
  482 + 4*asoc->pathmtu);
  483 + transport->cwnd = asoc->pathmtu;
506 484  
507   - /* T3-rtx also clears fast recovery on the transport */
508   - transport->fast_recovery = 0;
  485 + /* T3-rtx also clears fast recovery */
  486 + asoc->fast_recovery = 0;
509 487 break;
510 488  
511 489 case SCTP_LOWER_CWND_FAST_RTX:
512 490  
513 491  
... ... @@ -521,15 +499,15 @@
521 499 * cwnd = ssthresh
522 500 * partial_bytes_acked = 0
523 501 */
524   - if (transport->fast_recovery)
  502 + if (asoc->fast_recovery)
525 503 return;
526 504  
527 505 /* Mark Fast recovery */
528   - transport->fast_recovery = 1;
529   - transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
  506 + asoc->fast_recovery = 1;
  507 + asoc->fast_recovery_exit = asoc->next_tsn - 1;
530 508  
531 509 transport->ssthresh = max(transport->cwnd/2,
532   - 4*transport->asoc->pathmtu);
  510 + 4*asoc->pathmtu);
533 511 transport->cwnd = transport->ssthresh;
534 512 break;
535 513  
... ... @@ -549,7 +527,7 @@
549 527 if (time_after(jiffies, transport->last_time_ecne_reduced +
550 528 transport->rtt)) {
551 529 transport->ssthresh = max(transport->cwnd/2,
552   - 4*transport->asoc->pathmtu);
  530 + 4*asoc->pathmtu);
553 531 transport->cwnd = transport->ssthresh;
554 532 transport->last_time_ecne_reduced = jiffies;
555 533 }
... ... @@ -565,7 +543,7 @@
565 543 * interval.
566 544 */
567 545 transport->cwnd = max(transport->cwnd/2,
568   - 4*transport->asoc->pathmtu);
  546 + 4*asoc->pathmtu);
569 547 break;
570 548 }
571 549  
... ... @@ -650,7 +628,6 @@
650 628 t->error_count = 0;
651 629 t->rto_pending = 0;
652 630 t->hb_sent = 0;
653   - t->fast_recovery = 0;
654 631  
655 632 /* Initialize the state information for SFR-CACC */
656 633 t->cacc.changeover_active = 0;