Commit a7868ea68d29eb2c037952aeb3b549cf05749a18
Committed by
David S. Miller
1 parent
b87d8561d8
Exists in
master
and in
4 other branches
[TCP]: Add H-TCP congestion control module.
H-TCP is a congestion control algorithm developed at the Hamilton Institute, by Douglas Leith and Robert Shorten. It is extending the standard Reno algorithm with mode switching is thus a relatively simple modification. H-TCP is defined in a layered manner as it is still a research platform. The basic form includes the modification of beta according to the ratio of maxRTT to min RTT and the alpha=2*factor*(1-beta) relation, where factor is dependant on the time since last congestion. The other layers improve convergence by adding appropriate factors to alpha. The following patch implements the H-TCP algorithm in it's basic form. Signed-Off-By: Baruch Even <baruch@ev-en.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 3 changed files with 303 additions and 0 deletions Side-by-side Diff
net/ipv4/Kconfig
| ... | ... | @@ -467,6 +467,18 @@ |
| 467 | 467 | TCP Westwood+ significantly increases fairness wrt TCP Reno in |
| 468 | 468 | wired networks and throughput over wireless links. |
| 469 | 469 | |
| 470 | +config TCP_CONG_HTCP | |
| 471 | + tristate "H-TCP" | |
| 472 | + depends on INET | |
| 473 | + default m | |
| 474 | + ---help--- | |
| 475 | + H-TCP is a send-side only modifications of the TCP Reno | |
| 476 | + protocol stack that optimizes the performance of TCP | |
| 477 | + congestion control for high speed network links. It uses a | |
| 478 | + modeswitch to change the alpha and beta parameters of TCP Reno | |
| 479 | + based on network conditions and in a way so as to be fair with | |
| 480 | + other Reno and H-TCP flows. | |
| 481 | + | |
| 470 | 482 | config TCP_CONG_HSTCP |
| 471 | 483 | tristate "High Speed TCP" |
| 472 | 484 | depends on INET && EXPERIMENTAL |
| ... | ... | @@ -498,6 +510,7 @@ |
| 498 | 510 | adjusts the sending rate by modifying the congestion |
| 499 | 511 | window. TCP Vegas should provide less packet loss, but it is |
| 500 | 512 | not as aggressive as TCP Reno. |
| 513 | + | |
| 501 | 514 | |
| 502 | 515 | endmenu |
| 503 | 516 |
net/ipv4/Makefile
| ... | ... | @@ -35,6 +35,7 @@ |
| 35 | 35 | obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o |
| 36 | 36 | obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o |
| 37 | 37 | obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o |
| 38 | +obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o | |
| 38 | 39 | obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o |
| 39 | 40 | |
| 40 | 41 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
net/ipv4/tcp_htcp.c
| 1 | +/* | |
| 2 | + * H-TCP congestion control. The algorithm is detailed in: | |
| 3 | + * R.N.Shorten, D.J.Leith: | |
| 4 | + * "H-TCP: TCP for high-speed and long-distance networks" | |
| 5 | + * Proc. PFLDnet, Argonne, 2004. | |
| 6 | + * http://www.hamilton.ie/net/htcp3.pdf | |
| 7 | + */ | |
| 8 | + | |
| 9 | +#include <linux/config.h> | |
| 10 | +#include <linux/mm.h> | |
| 11 | +#include <linux/module.h> | |
| 12 | +#include <net/tcp.h> | |
| 13 | + | |
| 14 | +#define ALPHA_BASE (1<<7) /* 1.0 with shift << 7 */ | |
| 15 | +#define BETA_MIN (1<<6) /* 0.5 with shift << 7 */ | |
| 16 | +#define BETA_MAX 102 /* 0.8 with shift << 7 */ | |
| 17 | + | |
| 18 | +static int use_rtt_scaling = 1; | |
| 19 | +module_param(use_rtt_scaling, int, 0644); | |
| 20 | +MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling"); | |
| 21 | + | |
| 22 | +static int use_bandwidth_switch = 1; | |
| 23 | +module_param(use_bandwidth_switch, int, 0644); | |
| 24 | +MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher"); | |
| 25 | + | |
| 26 | +struct htcp { | |
| 27 | + u16 alpha; /* Fixed point arith, << 7 */ | |
| 28 | + u8 beta; /* Fixed point arith, << 7 */ | |
| 29 | + u8 modeswitch; /* Delay modeswitch until we had at least one congestion event */ | |
| 30 | + u8 ccount; /* Number of RTTs since last congestion event */ | |
| 31 | + u8 undo_ccount; | |
| 32 | + u16 packetcount; | |
| 33 | + u32 minRTT; | |
| 34 | + u32 maxRTT; | |
| 35 | + u32 snd_cwnd_cnt2; | |
| 36 | + | |
| 37 | + u32 undo_maxRTT; | |
| 38 | + u32 undo_old_maxB; | |
| 39 | + | |
| 40 | + /* Bandwidth estimation */ | |
| 41 | + u32 minB; | |
| 42 | + u32 maxB; | |
| 43 | + u32 old_maxB; | |
| 44 | + u32 Bi; | |
| 45 | + u32 lasttime; | |
| 46 | +}; | |
| 47 | + | |
| 48 | +static inline void htcp_reset(struct htcp *ca) | |
| 49 | +{ | |
| 50 | + ca->undo_ccount = ca->ccount; | |
| 51 | + ca->undo_maxRTT = ca->maxRTT; | |
| 52 | + ca->undo_old_maxB = ca->old_maxB; | |
| 53 | + | |
| 54 | + ca->ccount = 0; | |
| 55 | + ca->snd_cwnd_cnt2 = 0; | |
| 56 | +} | |
| 57 | + | |
| 58 | +static u32 htcp_cwnd_undo(struct tcp_sock *tp) | |
| 59 | +{ | |
| 60 | + struct htcp *ca = tcp_ca(tp); | |
| 61 | + ca->ccount = ca->undo_ccount; | |
| 62 | + ca->maxRTT = ca->undo_maxRTT; | |
| 63 | + ca->old_maxB = ca->undo_old_maxB; | |
| 64 | + return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); | |
| 65 | +} | |
| 66 | + | |
| 67 | +static inline void measure_rtt(struct tcp_sock *tp) | |
| 68 | +{ | |
| 69 | + struct htcp *ca = tcp_ca(tp); | |
| 70 | + u32 srtt = tp->srtt>>3; | |
| 71 | + | |
| 72 | + /* keep track of minimum RTT seen so far, minRTT is zero at first */ | |
| 73 | + if (ca->minRTT > srtt || !ca->minRTT) | |
| 74 | + ca->minRTT = srtt; | |
| 75 | + | |
| 76 | + /* max RTT */ | |
| 77 | + if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { | |
| 78 | + if (ca->maxRTT < ca->minRTT) | |
| 79 | + ca->maxRTT = ca->minRTT; | |
| 80 | + if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) | |
| 81 | + ca->maxRTT = srtt; | |
| 82 | + } | |
| 83 | +} | |
| 84 | + | |
| 85 | +static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) | |
| 86 | +{ | |
| 87 | + struct htcp *ca = tcp_ca(tp); | |
| 88 | + u32 now = tcp_time_stamp; | |
| 89 | + | |
| 90 | + /* achieved throughput calculations */ | |
| 91 | + if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { | |
| 92 | + ca->packetcount = 0; | |
| 93 | + ca->lasttime = now; | |
| 94 | + return; | |
| 95 | + } | |
| 96 | + | |
| 97 | + ca->packetcount += pkts_acked; | |
| 98 | + | |
| 99 | + if (ca->packetcount >= tp->snd_cwnd - (ca->alpha>>7? : 1) | |
| 100 | + && now - ca->lasttime >= ca->minRTT | |
| 101 | + && ca->minRTT > 0) { | |
| 102 | + __u32 cur_Bi = ca->packetcount*HZ/(now - ca->lasttime); | |
| 103 | + if (ca->ccount <= 3) { | |
| 104 | + /* just after backoff */ | |
| 105 | + ca->minB = ca->maxB = ca->Bi = cur_Bi; | |
| 106 | + } else { | |
| 107 | + ca->Bi = (3*ca->Bi + cur_Bi)/4; | |
| 108 | + if (ca->Bi > ca->maxB) | |
| 109 | + ca->maxB = ca->Bi; | |
| 110 | + if (ca->minB > ca->maxB) | |
| 111 | + ca->minB = ca->maxB; | |
| 112 | + } | |
| 113 | + ca->packetcount = 0; | |
| 114 | + ca->lasttime = now; | |
| 115 | + } | |
| 116 | +} | |
| 117 | + | |
| 118 | +static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT) | |
| 119 | +{ | |
| 120 | + if (use_bandwidth_switch) { | |
| 121 | + u32 maxB = ca->maxB; | |
| 122 | + u32 old_maxB = ca->old_maxB; | |
| 123 | + ca->old_maxB = ca->maxB; | |
| 124 | + | |
| 125 | + if (!between(5*maxB, 4*old_maxB, 6*old_maxB)) { | |
| 126 | + ca->beta = BETA_MIN; | |
| 127 | + ca->modeswitch = 0; | |
| 128 | + return; | |
| 129 | + } | |
| 130 | + } | |
| 131 | + | |
| 132 | + if (ca->modeswitch && minRTT > max(HZ/100, 1) && maxRTT) { | |
| 133 | + ca->beta = (minRTT<<7)/maxRTT; | |
| 134 | + if (ca->beta < BETA_MIN) | |
| 135 | + ca->beta = BETA_MIN; | |
| 136 | + else if (ca->beta > BETA_MAX) | |
| 137 | + ca->beta = BETA_MAX; | |
| 138 | + } else { | |
| 139 | + ca->beta = BETA_MIN; | |
| 140 | + ca->modeswitch = 1; | |
| 141 | + } | |
| 142 | +} | |
| 143 | + | |
| 144 | +static inline void htcp_alpha_update(struct htcp *ca) | |
| 145 | +{ | |
| 146 | + u32 minRTT = ca->minRTT; | |
| 147 | + u32 factor = 1; | |
| 148 | + u32 diff = ca->ccount * minRTT; /* time since last backoff */ | |
| 149 | + | |
| 150 | + if (diff > HZ) { | |
| 151 | + diff -= HZ; | |
| 152 | + factor = 1+ ( 10*diff + ((diff/2)*(diff/2)/HZ) )/HZ; | |
| 153 | + } | |
| 154 | + | |
| 155 | + if (use_rtt_scaling && minRTT) { | |
| 156 | + u32 scale = (HZ<<3)/(10*minRTT); | |
| 157 | + scale = min(max(scale, 1U<<2), 10U<<3); /* clamping ratio to interval [0.5,10]<<3 */ | |
| 158 | + factor = (factor<<3)/scale; | |
| 159 | + if (!factor) | |
| 160 | + factor = 1; | |
| 161 | + } | |
| 162 | + | |
| 163 | + ca->alpha = 2*factor*((1<<7)-ca->beta); | |
| 164 | + if (!ca->alpha) | |
| 165 | + ca->alpha = ALPHA_BASE; | |
| 166 | +} | |
| 167 | + | |
| 168 | +/* After we have the rtt data to calculate beta, we'd still prefer to wait one | |
| 169 | + * rtt before we adjust our beta to ensure we are working from a consistent | |
| 170 | + * data. | |
| 171 | + * | |
| 172 | + * This function should be called when we hit a congestion event since only at | |
| 173 | + * that point do we really have a real sense of maxRTT (the queues en route | |
| 174 | + * were getting just too full now). | |
| 175 | + */ | |
| 176 | +static void htcp_param_update(struct tcp_sock *tp) | |
| 177 | +{ | |
| 178 | + struct htcp *ca = tcp_ca(tp); | |
| 179 | + u32 minRTT = ca->minRTT; | |
| 180 | + u32 maxRTT = ca->maxRTT; | |
| 181 | + | |
| 182 | + htcp_beta_update(ca, minRTT, maxRTT); | |
| 183 | + htcp_alpha_update(ca); | |
| 184 | + | |
| 185 | + /* add slowly fading memory for maxRTT to accommodate routing changes etc */ | |
| 186 | + if (minRTT > 0 && maxRTT > minRTT) | |
| 187 | + ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; | |
| 188 | +} | |
| 189 | + | |
| 190 | +static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) | |
| 191 | +{ | |
| 192 | + struct htcp *ca = tcp_ca(tp); | |
| 193 | + htcp_param_update(tp); | |
| 194 | + return max((tp->snd_cwnd * ca->beta) >> 7, 2U); | |
| 195 | +} | |
| 196 | + | |
| 197 | +static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | |
| 198 | + u32 in_flight, int data_acked) | |
| 199 | +{ | |
| 200 | + struct htcp *ca = tcp_ca(tp); | |
| 201 | + | |
| 202 | + if (in_flight < tp->snd_cwnd) | |
| 203 | + return; | |
| 204 | + | |
| 205 | + if (tp->snd_cwnd <= tp->snd_ssthresh) { | |
| 206 | + /* In "safe" area, increase. */ | |
| 207 | + if (tp->snd_cwnd < tp->snd_cwnd_clamp) | |
| 208 | + tp->snd_cwnd++; | |
| 209 | + } else { | |
| 210 | + measure_rtt(tp); | |
| 211 | + | |
| 212 | + /* keep track of number of round-trip times since last backoff event */ | |
| 213 | + if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { | |
| 214 | + ca->ccount++; | |
| 215 | + ca->snd_cwnd_cnt2 = 0; | |
| 216 | + htcp_alpha_update(ca); | |
| 217 | + } | |
| 218 | + | |
| 219 | + /* In dangerous area, increase slowly. | |
| 220 | + * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd | |
| 221 | + */ | |
| 222 | + if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) { | |
| 223 | + if (tp->snd_cwnd < tp->snd_cwnd_clamp) | |
| 224 | + tp->snd_cwnd++; | |
| 225 | + tp->snd_cwnd_cnt = 0; | |
| 226 | + ca->ccount++; | |
| 227 | + } | |
| 228 | + } | |
| 229 | +} | |
| 230 | + | |
| 231 | +/* Lower bound on congestion window. */ | |
| 232 | +static u32 htcp_min_cwnd(struct tcp_sock *tp) | |
| 233 | +{ | |
| 234 | + return tp->snd_ssthresh; | |
| 235 | +} | |
| 236 | + | |
| 237 | + | |
| 238 | +static void htcp_init(struct tcp_sock *tp) | |
| 239 | +{ | |
| 240 | + struct htcp *ca = tcp_ca(tp); | |
| 241 | + | |
| 242 | + memset(ca, 0, sizeof(struct htcp)); | |
| 243 | + ca->alpha = ALPHA_BASE; | |
| 244 | + ca->beta = BETA_MIN; | |
| 245 | +} | |
| 246 | + | |
| 247 | +static void htcp_state(struct tcp_sock *tp, u8 new_state) | |
| 248 | +{ | |
| 249 | + switch (new_state) { | |
| 250 | + case TCP_CA_CWR: | |
| 251 | + case TCP_CA_Recovery: | |
| 252 | + case TCP_CA_Loss: | |
| 253 | + htcp_reset(tcp_ca(tp)); | |
| 254 | + break; | |
| 255 | + } | |
| 256 | +} | |
| 257 | + | |
| 258 | +static struct tcp_congestion_ops htcp = { | |
| 259 | + .init = htcp_init, | |
| 260 | + .ssthresh = htcp_recalc_ssthresh, | |
| 261 | + .min_cwnd = htcp_min_cwnd, | |
| 262 | + .cong_avoid = htcp_cong_avoid, | |
| 263 | + .set_state = htcp_state, | |
| 264 | + .undo_cwnd = htcp_cwnd_undo, | |
| 265 | + .pkts_acked = measure_achieved_throughput, | |
| 266 | + .owner = THIS_MODULE, | |
| 267 | + .name = "htcp", | |
| 268 | +}; | |
| 269 | + | |
| 270 | +static int __init htcp_register(void) | |
| 271 | +{ | |
| 272 | + BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); | |
| 273 | + BUILD_BUG_ON(BETA_MIN >= BETA_MAX); | |
| 274 | + if (!use_bandwidth_switch) | |
| 275 | + htcp.pkts_acked = NULL; | |
| 276 | + return tcp_register_congestion_control(&htcp); | |
| 277 | +} | |
| 278 | + | |
| 279 | +static void __exit htcp_unregister(void) | |
| 280 | +{ | |
| 281 | + tcp_unregister_congestion_control(&htcp); | |
| 282 | +} | |
| 283 | + | |
| 284 | +module_init(htcp_register); | |
| 285 | +module_exit(htcp_unregister); | |
| 286 | + | |
| 287 | +MODULE_AUTHOR("Baruch Even"); | |
| 288 | +MODULE_LICENSE("GPL"); | |
| 289 | +MODULE_DESCRIPTION("H-TCP"); |