Blame view

net/sched/sch_plug.c 6.4 KB
2874c5fd2   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-or-later
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
2
3
4
  /*
   * sch_plug.c Queue traffic until an explicit release command
   *
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
   * There are two ways to use this qdisc:
   * 1. A simple "instantaneous" plug/unplug operation, by issuing an alternating
   *    sequence of TCQ_PLUG_BUFFER & TCQ_PLUG_RELEASE_INDEFINITE commands.
   *
   * 2. For network output buffering (a.k.a output commit) functionality.
   *    Output commit property is commonly used by applications using checkpoint
   *    based fault-tolerance to ensure that the checkpoint from which a system
   *    is being restored is consistent w.r.t outside world.
   *
   *    Consider for e.g. Remus - a Virtual Machine checkpointing system,
   *    wherein a VM is checkpointed, say every 50ms. The checkpoint is replicated
   *    asynchronously to the backup host, while the VM continues executing the
   *    next epoch speculatively.
   *
   *    The following is a typical sequence of output buffer operations:
   *       1.At epoch i, start_buffer(i)
   *       2. At end of epoch i (i.e. after 50ms):
   *          2.1 Stop VM and take checkpoint(i).
   *          2.2 start_buffer(i+1) and Resume VM
   *       3. While speculatively executing epoch(i+1), asynchronously replicate
   *          checkpoint(i) to backup host.
   *       4. When checkpoint_ack(i) is received from backup, release_buffer(i)
   *    Thus, this Qdisc would receive the following sequence of commands:
   *       TCQ_PLUG_BUFFER (epoch i)
   *       .. TCQ_PLUG_BUFFER (epoch i+1)
   *       ....TCQ_PLUG_RELEASE_ONE (epoch i)
   *       ......TCQ_PLUG_BUFFER (epoch i+2)
   *       ........
   */
  
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/errno.h>
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/pkt_sched.h>
  
  /*
   * State of the queue, when used for network output buffering:
   *
   *                 plug(i+1)            plug(i)          head
   * ------------------+--------------------+---------------->
   *                   |                    |
   *                   |                    |
   * pkts_current_epoch| pkts_last_epoch    |pkts_to_release
   * ----------------->|<--------+--------->|+--------------->
   *                   v                    v
   *
   */
  
  struct plug_sched_data {
  	/* If true, the dequeue function releases all packets
  	 * from head to end of the queue. The queue turns into
  	 * a pass-through queue for newly arriving packets.
  	 */
  	bool unplug_indefinite;
8fe6a79fb   Eric Dumazet   net_sched: sch_pl...
62
  	bool throttled;
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
  	/* Queue Limit in bytes */
  	u32 limit;
  
  	/* Number of packets (output) from the current speculatively
  	 * executing epoch.
  	 */
  	u32 pkts_current_epoch;
  
  	/* Number of packets corresponding to the recently finished
  	 * epoch. These will be released when we receive a
  	 * TCQ_PLUG_RELEASE_ONE command. This command is typically
  	 * issued after committing a checkpoint at the target.
  	 */
  	u32 pkts_last_epoch;
  
  	/*
  	 * Number of packets from the head of the queue, that can
  	 * be released (committed checkpoint).
  	 */
  	u32 pkts_to_release;
  };
520ac30f4   Eric Dumazet   net_sched: drop p...
84
85
  static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  			struct sk_buff **to_free)
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
86
87
88
89
90
91
92
93
  {
  	struct plug_sched_data *q = qdisc_priv(sch);
  
  	if (likely(sch->qstats.backlog + skb->len <= q->limit)) {
  		if (!q->unplug_indefinite)
  			q->pkts_current_epoch++;
  		return qdisc_enqueue_tail(skb, sch);
  	}
520ac30f4   Eric Dumazet   net_sched: drop p...
94
  	return qdisc_drop(skb, sch, to_free);
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
95
96
97
98
99
  }
  
  static struct sk_buff *plug_dequeue(struct Qdisc *sch)
  {
  	struct plug_sched_data *q = qdisc_priv(sch);
8fe6a79fb   Eric Dumazet   net_sched: sch_pl...
100
  	if (q->throttled)
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
101
102
103
104
105
106
107
  		return NULL;
  
  	if (!q->unplug_indefinite) {
  		if (!q->pkts_to_release) {
  			/* No more packets to dequeue. Block the queue
  			 * and wait for the next release command.
  			 */
8fe6a79fb   Eric Dumazet   net_sched: sch_pl...
108
  			q->throttled = true;
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
109
110
111
112
113
114
115
  			return NULL;
  		}
  		q->pkts_to_release--;
  	}
  
  	return qdisc_dequeue_head(sch);
  }
e63d7dfd2   Alexander Aring   net: sched: sch: ...
116
117
  static int plug_init(struct Qdisc *sch, struct nlattr *opt,
  		     struct netlink_ext_ack *extack)
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
118
119
120
121
122
123
124
125
126
  {
  	struct plug_sched_data *q = qdisc_priv(sch);
  
  	q->pkts_current_epoch = 0;
  	q->pkts_last_epoch = 0;
  	q->pkts_to_release = 0;
  	q->unplug_indefinite = false;
  
  	if (opt == NULL) {
348e3435c   Phil Sutter   net: sched: drop ...
127
128
  		q->limit = qdisc_dev(sch)->tx_queue_len
  		           * psched_mtu(qdisc_dev(sch));
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
129
130
131
132
133
134
135
136
  	} else {
  		struct tc_plug_qopt *ctl = nla_data(opt);
  
  		if (nla_len(opt) < sizeof(*ctl))
  			return -EINVAL;
  
  		q->limit = ctl->limit;
  	}
8fe6a79fb   Eric Dumazet   net_sched: sch_pl...
137
  	q->throttled = true;
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
138
139
140
141
142
143
144
145
146
147
148
149
150
  	return 0;
  }
  
  /* Receives 4 types of messages:
   * TCQ_PLUG_BUFFER: Inset a plug into the queue and
   *  buffer any incoming packets
   * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
   *   to beginning of the next plug.
   * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
   *   Stop buffering packets until the next TCQ_PLUG_BUFFER
   *   command is received (just act as a pass-thru queue).
   * TCQ_PLUG_LIMIT: Increase/decrease queue size
   */
2030721cc   Alexander Aring   net: sched: sch: ...
151
152
  static int plug_change(struct Qdisc *sch, struct nlattr *opt,
  		       struct netlink_ext_ack *extack)
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
  {
  	struct plug_sched_data *q = qdisc_priv(sch);
  	struct tc_plug_qopt *msg;
  
  	if (opt == NULL)
  		return -EINVAL;
  
  	msg = nla_data(opt);
  	if (nla_len(opt) < sizeof(*msg))
  		return -EINVAL;
  
  	switch (msg->action) {
  	case TCQ_PLUG_BUFFER:
  		/* Save size of the current buffer */
  		q->pkts_last_epoch = q->pkts_current_epoch;
  		q->pkts_current_epoch = 0;
  		if (q->unplug_indefinite)
8fe6a79fb   Eric Dumazet   net_sched: sch_pl...
170
  			q->throttled = true;
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
171
172
173
174
175
176
177
178
  		q->unplug_indefinite = false;
  		break;
  	case TCQ_PLUG_RELEASE_ONE:
  		/* Add packets from the last complete buffer to the
  		 * packets to be released set.
  		 */
  		q->pkts_to_release += q->pkts_last_epoch;
  		q->pkts_last_epoch = 0;
8fe6a79fb   Eric Dumazet   net_sched: sch_pl...
179
  		q->throttled = false;
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
180
181
182
183
184
185
186
  		netif_schedule_queue(sch->dev_queue);
  		break;
  	case TCQ_PLUG_RELEASE_INDEFINITE:
  		q->unplug_indefinite = true;
  		q->pkts_to_release = 0;
  		q->pkts_last_epoch = 0;
  		q->pkts_current_epoch = 0;
8fe6a79fb   Eric Dumazet   net_sched: sch_pl...
187
  		q->throttled = false;
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
188
189
190
191
192
193
194
195
196
197
198
199
  		netif_schedule_queue(sch->dev_queue);
  		break;
  	case TCQ_PLUG_LIMIT:
  		/* Limit is supplied in bytes */
  		q->limit = msg->limit;
  		break;
  	default:
  		return -EINVAL;
  	}
  
  	return 0;
  }
2132cf643   Eric Dumazet   net_sched: sch_pl...
200
  static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
201
202
203
204
205
206
207
  	.id          =       "plug",
  	.priv_size   =       sizeof(struct plug_sched_data),
  	.enqueue     =       plug_enqueue,
  	.dequeue     =       plug_dequeue,
  	.peek        =       qdisc_peek_head,
  	.init        =       plug_init,
  	.change      =       plug_change,
fe6bea7f1   WANG Cong   sch_plug: purge b...
208
  	.reset       =	     qdisc_reset_queue,
c3059be16   Shriram Rajagopalan   net/sched: sch_pl...
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
  	.owner       =       THIS_MODULE,
  };
  
  static int __init plug_module_init(void)
  {
  	return register_qdisc(&plug_qdisc_ops);
  }
  
  static void __exit plug_module_exit(void)
  {
  	unregister_qdisc(&plug_qdisc_ops);
  }
  module_init(plug_module_init)
  module_exit(plug_module_exit)
  MODULE_LICENSE("GPL");