Blame view

net/sched/sch_qfq.c 42.6 KB
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1
  /*
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
2
   * net/sched/sch_qfq.c         Quick Fair Queueing Plus Scheduler.
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
3
4
   *
   * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
5
   * Copyright (c) 2012 Paolo Valente.
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License
   * version 2 as published by the Free Software Foundation.
   */
  
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/bitops.h>
  #include <linux/errno.h>
  #include <linux/netdevice.h>
  #include <linux/pkt_sched.h>
  #include <net/sch_generic.h>
  #include <net/pkt_sched.h>
  #include <net/pkt_cls.h>
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
21
22
  /*  Quick Fair Queueing Plus
      ========================
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
23
24
  
      Sources:
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
25
26
27
28
29
30
31
      [1] Paolo Valente,
      "Reducing the Execution Time of Fair-Queueing Schedulers."
      http://algo.ing.unimo.it/people/paolo/agg-sched/agg-sched.pdf
  
      Sources for QFQ:
  
      [2] Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
32
33
34
35
36
37
38
      Packet Scheduling with Tight Bandwidth Distribution Guarantees."
  
      See also:
      http://retis.sssup.it/~fabio/linux/qfq/
   */
  
  /*
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
39
40
41
42
43
44
45
46
47
48
49
50
51
    QFQ+ divides classes into aggregates of at most MAX_AGG_CLASSES
    classes. Each aggregate is timestamped with a virtual start time S
    and a virtual finish time F, and scheduled according to its
    timestamps. S and F are computed as a function of a system virtual
    time function V. The classes within each aggregate are instead
    scheduled with DRR.
  
    To speed up operations, QFQ+ divides also aggregates into a limited
    number of groups. Which group a class belongs to depends on the
    ratio between the maximum packet length for the class and the weight
    of the class. Groups have their own S and F. In the end, QFQ+
    schedules groups, then aggregates within groups, then classes within
    aggregates. See [1] and [2] for a full description.
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    Virtual time computations.
  
    S, F and V are all computed in fixed point arithmetic with
    FRAC_BITS decimal bits.
  
    QFQ_MAX_INDEX is the maximum index allowed for a group. We need
  	one bit per index.
    QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
  
    The layout of the bits is as below:
  
                     [ MTU_SHIFT ][      FRAC_BITS    ]
                     [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
  				 ^.__grp->index = 0
  				 *.__grp->slot_shift
  
    where MIN_SLOT_SHIFT is derived by difference from the others.
  
    The max group index corresponds to Lmax/w_min, where
    Lmax=1<<MTU_SHIFT, w_min = 1 .
    From this, and knowing how many groups (MAX_INDEX) we want,
    we can derive the shift corresponding to each group.
  
    Because we often need to compute
  	F = S + len/w_i  and V = V + len/wsum
    instead of storing w_i store the value
  	inv_w = (1<<FRAC_BITS)/w_i
    so we can do F = S + len * inv_w * wsum.
    We use W_TOT in the formulas so we can easily move between
    static and adaptive weight sum.
  
    The per-scheduler-instance data contain all the data structures
    for the scheduler: bitmaps and bucket lists.
  
   */
  
  /*
   * Maximum number of consecutive slots occupied by backlogged classes
   * inside a group.
   */
  #define QFQ_MAX_SLOTS	32
  
  /*
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
95
96
   * Shifts used for aggregate<->group mapping.  We allow class weights that are
   * in the range [1, 2^MAX_WSHIFT], and we try to map each aggregate i to the
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
97
   * group with the smallest index that can support the L_i / r_i configured
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
98
   * for the classes in the aggregate.
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
99
100
101
102
   *
   * grp->index is the index of the group; and grp->slot_shift
   * is the shift for the corresponding (scaled) sigma_i.
   */
3015f3d2a   Paolo Valente   pkt_sched: enable...
103
  #define QFQ_MAX_INDEX		24
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
104
  #define QFQ_MAX_WSHIFT		10
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
105

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
106
107
  #define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT) /* see qfq_slot_insert */
  #define QFQ_MAX_WSUM		(64*QFQ_MAX_WEIGHT)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
108
109
110
  
  #define FRAC_BITS		30	/* fixed point arithmetic */
  #define ONE_FP			(1UL << FRAC_BITS)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
111

3015f3d2a   Paolo Valente   pkt_sched: enable...
112
  #define QFQ_MTU_SHIFT		16	/* to support TSO/GSO */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
113
114
115
  #define QFQ_MIN_LMAX		512	/* see qfq_slot_insert */
  
  #define QFQ_MAX_AGG_CLASSES	8 /* max num classes per aggregate allowed */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
116
117
118
119
120
121
122
123
  
  /*
   * Possible group states.  These values are used as indexes for the bitmaps
   * array of struct qfq_queue.
   */
  enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
  
  struct qfq_group;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
124
  struct qfq_aggregate;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
125
126
127
128
129
130
131
132
  struct qfq_class {
  	struct Qdisc_class_common common;
  
  	unsigned int refcnt;
  	unsigned int filter_cnt;
  
  	struct gnet_stats_basic_packed bstats;
  	struct gnet_stats_queue qstats;
45203a3b3   Eric Dumazet   net_sched: add 64...
133
  	struct gnet_stats_rate_est64 rate_est;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
134
  	struct Qdisc *qdisc;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
135
136
137
138
  	struct list_head alist;		/* Link for active-classes list. */
  	struct qfq_aggregate *agg;	/* Parent aggregate. */
  	int deficit;			/* DRR deficit counter. */
  };
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
139

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
140
  struct qfq_aggregate {
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
141
142
143
144
145
146
147
148
149
150
  	struct hlist_node next;	/* Link for the slot list. */
  	u64 S, F;		/* flow timestamps (exact) */
  
  	/* group we belong to. In principle we would need the index,
  	 * which is log_2(lmax/weight), but we never reference it
  	 * directly, only the group.
  	 */
  	struct qfq_group *grp;
  
  	/* these are copied from the flowset. */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
151
152
153
154
155
156
157
158
159
160
161
162
  	u32	class_weight; /* Weight of each class in this aggregate. */
  	/* Max pkt size for the classes in this aggregate, DRR quantum. */
  	int	lmax;
  
  	u32	inv_w;	    /* ONE_FP/(sum of weights of classes in aggr.). */
  	u32	budgetmax;  /* Max budget for this aggregate. */
  	u32	initial_budget, budget;     /* Initial and current budget. */
  
  	int		  num_classes;	/* Number of classes in this aggr. */
  	struct list_head  active;	/* DRR queue of active classes. */
  
  	struct hlist_node nonfull_next;	/* See nonfull_aggs in qfq_sched. */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
163
164
165
166
167
168
169
170
  };
  
  struct qfq_group {
  	u64 S, F;			/* group timestamps (approx). */
  	unsigned int slot_shift;	/* Slot shift. */
  	unsigned int index;		/* Group index. */
  	unsigned int front;		/* Index of the front slot. */
  	unsigned long full_slots;	/* non-empty slots */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
171
  	/* Array of RR lists of active aggregates. */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
172
173
174
175
176
177
  	struct hlist_head slots[QFQ_MAX_SLOTS];
  };
  
  struct qfq_sched {
  	struct tcf_proto *filter_list;
  	struct Qdisc_class_hash clhash;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
178
179
180
181
  	u64			oldV, V;	/* Precise virtual times. */
  	struct qfq_aggregate	*in_serv_agg;   /* Aggregate being served. */
  	u32			num_active_agg; /* Num. of active aggregates */
  	u32			wsum;		/* weight sum */
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
182
  	u32			iwsum;		/* inverse weight sum */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
183
184
185
  
  	unsigned long bitmaps[QFQ_MAX_STATE];	    /* Group bitmaps. */
  	struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
186
187
188
189
  	u32 min_slot_shift;	/* Index of the group-0 bit in the bitmaps. */
  
  	u32 max_agg_classes;		/* Max number of classes per aggr. */
  	struct hlist_head nonfull_aggs; /* Aggs with room for more classes. */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
190
  };
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
191
192
193
194
195
196
197
198
  /*
   * Possible reasons why the timestamps of an aggregate are updated
   * enqueue: the aggregate switches from idle to active and must scheduled
   *	    for service
   * requeue: the aggregate finishes its budget, so it stops being served and
   *	    must be rescheduled for service
   */
  enum update_reason {enqueue, requeue};
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct Qdisc_class_common *clc;
  
  	clc = qdisc_class_find(&q->clhash, classid);
  	if (clc == NULL)
  		return NULL;
  	return container_of(clc, struct qfq_class, common);
  }
  
  static void qfq_purge_queue(struct qfq_class *cl)
  {
  	unsigned int len = cl->qdisc->q.qlen;
  
  	qdisc_reset(cl->qdisc);
  	qdisc_tree_decrease_qlen(cl->qdisc, len);
  }
  
  static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
  	[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
  	[TCA_QFQ_LMAX] = { .type = NLA_U32 },
  };
  
  /*
   * Calculate a flow index, given its weight and maximum packet length.
   * index = log_2(maxlen/weight) but we need to apply the scaling.
   * This is used only once at flow creation.
   */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
228
  static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
229
230
231
232
  {
  	u64 slot_size = (u64)maxlen * inv_w;
  	unsigned long size_map;
  	int index = 0;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
233
  	size_map = slot_size >> min_slot_shift;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
234
235
236
237
  	if (!size_map)
  		goto out;
  
  	index = __fls(size_map) + 1;	/* basically a log_2 */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
238
  	index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
239
240
241
242
243
244
245
246
247
248
  
  	if (index < 0)
  		index = 0;
  out:
  	pr_debug("qfq calc_index: W = %lu, L = %u, I = %d
  ",
  		 (unsigned long) ONE_FP/inv_w, maxlen, index);
  
  	return index;
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
249
250
251
252
253
254
  static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
  static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
  			     enum update_reason);
  
  static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
  			 u32 lmax, u32 weight)
be72f63b4   Paolo Valente   sched: add missin...
255
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
256
257
258
259
260
261
262
263
264
265
266
  	INIT_LIST_HEAD(&agg->active);
  	hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
  
  	agg->lmax = lmax;
  	agg->class_weight = weight;
  }
  
  static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
  					  u32 lmax, u32 weight)
  {
  	struct qfq_aggregate *agg;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
267

b67bfe0d4   Sasha Levin   hlist: drop the n...
268
  	hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
269
270
271
272
273
  		if (agg->lmax == lmax && agg->class_weight == weight)
  			return agg;
  
  	return NULL;
  }
be72f63b4   Paolo Valente   sched: add missin...
274

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
275
276
277
278
279
280
281
282
283
284
285
286
  /* Update aggregate as a function of the new number of classes. */
  static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
  			   int new_num_classes)
  {
  	u32 new_agg_weight;
  
  	if (new_num_classes == q->max_agg_classes)
  		hlist_del_init(&agg->nonfull_next);
  
  	if (agg->num_classes > new_num_classes &&
  	    new_num_classes == q->max_agg_classes - 1) /* agg no more full */
  		hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
9b99b7e90   Paolo Valente   pkt_sched: sch_qf...
287
288
289
290
  	/* The next assignment may let
  	 * agg->initial_budget > agg->budgetmax
  	 * hold, we will take it into account in charge_actual_service().
  	 */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
291
292
293
294
295
296
297
298
299
300
301
302
  	agg->budgetmax = new_num_classes * agg->lmax;
  	new_agg_weight = agg->class_weight * new_num_classes;
  	agg->inv_w = ONE_FP/new_agg_weight;
  
  	if (agg->grp == NULL) {
  		int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
  				       q->min_slot_shift);
  		agg->grp = &q->groups[i];
  	}
  
  	q->wsum +=
  		(int) agg->class_weight * (new_num_classes - agg->num_classes);
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
303
  	q->iwsum = ONE_FP / q->wsum;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  
  	agg->num_classes = new_num_classes;
  }
  
  /* Add class to aggregate. */
  static void qfq_add_to_agg(struct qfq_sched *q,
  			   struct qfq_aggregate *agg,
  			   struct qfq_class *cl)
  {
  	cl->agg = agg;
  
  	qfq_update_agg(q, agg, agg->num_classes+1);
  	if (cl->qdisc->q.qlen > 0) { /* adding an active class */
  		list_add_tail(&cl->alist, &agg->active);
  		if (list_first_entry(&agg->active, struct qfq_class, alist) ==
  		    cl && q->in_serv_agg != agg) /* agg was inactive */
  			qfq_activate_agg(q, agg, enqueue); /* schedule agg */
  	}
be72f63b4   Paolo Valente   sched: add missin...
322
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
323
  static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
be72f63b4   Paolo Valente   sched: add missin...
324

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
325
  static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
be72f63b4   Paolo Valente   sched: add missin...
326
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
327
328
  	if (!hlist_unhashed(&agg->nonfull_next))
  		hlist_del_init(&agg->nonfull_next);
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
329
330
331
  	q->wsum -= agg->class_weight;
  	if (q->wsum != 0)
  		q->iwsum = ONE_FP / q->wsum;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
332
333
334
335
  	if (q->in_serv_agg == agg)
  		q->in_serv_agg = qfq_choose_next_agg(q);
  	kfree(agg);
  }
be72f63b4   Paolo Valente   sched: add missin...
336

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
337
338
339
340
  /* Deschedule class from within its parent aggregate. */
  static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
  {
  	struct qfq_aggregate *agg = cl->agg;
be72f63b4   Paolo Valente   sched: add missin...
341

be72f63b4   Paolo Valente   sched: add missin...
342

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
343
344
345
  	list_del(&cl->alist); /* remove from RR queue of the aggregate */
  	if (list_empty(&agg->active)) /* agg is now inactive */
  		qfq_deactivate_agg(q, agg);
be72f63b4   Paolo Valente   sched: add missin...
346
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
347
348
  /* Remove class from its parent aggregate. */
  static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
3015f3d2a   Paolo Valente   pkt_sched: enable...
349
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
350
  	struct qfq_aggregate *agg = cl->agg;
3015f3d2a   Paolo Valente   pkt_sched: enable...
351

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
352
353
354
355
  	cl->agg = NULL;
  	if (agg->num_classes == 1) { /* agg being emptied, destroy it */
  		qfq_destroy_agg(q, agg);
  		return;
3015f3d2a   Paolo Valente   pkt_sched: enable...
356
  	}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
357
358
  	qfq_update_agg(q, agg, agg->num_classes-1);
  }
3015f3d2a   Paolo Valente   pkt_sched: enable...
359

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
360
361
362
363
364
  /* Deschedule class and remove it from its parent aggregate. */
  static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
  {
  	if (cl->qdisc->q.qlen > 0) /* class is active */
  		qfq_deactivate_class(q, cl);
3015f3d2a   Paolo Valente   pkt_sched: enable...
365

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
366
  	qfq_rm_from_agg(q, cl);
3015f3d2a   Paolo Valente   pkt_sched: enable...
367
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
  /* Move class to a new aggregate, matching the new class weight and/or lmax */
  static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
  			   u32 lmax)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
  
  	if (new_agg == NULL) { /* create new aggregate */
  		new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
  		if (new_agg == NULL)
  			return -ENOBUFS;
  		qfq_init_agg(q, new_agg, lmax, weight);
  	}
  	qfq_deact_rm_from_agg(q, cl);
  	qfq_add_to_agg(q, new_agg, cl);
  
  	return 0;
  }
3015f3d2a   Paolo Valente   pkt_sched: enable...
386

0545a3037   stephen hemminger   pkt_sched: QFQ - ...
387
388
389
390
391
  static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
  			    struct nlattr **tca, unsigned long *arg)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_class *cl = (struct qfq_class *)*arg;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
392
  	bool existing = false;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
393
  	struct nlattr *tb[TCA_QFQ_MAX + 1];
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
394
  	struct qfq_aggregate *new_agg = NULL;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
395
  	u32 weight, lmax, inv_w;
3015f3d2a   Paolo Valente   pkt_sched: enable...
396
  	int err;
d32ae76f2   Eric Dumazet   sch_qfq: accurate...
397
  	int delta_w;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
  
  	if (tca[TCA_OPTIONS] == NULL) {
  		pr_notice("qfq: no options
  ");
  		return -EINVAL;
  	}
  
  	err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
  	if (err < 0)
  		return err;
  
  	if (tb[TCA_QFQ_WEIGHT]) {
  		weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
  		if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
  			pr_notice("qfq: invalid weight %u
  ", weight);
  			return -EINVAL;
  		}
  	} else
  		weight = 1;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
418
419
  	if (tb[TCA_QFQ_LMAX]) {
  		lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
3015f3d2a   Paolo Valente   pkt_sched: enable...
420
  		if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
421
422
423
424
425
  			pr_notice("qfq: invalid max length %u
  ", lmax);
  			return -EINVAL;
  		}
  	} else
3015f3d2a   Paolo Valente   pkt_sched: enable...
426
  		lmax = psched_mtu(qdisc_dev(sch));
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
427

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
  	inv_w = ONE_FP / weight;
  	weight = ONE_FP / inv_w;
  
  	if (cl != NULL &&
  	    lmax == cl->agg->lmax &&
  	    weight == cl->agg->class_weight)
  		return 0; /* nothing to change */
  
  	delta_w = weight - (cl ? cl->agg->class_weight : 0);
  
  	if (q->wsum + delta_w > QFQ_MAX_WSUM) {
  		pr_notice("qfq: total weight out of range (%d + %u)
  ",
  			  delta_w, q->wsum);
  		return -EINVAL;
  	}
  
  	if (cl != NULL) { /* modify existing class */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
446
447
448
449
450
451
452
  		if (tca[TCA_RATE]) {
  			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
  						    qdisc_root_sleeping_lock(sch),
  						    tca[TCA_RATE]);
  			if (err)
  				return err;
  		}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
453
454
  		existing = true;
  		goto set_change_agg;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
455
  	}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
456
  	/* create and init new class */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
457
458
459
460
461
462
  	cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
  	if (cl == NULL)
  		return -ENOBUFS;
  
  	cl->refcnt = 1;
  	cl->common.classid = classid;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
463
  	cl->deficit = lmax;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
464
465
466
467
468
469
470
471
472
473
  
  	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
  				      &pfifo_qdisc_ops, classid);
  	if (cl->qdisc == NULL)
  		cl->qdisc = &noop_qdisc;
  
  	if (tca[TCA_RATE]) {
  		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
  					qdisc_root_sleeping_lock(sch),
  					tca[TCA_RATE]);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
474
475
  		if (err)
  			goto destroy_class;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
476
477
478
479
480
481
482
  	}
  
  	sch_tree_lock(sch);
  	qdisc_class_hash_insert(&q->clhash, &cl->common);
  	sch_tree_unlock(sch);
  
  	qdisc_class_hash_grow(sch, &q->clhash);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
  set_change_agg:
  	sch_tree_lock(sch);
  	new_agg = qfq_find_agg(q, lmax, weight);
  	if (new_agg == NULL) { /* create new aggregate */
  		sch_tree_unlock(sch);
  		new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
  		if (new_agg == NULL) {
  			err = -ENOBUFS;
  			gen_kill_estimator(&cl->bstats, &cl->rate_est);
  			goto destroy_class;
  		}
  		sch_tree_lock(sch);
  		qfq_init_agg(q, new_agg, lmax, weight);
  	}
  	if (existing)
  		qfq_deact_rm_from_agg(q, cl);
  	qfq_add_to_agg(q, new_agg, cl);
  	sch_tree_unlock(sch);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
501
502
  	*arg = (unsigned long)cl;
  	return 0;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
503
504
505
506
507
  
  destroy_class:
  	qdisc_destroy(cl->qdisc);
  	kfree(cl);
  	return err;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
508
509
510
511
512
  }
  
  static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
513
  	qfq_rm_from_agg(q, cl);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
  	gen_kill_estimator(&cl->bstats, &cl->rate_est);
  	qdisc_destroy(cl->qdisc);
  	kfree(cl);
  }
  
  static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_class *cl = (struct qfq_class *)arg;
  
  	if (cl->filter_cnt > 0)
  		return -EBUSY;
  
  	sch_tree_lock(sch);
  
  	qfq_purge_queue(cl);
  	qdisc_class_hash_remove(&q->clhash, &cl->common);
  
  	BUG_ON(--cl->refcnt == 0);
  	/*
  	 * This shouldn't happen: we "hold" one cops->get() when called
  	 * from tc_ctl_tclass; the destroy method is done from cops->put().
  	 */
  
  	sch_tree_unlock(sch);
  	return 0;
  }
  
  static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
  {
  	struct qfq_class *cl = qfq_find_class(sch, classid);
  
  	if (cl != NULL)
  		cl->refcnt++;
  
  	return (unsigned long)cl;
  }
  
  static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
  {
  	struct qfq_class *cl = (struct qfq_class *)arg;
  
  	if (--cl->refcnt == 0)
  		qfq_destroy_class(sch, cl);
  }
  
  static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  
  	if (cl)
  		return NULL;
  
  	return &q->filter_list;
  }
  
  static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
  				  u32 classid)
  {
  	struct qfq_class *cl = qfq_find_class(sch, classid);
  
  	if (cl != NULL)
  		cl->filter_cnt++;
  
  	return (unsigned long)cl;
  }
  
  static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
  {
  	struct qfq_class *cl = (struct qfq_class *)arg;
  
  	cl->filter_cnt--;
  }
  
  static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
  			   struct Qdisc *new, struct Qdisc **old)
  {
  	struct qfq_class *cl = (struct qfq_class *)arg;
  
  	if (new == NULL) {
  		new = qdisc_create_dflt(sch->dev_queue,
  					&pfifo_qdisc_ops, cl->common.classid);
  		if (new == NULL)
  			new = &noop_qdisc;
  	}
  
  	sch_tree_lock(sch);
  	qfq_purge_queue(cl);
  	*old = cl->qdisc;
  	cl->qdisc = new;
  	sch_tree_unlock(sch);
  	return 0;
  }
  
  static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
  {
  	struct qfq_class *cl = (struct qfq_class *)arg;
  
  	return cl->qdisc;
  }
  
  static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
  			  struct sk_buff *skb, struct tcmsg *tcm)
  {
  	struct qfq_class *cl = (struct qfq_class *)arg;
  	struct nlattr *nest;
  
  	tcm->tcm_parent	= TC_H_ROOT;
  	tcm->tcm_handle	= cl->common.classid;
  	tcm->tcm_info	= cl->qdisc->handle;
  
  	nest = nla_nest_start(skb, TCA_OPTIONS);
  	if (nest == NULL)
  		goto nla_put_failure;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
628
629
  	if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
  	    nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
1b34ec43c   David S. Miller   pkt_sched: Stop u...
630
  		goto nla_put_failure;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
  	return nla_nest_end(skb, nest);
  
  nla_put_failure:
  	nla_nest_cancel(skb, nest);
  	return -EMSGSIZE;
  }
  
  static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
  				struct gnet_dump *d)
  {
  	struct qfq_class *cl = (struct qfq_class *)arg;
  	struct tc_qfq_stats xstats;
  
  	memset(&xstats, 0, sizeof(xstats));
  	cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
646
647
  	xstats.weight = cl->agg->class_weight;
  	xstats.lmax = cl->agg->lmax;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
648
649
650
651
652
653
654
655
656
657
658
659
660
  
  	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
  	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
  	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
  		return -1;
  
  	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
  }
  
  static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_class *cl;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
661
662
663
664
665
666
  	unsigned int i;
  
  	if (arg->stop)
  		return;
  
  	for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d4   Sasha Levin   hlist: drop the n...
667
  		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
  			if (arg->count < arg->skip) {
  				arg->count++;
  				continue;
  			}
  			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
  				arg->stop = 1;
  				return;
  			}
  			arg->count++;
  		}
  	}
  }
  
  static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
  				      int *qerr)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_class *cl;
  	struct tcf_result res;
  	int result;
  
  	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
  		pr_debug("qfq_classify: found %d
  ", skb->priority);
  		cl = qfq_find_class(sch, skb->priority);
  		if (cl != NULL)
  			return cl;
  	}
  
  	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
  	result = tc_classify(skb, q->filter_list, &res);
  	if (result >= 0) {
  #ifdef CONFIG_NET_CLS_ACT
  		switch (result) {
  		case TC_ACT_QUEUED:
  		case TC_ACT_STOLEN:
  			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
  		case TC_ACT_SHOT:
  			return NULL;
  		}
  #endif
  		cl = (struct qfq_class *)res.class;
  		if (cl == NULL)
  			cl = qfq_find_class(sch, res.classid);
  		return cl;
  	}
  
  	return NULL;
  }
  
  /* Generic comparison function, handling wraparound. */
  static inline int qfq_gt(u64 a, u64 b)
  {
  	return (s64)(a - b) > 0;
  }
  
  /* Round a precise timestamp to its slotted value. */
  static inline u64 qfq_round_down(u64 ts, unsigned int shift)
  {
  	return ts & ~((1ULL << shift) - 1);
  }
  
  /* return the pointer to the group with lowest index in the bitmap */
  static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
  					unsigned long bitmap)
  {
  	int index = __ffs(bitmap);
  	return &q->groups[index];
  }
  /* Calculate a mask to mimic what would be ffs_from(). */
  static inline unsigned long mask_from(unsigned long bitmap, int from)
  {
  	return bitmap & ~((1UL << from) - 1);
  }
  
  /*
   * The state computation relies on ER=0, IR=1, EB=2, IB=3
   * First compute eligibility comparing grp->S, q->V,
   * then check if someone is blocking us and possibly add EB
   */
  static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
  {
  	/* if S > V we are not eligible */
  	unsigned int state = qfq_gt(grp->S, q->V);
  	unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
  	struct qfq_group *next;
  
  	if (mask) {
  		next = qfq_ffs(q, mask);
  		if (qfq_gt(grp->F, next->F))
  			state |= EB;
  	}
  
  	return state;
  }
  
  
  /*
   * In principle
   *	q->bitmaps[dst] |= q->bitmaps[src] & mask;
   *	q->bitmaps[src] &= ~mask;
   * but we should make sure that src != dst
   */
  static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
  				   int src, int dst)
  {
  	q->bitmaps[dst] |= q->bitmaps[src] & mask;
  	q->bitmaps[src] &= ~mask;
  }
  
  static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
  {
  	unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
  	struct qfq_group *next;
  
  	if (mask) {
  		next = qfq_ffs(q, mask);
  		if (!qfq_gt(next->F, old_F))
  			return;
  	}
  
  	mask = (1UL << index) - 1;
  	qfq_move_groups(q, mask, EB, ER);
  	qfq_move_groups(q, mask, IB, IR);
  }
  
  /*
   * perhaps
   *
  	old_V ^= q->V;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
798
  	old_V >>= q->min_slot_shift;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
799
800
801
802
803
  	if (old_V) {
  		...
  	}
   *
   */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
804
  static void qfq_make_eligible(struct qfq_sched *q)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
805
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
806
807
  	unsigned long vslot = q->V >> q->min_slot_shift;
  	unsigned long old_vslot = q->oldV >> q->min_slot_shift;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
808
809
  
  	if (vslot != old_vslot) {
87f1369d6   Paolo Valente   pkt_sched: sch_qf...
810
811
812
813
814
815
816
  		unsigned long mask;
  		int last_flip_pos = fls(vslot ^ old_vslot);
  
  		if (last_flip_pos > 31) /* higher than the number of groups */
  			mask = ~0UL;    /* make all groups eligible */
  		else
  			mask = (1UL << last_flip_pos) - 1;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
817
818
819
820
  		qfq_move_groups(q, mask, IR, ER);
  		qfq_move_groups(q, mask, IB, EB);
  	}
  }
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
821
  /*
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
   * The index of the slot in which the input aggregate agg is to be
   * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2'
   * and not a '-1' because the start time of the group may be moved
   * backward by one slot after the aggregate has been inserted, and
   * this would cause non-empty slots to be right-shifted by one
   * position.
   *
   * QFQ+ fully satisfies this bound to the slot index if the parameters
   * of the classes are not changed dynamically, and if QFQ+ never
   * happens to postpone the service of agg unjustly, i.e., it never
   * happens that the aggregate becomes backlogged and eligible, or just
   * eligible, while an aggregate with a higher approximated finish time
   * is being served. In particular, in this case QFQ+ guarantees that
   * the timestamps of agg are low enough that the slot index is never
   * higher than 2. Unfortunately, QFQ+ cannot provide the same
   * guarantee if it happens to unjustly postpone the service of agg, or
   * if the parameters of some class are changed.
   *
   * As for the first event, i.e., an out-of-order service, the
   * upper bound to the slot index guaranteed by QFQ+ grows to
   * 2 +
   * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) *
   * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1.
3015f3d2a   Paolo Valente   pkt_sched: enable...
845
   *
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
846
847
848
849
850
851
852
853
854
855
856
   * The following function deals with this problem by backward-shifting
   * the timestamps of agg, if needed, so as to guarantee that the slot
   * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may
   * cause the service of other aggregates to be postponed, yet the
   * worst-case guarantees of these aggregates are not violated.  In
   * fact, in case of no out-of-order service, the timestamps of agg
   * would have been even lower than they are after the backward shift,
   * because QFQ+ would have guaranteed a maximum value equal to 2 for
   * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose
   * service is postponed because of the backward-shift would have
   * however waited for the service of agg before being served.
3015f3d2a   Paolo Valente   pkt_sched: enable...
857
   *
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
858
859
860
861
862
863
864
865
866
867
868
869
   * The other event that may cause the slot index to be higher than 2
   * for agg is a recent change of the parameters of some class. If the
   * weight of a class is increased or the lmax (max_pkt_size) of the
   * class is decreased, then a new aggregate with smaller slot size
   * than the original parent aggregate of the class may happen to be
   * activated. The activation of this aggregate should be properly
   * delayed to when the service of the class has finished in the ideal
   * system tracked by QFQ+. If the activation of the aggregate is not
   * delayed to this reference time instant, then this aggregate may be
   * unjustly served before other aggregates waiting for service. This
   * may cause the above bound to the slot index to be violated for some
   * of these unlucky aggregates.
3015f3d2a   Paolo Valente   pkt_sched: enable...
870
   *
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
871
   * Instead of delaying the activation of the new aggregate, which is
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
872
873
874
   * quite complex, the above-discussed capping of the slot index is
   * used to handle also the consequences of a change of the parameters
   * of a class.
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
875
   */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
876
  static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
877
878
879
  			    u64 roundedS)
  {
  	u64 slot = (roundedS - grp->S) >> grp->slot_shift;
3015f3d2a   Paolo Valente   pkt_sched: enable...
880
881
882
883
884
  	unsigned int i; /* slot index in the bucket list */
  
  	if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
  		u64 deltaS = roundedS - grp->S -
  			((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
885
886
  		agg->S -= deltaS;
  		agg->F -= deltaS;
3015f3d2a   Paolo Valente   pkt_sched: enable...
887
888
889
890
  		slot = QFQ_MAX_SLOTS - 2;
  	}
  
  	i = (grp->front + slot) % QFQ_MAX_SLOTS;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
891

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
892
  	hlist_add_head(&agg->next, &grp->slots[i]);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
893
894
895
896
  	__set_bit(slot, &grp->full_slots);
  }
  
  /* Maybe introduce hlist_first_entry?? */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
897
  static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
898
899
  {
  	return hlist_entry(grp->slots[grp->front].first,
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
900
  			   struct qfq_aggregate, next);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
901
902
903
904
905
906
907
  }
  
  /*
   * remove the entry from the slot
   */
  static void qfq_front_slot_remove(struct qfq_group *grp)
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
908
  	struct qfq_aggregate *agg = qfq_slot_head(grp);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
909

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
910
911
  	BUG_ON(!agg);
  	hlist_del(&agg->next);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
912
913
914
915
916
  	if (hlist_empty(&grp->slots[grp->front]))
  		__clear_bit(0, &grp->full_slots);
  }
  
  /*
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
917
918
919
   * Returns the first aggregate in the first non-empty bucket of the
   * group. As a side effect, adjusts the bucket list so the first
   * non-empty bucket is at position 0 in full_slots.
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
920
   */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
921
  static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
  {
  	unsigned int i;
  
  	pr_debug("qfq slot_scan: grp %u full %#lx
  ",
  		 grp->index, grp->full_slots);
  
  	if (grp->full_slots == 0)
  		return NULL;
  
  	i = __ffs(grp->full_slots);  /* zero based */
  	if (i > 0) {
  		grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
  		grp->full_slots >>= i;
  	}
  
  	return qfq_slot_head(grp);
  }
  
  /*
   * adjust the bucket list. When the start time of a group decreases,
   * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
   * move the objects. The mask of occupied slots must be shifted
   * because we use ffs() to find the first non-empty slot.
   * This covers decreases in the group's start time, but what about
   * increases of the start time ?
   * Here too we should make sure that i is less than 32
   */
  static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
  {
  	unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
  
  	grp->full_slots <<= i;
  	grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
957
  static void qfq_update_eligible(struct qfq_sched *q)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
958
959
960
961
962
963
964
965
966
967
968
  {
  	struct qfq_group *grp;
  	unsigned long ineligible;
  
  	ineligible = q->bitmaps[IR] | q->bitmaps[IB];
  	if (ineligible) {
  		if (!q->bitmaps[ER]) {
  			grp = qfq_ffs(q, ineligible);
  			if (qfq_gt(grp->S, q->V))
  				q->V = grp->S;
  		}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
969
  		qfq_make_eligible(q);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
970
971
  	}
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
972
973
974
  /* Dequeue head packet of the head class in the DRR queue of the aggregate. */
  static void agg_dequeue(struct qfq_aggregate *agg,
  			struct qfq_class *cl, unsigned int len)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
975
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
976
  	qdisc_dequeue_peeked(cl->qdisc);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
977

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
978
  	cl->deficit -= (int) len;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
979

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
980
981
982
983
984
  	if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
  		list_del(&cl->alist);
  	else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
  		cl->deficit += agg->lmax;
  		list_move_tail(&cl->alist, &agg->active);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
985
  	}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
986
987
988
989
990
991
992
  }
  
  static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
  					   struct qfq_class **cl,
  					   unsigned int *len)
  {
  	struct sk_buff *skb;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
993

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
  	*cl = list_first_entry(&agg->active, struct qfq_class, alist);
  	skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
  	if (skb == NULL)
  		WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf
  ");
  	else
  		*len = qdisc_pkt_len(skb);
  
  	return skb;
  }
  
  /* Update F according to the actual service received by the aggregate. */
  static inline void charge_actual_service(struct qfq_aggregate *agg)
  {
9b99b7e90   Paolo Valente   pkt_sched: sch_qf...
1008
1009
1010
1011
1012
1013
1014
  	/* Compute the service received by the aggregate, taking into
  	 * account that, after decreasing the number of classes in
  	 * agg, it may happen that
  	 * agg->initial_budget - agg->budget > agg->bugdetmax
  	 */
  	u32 service_received = min(agg->budgetmax,
  				   agg->initial_budget - agg->budget);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1015
1016
  
  	agg->F = agg->S + (u64)service_received * agg->inv_w;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1017
  }
88d4f419a   Paolo Valente   pkt_sched: sch_qf...
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
  /* Assign a reasonable start time for a new aggregate in group i.
   * Admissible values for \hat(F) are multiples of \sigma_i
   * no greater than V+\sigma_i . Larger values mean that
   * we had a wraparound so we consider the timestamp to be stale.
   *
   * If F is not stale and F >= V then we set S = F.
   * Otherwise we should assign S = V, but this may violate
   * the ordering in EB (see [2]). So, if we have groups in ER,
   * set S to the F_j of the first group j which would be blocking us.
   * We are guaranteed not to move S backward because
   * otherwise our group i would still be blocked.
   */
  static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
  {
  	unsigned long mask;
  	u64 limit, roundedF;
  	int slot_shift = agg->grp->slot_shift;
  
  	roundedF = qfq_round_down(agg->F, slot_shift);
  	limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
  
  	if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
  		/* timestamp was stale */
  		mask = mask_from(q->bitmaps[ER], agg->grp->index);
  		if (mask) {
  			struct qfq_group *next = qfq_ffs(q, mask);
  			if (qfq_gt(roundedF, next->F)) {
  				if (qfq_gt(limit, next->F))
  					agg->S = next->F;
  				else /* preserve timestamp correctness */
  					agg->S = limit;
  				return;
  			}
  		}
  		agg->S = q->V;
  	} else  /* timestamp is not stale */
  		agg->S = agg->F;
  }
  
  /* Update the timestamps of agg before scheduling/rescheduling it for
   * service.  In particular, assign to agg->F its maximum possible
   * value, i.e., the virtual finish time with which the aggregate
   * should be labeled if it used all its budget once in service.
   */
  static inline void
  qfq_update_agg_ts(struct qfq_sched *q,
  		    struct qfq_aggregate *agg, enum update_reason reason)
  {
  	if (reason != requeue)
  		qfq_update_start(q, agg);
  	else /* just charge agg for the service received */
  		agg->S = agg->F;
  
  	agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
  }
2f3b89a1f   Paolo Valente   pkt_sched: sch_qf...
1073
1074
  
  static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1075
1076
1077
  static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1078
  	struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1079
  	struct qfq_class *cl;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1080
1081
1082
  	struct sk_buff *skb = NULL;
  	/* next-packet len, 0 means no more active classes in in-service agg */
  	unsigned int len = 0;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1083

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1084
  	if (in_serv_agg == NULL)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1085
  		return NULL;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1086
1087
  	if (!list_empty(&in_serv_agg->active))
  		skb = qfq_peek_skb(in_serv_agg, &cl, &len);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1088

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
  	/*
  	 * If there are no active classes in the in-service aggregate,
  	 * or if the aggregate has not enough budget to serve its next
  	 * class, then choose the next aggregate to serve.
  	 */
  	if (len == 0 || in_serv_agg->budget < len) {
  		charge_actual_service(in_serv_agg);
  
  		/* recharge the budget of the aggregate */
  		in_serv_agg->initial_budget = in_serv_agg->budget =
  			in_serv_agg->budgetmax;
2f3b89a1f   Paolo Valente   pkt_sched: sch_qf...
1100
  		if (!list_empty(&in_serv_agg->active)) {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
  			/*
  			 * Still active: reschedule for
  			 * service. Possible optimization: if no other
  			 * aggregate is active, then there is no point
  			 * in rescheduling this aggregate, and we can
  			 * just keep it as the in-service one. This
  			 * should be however a corner case, and to
  			 * handle it, we would need to maintain an
  			 * extra num_active_aggs field.
  			*/
2f3b89a1f   Paolo Valente   pkt_sched: sch_qf...
1111
1112
1113
  			qfq_update_agg_ts(q, in_serv_agg, requeue);
  			qfq_schedule_agg(q, in_serv_agg);
  		} else if (sch->q.qlen == 0) { /* no aggregate to serve */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
  			q->in_serv_agg = NULL;
  			return NULL;
  		}
  
  		/*
  		 * If we get here, there are other aggregates queued:
  		 * choose the new aggregate to serve.
  		 */
  		in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
  		skb = qfq_peek_skb(in_serv_agg, &cl, &len);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1124
  	}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1125
1126
  	if (!skb)
  		return NULL;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1127
1128
1129
  
  	sch->q.qlen--;
  	qdisc_bstats_update(sch, skb);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1130
  	agg_dequeue(in_serv_agg, cl, len);
a0143efa9   Paolo Valente   pkt_sched: sch_qf...
1131
1132
1133
1134
1135
1136
1137
1138
  	/* If lmax is lowered, through qfq_change_class, for a class
  	 * owning pending packets with larger size than the new value
  	 * of lmax, then the following condition may hold.
  	 */
  	if (unlikely(in_serv_agg->budget < len))
  		in_serv_agg->budget = 0;
  	else
  		in_serv_agg->budget -= len;
87f40dd6c   Paolo Valente   pkt_sched: sch_qf...
1139
  	q->V += (u64)len * q->iwsum;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1140
1141
  	pr_debug("qfq dequeue: len %u F %lld now %lld
  ",
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1142
1143
  		 len, (unsigned long long) in_serv_agg->F,
  		 (unsigned long long) q->V);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1144

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1145
1146
  	return skb;
  }
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1147

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1148
1149
1150
1151
1152
  static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
  {
  	struct qfq_group *grp;
  	struct qfq_aggregate *agg, *new_front_agg;
  	u64 old_F;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1153

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
  	qfq_update_eligible(q);
  	q->oldV = q->V;
  
  	if (!q->bitmaps[ER])
  		return NULL;
  
  	grp = qfq_ffs(q, q->bitmaps[ER]);
  	old_F = grp->F;
  
  	agg = qfq_slot_head(grp);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1164

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
  	/* agg starts to be served, remove it from schedule */
  	qfq_front_slot_remove(grp);
  
  	new_front_agg = qfq_slot_scan(grp);
  
  	if (new_front_agg == NULL) /* group is now inactive, remove from ER */
  		__clear_bit(grp->index, &q->bitmaps[ER]);
  	else {
  		u64 roundedS = qfq_round_down(new_front_agg->S,
  					      grp->slot_shift);
  		unsigned int s;
  
  		if (grp->S == roundedS)
  			return agg;
  		grp->S = roundedS;
  		grp->F = roundedS + (2ULL << grp->slot_shift);
  		__clear_bit(grp->index, &q->bitmaps[ER]);
  		s = qfq_calc_state(q, grp);
  		__set_bit(grp->index, &q->bitmaps[s]);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1184
  	}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1185
  	qfq_unblock_groups(q, grp->index, old_F);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1186

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1187
  	return agg;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1188
  }
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1189
1190
1191
  static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1192
  	struct qfq_class *cl;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1193
  	struct qfq_aggregate *agg;
f54ba7798   David S. Miller   pkt_sched: Fix wa...
1194
  	int err = 0;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
  
  	cl = qfq_classify(skb, sch, &err);
  	if (cl == NULL) {
  		if (err & __NET_XMIT_BYPASS)
  			sch->qstats.drops++;
  		kfree_skb(skb);
  		return err;
  	}
  	pr_debug("qfq_enqueue: cl = %x
  ", cl->common.classid);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1205
  	if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) {
3015f3d2a   Paolo Valente   pkt_sched: enable...
1206
  		pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1207
1208
1209
1210
1211
  			 cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid);
  		err = qfq_change_agg(sch, cl, cl->agg->class_weight,
  				     qdisc_pkt_len(skb));
  		if (err)
  			return err;
3015f3d2a   Paolo Valente   pkt_sched: enable...
1212
  	}
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
  	err = qdisc_enqueue(skb, cl->qdisc);
  	if (unlikely(err != NET_XMIT_SUCCESS)) {
  		pr_debug("qfq_enqueue: enqueue failed %d
  ", err);
  		if (net_xmit_drop_count(err)) {
  			cl->qstats.drops++;
  			sch->qstats.drops++;
  		}
  		return err;
  	}
  
  	bstats_update(&cl->bstats, skb);
  	++sch->q.qlen;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1226
1227
1228
1229
1230
1231
1232
  	agg = cl->agg;
  	/* if the queue was not empty, then done here */
  	if (cl->qdisc->q.qlen != 1) {
  		if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
  		    list_first_entry(&agg->active, struct qfq_class, alist)
  		    == cl && cl->deficit < qdisc_pkt_len(skb))
  			list_move_tail(&cl->alist, &agg->active);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1233
  		return err;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1234
1235
1236
1237
1238
  	}
  
  	/* schedule class for service within the aggregate */
  	cl->deficit = agg->lmax;
  	list_add_tail(&cl->alist, &agg->active);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1239

2f3b89a1f   Paolo Valente   pkt_sched: sch_qf...
1240
1241
1242
  	if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
  	    q->in_serv_agg == agg)
  		return err; /* non-empty or in service, nothing else to do */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1243

2f3b89a1f   Paolo Valente   pkt_sched: sch_qf...
1244
  	qfq_activate_agg(q, agg, enqueue);
be72f63b4   Paolo Valente   sched: add missin...
1245
1246
1247
1248
1249
  
  	return err;
  }
  
  /*
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1250
   * Schedule aggregate according to its timestamps.
be72f63b4   Paolo Valente   sched: add missin...
1251
   */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1252
  static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
be72f63b4   Paolo Valente   sched: add missin...
1253
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1254
  	struct qfq_group *grp = agg->grp;
be72f63b4   Paolo Valente   sched: add missin...
1255
1256
  	u64 roundedS;
  	int s;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1257
  	roundedS = qfq_round_down(agg->S, grp->slot_shift);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1258
1259
  
  	/*
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1260
1261
  	 * Insert agg in the correct bucket.
  	 * If agg->S >= grp->S we don't need to adjust the
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1262
1263
1264
1265
1266
1267
1268
  	 * bucket list and simply go to the insertion phase.
  	 * Otherwise grp->S is decreasing, we must make room
  	 * in the bucket list, and also recompute the group state.
  	 * Finally, if there were no flows in this group and nobody
  	 * was in ER make sure to adjust V.
  	 */
  	if (grp->full_slots) {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1269
  		if (!qfq_gt(grp->S, agg->S))
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1270
  			goto skip_update;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1271
  		/* create a slot for this agg->S */
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1272
1273
1274
1275
  		qfq_slot_rotate(grp, roundedS);
  		/* group was surely ineligible, remove */
  		__clear_bit(grp->index, &q->bitmaps[IR]);
  		__clear_bit(grp->index, &q->bitmaps[IB]);
40dd2d546   Paolo Valente   pkt_sched: sch_qf...
1276
1277
  	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
  		   q->in_serv_agg == NULL)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
  		q->V = roundedS;
  
  	grp->S = roundedS;
  	grp->F = roundedS + (2ULL << grp->slot_shift);
  	s = qfq_calc_state(q, grp);
  	__set_bit(grp->index, &q->bitmaps[s]);
  
  	pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld
  ",
  		 s, q->bitmaps[s],
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1288
1289
  		 (unsigned long long) agg->S,
  		 (unsigned long long) agg->F,
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1290
1291
1292
  		 (unsigned long long) q->V);
  
  skip_update:
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1293
  	qfq_slot_insert(grp, agg, roundedS);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1294
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1295
1296
1297
1298
  /* Update agg ts and schedule agg for service */
  static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
  			     enum update_reason reason)
  {
2f3b89a1f   Paolo Valente   pkt_sched: sch_qf...
1299
  	agg->initial_budget = agg->budget = agg->budgetmax; /* recharge budg. */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1300
  	qfq_update_agg_ts(q, agg, reason);
2f3b89a1f   Paolo Valente   pkt_sched: sch_qf...
1301
1302
1303
1304
1305
1306
  	if (q->in_serv_agg == NULL) { /* no aggr. in service or scheduled */
  		q->in_serv_agg = agg; /* start serving this aggregate */
  		 /* update V: to be in service, agg must be eligible */
  		q->oldV = q->V = agg->S;
  	} else if (agg != q->in_serv_agg)
  		qfq_schedule_agg(q, agg);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1307
  }
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1308
  static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1309
  			    struct qfq_aggregate *agg)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1310
1311
1312
  {
  	unsigned int i, offset;
  	u64 roundedS;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1313
  	roundedS = qfq_round_down(agg->S, grp->slot_shift);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1314
  	offset = (roundedS - grp->S) >> grp->slot_shift;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1315

0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1316
  	i = (grp->front + offset) % QFQ_MAX_SLOTS;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1317
  	hlist_del(&agg->next);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1318
1319
1320
1321
1322
  	if (hlist_empty(&grp->slots[i]))
  		__clear_bit(offset, &grp->full_slots);
  }
  
  /*
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1323
1324
1325
1326
   * Called to forcibly deschedule an aggregate.  If the aggregate is
   * not in the front bucket, or if the latter has other aggregates in
   * the front bucket, we can simply remove the aggregate with no other
   * side effects.
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1327
1328
   * Otherwise we must propagate the event up.
   */
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1329
  static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1330
  {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1331
  	struct qfq_group *grp = agg->grp;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1332
1333
1334
  	unsigned long mask;
  	u64 roundedS;
  	int s;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1335
1336
1337
1338
1339
1340
1341
1342
  	if (agg == q->in_serv_agg) {
  		charge_actual_service(agg);
  		q->in_serv_agg = qfq_choose_next_agg(q);
  		return;
  	}
  
  	agg->F = agg->S;
  	qfq_slot_remove(q, grp, agg);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
  
  	if (!grp->full_slots) {
  		__clear_bit(grp->index, &q->bitmaps[IR]);
  		__clear_bit(grp->index, &q->bitmaps[EB]);
  		__clear_bit(grp->index, &q->bitmaps[IB]);
  
  		if (test_bit(grp->index, &q->bitmaps[ER]) &&
  		    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
  			mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
  			if (mask)
  				mask = ~((1UL << __fls(mask)) - 1);
  			else
  				mask = ~0UL;
  			qfq_move_groups(q, mask, EB, ER);
  			qfq_move_groups(q, mask, IB, IR);
  		}
  		__clear_bit(grp->index, &q->bitmaps[ER]);
  	} else if (hlist_empty(&grp->slots[grp->front])) {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1361
1362
  		agg = qfq_slot_scan(grp);
  		roundedS = qfq_round_down(agg->S, grp->slot_shift);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
  		if (grp->S != roundedS) {
  			__clear_bit(grp->index, &q->bitmaps[ER]);
  			__clear_bit(grp->index, &q->bitmaps[IR]);
  			__clear_bit(grp->index, &q->bitmaps[EB]);
  			__clear_bit(grp->index, &q->bitmaps[IB]);
  			grp->S = roundedS;
  			grp->F = roundedS + (2ULL << grp->slot_shift);
  			s = qfq_calc_state(q, grp);
  			__set_bit(grp->index, &q->bitmaps[s]);
  		}
  	}
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
  }
  
  static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_class *cl = (struct qfq_class *)arg;
  
  	if (cl->qdisc->q.qlen == 0)
  		qfq_deactivate_class(q, cl);
  }
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1384
1385
1386
1387
  static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
  				       struct hlist_head *slot)
  {
  	struct qfq_aggregate *agg;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1388
1389
  	struct qfq_class *cl;
  	unsigned int len;
b67bfe0d4   Sasha Levin   hlist: drop the n...
1390
  	hlist_for_each_entry(agg, slot, next) {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
  		list_for_each_entry(cl, &agg->active, alist) {
  
  			if (!cl->qdisc->ops->drop)
  				continue;
  
  			len = cl->qdisc->ops->drop(cl->qdisc);
  			if (len > 0) {
  				if (cl->qdisc->q.qlen == 0)
  					qfq_deactivate_class(q, cl);
  
  				return len;
  			}
  		}
  	}
  	return 0;
  }
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1407
1408
1409
1410
1411
1412
1413
1414
1415
  static unsigned int qfq_drop(struct Qdisc *sch)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_group *grp;
  	unsigned int i, j, len;
  
  	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
  		grp = &q->groups[i];
  		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1416
1417
1418
1419
  			len = qfq_drop_from_slot(q, &grp->slots[j]);
  			if (len > 0) {
  				sch->q.qlen--;
  				return len;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1420
1421
  			}
  		}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1422

0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
  	}
  
  	return 0;
  }
  
  static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_group *grp;
  	int i, j, err;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1433
  	u32 max_cl_shift, maxbudg_shift, max_classes;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1434
1435
1436
1437
  
  	err = qdisc_class_hash_init(&q->clhash);
  	if (err < 0)
  		return err;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
  	if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
  		max_classes = QFQ_MAX_AGG_CLASSES;
  	else
  		max_classes = qdisc_dev(sch)->tx_queue_len + 1;
  	/* max_cl_shift = floor(log_2(max_classes)) */
  	max_cl_shift = __fls(max_classes);
  	q->max_agg_classes = 1<<max_cl_shift;
  
  	/* maxbudg_shift = log2(max_len * max_classes_per_agg) */
  	maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
  	q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1449
1450
1451
  	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
  		grp = &q->groups[i];
  		grp->index = i;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1452
  		grp->slot_shift = q->min_slot_shift + i;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1453
1454
1455
  		for (j = 0; j < QFQ_MAX_SLOTS; j++)
  			INIT_HLIST_HEAD(&grp->slots[j]);
  	}
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1456
  	INIT_HLIST_HEAD(&q->nonfull_aggs);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1457
1458
1459
1460
1461
1462
  	return 0;
  }
  
  static void qfq_reset_qdisc(struct Qdisc *sch)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1463
  	struct qfq_class *cl;
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1464
  	unsigned int i;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1465

462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1466
  	for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d4   Sasha Levin   hlist: drop the n...
1467
  		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1468
  			if (cl->qdisc->q.qlen > 0)
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1469
  				qfq_deactivate_class(q, cl);
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1470

0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1471
  			qdisc_reset(cl->qdisc);
462dbc910   Paolo Valente   pkt_sched: QFQ Pl...
1472
  		}
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1473
1474
1475
1476
1477
1478
1479
1480
  	}
  	sch->q.qlen = 0;
  }
  
  static void qfq_destroy_qdisc(struct Qdisc *sch)
  {
  	struct qfq_sched *q = qdisc_priv(sch);
  	struct qfq_class *cl;
b67bfe0d4   Sasha Levin   hlist: drop the n...
1481
  	struct hlist_node *next;
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1482
1483
1484
1485
1486
  	unsigned int i;
  
  	tcf_destroy_chain(&q->filter_list);
  
  	for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d4   Sasha Levin   hlist: drop the n...
1487
  		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
0545a3037   stephen hemminger   pkt_sched: QFQ - ...
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
  					  common.hnode) {
  			qfq_destroy_class(sch, cl);
  		}
  	}
  	qdisc_class_hash_destroy(&q->clhash);
  }
  
  static const struct Qdisc_class_ops qfq_class_ops = {
  	.change		= qfq_change_class,
  	.delete		= qfq_delete_class,
  	.get		= qfq_get_class,
  	.put		= qfq_put_class,
  	.tcf_chain	= qfq_tcf_chain,
  	.bind_tcf	= qfq_bind_tcf,
  	.unbind_tcf	= qfq_unbind_tcf,
  	.graft		= qfq_graft_class,
  	.leaf		= qfq_class_leaf,
  	.qlen_notify	= qfq_qlen_notify,
  	.dump		= qfq_dump_class,
  	.dump_stats	= qfq_dump_class_stats,
  	.walk		= qfq_walk,
  };
  
  static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
  	.cl_ops		= &qfq_class_ops,
  	.id		= "qfq",
  	.priv_size	= sizeof(struct qfq_sched),
  	.enqueue	= qfq_enqueue,
  	.dequeue	= qfq_dequeue,
  	.peek		= qdisc_peek_dequeued,
  	.drop		= qfq_drop,
  	.init		= qfq_init_qdisc,
  	.reset		= qfq_reset_qdisc,
  	.destroy	= qfq_destroy_qdisc,
  	.owner		= THIS_MODULE,
  };
  
  static int __init qfq_init(void)
  {
  	return register_qdisc(&qfq_qdisc_ops);
  }
  
  static void __exit qfq_exit(void)
  {
  	unregister_qdisc(&qfq_qdisc_ops);
  }
  
  module_init(qfq_init);
  module_exit(qfq_exit);
  MODULE_LICENSE("GPL");