Blame view

lib/proportions.c 9.3 KB
145ca25eb   Peter Zijlstra   lib: floating pro...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  /*
   * Floating proportions
   *
   *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
   *
   * Description:
   *
   * The floating proportion is a time derivative with an exponentially decaying
   * history:
   *
   *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
   *
   * Where j is an element from {prop_local}, x_{j} is j's number of events,
   * and i the time period over which the differential is taken. So d/dt_{-i} is
   * the differential over the i-th last period.
   *
   * The decaying history gives smooth transitions. The time differential carries
   * the notion of speed.
   *
   * The denominator is 2^(1+i) because we want the series to be normalised, ie.
   *
   *   \Sum_{i=0} 1/2^(1+i) = 1
   *
   * Further more, if we measure time (t) in the same events as x; so that:
   *
   *   t = \Sum_{j} x_{j}
   *
   * we get that:
   *
   *   \Sum_{j} p_{j} = 1
   *
   * Writing this in an iterative fashion we get (dropping the 'd's):
   *
   *   if (++x_{j}, ++t > period)
   *     t /= 2;
   *     for_each (j)
   *       x_{j} /= 2;
   *
   * so that:
   *
   *   p_{j} = x_{j} / t;
   *
   * We optimize away the '/= 2' for the global time delta by noting that:
   *
   *   if (++t > period) t /= 2:
   *
   * Can be approximated by:
   *
   *   period/2 + (++t % period/2)
   *
   * [ Furthermore, when we choose period to be 2^n it can be written in terms of
   *   binary operations and wraparound artefacts disappear. ]
   *
   * Also note that this yields a natural counter of the elapsed periods:
   *
   *   c = t / (period/2)
   *
   * [ Its monotonic increasing property can be applied to mitigate the wrap-
   *   around issue. ]
   *
   * This allows us to do away with the loop over all prop_locals on each period
   * expiration. By remembering the period count under which it was last accessed
   * as c_{j}, we can obtain the number of 'missed' cycles from:
   *
   *   c - c_{j}
   *
   * We can then lazily catch up to the global period count every time we are
   * going to use x_{j}, by doing:
   *
   *   x_{j} /= 2^(c - c_{j}), c_{j} = c
   */
  
  #include <linux/proportions.h>
  #include <linux/rcupdate.h>
145ca25eb   Peter Zijlstra   lib: floating pro...
75
76
77
78
79
80
81
82
83
84
  int prop_descriptor_init(struct prop_descriptor *pd, int shift)
  {
  	int err;
  
  	if (shift > PROP_MAX_SHIFT)
  		shift = PROP_MAX_SHIFT;
  
  	pd->index = 0;
  	pd->pg[0].shift = shift;
  	mutex_init(&pd->mutex);
ea319518b   Peter Zijlstra   locking, percpu c...
85
  	err = percpu_counter_init(&pd->pg[0].events, 0);
145ca25eb   Peter Zijlstra   lib: floating pro...
86
87
  	if (err)
  		goto out;
ea319518b   Peter Zijlstra   locking, percpu c...
88
  	err = percpu_counter_init(&pd->pg[1].events, 0);
145ca25eb   Peter Zijlstra   lib: floating pro...
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  	if (err)
  		percpu_counter_destroy(&pd->pg[0].events);
  
  out:
  	return err;
  }
  
  /*
   * We have two copies, and flip between them to make it seem like an atomic
   * update. The update is not really atomic wrt the events counter, but
   * it is internally consistent with the bit layout depending on shift.
   *
   * We copy the events count, move the bits around and flip the index.
   */
  void prop_change_shift(struct prop_descriptor *pd, int shift)
  {
  	int index;
  	int offset;
  	u64 events;
  	unsigned long flags;
  
  	if (shift > PROP_MAX_SHIFT)
  		shift = PROP_MAX_SHIFT;
  
  	mutex_lock(&pd->mutex);
  
  	index = pd->index ^ 1;
  	offset = pd->pg[pd->index].shift - shift;
  	if (!offset)
  		goto out;
  
  	pd->pg[index].shift = shift;
  
  	local_irq_save(flags);
  	events = percpu_counter_sum(&pd->pg[pd->index].events);
  	if (offset < 0)
  		events <<= -offset;
  	else
  		events >>= offset;
  	percpu_counter_set(&pd->pg[index].events, events);
  
  	/*
  	 * ensure the new pg is fully written before the switch
  	 */
  	smp_wmb();
  	pd->index = index;
  	local_irq_restore(flags);
  
  	synchronize_rcu();
  
  out:
  	mutex_unlock(&pd->mutex);
  }
  
  /*
   * wrap the access to the data in an rcu_read_lock() section;
   * this is used to track the active references.
   */
  static struct prop_global *prop_get_global(struct prop_descriptor *pd)
300796774   Harvey Harrison   lib: proportions....
148
  __acquires(RCU)
145ca25eb   Peter Zijlstra   lib: floating pro...
149
150
151
152
153
154
155
156
157
158
159
160
161
  {
  	int index;
  
  	rcu_read_lock();
  	index = pd->index;
  	/*
  	 * match the wmb from vcd_flip()
  	 */
  	smp_rmb();
  	return &pd->pg[index];
  }
  
  static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
300796774   Harvey Harrison   lib: proportions....
162
  __releases(RCU)
145ca25eb   Peter Zijlstra   lib: floating pro...
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  {
  	rcu_read_unlock();
  }
  
  static void
  prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  {
  	int offset = *pl_shift - new_shift;
  
  	if (!offset)
  		return;
  
  	if (offset < 0)
  		*pl_period <<= -offset;
  	else
  		*pl_period >>= offset;
  
  	*pl_shift = new_shift;
  }
  
  /*
   * PERCPU
   */
f16b34aa1   Peter Zijlstra   lib: proportion: ...
186
  #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
145ca25eb   Peter Zijlstra   lib: floating pro...
187
188
189
190
191
  int prop_local_init_percpu(struct prop_local_percpu *pl)
  {
  	spin_lock_init(&pl->lock);
  	pl->shift = 0;
  	pl->period = 0;
ea319518b   Peter Zijlstra   locking, percpu c...
192
  	return percpu_counter_init(&pl->events, 0);
145ca25eb   Peter Zijlstra   lib: floating pro...
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
  }
  
  void prop_local_destroy_percpu(struct prop_local_percpu *pl)
  {
  	percpu_counter_destroy(&pl->events);
  }
  
  /*
   * Catch up with missed period expirations.
   *
   *   until (c_{j} == c)
   *     x_{j} -= x_{j}/2;
   *     c_{j}++;
   */
  static
  void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
  {
  	unsigned long period = 1UL << (pg->shift - 1);
  	unsigned long period_mask = ~(period - 1);
  	unsigned long global_period;
  	unsigned long flags;
  
  	global_period = percpu_counter_read(&pg->events);
  	global_period &= period_mask;
  
  	/*
  	 * Fast path - check if the local and global period count still match
  	 * outside of the lock.
  	 */
  	if (pl->period == global_period)
  		return;
  
  	spin_lock_irqsave(&pl->lock, flags);
  	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
f16b34aa1   Peter Zijlstra   lib: proportion: ...
227

145ca25eb   Peter Zijlstra   lib: floating pro...
228
229
230
231
  	/*
  	 * For each missed period, we half the local counter.
  	 * basically:
  	 *   pl->events >> (global_period - pl->period);
145ca25eb   Peter Zijlstra   lib: floating pro...
232
  	 */
f16b34aa1   Peter Zijlstra   lib: proportion: ...
233
234
235
236
237
238
239
240
241
242
243
  	period = (global_period - pl->period) >> (pg->shift - 1);
  	if (period < BITS_PER_LONG) {
  		s64 val = percpu_counter_read(&pl->events);
  
  		if (val < (nr_cpu_ids * PROP_BATCH))
  			val = percpu_counter_sum(&pl->events);
  
  		__percpu_counter_add(&pl->events, -val + (val >> period),
  					PROP_BATCH);
  	} else
  		percpu_counter_set(&pl->events, 0);
145ca25eb   Peter Zijlstra   lib: floating pro...
244
245
246
247
248
249
250
251
252
253
254
255
  	pl->period = global_period;
  	spin_unlock_irqrestore(&pl->lock, flags);
  }
  
  /*
   *   ++x_{j}, ++t
   */
  void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  {
  	struct prop_global *pg = prop_get_global(pd);
  
  	prop_norm_percpu(pg, pl);
f16b34aa1   Peter Zijlstra   lib: proportion: ...
256
  	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
145ca25eb   Peter Zijlstra   lib: floating pro...
257
258
259
260
261
  	percpu_counter_add(&pg->events, 1);
  	prop_put_global(pd, pg);
  }
  
  /*
a42dde041   Peter Zijlstra   mm: bdi: allow se...
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
   * identical to __prop_inc_percpu, except that it limits this pl's fraction to
   * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
   */
  void __prop_inc_percpu_max(struct prop_descriptor *pd,
  			   struct prop_local_percpu *pl, long frac)
  {
  	struct prop_global *pg = prop_get_global(pd);
  
  	prop_norm_percpu(pg, pl);
  
  	if (unlikely(frac != PROP_FRAC_BASE)) {
  		unsigned long period_2 = 1UL << (pg->shift - 1);
  		unsigned long counter_mask = period_2 - 1;
  		unsigned long global_count;
  		long numerator, denominator;
  
  		numerator = percpu_counter_read_positive(&pl->events);
  		global_count = percpu_counter_read(&pg->events);
  		denominator = period_2 + (global_count & counter_mask);
  
  		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
  			goto out_put;
  	}
  
  	percpu_counter_add(&pl->events, 1);
  	percpu_counter_add(&pg->events, 1);
  
  out_put:
  	prop_put_global(pd, pg);
  }
  
  /*
145ca25eb   Peter Zijlstra   lib: floating pro...
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
   * Obtain a fraction of this proportion
   *
   *   p_{j} = x_{j} / (period/2 + t % period/2)
   */
  void prop_fraction_percpu(struct prop_descriptor *pd,
  		struct prop_local_percpu *pl,
  		long *numerator, long *denominator)
  {
  	struct prop_global *pg = prop_get_global(pd);
  	unsigned long period_2 = 1UL << (pg->shift - 1);
  	unsigned long counter_mask = period_2 - 1;
  	unsigned long global_count;
  
  	prop_norm_percpu(pg, pl);
  	*numerator = percpu_counter_read_positive(&pl->events);
  
  	global_count = percpu_counter_read(&pg->events);
  	*denominator = period_2 + (global_count & counter_mask);
  
  	prop_put_global(pd, pg);
  }
  
  /*
   * SINGLE
   */
  
  int prop_local_init_single(struct prop_local_single *pl)
  {
  	spin_lock_init(&pl->lock);
  	pl->shift = 0;
  	pl->period = 0;
  	pl->events = 0;
  	return 0;
  }
  
  void prop_local_destroy_single(struct prop_local_single *pl)
  {
  }
  
  /*
   * Catch up with missed period expirations.
   */
  static
  void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
  {
  	unsigned long period = 1UL << (pg->shift - 1);
  	unsigned long period_mask = ~(period - 1);
  	unsigned long global_period;
  	unsigned long flags;
  
  	global_period = percpu_counter_read(&pg->events);
  	global_period &= period_mask;
  
  	/*
  	 * Fast path - check if the local and global period count still match
  	 * outside of the lock.
  	 */
  	if (pl->period == global_period)
  		return;
  
  	spin_lock_irqsave(&pl->lock, flags);
  	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  	/*
  	 * For each missed period, we half the local counter.
  	 */
  	period = (global_period - pl->period) >> (pg->shift - 1);
  	if (likely(period < BITS_PER_LONG))
  		pl->events >>= period;
  	else
  		pl->events = 0;
  	pl->period = global_period;
  	spin_unlock_irqrestore(&pl->lock, flags);
  }
  
  /*
   *   ++x_{j}, ++t
   */
  void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
  {
  	struct prop_global *pg = prop_get_global(pd);
  
  	prop_norm_single(pg, pl);
  	pl->events++;
  	percpu_counter_add(&pg->events, 1);
  	prop_put_global(pd, pg);
  }
  
  /*
   * Obtain a fraction of this proportion
   *
   *   p_{j} = x_{j} / (period/2 + t % period/2)
   */
  void prop_fraction_single(struct prop_descriptor *pd,
  	       	struct prop_local_single *pl,
  		long *numerator, long *denominator)
  {
  	struct prop_global *pg = prop_get_global(pd);
  	unsigned long period_2 = 1UL << (pg->shift - 1);
  	unsigned long counter_mask = period_2 - 1;
  	unsigned long global_count;
  
  	prop_norm_single(pg, pl);
  	*numerator = pl->events;
  
  	global_count = percpu_counter_read(&pg->events);
  	*denominator = period_2 + (global_count & counter_mask);
  
  	prop_put_global(pd, pg);
  }