Commit 641b9e0e8b7f96425da6ce98f3361e3af0baee29

Authored by Patrick McHardy
Committed by David S. Miller
1 parent ddc7b8e32b

[NET_SCHED]: Use ktime as clocksource

Get rid of the manual clock source selection mess and use ktime. Also
use a scalar representation, which allows to clean up pkt_sched.h a bit
more and results in less ktime_to_ns() calls in most cases.

The PSCHED_US2JIFFIE/PSCHED_JIFFIE2US macros are implemented quite
inefficient by this patch, following patches will convert all qdiscs
to hrtimers and get rid of them entirely.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 5 changed files with 19 additions and 315 deletions Side-by-side Diff

include/net/pkt_sched.h
... ... @@ -2,6 +2,7 @@
2 2 #define __NET_PKT_SCHED_H
3 3  
4 4 #include <linux/jiffies.h>
  5 +#include <linux/ktime.h>
5 6 #include <net/sch_generic.h>
6 7  
7 8 struct qdisc_walker
8 9  
9 10  
10 11  
11 12  
12 13  
13 14  
14 15  
15 16  
... ... @@ -37,175 +38,31 @@
37 38 The things are not so bad, because we may use artifical
38 39 clock evaluated by integration of network data flow
39 40 in the most critical places.
40   -
41   - Note: we do not use fastgettimeofday.
42   - The reason is that, when it is not the same thing as
43   - gettimeofday, it returns invalid timestamp, which is
44   - not updated, when net_bh is active.
45 41 */
46 42  
47   -/* General note about internal clock.
48   -
49   - Any clock source returns time intervals, measured in units
50   - close to 1usec. With source CONFIG_NET_SCH_CLK_GETTIMEOFDAY it is precisely
51   - microseconds, otherwise something close but different chosen to minimize
52   - arithmetic cost. Ratio usec/internal untis in form nominator/denominator
53   - may be read from /proc/net/psched.
54   - */
55   -
56   -
57   -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
58   -
59   -typedef struct timeval psched_time_t;
60   -typedef long psched_tdiff_t;
61   -
62   -#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
63   -#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs)
64   -#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay)
65   -
66   -#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
67   -
68 43 typedef u64 psched_time_t;
69 44 typedef long psched_tdiff_t;
70 45  
71   -#ifdef CONFIG_NET_SCH_CLK_JIFFIES
  46 +/* Avoid doing 64 bit divide by 1000 */
  47 +#define PSCHED_US2NS(x) ((s64)(x) << 10)
  48 +#define PSCHED_NS2US(x) ((x) >> 10)
72 49  
73   -#if HZ < 96
74   -#define PSCHED_JSCALE 14
75   -#elif HZ >= 96 && HZ < 192
76   -#define PSCHED_JSCALE 13
77   -#elif HZ >= 192 && HZ < 384
78   -#define PSCHED_JSCALE 12
79   -#elif HZ >= 384 && HZ < 768
80   -#define PSCHED_JSCALE 11
81   -#elif HZ >= 768
82   -#define PSCHED_JSCALE 10
83   -#endif
  50 +#define PSCHED_TICKS_PER_SEC PSCHED_NS2US(NSEC_PER_SEC)
  51 +#define PSCHED_GET_TIME(stamp) \
  52 + ((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
84 53  
85   -#define PSCHED_GET_TIME(stamp) ((stamp) = (get_jiffies_64()<<PSCHED_JSCALE))
86   -#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
87   -#define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE)
  54 +#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(PSCHED_US2NS((usecs)) / NSEC_PER_USEC)
  55 +#define PSCHED_JIFFIE2US(delay) PSCHED_NS2US(jiffies_to_usecs((delay)) * NSEC_PER_USEC)
88 56  
89   -#endif /* CONFIG_NET_SCH_CLK_JIFFIES */
90   -#ifdef CONFIG_NET_SCH_CLK_CPU
91   -#include <asm/timex.h>
92   -
93   -extern psched_tdiff_t psched_clock_per_hz;
94   -extern int psched_clock_scale;
95   -extern psched_time_t psched_time_base;
96   -extern cycles_t psched_time_mark;
97   -
98   -#define PSCHED_GET_TIME(stamp) \
99   -do { \
100   - cycles_t cur = get_cycles(); \
101   - if (sizeof(cycles_t) == sizeof(u32)) { \
102   - if (cur <= psched_time_mark) \
103   - psched_time_base += 0x100000000ULL; \
104   - psched_time_mark = cur; \
105   - (stamp) = (psched_time_base + cur)>>psched_clock_scale; \
106   - } else { \
107   - (stamp) = cur>>psched_clock_scale; \
108   - } \
109   -} while (0)
110   -#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
111   -#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
112   -
113   -#endif /* CONFIG_NET_SCH_CLK_CPU */
114   -
115   -#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
116   -
117   -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
118   -#define PSCHED_TDIFF(tv1, tv2) \
119   -({ \
120   - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
121   - int __delta = (tv1).tv_usec - (tv2).tv_usec; \
122   - if (__delta_sec) { \
123   - switch (__delta_sec) { \
124   - default: \
125   - __delta = 0; \
126   - case 2: \
127   - __delta += USEC_PER_SEC; \
128   - case 1: \
129   - __delta += USEC_PER_SEC; \
130   - } \
131   - } \
132   - __delta; \
133   -})
134   -
135   -static inline int
136   -psched_tod_diff(int delta_sec, int bound)
137   -{
138   - int delta;
139   -
140   - if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1)
141   - return bound;
142   - delta = delta_sec * USEC_PER_SEC;
143   - if (delta > bound || delta < 0)
144   - delta = bound;
145   - return delta;
146   -}
147   -
  57 +#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
148 58 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
149   -({ \
150   - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
151   - int __delta = (tv1).tv_usec - (tv2).tv_usec; \
152   - switch (__delta_sec) { \
153   - default: \
154   - __delta = psched_tod_diff(__delta_sec, bound); break; \
155   - case 2: \
156   - __delta += USEC_PER_SEC; \
157   - case 1: \
158   - __delta += USEC_PER_SEC; \
159   - case 0: \
160   - if (__delta > bound || __delta < 0) \
161   - __delta = bound; \
162   - } \
163   - __delta; \
164   -})
165   -
166   -#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
167   - (tv1).tv_sec <= (tv2).tv_sec) || \
168   - (tv1).tv_sec < (tv2).tv_sec)
169   -
170   -#define PSCHED_TADD2(tv, delta, tv_res) \
171   -({ \
172   - int __delta = (tv).tv_usec + (delta); \
173   - (tv_res).tv_sec = (tv).tv_sec; \
174   - while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \
175   - (tv_res).tv_usec = __delta; \
176   -})
177   -
178   -#define PSCHED_TADD(tv, delta) \
179   -({ \
180   - (tv).tv_usec += (delta); \
181   - while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \
182   - (tv).tv_usec -= USEC_PER_SEC; } \
183   -})
184   -
185   -/* Set/check that time is in the "past perfect";
186   - it depends on concrete representation of system time
187   - */
188   -
189   -#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0)
190   -#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0)
191   -
192   -#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
193   -
194   -#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
195   -
196   -#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
197   -#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
198   - min_t(long long, (tv1) - (tv2), bound)
199   -
200   -
201   -#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
  59 + min_t(long long, (tv1) - (tv2), bound)
  60 +#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
202 61 #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
203   -#define PSCHED_TADD(tv, delta) ((tv) += (delta))
  62 +#define PSCHED_TADD(tv, delta) ((tv) += (delta))
204 63 #define PSCHED_SET_PASTPERFECT(t) ((t) = 0)
205 64 #define PSCHED_IS_PASTPERFECT(t) ((t) == 0)
206 65 #define PSCHED_AUDIT_TDIFF(t)
207   -
208   -#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
209 66  
210 67 extern struct Qdisc_ops pfifo_qdisc_ops;
211 68 extern struct Qdisc_ops bfifo_qdisc_ops;
... ... @@ -59,6 +59,7 @@
59 59  
60 60 return timespec_to_ktime(now);
61 61 }
  62 +EXPORT_SYMBOL_GPL(ktime_get);
62 63  
63 64 /**
64 65 * ktime_get_real - get the real (wall-) time in ktime_t format
... ... @@ -46,62 +46,6 @@
46 46  
47 47 if NET_SCHED
48 48  
49   -choice
50   - prompt "Packet scheduler clock source"
51   - default NET_SCH_CLK_GETTIMEOFDAY
52   - ---help---
53   - Packet schedulers need a monotonic clock that increments at a static
54   - rate. The kernel provides several suitable interfaces, each with
55   - different properties:
56   -
57   - - high resolution (us or better)
58   - - fast to read (minimal locking, no i/o access)
59   - - synchronized on all processors
60   - - handles cpu clock frequency changes
61   -
62   - but nothing provides all of the above.
63   -
64   -config NET_SCH_CLK_JIFFIES
65   - bool "Timer interrupt"
66   - ---help---
67   - Say Y here if you want to use the timer interrupt (jiffies) as clock
68   - source. This clock source is fast, synchronized on all processors and
69   - handles cpu clock frequency changes, but its resolution is too low
70   - for accurate shaping except at very low speed.
71   -
72   -config NET_SCH_CLK_GETTIMEOFDAY
73   - bool "gettimeofday"
74   - ---help---
75   - Say Y here if you want to use gettimeofday as clock source. This clock
76   - source has high resolution, is synchronized on all processors and
77   - handles cpu clock frequency changes, but it is slow.
78   -
79   - Choose this if you need a high resolution clock source but can't use
80   - the CPU's cycle counter.
81   -
82   -# don't allow on SMP x86 because they can have unsynchronized TSCs.
83   -# gettimeofday is a good alternative
84   -config NET_SCH_CLK_CPU
85   - bool "CPU cycle counter"
86   - depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
87   - ---help---
88   - Say Y here if you want to use the CPU's cycle counter as clock source.
89   - This is a cheap and high resolution clock source, but on some
90   - architectures it is not synchronized on all processors and doesn't
91   - handle cpu clock frequency changes.
92   -
93   - The useable cycle counters are:
94   -
95   - x86/x86_64 - Timestamp Counter
96   - alpha - Cycle Counter
97   - sparc64 - %ticks register
98   - ppc64 - Time base
99   - ia64 - Interval Time Counter
100   -
101   - Choose this if your CPU's cycle counter is working properly.
102   -
103   -endchoice
104   -
105 49 comment "Queueing/Scheduling"
106 50  
107 51 config NET_SCH_CBQ
... ... @@ -1175,15 +1175,12 @@
1175 1175 return -1;
1176 1176 }
1177 1177  
1178   -static int psched_us_per_tick = 1;
1179   -static int psched_tick_per_us = 1;
1180   -
1181 1178 #ifdef CONFIG_PROC_FS
1182 1179 static int psched_show(struct seq_file *seq, void *v)
1183 1180 {
1184 1181 seq_printf(seq, "%08x %08x %08x %08x\n",
1185   - psched_tick_per_us, psched_us_per_tick,
1186   - 1000000, HZ);
  1182 + (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
  1183 + 1000000, HZ);
1187 1184  
1188 1185 return 0;
1189 1186 }
1190 1187  
... ... @@ -1202,79 +1199,9 @@
1202 1199 };
1203 1200 #endif
1204 1201  
1205   -#ifdef CONFIG_NET_SCH_CLK_CPU
1206   -psched_tdiff_t psched_clock_per_hz;
1207   -int psched_clock_scale;
1208   -EXPORT_SYMBOL(psched_clock_per_hz);
1209   -EXPORT_SYMBOL(psched_clock_scale);
1210   -
1211   -psched_time_t psched_time_base;
1212   -cycles_t psched_time_mark;
1213   -EXPORT_SYMBOL(psched_time_mark);
1214   -EXPORT_SYMBOL(psched_time_base);
1215   -
1216   -/*
1217   - * Periodically adjust psched_time_base to avoid overflow
1218   - * with 32-bit get_cycles(). Safe up to 4GHz CPU.
1219   - */
1220   -static void psched_tick(unsigned long);
1221   -static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
1222   -
1223   -static void psched_tick(unsigned long dummy)
1224   -{
1225   - if (sizeof(cycles_t) == sizeof(u32)) {
1226   - psched_time_t dummy_stamp;
1227   - PSCHED_GET_TIME(dummy_stamp);
1228   - psched_timer.expires = jiffies + 1*HZ;
1229   - add_timer(&psched_timer);
1230   - }
1231   -}
1232   -
1233   -int __init psched_calibrate_clock(void)
1234   -{
1235   - psched_time_t stamp, stamp1;
1236   - struct timeval tv, tv1;
1237   - psched_tdiff_t delay;
1238   - long rdelay;
1239   - unsigned long stop;
1240   -
1241   - psched_tick(0);
1242   - stop = jiffies + HZ/10;
1243   - PSCHED_GET_TIME(stamp);
1244   - do_gettimeofday(&tv);
1245   - while (time_before(jiffies, stop)) {
1246   - barrier();
1247   - cpu_relax();
1248   - }
1249   - PSCHED_GET_TIME(stamp1);
1250   - do_gettimeofday(&tv1);
1251   -
1252   - delay = PSCHED_TDIFF(stamp1, stamp);
1253   - rdelay = tv1.tv_usec - tv.tv_usec;
1254   - rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1255   - if (rdelay > delay)
1256   - return -1;
1257   - delay /= rdelay;
1258   - psched_tick_per_us = delay;
1259   - while ((delay>>=1) != 0)
1260   - psched_clock_scale++;
1261   - psched_us_per_tick = 1<<psched_clock_scale;
1262   - psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1263   - return 0;
1264   -}
1265   -#endif
1266   -
1267 1202 static int __init pktsched_init(void)
1268 1203 {
1269 1204 struct rtnetlink_link *link_p;
1270   -
1271   -#ifdef CONFIG_NET_SCH_CLK_CPU
1272   - if (psched_calibrate_clock() < 0)
1273   - return -1;
1274   -#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
1275   - psched_tick_per_us = HZ<<PSCHED_JSCALE;
1276   - psched_us_per_tick = 1000000;
1277   -#endif
1278 1205  
1279 1206 link_p = rtnetlink_links[PF_UNSPEC];
1280 1207  
net/sched/sch_hfsc.c
... ... @@ -195,20 +195,6 @@
195 195 struct timer_list wd_timer; /* watchdog timer */
196 196 };
197 197  
198   -/*
199   - * macros
200   - */
201   -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
202   -#include <linux/time.h>
203   -#undef PSCHED_GET_TIME
204   -#define PSCHED_GET_TIME(stamp) \
205   -do { \
206   - struct timeval tv; \
207   - do_gettimeofday(&tv); \
208   - (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \
209   -} while (0)
210   -#endif
211   -
212 198 #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
213 199  
214 200  
215 201  
216 202  
217 203  
... ... @@ -394,28 +380,17 @@
394 380 * ism: (psched_us/byte) << ISM_SHIFT
395 381 * dx: psched_us
396 382 *
397   - * Clock source resolution (CONFIG_NET_SCH_CLK_*)
398   - * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
399   - * CPU: resolution is between 0.5us and 1us.
400   - * GETTIMEOFDAY: resolution is exactly 1us.
  383 + * The clock source resolution with ktime is 1.024us.
401 384 *
402 385 * sm and ism are scaled in order to keep effective digits.
403 386 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
404 387 * digits in decimal using the following table.
405 388 *
406   - * Note: We can afford the additional accuracy (altq hfsc keeps at most
407   - * 3 effective digits) thanks to the fact that linux clock is bounded
408   - * much more tightly.
409   - *
410 389 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
411 390 * ------------+-------------------------------------------------------
412   - * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3
413   - * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3
414   - * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3
  391 + * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3
415 392 *
416   - * 0.5us/byte 160 16 1.6 0.16 0.016
417   - * us/byte 80 8 0.8 0.08 0.008
418   - * 1.27us/byte 63 6.3 0.63 0.063 0.0063
  393 + * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125
419 394 */
420 395 #define SM_SHIFT 20
421 396 #define ISM_SHIFT 18