calibrate: retry with wider bounds when converge seems to fail

Systems with unmaskable interrupts such as SMIs may massively underestimate loops_per_jiffy, and fail to converge anywhere near the real value. A case seen on x86_64 was an initial estimate of 256<<12, which converged to 511<<12 where the real value should have been over 630<<12. This admitedly requires bypassing the TSC calibration (lpj_fine), and a failure to settle in the direct calibration too, but is physically possible. This failure does not depend on my previous calibration optimisation, but by luck is easy to fix with the optimisation in place with a trivial retry loop. In the context of the optimised converging method, as we can no longer trust the starting estimate, enlarge the search bounds exponentially so that the number of retries is logarithmically bounded. [akpm@linux-foundation.org: mention x86_64 SMIs in comment] Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Tested-by: Stephen Boyd <sboyd@codeaurora.org> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

calibrate: retry with wider bounds when converge seems to fail
Systems with unmaskable interrupts such as SMIs may massively underestimate loops_per_jiffy, and fail to converge anywhere near the real value. A case seen on x86_64 was an initial estimate of 256<<12, which converged to 511<<12 where the real value should have been over 630<<12. This admitedly requires bypassing the TSC calibration (lpj_fine), and a failure to settle in the direct calibration too, but is physically possible. This failure does not depend on my previous calibration optimisation, but by luck is easy to fix with the optimisation in place with a trivial retry loop. In the context of the optimised converging method, as we can no longer trust the starting estimate, enlarge the search bounds exponentially so that the number of retries is logarithmically bounded. [akpm@linux-foundation.org: mention x86_64 SMIs in comment] Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Tested-by: Stephen Boyd <sboyd@codeaurora.org> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Phil Carmody · Linus Torvalds
1 parent 191e56880a
Showing 1 changed file with 18 additions and 4 deletions Side-by-side Diff
init/calibrate.c
@@ -122,7 +122,7 @@
 static unsigned long __cpuinit calibrate_delay_converge(void)
 {
 	/* First stage - slowly accelerate to find initial bounds */
-	unsigned long lpj, ticks, loopadd, chop_limit;
+	unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
 	int trials = 0, band = 0, trial_in_band = 0;
  
 	lpj = (1<<12);
  
  
@@ -146,14 +146,18 @@
 	 * the largest likely undershoot. This defines our chop bounds.
 	 */
 	trials -= band;
-	loopadd = lpj * band;
-	lpj *= trials;
-	chop_limit = lpj >> (LPS_PREC + 1);
+	loopadd_base = lpj * band;
+	lpj_base = lpj * trials;
  
+recalibrate:
+	lpj = lpj_base;
+	loopadd = loopadd_base;
+
 	/*
 	 * Do a binary approximation to get lpj set to
 	 * equal one clock (up to LPS_PREC bits)
 	 */
+	chop_limit = lpj >> LPS_PREC;
 	while (loopadd > chop_limit) {
 		lpj += loopadd;
 		ticks = jiffies;
@@ -164,6 +168,16 @@
 		if (jiffies != ticks)	/* longer than 1 tick */
 			lpj -= loopadd;
 		loopadd >>= 1;
+	}
+	/*
+	 * If we incremented every single time possible, presume we've
+	 * massively underestimated initially, and retry with a higher
+	 * start, and larger range. (Only seen on x86_64, due to SMIs)
+	 */
+	if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
+		lpj_base = lpj;
+		loopadd_base <<= 2;
+		goto recalibrate;
 	}
  
 	return lpj;
...	...	@@ -122,7 +122,7 @@
122	122	static unsigned long __cpuinit calibrate_delay_converge(void)
123	123	{
124	124	/* First stage - slowly accelerate to find initial bounds */
125		- unsigned long lpj, ticks, loopadd, chop_limit;
	125	+ unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
126	126	int trials = 0, band = 0, trial_in_band = 0;
127	127
128	128	lpj = (1<<12);
129	129
130	130
...	...	@@ -146,14 +146,18 @@
146	146	* the largest likely undershoot. This defines our chop bounds.
147	147	*/
148	148	trials -= band;
149		- loopadd = lpj * band;
150		- lpj *= trials;
151		- chop_limit = lpj >> (LPS_PREC + 1);
	149	+ loopadd_base = lpj * band;
	150	+ lpj_base = lpj * trials;
152	151
	152	+recalibrate:
	153	+ lpj = lpj_base;
	154	+ loopadd = loopadd_base;
	155	+
153	156	/*
154	157	* Do a binary approximation to get lpj set to
155	158	* equal one clock (up to LPS_PREC bits)
156	159	*/
	160	+ chop_limit = lpj >> LPS_PREC;
157	161	while (loopadd > chop_limit) {
158	162	lpj += loopadd;
159	163	ticks = jiffies;
...	...	@@ -164,6 +168,16 @@
164	168	if (jiffies != ticks) /* longer than 1 tick */
165	169	lpj -= loopadd;
166	170	loopadd >>= 1;
	171	+ }
	172	+ /*
	173	+ * If we incremented every single time possible, presume we've
	174	+ * massively underestimated initially, and retry with a higher
	175	+ * start, and larger range. (Only seen on x86_64, due to SMIs)
	176	+ */
	177	+ if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
	178	+ lpj_base = lpj;
	179	+ loopadd_base <<= 2;
	180	+ goto recalibrate;
167	181	}
168	182
169	183	return lpj;