Commit 2359172a75986359ce9cf041a9aca6a32cdf8779
Committed by
Linus Torvalds
1 parent
e49317d415
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
lib: vsprintf: optimize division by 10000
The same multiply-by-inverse technique can be used to convert division by 10000 to a 32x32->64-bit multiply. Signed-off-by: George Spelvin <linux@horizon.com> Cc: Denys Vlasenko <vda.linux@googlemail.com> Cc: Michal Nazarewicz <mina86@mina86.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 33 additions and 27 deletions Side-by-side Diff
lib/vsprintf.c
... | ... | @@ -245,19 +245,34 @@ |
245 | 245 | |
246 | 246 | /* See comment in put_dec_full9 for choice of constants */ |
247 | 247 | static noinline_for_stack |
248 | -char *put_dec_full4(char *buf, unsigned q) | |
248 | +void put_dec_full4(char *buf, unsigned q) | |
249 | 249 | { |
250 | 250 | unsigned r; |
251 | 251 | r = (q * 0xccd) >> 15; |
252 | - *buf++ = (q - 10 * r) + '0'; | |
252 | + buf[0] = (q - 10 * r) + '0'; | |
253 | 253 | q = (r * 0xcd) >> 11; |
254 | - *buf++ = (r - 10 * q) + '0'; | |
254 | + buf[1] = (r - 10 * q) + '0'; | |
255 | 255 | r = (q * 0xcd) >> 11; |
256 | - *buf++ = (q - 10 * r) + '0'; | |
257 | - *buf++ = r + '0'; | |
258 | - return buf; | |
256 | + buf[2] = (q - 10 * r) + '0'; | |
257 | + buf[3] = r + '0'; | |
259 | 258 | } |
260 | 259 | |
260 | +/* | |
261 | + * Call put_dec_full4 on x % 10000, return x / 10000. | |
262 | + * The approximation x/10000 == (x * 0x346DC5D7) >> 43 | |
263 | + * holds for all x < 1,128,869,999. The largest value this | |
264 | + * helper will ever be asked to convert is 1,125,520,955. | |
265 | + * (d1 in the put_dec code, assuming n is all-ones). | |
266 | + */ | |
267 | +static | |
268 | +unsigned put_dec_helper4(char *buf, unsigned x) | |
269 | +{ | |
270 | + uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; | |
271 | + | |
272 | + put_dec_full4(buf, x - q * 10000); | |
273 | + return q; | |
274 | +} | |
275 | + | |
261 | 276 | /* Based on code by Douglas W. Jones found at |
262 | 277 | * <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour> |
263 | 278 | * (with permission from the author). |
264 | 279 | |
265 | 280 | |
266 | 281 | |
... | ... | @@ -277,28 +292,19 @@ |
277 | 292 | d3 = (h >> 16); /* implicit "& 0xffff" */ |
278 | 293 | |
279 | 294 | q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); |
295 | + q = put_dec_helper4(buf, q); | |
280 | 296 | |
281 | - buf = put_dec_full4(buf, q % 10000); | |
282 | - q = q / 10000; | |
297 | + q += 7671 * d3 + 9496 * d2 + 6 * d1; | |
298 | + q = put_dec_helper4(buf+4, q); | |
283 | 299 | |
284 | - d1 = q + 7671 * d3 + 9496 * d2 + 6 * d1; | |
285 | - buf = put_dec_full4(buf, d1 % 10000); | |
286 | - q = d1 / 10000; | |
300 | + q += 4749 * d3 + 42 * d2; | |
301 | + q = put_dec_helper4(buf+8, q); | |
287 | 302 | |
288 | - d2 = q + 4749 * d3 + 42 * d2; | |
289 | - buf = put_dec_full4(buf, d2 % 10000); | |
290 | - q = d2 / 10000; | |
291 | - | |
292 | - d3 = q + 281 * d3; | |
293 | - if (!d3) | |
294 | - goto done; | |
295 | - buf = put_dec_full4(buf, d3 % 10000); | |
296 | - q = d3 / 10000; | |
297 | - if (!q) | |
298 | - goto done; | |
299 | - buf = put_dec_full4(buf, q); | |
300 | - done: | |
301 | - while (buf[-1] == '0') | |
303 | + q += 281 * d3; | |
304 | + buf += 12; | |
305 | + if (q) | |
306 | + buf = put_dec_trunc8(buf, q); | |
307 | + else while (buf[-1] == '0') | |
302 | 308 | --buf; |
303 | 309 | |
304 | 310 | return buf; |