Commit e69eae65523b457a3ac4262a66cfff57f2c924a9
Committed by
Linus Torvalds
1 parent
5ceaa2f39b
Exists in
master
and in
4 other branches
zlib: make new optimized inflate endian independent
Commit 6846ee5ca68d81e6baccf0d56221d7a00c1be18b ("zlib: Fix build of powerpc boot wrapper") made the new optimized inflate only available on arch's that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. This patch will again enable the optimization for all arch's by defining our own endian independent version of unaligned access. As an added bonus, arch's that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS do a plain load instead. Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se> Cc: Anton Blanchard <anton@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Kumar Gala <galak@kernel.crashing.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 30 additions and 40 deletions Side-by-side Diff
lib/zlib_inflate/inffast.c
... | ... | @@ -8,21 +8,6 @@ |
8 | 8 | #include "inflate.h" |
9 | 9 | #include "inffast.h" |
10 | 10 | |
11 | -/* Only do the unaligned "Faster" variant when | |
12 | - * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set | |
13 | - * | |
14 | - * On powerpc, it won't be as we don't include autoconf.h | |
15 | - * automatically for the boot wrapper, which is intended as | |
16 | - * we run in an environment where we may not be able to deal | |
17 | - * with (even rare) alignment faults. In addition, we do not | |
18 | - * define __KERNEL__ for arch/powerpc/boot unlike x86 | |
19 | - */ | |
20 | - | |
21 | -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
22 | -#include <asm/unaligned.h> | |
23 | -#include <asm/byteorder.h> | |
24 | -#endif | |
25 | - | |
26 | 11 | #ifndef ASMINF |
27 | 12 | |
28 | 13 | /* Allow machine dependent optimization for post-increment or pre-increment. |
29 | 14 | |
30 | 15 | |
... | ... | @@ -36,14 +21,31 @@ |
36 | 21 | - Pentium III (Anderson) |
37 | 22 | - M68060 (Nikl) |
38 | 23 | */ |
24 | +union uu { | |
25 | + unsigned short us; | |
26 | + unsigned char b[2]; | |
27 | +}; | |
28 | + | |
29 | +/* Endian independed version */ | |
30 | +static inline unsigned short | |
31 | +get_unaligned16(const unsigned short *p) | |
32 | +{ | |
33 | + union uu mm; | |
34 | + unsigned char *b = (unsigned char *)p; | |
35 | + | |
36 | + mm.b[0] = b[0]; | |
37 | + mm.b[1] = b[1]; | |
38 | + return mm.us; | |
39 | +} | |
40 | + | |
39 | 41 | #ifdef POSTINC |
40 | 42 | # define OFF 0 |
41 | 43 | # define PUP(a) *(a)++ |
42 | -# define UP_UNALIGNED(a) get_unaligned((a)++) | |
44 | +# define UP_UNALIGNED(a) get_unaligned16((a)++) | |
43 | 45 | #else |
44 | 46 | # define OFF 1 |
45 | 47 | # define PUP(a) *++(a) |
46 | -# define UP_UNALIGNED(a) get_unaligned(++(a)) | |
48 | +# define UP_UNALIGNED(a) get_unaligned16(++(a)) | |
47 | 49 | #endif |
48 | 50 | |
49 | 51 | /* |
... | ... | @@ -256,7 +258,6 @@ |
256 | 258 | } |
257 | 259 | } |
258 | 260 | else { |
259 | -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
260 | 261 | unsigned short *sout; |
261 | 262 | unsigned long loops; |
262 | 263 | |
263 | 264 | |
... | ... | @@ -274,7 +275,11 @@ |
274 | 275 | sfrom = (unsigned short *)(from - OFF); |
275 | 276 | loops = len >> 1; |
276 | 277 | do |
278 | +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
279 | + PUP(sout) = PUP(sfrom); | |
280 | +#else | |
277 | 281 | PUP(sout) = UP_UNALIGNED(sfrom); |
282 | +#endif | |
278 | 283 | while (--loops); |
279 | 284 | out = (unsigned char *)sout + OFF; |
280 | 285 | from = (unsigned char *)sfrom + OFF; |
... | ... | @@ -282,14 +287,13 @@ |
282 | 287 | unsigned short pat16; |
283 | 288 | |
284 | 289 | pat16 = *(sout-2+2*OFF); |
285 | - if (dist == 1) | |
286 | -#if defined(__BIG_ENDIAN) | |
287 | - pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8); | |
288 | -#elif defined(__LITTLE_ENDIAN) | |
289 | - pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8); | |
290 | -#else | |
291 | -#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined | |
292 | -#endif | |
290 | + if (dist == 1) { | |
291 | + union uu mm; | |
292 | + /* copy one char pattern to both bytes */ | |
293 | + mm.us = pat16; | |
294 | + mm.b[0] = mm.b[1]; | |
295 | + pat16 = mm.us; | |
296 | + } | |
293 | 297 | loops = len >> 1; |
294 | 298 | do |
295 | 299 | PUP(sout) = pat16; |
... | ... | @@ -298,20 +302,6 @@ |
298 | 302 | } |
299 | 303 | if (len & 1) |
300 | 304 | PUP(out) = PUP(from); |
301 | -#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ | |
302 | - from = out - dist; /* copy direct from output */ | |
303 | - do { /* minimum length is three */ | |
304 | - PUP(out) = PUP(from); | |
305 | - PUP(out) = PUP(from); | |
306 | - PUP(out) = PUP(from); | |
307 | - len -= 3; | |
308 | - } while (len > 2); | |
309 | - if (len) { | |
310 | - PUP(out) = PUP(from); | |
311 | - if (len > 1) | |
312 | - PUP(out) = PUP(from); | |
313 | - } | |
314 | -#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ | |
315 | 305 | } |
316 | 306 | } |
317 | 307 | else if ((op & 64) == 0) { /* 2nd level distance code */ |