Commit ac4c2a3bbe5db5fc570b1d0ee1e474db7cb22585

Authored by Joakim Tjernlund
Committed by Linus Torvalds
1 parent 129182e562

zlib: optimize inffast when copying direct from output

JFFS2 uses lesser compression ratio and inflate always ends up in "copy
direct from output" case.

This patch tries to optimize the direct copy procedure.  Uses
get_unaligned() but only in one place.

The copy loop just above this one can also use this optimization, but I
havn't done so as I have not tested if it is a win there too.

On my MPC8321 this is about 17% faster on my JFFS2 root FS than the
original.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Cc: Roel Kluin <roel.kluin@gmail.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 47 additions and 12 deletions Side-by-side Diff

arch/powerpc/boot/Makefile
... ... @@ -20,7 +20,7 @@
20 20 all: $(obj)/zImage
21 21  
22 22 BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
23   - -fno-strict-aliasing -Os -msoft-float -pipe \
  23 + -fno-strict-aliasing -Os -msoft-float -pipe -D__KERNEL__\
24 24 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
25 25 -isystem $(shell $(CROSS32CC) -print-file-name=include)
26 26 BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
... ... @@ -34,6 +34,8 @@
34 34 endif
35 35  
36 36 BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
  37 +BOOTCFLAGS += -include include/linux/autoconf.h -Iarch/powerpc/include
  38 +BOOTCFLAGS += -Iinclude
37 39  
38 40 DTS_FLAGS ?= -p 1024
39 41  
lib/zlib_inflate/inffast.c
... ... @@ -4,6 +4,8 @@
4 4 */
5 5  
6 6 #include <linux/zutil.h>
  7 +#include <asm/unaligned.h>
  8 +#include <asm/byteorder.h>
7 9 #include "inftrees.h"
8 10 #include "inflate.h"
9 11 #include "inffast.h"
10 12  
... ... @@ -24,9 +26,11 @@
24 26 #ifdef POSTINC
25 27 # define OFF 0
26 28 # define PUP(a) *(a)++
  29 +# define UP_UNALIGNED(a) get_unaligned((a)++)
27 30 #else
28 31 # define OFF 1
29 32 # define PUP(a) *++(a)
  33 +# define UP_UNALIGNED(a) get_unaligned(++(a))
30 34 #endif
31 35  
32 36 /*
33 37  
... ... @@ -239,18 +243,47 @@
239 243 }
240 244 }
241 245 else {
  246 + unsigned short *sout;
  247 + unsigned long loops;
  248 +
242 249 from = out - dist; /* copy direct from output */
243   - do { /* minimum length is three */
244   - PUP(out) = PUP(from);
245   - PUP(out) = PUP(from);
246   - PUP(out) = PUP(from);
247   - len -= 3;
248   - } while (len > 2);
249   - if (len) {
250   - PUP(out) = PUP(from);
251   - if (len > 1)
252   - PUP(out) = PUP(from);
253   - }
  250 + /* minimum length is three */
  251 + /* Align out addr */
  252 + if (!((long)(out - 1 + OFF) & 1)) {
  253 + PUP(out) = PUP(from);
  254 + len--;
  255 + }
  256 + sout = (unsigned short *)(out - OFF);
  257 + if (dist > 2) {
  258 + unsigned short *sfrom;
  259 +
  260 + sfrom = (unsigned short *)(from - OFF);
  261 + loops = len >> 1;
  262 + do
  263 + PUP(sout) = UP_UNALIGNED(sfrom);
  264 + while (--loops);
  265 + out = (unsigned char *)sout + OFF;
  266 + from = (unsigned char *)sfrom + OFF;
  267 + } else { /* dist == 1 or dist == 2 */
  268 + unsigned short pat16;
  269 +
  270 + pat16 = *(sout-2+2*OFF);
  271 + if (dist == 1)
  272 +#if defined(__BIG_ENDIAN)
  273 + pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8);
  274 +#elif defined(__LITTLE_ENDIAN)
  275 + pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8);
  276 +#else
  277 +#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
  278 +#endif
  279 + loops = len >> 1;
  280 + do
  281 + PUP(sout) = pat16;
  282 + while (--loops);
  283 + out = (unsigned char *)sout + OFF;
  284 + }
  285 + if (len & 1)
  286 + PUP(out) = PUP(from);
254 287 }
255 288 }
256 289 else if ((op & 64) == 0) { /* 2nd level distance code */