Commit 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7

Authored by Markus F.X.J. Oberhumer
1 parent b6bec26cea

lib/lzo: Update LZO compression to current upstream version

This commit updates the kernel LZO code to the current upsteam version
which features a significant speed improvement - benchmarking the Calgary
and Silesia test corpora typically shows a doubled performance in
both compression and decompression on modern i386/x86_64/powerpc machines.

Signed-off-by: Markus F.X.J. Oberhumer <markus@oberhumer.com>

Showing 4 changed files with 395 additions and 343 deletions Side-by-side Diff

... ... @@ -4,28 +4,28 @@
4 4 * LZO Public Kernel Interface
5 5 * A mini subset of the LZO real-time data compression library
6 6 *
7   - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  7 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
8 8 *
9 9 * The full LZO package can be found at:
10 10 * http://www.oberhumer.com/opensource/lzo/
11 11 *
12   - * Changed for kernel use by:
  12 + * Changed for Linux kernel use by:
13 13 * Nitin Gupta <nitingupta910@gmail.com>
14 14 * Richard Purdie <rpurdie@openedhand.com>
15 15 */
16 16  
17   -#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *))
18   -#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS
  17 +#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short))
  18 +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS
19 19  
20 20 #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
21 21  
22   -/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */
  22 +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
23 23 int lzo1x_1_compress(const unsigned char *src, size_t src_len,
24   - unsigned char *dst, size_t *dst_len, void *wrkmem);
  24 + unsigned char *dst, size_t *dst_len, void *wrkmem);
25 25  
26 26 /* safe decompression with overrun testing */
27 27 int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
28   - unsigned char *dst, size_t *dst_len);
  28 + unsigned char *dst, size_t *dst_len);
29 29  
30 30 /*
31 31 * Return values (< 0 = Error)
... ... @@ -40,6 +40,7 @@
40 40 #define LZO_E_EOF_NOT_FOUND (-7)
41 41 #define LZO_E_INPUT_NOT_CONSUMED (-8)
42 42 #define LZO_E_NOT_YET_IMPLEMENTED (-9)
  43 +#define LZO_E_INVALID_ARGUMENT (-10)
43 44  
44 45 #endif
lib/lzo/lzo1x_compress.c
1 1 /*
2   - * LZO1X Compressor from MiniLZO
  2 + * LZO1X Compressor from LZO
3 3 *
4   - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  4 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
5 5 *
6 6 * The full LZO package can be found at:
7 7 * http://www.oberhumer.com/opensource/lzo/
8 8 *
9   - * Changed for kernel use by:
  9 + * Changed for Linux kernel use by:
10 10 * Nitin Gupta <nitingupta910@gmail.com>
11 11 * Richard Purdie <rpurdie@openedhand.com>
12 12 */
13 13  
14 14 #include <linux/module.h>
15 15 #include <linux/kernel.h>
16   -#include <linux/lzo.h>
17 16 #include <asm/unaligned.h>
  17 +#include <linux/lzo.h>
18 18 #include "lzodefs.h"
19 19  
20 20 static noinline size_t
21   -_lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
22   - unsigned char *out, size_t *out_len, void *wrkmem)
  21 +lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
  22 + unsigned char *out, size_t *out_len,
  23 + size_t ti, void *wrkmem)
23 24 {
  25 + const unsigned char *ip;
  26 + unsigned char *op;
24 27 const unsigned char * const in_end = in + in_len;
25   - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5;
26   - const unsigned char ** const dict = wrkmem;
27   - const unsigned char *ip = in, *ii = ip;
28   - const unsigned char *end, *m, *m_pos;
29   - size_t m_off, m_len, dindex;
30   - unsigned char *op = out;
  28 + const unsigned char * const ip_end = in + in_len - 20;
  29 + const unsigned char *ii;
  30 + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
31 31  
32   - ip += 4;
  32 + op = out;
  33 + ip = in;
  34 + ii = ip;
  35 + ip += ti < 4 ? 4 - ti : 0;
33 36  
34 37 for (;;) {
35   - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK;
36   - m_pos = dict[dindex];
37   -
38   - if (m_pos < in)
39   - goto literal;
40   -
41   - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
42   - goto literal;
43   -
44   - m_off = ip - m_pos;
45   - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
46   - goto try_match;
47   -
48   - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f);
49   - m_pos = dict[dindex];
50   -
51   - if (m_pos < in)
52   - goto literal;
53   -
54   - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
55   - goto literal;
56   -
57   - m_off = ip - m_pos;
58   - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
59   - goto try_match;
60   -
61   - goto literal;
62   -
63   -try_match:
64   - if (get_unaligned((const unsigned short *)m_pos)
65   - == get_unaligned((const unsigned short *)ip)) {
66   - if (likely(m_pos[2] == ip[2]))
67   - goto match;
68   - }
69   -
  38 + const unsigned char *m_pos;
  39 + size_t t, m_len, m_off;
  40 + u32 dv;
70 41 literal:
71   - dict[dindex] = ip;
72   - ++ip;
  42 + ip += 1 + ((ip - ii) >> 5);
  43 +next:
73 44 if (unlikely(ip >= ip_end))
74 45 break;
75   - continue;
  46 + dv = get_unaligned_le32(ip);
  47 + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
  48 + m_pos = in + dict[t];
  49 + dict[t] = (lzo_dict_t) (ip - in);
  50 + if (unlikely(dv != get_unaligned_le32(m_pos)))
  51 + goto literal;
76 52  
77   -match:
78   - dict[dindex] = ip;
79   - if (ip != ii) {
80   - size_t t = ip - ii;
81   -
  53 + ii -= ti;
  54 + ti = 0;
  55 + t = ip - ii;
  56 + if (t != 0) {
82 57 if (t <= 3) {
83 58 op[-2] |= t;
84   - } else if (t <= 18) {
  59 + COPY4(op, ii);
  60 + op += t;
  61 + } else if (t <= 16) {
85 62 *op++ = (t - 3);
  63 + COPY8(op, ii);
  64 + COPY8(op + 8, ii + 8);
  65 + op += t;
86 66 } else {
87   - size_t tt = t - 18;
88   -
89   - *op++ = 0;
90   - while (tt > 255) {
91   - tt -= 255;
  67 + if (t <= 18) {
  68 + *op++ = (t - 3);
  69 + } else {
  70 + size_t tt = t - 18;
92 71 *op++ = 0;
  72 + while (unlikely(tt > 255)) {
  73 + tt -= 255;
  74 + *op++ = 0;
  75 + }
  76 + *op++ = tt;
93 77 }
94   - *op++ = tt;
  78 + do {
  79 + COPY8(op, ii);
  80 + COPY8(op + 8, ii + 8);
  81 + op += 16;
  82 + ii += 16;
  83 + t -= 16;
  84 + } while (t >= 16);
  85 + if (t > 0) do {
  86 + *op++ = *ii++;
  87 + } while (--t > 0);
95 88 }
  89 + }
  90 +
  91 + m_len = 4;
  92 + {
  93 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
  94 + u64 v;
  95 + v = get_unaligned((const u64 *) (ip + m_len)) ^
  96 + get_unaligned((const u64 *) (m_pos + m_len));
  97 + if (unlikely(v == 0)) {
96 98 do {
97   - *op++ = *ii++;
98   - } while (--t > 0);
  99 + m_len += 8;
  100 + v = get_unaligned((const u64 *) (ip + m_len)) ^
  101 + get_unaligned((const u64 *) (m_pos + m_len));
  102 + if (unlikely(ip + m_len >= ip_end))
  103 + goto m_len_done;
  104 + } while (v == 0);
99 105 }
  106 +# if defined(__LITTLE_ENDIAN)
  107 + m_len += (unsigned) __builtin_ctzll(v) / 8;
  108 +# elif defined(__BIG_ENDIAN)
  109 + m_len += (unsigned) __builtin_clzll(v) / 8;
  110 +# else
  111 +# error "missing endian definition"
  112 +# endif
  113 +#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
  114 + u32 v;
  115 + v = get_unaligned((const u32 *) (ip + m_len)) ^
  116 + get_unaligned((const u32 *) (m_pos + m_len));
  117 + if (unlikely(v == 0)) {
  118 + do {
  119 + m_len += 4;
  120 + v = get_unaligned((const u32 *) (ip + m_len)) ^
  121 + get_unaligned((const u32 *) (m_pos + m_len));
  122 + if (v != 0)
  123 + break;
  124 + m_len += 4;
  125 + v = get_unaligned((const u32 *) (ip + m_len)) ^
  126 + get_unaligned((const u32 *) (m_pos + m_len));
  127 + if (unlikely(ip + m_len >= ip_end))
  128 + goto m_len_done;
  129 + } while (v == 0);
  130 + }
  131 +# if defined(__LITTLE_ENDIAN)
  132 + m_len += (unsigned) __builtin_ctz(v) / 8;
  133 +# elif defined(__BIG_ENDIAN)
  134 + m_len += (unsigned) __builtin_clz(v) / 8;
  135 +# else
  136 +# error "missing endian definition"
  137 +# endif
  138 +#else
  139 + if (unlikely(ip[m_len] == m_pos[m_len])) {
  140 + do {
  141 + m_len += 1;
  142 + if (ip[m_len] != m_pos[m_len])
  143 + break;
  144 + m_len += 1;
  145 + if (ip[m_len] != m_pos[m_len])
  146 + break;
  147 + m_len += 1;
  148 + if (ip[m_len] != m_pos[m_len])
  149 + break;
  150 + m_len += 1;
  151 + if (ip[m_len] != m_pos[m_len])
  152 + break;
  153 + m_len += 1;
  154 + if (ip[m_len] != m_pos[m_len])
  155 + break;
  156 + m_len += 1;
  157 + if (ip[m_len] != m_pos[m_len])
  158 + break;
  159 + m_len += 1;
  160 + if (ip[m_len] != m_pos[m_len])
  161 + break;
  162 + m_len += 1;
  163 + if (unlikely(ip + m_len >= ip_end))
  164 + goto m_len_done;
  165 + } while (ip[m_len] == m_pos[m_len]);
  166 + }
  167 +#endif
  168 + }
  169 +m_len_done:
100 170  
101   - ip += 3;
102   - if (m_pos[3] != *ip++ || m_pos[4] != *ip++
103   - || m_pos[5] != *ip++ || m_pos[6] != *ip++
104   - || m_pos[7] != *ip++ || m_pos[8] != *ip++) {
105   - --ip;
106   - m_len = ip - ii;
107   -
108   - if (m_off <= M2_MAX_OFFSET) {
109   - m_off -= 1;
110   - *op++ = (((m_len - 1) << 5)
111   - | ((m_off & 7) << 2));
112   - *op++ = (m_off >> 3);
113   - } else if (m_off <= M3_MAX_OFFSET) {
114   - m_off -= 1;
  171 + m_off = ip - m_pos;
  172 + ip += m_len;
  173 + ii = ip;
  174 + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
  175 + m_off -= 1;
  176 + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
  177 + *op++ = (m_off >> 3);
  178 + } else if (m_off <= M3_MAX_OFFSET) {
  179 + m_off -= 1;
  180 + if (m_len <= M3_MAX_LEN)
115 181 *op++ = (M3_MARKER | (m_len - 2));
116   - goto m3_m4_offset;
117   - } else {
118   - m_off -= 0x4000;
119   -
120   - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11)
121   - | (m_len - 2));
122   - goto m3_m4_offset;
  182 + else {
  183 + m_len -= M3_MAX_LEN;
  184 + *op++ = M3_MARKER | 0;
  185 + while (unlikely(m_len > 255)) {
  186 + m_len -= 255;
  187 + *op++ = 0;
  188 + }
  189 + *op++ = (m_len);
123 190 }
  191 + *op++ = (m_off << 2);
  192 + *op++ = (m_off >> 6);
124 193 } else {
125   - end = in_end;
126   - m = m_pos + M2_MAX_LEN + 1;
127   -
128   - while (ip < end && *m == *ip) {
129   - m++;
130   - ip++;
131   - }
132   - m_len = ip - ii;
133   -
134   - if (m_off <= M3_MAX_OFFSET) {
135   - m_off -= 1;
136   - if (m_len <= 33) {
137   - *op++ = (M3_MARKER | (m_len - 2));
138   - } else {
139   - m_len -= 33;
140   - *op++ = M3_MARKER | 0;
141   - goto m3_m4_len;
142   - }
143   - } else {
144   - m_off -= 0x4000;
145   - if (m_len <= M4_MAX_LEN) {
146   - *op++ = (M4_MARKER
147   - | ((m_off & 0x4000) >> 11)
  194 + m_off -= 0x4000;
  195 + if (m_len <= M4_MAX_LEN)
  196 + *op++ = (M4_MARKER | ((m_off >> 11) & 8)
148 197 | (m_len - 2));
149   - } else {
150   - m_len -= M4_MAX_LEN;
151   - *op++ = (M4_MARKER
152   - | ((m_off & 0x4000) >> 11));
153   -m3_m4_len:
154   - while (m_len > 255) {
155   - m_len -= 255;
156   - *op++ = 0;
157   - }
158   -
159   - *op++ = (m_len);
  198 + else {
  199 + m_len -= M4_MAX_LEN;
  200 + *op++ = (M4_MARKER | ((m_off >> 11) & 8));
  201 + while (unlikely(m_len > 255)) {
  202 + m_len -= 255;
  203 + *op++ = 0;
160 204 }
  205 + *op++ = (m_len);
161 206 }
162   -m3_m4_offset:
163   - *op++ = ((m_off & 63) << 2);
  207 + *op++ = (m_off << 2);
164 208 *op++ = (m_off >> 6);
165 209 }
166   -
167   - ii = ip;
168   - if (unlikely(ip >= ip_end))
169   - break;
  210 + goto next;
170 211 }
171   -
172 212 *out_len = op - out;
173   - return in_end - ii;
  213 + return in_end - (ii - ti);
174 214 }
175 215  
176   -int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
177   - size_t *out_len, void *wrkmem)
  216 +int lzo1x_1_compress(const unsigned char *in, size_t in_len,
  217 + unsigned char *out, size_t *out_len,
  218 + void *wrkmem)
178 219 {
179   - const unsigned char *ii;
  220 + const unsigned char *ip = in;
180 221 unsigned char *op = out;
181   - size_t t;
  222 + size_t l = in_len;
  223 + size_t t = 0;
182 224  
183   - if (unlikely(in_len <= M2_MAX_LEN + 5)) {
184   - t = in_len;
185   - } else {
186   - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem);
  225 + while (l > 20) {
  226 + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
  227 + uintptr_t ll_end = (uintptr_t) ip + ll;
  228 + if ((ll_end + ((t + ll) >> 5)) <= ll_end)
  229 + break;
  230 + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
  231 + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
  232 + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
  233 + ip += ll;
187 234 op += *out_len;
  235 + l -= ll;
188 236 }
  237 + t += l;
189 238  
190 239 if (t > 0) {
191   - ii = in + in_len - t;
  240 + const unsigned char *ii = in + in_len - t;
192 241  
193 242 if (op == out && t <= 238) {
194 243 *op++ = (17 + t);
195 244  
196 245  
... ... @@ -198,16 +247,21 @@
198 247 *op++ = (t - 3);
199 248 } else {
200 249 size_t tt = t - 18;
201   -
202 250 *op++ = 0;
203 251 while (tt > 255) {
204 252 tt -= 255;
205 253 *op++ = 0;
206 254 }
207   -
208 255 *op++ = tt;
209 256 }
210   - do {
  257 + if (t >= 16) do {
  258 + COPY8(op, ii);
  259 + COPY8(op + 8, ii + 8);
  260 + op += 16;
  261 + ii += 16;
  262 + t -= 16;
  263 + } while (t >= 16);
  264 + if (t > 0) do {
211 265 *op++ = *ii++;
212 266 } while (--t > 0);
213 267 }
lib/lzo/lzo1x_decompress_safe.c
1 1 /*
2   - * LZO1X Decompressor from MiniLZO
  2 + * LZO1X Decompressor from LZO
3 3 *
4   - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  4 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
5 5 *
6 6 * The full LZO package can be found at:
7 7 * http://www.oberhumer.com/opensource/lzo/
8 8 *
9   - * Changed for kernel use by:
  9 + * Changed for Linux kernel use by:
10 10 * Nitin Gupta <nitingupta910@gmail.com>
11 11 * Richard Purdie <rpurdie@openedhand.com>
12 12 */
13 13  
14 14  
15 15  
16 16  
17 17  
18 18  
19 19  
20 20  
21 21  
22 22  
23 23  
24 24  
25 25  
26 26  
27 27  
28 28  
29 29  
30 30  
31 31  
32 32  
33 33  
34 34  
35 35  
36 36  
37 37  
38 38  
... ... @@ -15,225 +15,207 @@
15 15 #include <linux/module.h>
16 16 #include <linux/kernel.h>
17 17 #endif
18   -
19 18 #include <asm/unaligned.h>
20 19 #include <linux/lzo.h>
21 20 #include "lzodefs.h"
22 21  
23   -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
24   -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x))
25   -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op)
  22 +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x))
  23 +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x))
  24 +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun
  25 +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun
  26 +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun
26 27  
27   -#define COPY4(dst, src) \
28   - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
29   -
30 28 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
31   - unsigned char *out, size_t *out_len)
  29 + unsigned char *out, size_t *out_len)
32 30 {
  31 + unsigned char *op;
  32 + const unsigned char *ip;
  33 + size_t t, next;
  34 + size_t state = 0;
  35 + const unsigned char *m_pos;
33 36 const unsigned char * const ip_end = in + in_len;
34 37 unsigned char * const op_end = out + *out_len;
35   - const unsigned char *ip = in, *m_pos;
36   - unsigned char *op = out;
37   - size_t t;
38 38  
39   - *out_len = 0;
  39 + op = out;
  40 + ip = in;
40 41  
  42 + if (unlikely(in_len < 3))
  43 + goto input_overrun;
41 44 if (*ip > 17) {
42 45 t = *ip++ - 17;
43   - if (t < 4)
  46 + if (t < 4) {
  47 + next = t;
44 48 goto match_next;
45   - if (HAVE_OP(t, op_end, op))
46   - goto output_overrun;
47   - if (HAVE_IP(t + 1, ip_end, ip))
48   - goto input_overrun;
49   - do {
50   - *op++ = *ip++;
51   - } while (--t > 0);
52   - goto first_literal_run;
  49 + }
  50 + goto copy_literal_run;
53 51 }
54 52  
55   - while ((ip < ip_end)) {
  53 + for (;;) {
56 54 t = *ip++;
57   - if (t >= 16)
58   - goto match;
59   - if (t == 0) {
60   - if (HAVE_IP(1, ip_end, ip))
61   - goto input_overrun;
62   - while (*ip == 0) {
63   - t += 255;
64   - ip++;
65   - if (HAVE_IP(1, ip_end, ip))
66   - goto input_overrun;
67   - }
68   - t += 15 + *ip++;
69   - }
70   - if (HAVE_OP(t + 3, op_end, op))
71   - goto output_overrun;
72   - if (HAVE_IP(t + 4, ip_end, ip))
73   - goto input_overrun;
74   -
75   - COPY4(op, ip);
76   - op += 4;
77   - ip += 4;
78   - if (--t > 0) {
79   - if (t >= 4) {
80   - do {
81   - COPY4(op, ip);
82   - op += 4;
83   - ip += 4;
84   - t -= 4;
85   - } while (t >= 4);
86   - if (t > 0) {
87   - do {
88   - *op++ = *ip++;
89   - } while (--t > 0);
90   - }
91   - } else {
92   - do {
93   - *op++ = *ip++;
94   - } while (--t > 0);
95   - }
96   - }
97   -
98   -first_literal_run:
99   - t = *ip++;
100   - if (t >= 16)
101   - goto match;
102   - m_pos = op - (1 + M2_MAX_OFFSET);
103   - m_pos -= t >> 2;
104   - m_pos -= *ip++ << 2;
105   -
106   - if (HAVE_LB(m_pos, out, op))
107   - goto lookbehind_overrun;
108   -
109   - if (HAVE_OP(3, op_end, op))
110   - goto output_overrun;
111   - *op++ = *m_pos++;
112   - *op++ = *m_pos++;
113   - *op++ = *m_pos;
114   -
115   - goto match_done;
116   -
117   - do {
118   -match:
119   - if (t >= 64) {
120   - m_pos = op - 1;
121   - m_pos -= (t >> 2) & 7;
122   - m_pos -= *ip++ << 3;
123   - t = (t >> 5) - 1;
124   - if (HAVE_LB(m_pos, out, op))
125   - goto lookbehind_overrun;
126   - if (HAVE_OP(t + 3 - 1, op_end, op))
127   - goto output_overrun;
128   - goto copy_match;
129   - } else if (t >= 32) {
130   - t &= 31;
131   - if (t == 0) {
132   - if (HAVE_IP(1, ip_end, ip))
133   - goto input_overrun;
134   - while (*ip == 0) {
  55 + if (t < 16) {
  56 + if (likely(state == 0)) {
  57 + if (unlikely(t == 0)) {
  58 + while (unlikely(*ip == 0)) {
135 59 t += 255;
136 60 ip++;
137   - if (HAVE_IP(1, ip_end, ip))
138   - goto input_overrun;
  61 + NEED_IP(1);
139 62 }
140   - t += 31 + *ip++;
  63 + t += 15 + *ip++;
141 64 }
142   - m_pos = op - 1;
143   - m_pos -= get_unaligned_le16(ip) >> 2;
144   - ip += 2;
145   - } else if (t >= 16) {
146   - m_pos = op;
147   - m_pos -= (t & 8) << 11;
148   -
149   - t &= 7;
150   - if (t == 0) {
151   - if (HAVE_IP(1, ip_end, ip))
152   - goto input_overrun;
153   - while (*ip == 0) {
154   - t += 255;
155   - ip++;
156   - if (HAVE_IP(1, ip_end, ip))
157   - goto input_overrun;
158   - }
159   - t += 7 + *ip++;
  65 + t += 3;
  66 +copy_literal_run:
  67 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
  68 + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) {
  69 + const unsigned char *ie = ip + t;
  70 + unsigned char *oe = op + t;
  71 + do {
  72 + COPY8(op, ip);
  73 + op += 8;
  74 + ip += 8;
  75 + COPY8(op, ip);
  76 + op += 8;
  77 + ip += 8;
  78 + } while (ip < ie);
  79 + ip = ie;
  80 + op = oe;
  81 + } else
  82 +#endif
  83 + {
  84 + NEED_OP(t);
  85 + NEED_IP(t + 3);
  86 + do {
  87 + *op++ = *ip++;
  88 + } while (--t > 0);
160 89 }
161   - m_pos -= get_unaligned_le16(ip) >> 2;
162   - ip += 2;
163   - if (m_pos == op)
164   - goto eof_found;
165   - m_pos -= 0x4000;
166   - } else {
  90 + state = 4;
  91 + continue;
  92 + } else if (state != 4) {
  93 + next = t & 3;
167 94 m_pos = op - 1;
168 95 m_pos -= t >> 2;
169 96 m_pos -= *ip++ << 2;
170   -
171   - if (HAVE_LB(m_pos, out, op))
172   - goto lookbehind_overrun;
173   - if (HAVE_OP(2, op_end, op))
174   - goto output_overrun;
175   -
176   - *op++ = *m_pos++;
177   - *op++ = *m_pos;
178   - goto match_done;
  97 + TEST_LB(m_pos);
  98 + NEED_OP(2);
  99 + op[0] = m_pos[0];
  100 + op[1] = m_pos[1];
  101 + op += 2;
  102 + goto match_next;
  103 + } else {
  104 + next = t & 3;
  105 + m_pos = op - (1 + M2_MAX_OFFSET);
  106 + m_pos -= t >> 2;
  107 + m_pos -= *ip++ << 2;
  108 + t = 3;
179 109 }
180   -
181   - if (HAVE_LB(m_pos, out, op))
182   - goto lookbehind_overrun;
183   - if (HAVE_OP(t + 3 - 1, op_end, op))
184   - goto output_overrun;
185   -
186   - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
187   - COPY4(op, m_pos);
188   - op += 4;
189   - m_pos += 4;
190   - t -= 4 - (3 - 1);
  110 + } else if (t >= 64) {
  111 + next = t & 3;
  112 + m_pos = op - 1;
  113 + m_pos -= (t >> 2) & 7;
  114 + m_pos -= *ip++ << 3;
  115 + t = (t >> 5) - 1 + (3 - 1);
  116 + } else if (t >= 32) {
  117 + t = (t & 31) + (3 - 1);
  118 + if (unlikely(t == 2)) {
  119 + while (unlikely(*ip == 0)) {
  120 + t += 255;
  121 + ip++;
  122 + NEED_IP(1);
  123 + }
  124 + t += 31 + *ip++;
  125 + NEED_IP(2);
  126 + }
  127 + m_pos = op - 1;
  128 + next = get_unaligned_le16(ip);
  129 + ip += 2;
  130 + m_pos -= next >> 2;
  131 + next &= 3;
  132 + } else {
  133 + m_pos = op;
  134 + m_pos -= (t & 8) << 11;
  135 + t = (t & 7) + (3 - 1);
  136 + if (unlikely(t == 2)) {
  137 + while (unlikely(*ip == 0)) {
  138 + t += 255;
  139 + ip++;
  140 + NEED_IP(1);
  141 + }
  142 + t += 7 + *ip++;
  143 + NEED_IP(2);
  144 + }
  145 + next = get_unaligned_le16(ip);
  146 + ip += 2;
  147 + m_pos -= next >> 2;
  148 + next &= 3;
  149 + if (m_pos == op)
  150 + goto eof_found;
  151 + m_pos -= 0x4000;
  152 + }
  153 + TEST_LB(m_pos);
  154 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
  155 + if (op - m_pos >= 8) {
  156 + unsigned char *oe = op + t;
  157 + if (likely(HAVE_OP(t + 15))) {
191 158 do {
192   - COPY4(op, m_pos);
193   - op += 4;
194   - m_pos += 4;
195   - t -= 4;
196   - } while (t >= 4);
197   - if (t > 0)
198   - do {
199   - *op++ = *m_pos++;
200   - } while (--t > 0);
  159 + COPY8(op, m_pos);
  160 + op += 8;
  161 + m_pos += 8;
  162 + COPY8(op, m_pos);
  163 + op += 8;
  164 + m_pos += 8;
  165 + } while (op < oe);
  166 + op = oe;
  167 + if (HAVE_IP(6)) {
  168 + state = next;
  169 + COPY4(op, ip);
  170 + op += next;
  171 + ip += next;
  172 + continue;
  173 + }
201 174 } else {
202   -copy_match:
203   - *op++ = *m_pos++;
204   - *op++ = *m_pos++;
  175 + NEED_OP(t);
205 176 do {
206 177 *op++ = *m_pos++;
207   - } while (--t > 0);
  178 + } while (op < oe);
208 179 }
209   -match_done:
210   - t = ip[-2] & 3;
211   - if (t == 0)
212   - break;
  180 + } else
  181 +#endif
  182 + {
  183 + unsigned char *oe = op + t;
  184 + NEED_OP(t);
  185 + op[0] = m_pos[0];
  186 + op[1] = m_pos[1];
  187 + op += 2;
  188 + m_pos += 2;
  189 + do {
  190 + *op++ = *m_pos++;
  191 + } while (op < oe);
  192 + }
213 193 match_next:
214   - if (HAVE_OP(t, op_end, op))
215   - goto output_overrun;
216   - if (HAVE_IP(t + 1, ip_end, ip))
217   - goto input_overrun;
218   -
219   - *op++ = *ip++;
220   - if (t > 1) {
  194 + state = next;
  195 + t = next;
  196 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
  197 + if (likely(HAVE_IP(6) && HAVE_OP(4))) {
  198 + COPY4(op, ip);
  199 + op += t;
  200 + ip += t;
  201 + } else
  202 +#endif
  203 + {
  204 + NEED_IP(t + 3);
  205 + NEED_OP(t);
  206 + while (t > 0) {
221 207 *op++ = *ip++;
222   - if (t > 2)
223   - *op++ = *ip++;
  208 + t--;
224 209 }
225   -
226   - t = *ip++;
227   - } while (ip < ip_end);
  210 + }
228 211 }
229 212  
230   - *out_len = op - out;
231   - return LZO_E_EOF_NOT_FOUND;
232   -
233 213 eof_found:
234 214 *out_len = op - out;
235   - return (ip == ip_end ? LZO_E_OK :
236   - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
  215 + return (t != 3 ? LZO_E_ERROR :
  216 + ip == ip_end ? LZO_E_OK :
  217 + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN);
  218 +
237 219 input_overrun:
238 220 *out_len = op - out;
239 221 return LZO_E_INPUT_OVERRUN;
1 1 /*
2 2 * lzodefs.h -- architecture, OS and compiler specific defines
3 3 *
4   - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  4 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
5 5 *
6 6 * The full LZO package can be found at:
7 7 * http://www.oberhumer.com/opensource/lzo/
8 8 *
9   - * Changed for kernel use by:
  9 + * Changed for Linux kernel use by:
10 10 * Nitin Gupta <nitingupta910@gmail.com>
11 11 * Richard Purdie <rpurdie@openedhand.com>
12 12 */
13 13  
14   -#define LZO_VERSION 0x2020
15   -#define LZO_VERSION_STRING "2.02"
16   -#define LZO_VERSION_DATE "Oct 17 2005"
17 14  
  15 +#define COPY4(dst, src) \
  16 + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
  17 +#if defined(__x86_64__)
  18 +#define COPY8(dst, src) \
  19 + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
  20 +#else
  21 +#define COPY8(dst, src) \
  22 + COPY4(dst, src); COPY4((dst) + 4, (src) + 4)
  23 +#endif
  24 +
  25 +#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
  26 +#error "conflicting endian definitions"
  27 +#elif defined(__x86_64__)
  28 +#define LZO_USE_CTZ64 1
  29 +#define LZO_USE_CTZ32 1
  30 +#elif defined(__i386__) || defined(__powerpc__)
  31 +#define LZO_USE_CTZ32 1
  32 +#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
  33 +#define LZO_USE_CTZ32 1
  34 +#endif
  35 +
18 36 #define M1_MAX_OFFSET 0x0400
19 37 #define M2_MAX_OFFSET 0x0800
20 38 #define M3_MAX_OFFSET 0x4000
21 39  
... ... @@ -34,11 +52,9 @@
34 52 #define M3_MARKER 32
35 53 #define M4_MARKER 16
36 54  
37   -#define D_BITS 14
38   -#define D_MASK ((1u << D_BITS) - 1)
  55 +#define lzo_dict_t unsigned short
  56 +#define D_BITS 13
  57 +#define D_SIZE (1u << D_BITS)
  58 +#define D_MASK (D_SIZE - 1)
39 59 #define D_HIGH ((D_MASK >> 1) + 1)
40   -
41   -#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \
42   - << (s1)) ^ (p)[0])
43   -#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])