Commit 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7
1 parent
b6bec26cea
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
lib/lzo: Update LZO compression to current upstream version
This commit updates the kernel LZO code to the current upsteam version which features a significant speed improvement - benchmarking the Calgary and Silesia test corpora typically shows a doubled performance in both compression and decompression on modern i386/x86_64/powerpc machines. Signed-off-by: Markus F.X.J. Oberhumer <markus@oberhumer.com>
Showing 4 changed files with 395 additions and 343 deletions Side-by-side Diff
include/linux/lzo.h
... | ... | @@ -4,28 +4,28 @@ |
4 | 4 | * LZO Public Kernel Interface |
5 | 5 | * A mini subset of the LZO real-time data compression library |
6 | 6 | * |
7 | - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
7 | + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
8 | 8 | * |
9 | 9 | * The full LZO package can be found at: |
10 | 10 | * http://www.oberhumer.com/opensource/lzo/ |
11 | 11 | * |
12 | - * Changed for kernel use by: | |
12 | + * Changed for Linux kernel use by: | |
13 | 13 | * Nitin Gupta <nitingupta910@gmail.com> |
14 | 14 | * Richard Purdie <rpurdie@openedhand.com> |
15 | 15 | */ |
16 | 16 | |
17 | -#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) | |
18 | -#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS | |
17 | +#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) | |
18 | +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS | |
19 | 19 | |
20 | 20 | #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) |
21 | 21 | |
22 | -/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ | |
22 | +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ | |
23 | 23 | int lzo1x_1_compress(const unsigned char *src, size_t src_len, |
24 | - unsigned char *dst, size_t *dst_len, void *wrkmem); | |
24 | + unsigned char *dst, size_t *dst_len, void *wrkmem); | |
25 | 25 | |
26 | 26 | /* safe decompression with overrun testing */ |
27 | 27 | int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, |
28 | - unsigned char *dst, size_t *dst_len); | |
28 | + unsigned char *dst, size_t *dst_len); | |
29 | 29 | |
30 | 30 | /* |
31 | 31 | * Return values (< 0 = Error) |
... | ... | @@ -40,6 +40,7 @@ |
40 | 40 | #define LZO_E_EOF_NOT_FOUND (-7) |
41 | 41 | #define LZO_E_INPUT_NOT_CONSUMED (-8) |
42 | 42 | #define LZO_E_NOT_YET_IMPLEMENTED (-9) |
43 | +#define LZO_E_INVALID_ARGUMENT (-10) | |
43 | 44 | |
44 | 45 | #endif |
lib/lzo/lzo1x_compress.c
1 | 1 | /* |
2 | - * LZO1X Compressor from MiniLZO | |
2 | + * LZO1X Compressor from LZO | |
3 | 3 | * |
4 | - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
4 | + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
5 | 5 | * |
6 | 6 | * The full LZO package can be found at: |
7 | 7 | * http://www.oberhumer.com/opensource/lzo/ |
8 | 8 | * |
9 | - * Changed for kernel use by: | |
9 | + * Changed for Linux kernel use by: | |
10 | 10 | * Nitin Gupta <nitingupta910@gmail.com> |
11 | 11 | * Richard Purdie <rpurdie@openedhand.com> |
12 | 12 | */ |
13 | 13 | |
14 | 14 | #include <linux/module.h> |
15 | 15 | #include <linux/kernel.h> |
16 | -#include <linux/lzo.h> | |
17 | 16 | #include <asm/unaligned.h> |
17 | +#include <linux/lzo.h> | |
18 | 18 | #include "lzodefs.h" |
19 | 19 | |
20 | 20 | static noinline size_t |
21 | -_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, | |
22 | - unsigned char *out, size_t *out_len, void *wrkmem) | |
21 | +lzo1x_1_do_compress(const unsigned char *in, size_t in_len, | |
22 | + unsigned char *out, size_t *out_len, | |
23 | + size_t ti, void *wrkmem) | |
23 | 24 | { |
25 | + const unsigned char *ip; | |
26 | + unsigned char *op; | |
24 | 27 | const unsigned char * const in_end = in + in_len; |
25 | - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; | |
26 | - const unsigned char ** const dict = wrkmem; | |
27 | - const unsigned char *ip = in, *ii = ip; | |
28 | - const unsigned char *end, *m, *m_pos; | |
29 | - size_t m_off, m_len, dindex; | |
30 | - unsigned char *op = out; | |
28 | + const unsigned char * const ip_end = in + in_len - 20; | |
29 | + const unsigned char *ii; | |
30 | + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; | |
31 | 31 | |
32 | - ip += 4; | |
32 | + op = out; | |
33 | + ip = in; | |
34 | + ii = ip; | |
35 | + ip += ti < 4 ? 4 - ti : 0; | |
33 | 36 | |
34 | 37 | for (;;) { |
35 | - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; | |
36 | - m_pos = dict[dindex]; | |
37 | - | |
38 | - if (m_pos < in) | |
39 | - goto literal; | |
40 | - | |
41 | - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) | |
42 | - goto literal; | |
43 | - | |
44 | - m_off = ip - m_pos; | |
45 | - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) | |
46 | - goto try_match; | |
47 | - | |
48 | - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); | |
49 | - m_pos = dict[dindex]; | |
50 | - | |
51 | - if (m_pos < in) | |
52 | - goto literal; | |
53 | - | |
54 | - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) | |
55 | - goto literal; | |
56 | - | |
57 | - m_off = ip - m_pos; | |
58 | - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) | |
59 | - goto try_match; | |
60 | - | |
61 | - goto literal; | |
62 | - | |
63 | -try_match: | |
64 | - if (get_unaligned((const unsigned short *)m_pos) | |
65 | - == get_unaligned((const unsigned short *)ip)) { | |
66 | - if (likely(m_pos[2] == ip[2])) | |
67 | - goto match; | |
68 | - } | |
69 | - | |
38 | + const unsigned char *m_pos; | |
39 | + size_t t, m_len, m_off; | |
40 | + u32 dv; | |
70 | 41 | literal: |
71 | - dict[dindex] = ip; | |
72 | - ++ip; | |
42 | + ip += 1 + ((ip - ii) >> 5); | |
43 | +next: | |
73 | 44 | if (unlikely(ip >= ip_end)) |
74 | 45 | break; |
75 | - continue; | |
46 | + dv = get_unaligned_le32(ip); | |
47 | + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; | |
48 | + m_pos = in + dict[t]; | |
49 | + dict[t] = (lzo_dict_t) (ip - in); | |
50 | + if (unlikely(dv != get_unaligned_le32(m_pos))) | |
51 | + goto literal; | |
76 | 52 | |
77 | -match: | |
78 | - dict[dindex] = ip; | |
79 | - if (ip != ii) { | |
80 | - size_t t = ip - ii; | |
81 | - | |
53 | + ii -= ti; | |
54 | + ti = 0; | |
55 | + t = ip - ii; | |
56 | + if (t != 0) { | |
82 | 57 | if (t <= 3) { |
83 | 58 | op[-2] |= t; |
84 | - } else if (t <= 18) { | |
59 | + COPY4(op, ii); | |
60 | + op += t; | |
61 | + } else if (t <= 16) { | |
85 | 62 | *op++ = (t - 3); |
63 | + COPY8(op, ii); | |
64 | + COPY8(op + 8, ii + 8); | |
65 | + op += t; | |
86 | 66 | } else { |
87 | - size_t tt = t - 18; | |
88 | - | |
89 | - *op++ = 0; | |
90 | - while (tt > 255) { | |
91 | - tt -= 255; | |
67 | + if (t <= 18) { | |
68 | + *op++ = (t - 3); | |
69 | + } else { | |
70 | + size_t tt = t - 18; | |
92 | 71 | *op++ = 0; |
72 | + while (unlikely(tt > 255)) { | |
73 | + tt -= 255; | |
74 | + *op++ = 0; | |
75 | + } | |
76 | + *op++ = tt; | |
93 | 77 | } |
94 | - *op++ = tt; | |
78 | + do { | |
79 | + COPY8(op, ii); | |
80 | + COPY8(op + 8, ii + 8); | |
81 | + op += 16; | |
82 | + ii += 16; | |
83 | + t -= 16; | |
84 | + } while (t >= 16); | |
85 | + if (t > 0) do { | |
86 | + *op++ = *ii++; | |
87 | + } while (--t > 0); | |
95 | 88 | } |
89 | + } | |
90 | + | |
91 | + m_len = 4; | |
92 | + { | |
93 | +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) | |
94 | + u64 v; | |
95 | + v = get_unaligned((const u64 *) (ip + m_len)) ^ | |
96 | + get_unaligned((const u64 *) (m_pos + m_len)); | |
97 | + if (unlikely(v == 0)) { | |
96 | 98 | do { |
97 | - *op++ = *ii++; | |
98 | - } while (--t > 0); | |
99 | + m_len += 8; | |
100 | + v = get_unaligned((const u64 *) (ip + m_len)) ^ | |
101 | + get_unaligned((const u64 *) (m_pos + m_len)); | |
102 | + if (unlikely(ip + m_len >= ip_end)) | |
103 | + goto m_len_done; | |
104 | + } while (v == 0); | |
99 | 105 | } |
106 | +# if defined(__LITTLE_ENDIAN) | |
107 | + m_len += (unsigned) __builtin_ctzll(v) / 8; | |
108 | +# elif defined(__BIG_ENDIAN) | |
109 | + m_len += (unsigned) __builtin_clzll(v) / 8; | |
110 | +# else | |
111 | +# error "missing endian definition" | |
112 | +# endif | |
113 | +#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) | |
114 | + u32 v; | |
115 | + v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
116 | + get_unaligned((const u32 *) (m_pos + m_len)); | |
117 | + if (unlikely(v == 0)) { | |
118 | + do { | |
119 | + m_len += 4; | |
120 | + v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
121 | + get_unaligned((const u32 *) (m_pos + m_len)); | |
122 | + if (v != 0) | |
123 | + break; | |
124 | + m_len += 4; | |
125 | + v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
126 | + get_unaligned((const u32 *) (m_pos + m_len)); | |
127 | + if (unlikely(ip + m_len >= ip_end)) | |
128 | + goto m_len_done; | |
129 | + } while (v == 0); | |
130 | + } | |
131 | +# if defined(__LITTLE_ENDIAN) | |
132 | + m_len += (unsigned) __builtin_ctz(v) / 8; | |
133 | +# elif defined(__BIG_ENDIAN) | |
134 | + m_len += (unsigned) __builtin_clz(v) / 8; | |
135 | +# else | |
136 | +# error "missing endian definition" | |
137 | +# endif | |
138 | +#else | |
139 | + if (unlikely(ip[m_len] == m_pos[m_len])) { | |
140 | + do { | |
141 | + m_len += 1; | |
142 | + if (ip[m_len] != m_pos[m_len]) | |
143 | + break; | |
144 | + m_len += 1; | |
145 | + if (ip[m_len] != m_pos[m_len]) | |
146 | + break; | |
147 | + m_len += 1; | |
148 | + if (ip[m_len] != m_pos[m_len]) | |
149 | + break; | |
150 | + m_len += 1; | |
151 | + if (ip[m_len] != m_pos[m_len]) | |
152 | + break; | |
153 | + m_len += 1; | |
154 | + if (ip[m_len] != m_pos[m_len]) | |
155 | + break; | |
156 | + m_len += 1; | |
157 | + if (ip[m_len] != m_pos[m_len]) | |
158 | + break; | |
159 | + m_len += 1; | |
160 | + if (ip[m_len] != m_pos[m_len]) | |
161 | + break; | |
162 | + m_len += 1; | |
163 | + if (unlikely(ip + m_len >= ip_end)) | |
164 | + goto m_len_done; | |
165 | + } while (ip[m_len] == m_pos[m_len]); | |
166 | + } | |
167 | +#endif | |
168 | + } | |
169 | +m_len_done: | |
100 | 170 | |
101 | - ip += 3; | |
102 | - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ | |
103 | - || m_pos[5] != *ip++ || m_pos[6] != *ip++ | |
104 | - || m_pos[7] != *ip++ || m_pos[8] != *ip++) { | |
105 | - --ip; | |
106 | - m_len = ip - ii; | |
107 | - | |
108 | - if (m_off <= M2_MAX_OFFSET) { | |
109 | - m_off -= 1; | |
110 | - *op++ = (((m_len - 1) << 5) | |
111 | - | ((m_off & 7) << 2)); | |
112 | - *op++ = (m_off >> 3); | |
113 | - } else if (m_off <= M3_MAX_OFFSET) { | |
114 | - m_off -= 1; | |
171 | + m_off = ip - m_pos; | |
172 | + ip += m_len; | |
173 | + ii = ip; | |
174 | + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { | |
175 | + m_off -= 1; | |
176 | + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); | |
177 | + *op++ = (m_off >> 3); | |
178 | + } else if (m_off <= M3_MAX_OFFSET) { | |
179 | + m_off -= 1; | |
180 | + if (m_len <= M3_MAX_LEN) | |
115 | 181 | *op++ = (M3_MARKER | (m_len - 2)); |
116 | - goto m3_m4_offset; | |
117 | - } else { | |
118 | - m_off -= 0x4000; | |
119 | - | |
120 | - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) | |
121 | - | (m_len - 2)); | |
122 | - goto m3_m4_offset; | |
182 | + else { | |
183 | + m_len -= M3_MAX_LEN; | |
184 | + *op++ = M3_MARKER | 0; | |
185 | + while (unlikely(m_len > 255)) { | |
186 | + m_len -= 255; | |
187 | + *op++ = 0; | |
188 | + } | |
189 | + *op++ = (m_len); | |
123 | 190 | } |
191 | + *op++ = (m_off << 2); | |
192 | + *op++ = (m_off >> 6); | |
124 | 193 | } else { |
125 | - end = in_end; | |
126 | - m = m_pos + M2_MAX_LEN + 1; | |
127 | - | |
128 | - while (ip < end && *m == *ip) { | |
129 | - m++; | |
130 | - ip++; | |
131 | - } | |
132 | - m_len = ip - ii; | |
133 | - | |
134 | - if (m_off <= M3_MAX_OFFSET) { | |
135 | - m_off -= 1; | |
136 | - if (m_len <= 33) { | |
137 | - *op++ = (M3_MARKER | (m_len - 2)); | |
138 | - } else { | |
139 | - m_len -= 33; | |
140 | - *op++ = M3_MARKER | 0; | |
141 | - goto m3_m4_len; | |
142 | - } | |
143 | - } else { | |
144 | - m_off -= 0x4000; | |
145 | - if (m_len <= M4_MAX_LEN) { | |
146 | - *op++ = (M4_MARKER | |
147 | - | ((m_off & 0x4000) >> 11) | |
194 | + m_off -= 0x4000; | |
195 | + if (m_len <= M4_MAX_LEN) | |
196 | + *op++ = (M4_MARKER | ((m_off >> 11) & 8) | |
148 | 197 | | (m_len - 2)); |
149 | - } else { | |
150 | - m_len -= M4_MAX_LEN; | |
151 | - *op++ = (M4_MARKER | |
152 | - | ((m_off & 0x4000) >> 11)); | |
153 | -m3_m4_len: | |
154 | - while (m_len > 255) { | |
155 | - m_len -= 255; | |
156 | - *op++ = 0; | |
157 | - } | |
158 | - | |
159 | - *op++ = (m_len); | |
198 | + else { | |
199 | + m_len -= M4_MAX_LEN; | |
200 | + *op++ = (M4_MARKER | ((m_off >> 11) & 8)); | |
201 | + while (unlikely(m_len > 255)) { | |
202 | + m_len -= 255; | |
203 | + *op++ = 0; | |
160 | 204 | } |
205 | + *op++ = (m_len); | |
161 | 206 | } |
162 | -m3_m4_offset: | |
163 | - *op++ = ((m_off & 63) << 2); | |
207 | + *op++ = (m_off << 2); | |
164 | 208 | *op++ = (m_off >> 6); |
165 | 209 | } |
166 | - | |
167 | - ii = ip; | |
168 | - if (unlikely(ip >= ip_end)) | |
169 | - break; | |
210 | + goto next; | |
170 | 211 | } |
171 | - | |
172 | 212 | *out_len = op - out; |
173 | - return in_end - ii; | |
213 | + return in_end - (ii - ti); | |
174 | 214 | } |
175 | 215 | |
176 | -int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, | |
177 | - size_t *out_len, void *wrkmem) | |
216 | +int lzo1x_1_compress(const unsigned char *in, size_t in_len, | |
217 | + unsigned char *out, size_t *out_len, | |
218 | + void *wrkmem) | |
178 | 219 | { |
179 | - const unsigned char *ii; | |
220 | + const unsigned char *ip = in; | |
180 | 221 | unsigned char *op = out; |
181 | - size_t t; | |
222 | + size_t l = in_len; | |
223 | + size_t t = 0; | |
182 | 224 | |
183 | - if (unlikely(in_len <= M2_MAX_LEN + 5)) { | |
184 | - t = in_len; | |
185 | - } else { | |
186 | - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); | |
225 | + while (l > 20) { | |
226 | + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); | |
227 | + uintptr_t ll_end = (uintptr_t) ip + ll; | |
228 | + if ((ll_end + ((t + ll) >> 5)) <= ll_end) | |
229 | + break; | |
230 | + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); | |
231 | + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); | |
232 | + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); | |
233 | + ip += ll; | |
187 | 234 | op += *out_len; |
235 | + l -= ll; | |
188 | 236 | } |
237 | + t += l; | |
189 | 238 | |
190 | 239 | if (t > 0) { |
191 | - ii = in + in_len - t; | |
240 | + const unsigned char *ii = in + in_len - t; | |
192 | 241 | |
193 | 242 | if (op == out && t <= 238) { |
194 | 243 | *op++ = (17 + t); |
195 | 244 | |
196 | 245 | |
... | ... | @@ -198,16 +247,21 @@ |
198 | 247 | *op++ = (t - 3); |
199 | 248 | } else { |
200 | 249 | size_t tt = t - 18; |
201 | - | |
202 | 250 | *op++ = 0; |
203 | 251 | while (tt > 255) { |
204 | 252 | tt -= 255; |
205 | 253 | *op++ = 0; |
206 | 254 | } |
207 | - | |
208 | 255 | *op++ = tt; |
209 | 256 | } |
210 | - do { | |
257 | + if (t >= 16) do { | |
258 | + COPY8(op, ii); | |
259 | + COPY8(op + 8, ii + 8); | |
260 | + op += 16; | |
261 | + ii += 16; | |
262 | + t -= 16; | |
263 | + } while (t >= 16); | |
264 | + if (t > 0) do { | |
211 | 265 | *op++ = *ii++; |
212 | 266 | } while (--t > 0); |
213 | 267 | } |
lib/lzo/lzo1x_decompress_safe.c
1 | 1 | /* |
2 | - * LZO1X Decompressor from MiniLZO | |
2 | + * LZO1X Decompressor from LZO | |
3 | 3 | * |
4 | - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
4 | + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
5 | 5 | * |
6 | 6 | * The full LZO package can be found at: |
7 | 7 | * http://www.oberhumer.com/opensource/lzo/ |
8 | 8 | * |
9 | - * Changed for kernel use by: | |
9 | + * Changed for Linux kernel use by: | |
10 | 10 | * Nitin Gupta <nitingupta910@gmail.com> |
11 | 11 | * Richard Purdie <rpurdie@openedhand.com> |
12 | 12 | */ |
13 | 13 | |
14 | 14 | |
15 | 15 | |
16 | 16 | |
17 | 17 | |
18 | 18 | |
19 | 19 | |
20 | 20 | |
21 | 21 | |
22 | 22 | |
23 | 23 | |
24 | 24 | |
25 | 25 | |
26 | 26 | |
27 | 27 | |
28 | 28 | |
29 | 29 | |
30 | 30 | |
31 | 31 | |
32 | 32 | |
33 | 33 | |
34 | 34 | |
35 | 35 | |
36 | 36 | |
37 | 37 | |
38 | 38 | |
... | ... | @@ -15,225 +15,207 @@ |
15 | 15 | #include <linux/module.h> |
16 | 16 | #include <linux/kernel.h> |
17 | 17 | #endif |
18 | - | |
19 | 18 | #include <asm/unaligned.h> |
20 | 19 | #include <linux/lzo.h> |
21 | 20 | #include "lzodefs.h" |
22 | 21 | |
23 | -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) | |
24 | -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) | |
25 | -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) | |
22 | +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) | |
23 | +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) | |
24 | +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun | |
25 | +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun | |
26 | +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun | |
26 | 27 | |
27 | -#define COPY4(dst, src) \ | |
28 | - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) | |
29 | - | |
30 | 28 | int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, |
31 | - unsigned char *out, size_t *out_len) | |
29 | + unsigned char *out, size_t *out_len) | |
32 | 30 | { |
31 | + unsigned char *op; | |
32 | + const unsigned char *ip; | |
33 | + size_t t, next; | |
34 | + size_t state = 0; | |
35 | + const unsigned char *m_pos; | |
33 | 36 | const unsigned char * const ip_end = in + in_len; |
34 | 37 | unsigned char * const op_end = out + *out_len; |
35 | - const unsigned char *ip = in, *m_pos; | |
36 | - unsigned char *op = out; | |
37 | - size_t t; | |
38 | 38 | |
39 | - *out_len = 0; | |
39 | + op = out; | |
40 | + ip = in; | |
40 | 41 | |
42 | + if (unlikely(in_len < 3)) | |
43 | + goto input_overrun; | |
41 | 44 | if (*ip > 17) { |
42 | 45 | t = *ip++ - 17; |
43 | - if (t < 4) | |
46 | + if (t < 4) { | |
47 | + next = t; | |
44 | 48 | goto match_next; |
45 | - if (HAVE_OP(t, op_end, op)) | |
46 | - goto output_overrun; | |
47 | - if (HAVE_IP(t + 1, ip_end, ip)) | |
48 | - goto input_overrun; | |
49 | - do { | |
50 | - *op++ = *ip++; | |
51 | - } while (--t > 0); | |
52 | - goto first_literal_run; | |
49 | + } | |
50 | + goto copy_literal_run; | |
53 | 51 | } |
54 | 52 | |
55 | - while ((ip < ip_end)) { | |
53 | + for (;;) { | |
56 | 54 | t = *ip++; |
57 | - if (t >= 16) | |
58 | - goto match; | |
59 | - if (t == 0) { | |
60 | - if (HAVE_IP(1, ip_end, ip)) | |
61 | - goto input_overrun; | |
62 | - while (*ip == 0) { | |
63 | - t += 255; | |
64 | - ip++; | |
65 | - if (HAVE_IP(1, ip_end, ip)) | |
66 | - goto input_overrun; | |
67 | - } | |
68 | - t += 15 + *ip++; | |
69 | - } | |
70 | - if (HAVE_OP(t + 3, op_end, op)) | |
71 | - goto output_overrun; | |
72 | - if (HAVE_IP(t + 4, ip_end, ip)) | |
73 | - goto input_overrun; | |
74 | - | |
75 | - COPY4(op, ip); | |
76 | - op += 4; | |
77 | - ip += 4; | |
78 | - if (--t > 0) { | |
79 | - if (t >= 4) { | |
80 | - do { | |
81 | - COPY4(op, ip); | |
82 | - op += 4; | |
83 | - ip += 4; | |
84 | - t -= 4; | |
85 | - } while (t >= 4); | |
86 | - if (t > 0) { | |
87 | - do { | |
88 | - *op++ = *ip++; | |
89 | - } while (--t > 0); | |
90 | - } | |
91 | - } else { | |
92 | - do { | |
93 | - *op++ = *ip++; | |
94 | - } while (--t > 0); | |
95 | - } | |
96 | - } | |
97 | - | |
98 | -first_literal_run: | |
99 | - t = *ip++; | |
100 | - if (t >= 16) | |
101 | - goto match; | |
102 | - m_pos = op - (1 + M2_MAX_OFFSET); | |
103 | - m_pos -= t >> 2; | |
104 | - m_pos -= *ip++ << 2; | |
105 | - | |
106 | - if (HAVE_LB(m_pos, out, op)) | |
107 | - goto lookbehind_overrun; | |
108 | - | |
109 | - if (HAVE_OP(3, op_end, op)) | |
110 | - goto output_overrun; | |
111 | - *op++ = *m_pos++; | |
112 | - *op++ = *m_pos++; | |
113 | - *op++ = *m_pos; | |
114 | - | |
115 | - goto match_done; | |
116 | - | |
117 | - do { | |
118 | -match: | |
119 | - if (t >= 64) { | |
120 | - m_pos = op - 1; | |
121 | - m_pos -= (t >> 2) & 7; | |
122 | - m_pos -= *ip++ << 3; | |
123 | - t = (t >> 5) - 1; | |
124 | - if (HAVE_LB(m_pos, out, op)) | |
125 | - goto lookbehind_overrun; | |
126 | - if (HAVE_OP(t + 3 - 1, op_end, op)) | |
127 | - goto output_overrun; | |
128 | - goto copy_match; | |
129 | - } else if (t >= 32) { | |
130 | - t &= 31; | |
131 | - if (t == 0) { | |
132 | - if (HAVE_IP(1, ip_end, ip)) | |
133 | - goto input_overrun; | |
134 | - while (*ip == 0) { | |
55 | + if (t < 16) { | |
56 | + if (likely(state == 0)) { | |
57 | + if (unlikely(t == 0)) { | |
58 | + while (unlikely(*ip == 0)) { | |
135 | 59 | t += 255; |
136 | 60 | ip++; |
137 | - if (HAVE_IP(1, ip_end, ip)) | |
138 | - goto input_overrun; | |
61 | + NEED_IP(1); | |
139 | 62 | } |
140 | - t += 31 + *ip++; | |
63 | + t += 15 + *ip++; | |
141 | 64 | } |
142 | - m_pos = op - 1; | |
143 | - m_pos -= get_unaligned_le16(ip) >> 2; | |
144 | - ip += 2; | |
145 | - } else if (t >= 16) { | |
146 | - m_pos = op; | |
147 | - m_pos -= (t & 8) << 11; | |
148 | - | |
149 | - t &= 7; | |
150 | - if (t == 0) { | |
151 | - if (HAVE_IP(1, ip_end, ip)) | |
152 | - goto input_overrun; | |
153 | - while (*ip == 0) { | |
154 | - t += 255; | |
155 | - ip++; | |
156 | - if (HAVE_IP(1, ip_end, ip)) | |
157 | - goto input_overrun; | |
158 | - } | |
159 | - t += 7 + *ip++; | |
65 | + t += 3; | |
66 | +copy_literal_run: | |
67 | +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | |
68 | + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { | |
69 | + const unsigned char *ie = ip + t; | |
70 | + unsigned char *oe = op + t; | |
71 | + do { | |
72 | + COPY8(op, ip); | |
73 | + op += 8; | |
74 | + ip += 8; | |
75 | + COPY8(op, ip); | |
76 | + op += 8; | |
77 | + ip += 8; | |
78 | + } while (ip < ie); | |
79 | + ip = ie; | |
80 | + op = oe; | |
81 | + } else | |
82 | +#endif | |
83 | + { | |
84 | + NEED_OP(t); | |
85 | + NEED_IP(t + 3); | |
86 | + do { | |
87 | + *op++ = *ip++; | |
88 | + } while (--t > 0); | |
160 | 89 | } |
161 | - m_pos -= get_unaligned_le16(ip) >> 2; | |
162 | - ip += 2; | |
163 | - if (m_pos == op) | |
164 | - goto eof_found; | |
165 | - m_pos -= 0x4000; | |
166 | - } else { | |
90 | + state = 4; | |
91 | + continue; | |
92 | + } else if (state != 4) { | |
93 | + next = t & 3; | |
167 | 94 | m_pos = op - 1; |
168 | 95 | m_pos -= t >> 2; |
169 | 96 | m_pos -= *ip++ << 2; |
170 | - | |
171 | - if (HAVE_LB(m_pos, out, op)) | |
172 | - goto lookbehind_overrun; | |
173 | - if (HAVE_OP(2, op_end, op)) | |
174 | - goto output_overrun; | |
175 | - | |
176 | - *op++ = *m_pos++; | |
177 | - *op++ = *m_pos; | |
178 | - goto match_done; | |
97 | + TEST_LB(m_pos); | |
98 | + NEED_OP(2); | |
99 | + op[0] = m_pos[0]; | |
100 | + op[1] = m_pos[1]; | |
101 | + op += 2; | |
102 | + goto match_next; | |
103 | + } else { | |
104 | + next = t & 3; | |
105 | + m_pos = op - (1 + M2_MAX_OFFSET); | |
106 | + m_pos -= t >> 2; | |
107 | + m_pos -= *ip++ << 2; | |
108 | + t = 3; | |
179 | 109 | } |
180 | - | |
181 | - if (HAVE_LB(m_pos, out, op)) | |
182 | - goto lookbehind_overrun; | |
183 | - if (HAVE_OP(t + 3 - 1, op_end, op)) | |
184 | - goto output_overrun; | |
185 | - | |
186 | - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { | |
187 | - COPY4(op, m_pos); | |
188 | - op += 4; | |
189 | - m_pos += 4; | |
190 | - t -= 4 - (3 - 1); | |
110 | + } else if (t >= 64) { | |
111 | + next = t & 3; | |
112 | + m_pos = op - 1; | |
113 | + m_pos -= (t >> 2) & 7; | |
114 | + m_pos -= *ip++ << 3; | |
115 | + t = (t >> 5) - 1 + (3 - 1); | |
116 | + } else if (t >= 32) { | |
117 | + t = (t & 31) + (3 - 1); | |
118 | + if (unlikely(t == 2)) { | |
119 | + while (unlikely(*ip == 0)) { | |
120 | + t += 255; | |
121 | + ip++; | |
122 | + NEED_IP(1); | |
123 | + } | |
124 | + t += 31 + *ip++; | |
125 | + NEED_IP(2); | |
126 | + } | |
127 | + m_pos = op - 1; | |
128 | + next = get_unaligned_le16(ip); | |
129 | + ip += 2; | |
130 | + m_pos -= next >> 2; | |
131 | + next &= 3; | |
132 | + } else { | |
133 | + m_pos = op; | |
134 | + m_pos -= (t & 8) << 11; | |
135 | + t = (t & 7) + (3 - 1); | |
136 | + if (unlikely(t == 2)) { | |
137 | + while (unlikely(*ip == 0)) { | |
138 | + t += 255; | |
139 | + ip++; | |
140 | + NEED_IP(1); | |
141 | + } | |
142 | + t += 7 + *ip++; | |
143 | + NEED_IP(2); | |
144 | + } | |
145 | + next = get_unaligned_le16(ip); | |
146 | + ip += 2; | |
147 | + m_pos -= next >> 2; | |
148 | + next &= 3; | |
149 | + if (m_pos == op) | |
150 | + goto eof_found; | |
151 | + m_pos -= 0x4000; | |
152 | + } | |
153 | + TEST_LB(m_pos); | |
154 | +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | |
155 | + if (op - m_pos >= 8) { | |
156 | + unsigned char *oe = op + t; | |
157 | + if (likely(HAVE_OP(t + 15))) { | |
191 | 158 | do { |
192 | - COPY4(op, m_pos); | |
193 | - op += 4; | |
194 | - m_pos += 4; | |
195 | - t -= 4; | |
196 | - } while (t >= 4); | |
197 | - if (t > 0) | |
198 | - do { | |
199 | - *op++ = *m_pos++; | |
200 | - } while (--t > 0); | |
159 | + COPY8(op, m_pos); | |
160 | + op += 8; | |
161 | + m_pos += 8; | |
162 | + COPY8(op, m_pos); | |
163 | + op += 8; | |
164 | + m_pos += 8; | |
165 | + } while (op < oe); | |
166 | + op = oe; | |
167 | + if (HAVE_IP(6)) { | |
168 | + state = next; | |
169 | + COPY4(op, ip); | |
170 | + op += next; | |
171 | + ip += next; | |
172 | + continue; | |
173 | + } | |
201 | 174 | } else { |
202 | -copy_match: | |
203 | - *op++ = *m_pos++; | |
204 | - *op++ = *m_pos++; | |
175 | + NEED_OP(t); | |
205 | 176 | do { |
206 | 177 | *op++ = *m_pos++; |
207 | - } while (--t > 0); | |
178 | + } while (op < oe); | |
208 | 179 | } |
209 | -match_done: | |
210 | - t = ip[-2] & 3; | |
211 | - if (t == 0) | |
212 | - break; | |
180 | + } else | |
181 | +#endif | |
182 | + { | |
183 | + unsigned char *oe = op + t; | |
184 | + NEED_OP(t); | |
185 | + op[0] = m_pos[0]; | |
186 | + op[1] = m_pos[1]; | |
187 | + op += 2; | |
188 | + m_pos += 2; | |
189 | + do { | |
190 | + *op++ = *m_pos++; | |
191 | + } while (op < oe); | |
192 | + } | |
213 | 193 | match_next: |
214 | - if (HAVE_OP(t, op_end, op)) | |
215 | - goto output_overrun; | |
216 | - if (HAVE_IP(t + 1, ip_end, ip)) | |
217 | - goto input_overrun; | |
218 | - | |
219 | - *op++ = *ip++; | |
220 | - if (t > 1) { | |
194 | + state = next; | |
195 | + t = next; | |
196 | +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | |
197 | + if (likely(HAVE_IP(6) && HAVE_OP(4))) { | |
198 | + COPY4(op, ip); | |
199 | + op += t; | |
200 | + ip += t; | |
201 | + } else | |
202 | +#endif | |
203 | + { | |
204 | + NEED_IP(t + 3); | |
205 | + NEED_OP(t); | |
206 | + while (t > 0) { | |
221 | 207 | *op++ = *ip++; |
222 | - if (t > 2) | |
223 | - *op++ = *ip++; | |
208 | + t--; | |
224 | 209 | } |
225 | - | |
226 | - t = *ip++; | |
227 | - } while (ip < ip_end); | |
210 | + } | |
228 | 211 | } |
229 | 212 | |
230 | - *out_len = op - out; | |
231 | - return LZO_E_EOF_NOT_FOUND; | |
232 | - | |
233 | 213 | eof_found: |
234 | 214 | *out_len = op - out; |
235 | - return (ip == ip_end ? LZO_E_OK : | |
236 | - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); | |
215 | + return (t != 3 ? LZO_E_ERROR : | |
216 | + ip == ip_end ? LZO_E_OK : | |
217 | + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); | |
218 | + | |
237 | 219 | input_overrun: |
238 | 220 | *out_len = op - out; |
239 | 221 | return LZO_E_INPUT_OVERRUN; |
lib/lzo/lzodefs.h
1 | 1 | /* |
2 | 2 | * lzodefs.h -- architecture, OS and compiler specific defines |
3 | 3 | * |
4 | - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
4 | + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> | |
5 | 5 | * |
6 | 6 | * The full LZO package can be found at: |
7 | 7 | * http://www.oberhumer.com/opensource/lzo/ |
8 | 8 | * |
9 | - * Changed for kernel use by: | |
9 | + * Changed for Linux kernel use by: | |
10 | 10 | * Nitin Gupta <nitingupta910@gmail.com> |
11 | 11 | * Richard Purdie <rpurdie@openedhand.com> |
12 | 12 | */ |
13 | 13 | |
14 | -#define LZO_VERSION 0x2020 | |
15 | -#define LZO_VERSION_STRING "2.02" | |
16 | -#define LZO_VERSION_DATE "Oct 17 2005" | |
17 | 14 | |
15 | +#define COPY4(dst, src) \ | |
16 | + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) | |
17 | +#if defined(__x86_64__) | |
18 | +#define COPY8(dst, src) \ | |
19 | + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) | |
20 | +#else | |
21 | +#define COPY8(dst, src) \ | |
22 | + COPY4(dst, src); COPY4((dst) + 4, (src) + 4) | |
23 | +#endif | |
24 | + | |
25 | +#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) | |
26 | +#error "conflicting endian definitions" | |
27 | +#elif defined(__x86_64__) | |
28 | +#define LZO_USE_CTZ64 1 | |
29 | +#define LZO_USE_CTZ32 1 | |
30 | +#elif defined(__i386__) || defined(__powerpc__) | |
31 | +#define LZO_USE_CTZ32 1 | |
32 | +#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) | |
33 | +#define LZO_USE_CTZ32 1 | |
34 | +#endif | |
35 | + | |
18 | 36 | #define M1_MAX_OFFSET 0x0400 |
19 | 37 | #define M2_MAX_OFFSET 0x0800 |
20 | 38 | #define M3_MAX_OFFSET 0x4000 |
21 | 39 | |
... | ... | @@ -34,11 +52,9 @@ |
34 | 52 | #define M3_MARKER 32 |
35 | 53 | #define M4_MARKER 16 |
36 | 54 | |
37 | -#define D_BITS 14 | |
38 | -#define D_MASK ((1u << D_BITS) - 1) | |
55 | +#define lzo_dict_t unsigned short | |
56 | +#define D_BITS 13 | |
57 | +#define D_SIZE (1u << D_BITS) | |
58 | +#define D_MASK (D_SIZE - 1) | |
39 | 59 | #define D_HIGH ((D_MASK >> 1) + 1) |
40 | - | |
41 | -#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ | |
42 | - << (s1)) ^ (p)[0]) | |
43 | -#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0]) |