Blame view

lib/sort.c 8.44 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
22a241ccb   George Spelvin   lib/sort: use mor...
3
   * A fast, small, non-recursive O(n log n) sort for the Linux kernel
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4
   *
22a241ccb   George Spelvin   lib/sort: use mor...
5
6
7
8
9
10
   * This performs n*log2(n) + 0.37*n + o(n) comparisons on average,
   * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case.
   *
   * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n
   * better) at the expense of stack usage and much larger code to avoid
   * quicksort's O(n^2) worst case.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
   */
c5adae958   Kostenzer Felix   lib: add CONFIG_T...
12
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42cf80965   Rasmus Villemoes   lib/sort.c: use s...
13
14
  #include <linux/types.h>
  #include <linux/export.h>
ecec4cb7a   Adrian Bunk   [PATCH] lib/sort....
15
  #include <linux/sort.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16

37d0ec34d   George Spelvin   lib/sort: make sw...
17
18
19
20
  /**
   * is_aligned - is this pointer & size okay for word-wide copying?
   * @base: pointer to data
   * @size: size of each element
22a241ccb   George Spelvin   lib/sort: use mor...
21
   * @align: required alignment (typically 4 or 8)
37d0ec34d   George Spelvin   lib/sort: make sw...
22
23
24
25
26
27
28
29
30
31
   *
   * Returns true if elements can be copied using word loads and stores.
   * The size must be a multiple of the alignment, and the base address must
   * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
   *
   * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
   * to "if ((a | b) & mask)", so we do that by hand.
   */
  __attribute_const__ __always_inline
  static bool is_aligned(const void *base, size_t size, unsigned char align)
ca96ab859   Daniel Wagner   lib/sort: Add 64 ...
32
  {
37d0ec34d   George Spelvin   lib/sort: make sw...
33
34
35
36
37
38
39
  	unsigned char lsbits = (unsigned char)size;
  
  	(void)base;
  #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  	lsbits |= (unsigned char)(uintptr_t)base;
  #endif
  	return (lsbits & (align - 1)) == 0;
ca96ab859   Daniel Wagner   lib/sort: Add 64 ...
40
  }
37d0ec34d   George Spelvin   lib/sort: make sw...
41
42
  /**
   * swap_words_32 - swap two elements in 32-bit chunks
aa52619cc   Randy Dunlap   lib/sort.c: fix k...
43
44
45
   * @a: pointer to the first element to swap
   * @b: pointer to the second element to swap
   * @n: element size (must be a multiple of 4)
37d0ec34d   George Spelvin   lib/sort: make sw...
46
47
48
49
50
51
52
53
54
   *
   * Exchange the two objects in memory.  This exploits base+index addressing,
   * which basically all CPUs have, to minimize loop overhead computations.
   *
   * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
   * bottom of the loop, even though the zero flag is stil valid from the
   * subtract (since the intervening mov instructions don't alter the flags).
   * Gcc 8.1.0 doesn't have that problem.
   */
8fb583c42   George Spelvin   lib/sort: avoid i...
55
  static void swap_words_32(void *a, void *b, size_t n)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56
  {
37d0ec34d   George Spelvin   lib/sort: make sw...
57
58
59
60
61
  	do {
  		u32 t = *(u32 *)(a + (n -= 4));
  		*(u32 *)(a + n) = *(u32 *)(b + n);
  		*(u32 *)(b + n) = t;
  	} while (n);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
  }
37d0ec34d   George Spelvin   lib/sort: make sw...
63
64
  /**
   * swap_words_64 - swap two elements in 64-bit chunks
aa52619cc   Randy Dunlap   lib/sort.c: fix k...
65
66
67
   * @a: pointer to the first element to swap
   * @b: pointer to the second element to swap
   * @n: element size (must be a multiple of 8)
37d0ec34d   George Spelvin   lib/sort: make sw...
68
69
70
71
72
73
74
75
76
77
78
   *
   * Exchange the two objects in memory.  This exploits base+index
   * addressing, which basically all CPUs have, to minimize loop overhead
   * computations.
   *
   * We'd like to use 64-bit loads if possible.  If they're not, emulating
   * one requires base+index+4 addressing which x86 has but most other
   * processors do not.  If CONFIG_64BIT, we definitely have 64-bit loads,
   * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
   * x32 ABI).  Are there any cases the kernel needs to worry about?
   */
8fb583c42   George Spelvin   lib/sort: avoid i...
79
  static void swap_words_64(void *a, void *b, size_t n)
ca96ab859   Daniel Wagner   lib/sort: Add 64 ...
80
  {
37d0ec34d   George Spelvin   lib/sort: make sw...
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  	do {
  #ifdef CONFIG_64BIT
  		u64 t = *(u64 *)(a + (n -= 8));
  		*(u64 *)(a + n) = *(u64 *)(b + n);
  		*(u64 *)(b + n) = t;
  #else
  		/* Use two 32-bit transfers to avoid base+index+4 addressing */
  		u32 t = *(u32 *)(a + (n -= 4));
  		*(u32 *)(a + n) = *(u32 *)(b + n);
  		*(u32 *)(b + n) = t;
  
  		t = *(u32 *)(a + (n -= 4));
  		*(u32 *)(a + n) = *(u32 *)(b + n);
  		*(u32 *)(b + n) = t;
  #endif
  	} while (n);
ca96ab859   Daniel Wagner   lib/sort: Add 64 ...
97
  }
37d0ec34d   George Spelvin   lib/sort: make sw...
98
99
  /**
   * swap_bytes - swap two elements a byte at a time
aa52619cc   Randy Dunlap   lib/sort.c: fix k...
100
101
102
   * @a: pointer to the first element to swap
   * @b: pointer to the second element to swap
   * @n: element size
37d0ec34d   George Spelvin   lib/sort: make sw...
103
104
105
   *
   * This is the fallback if alignment doesn't allow using larger chunks.
   */
8fb583c42   George Spelvin   lib/sort: avoid i...
106
  static void swap_bytes(void *a, void *b, size_t n)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
  	do {
37d0ec34d   George Spelvin   lib/sort: make sw...
109
110
111
112
  		char t = ((char *)a)[--n];
  		((char *)a)[n] = ((char *)b)[n];
  		((char *)b)[n] = t;
  	} while (n);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
  }
8fb583c42   George Spelvin   lib/sort: avoid i...
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
  /*
   * The values are arbitrary as long as they can't be confused with
   * a pointer, but small integers make for the smallest compare
   * instructions.
   */
  #define SWAP_WORDS_64 (swap_func_t)0
  #define SWAP_WORDS_32 (swap_func_t)1
  #define SWAP_BYTES    (swap_func_t)2
  
  /*
   * The function pointer is last to make tail calls most efficient if the
   * compiler decides not to inline this function.
   */
  static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
  {
  	if (swap_func == SWAP_WORDS_64)
  		swap_words_64(a, b, size);
  	else if (swap_func == SWAP_WORDS_32)
  		swap_words_32(a, b, size);
  	else if (swap_func == SWAP_BYTES)
  		swap_bytes(a, b, size);
  	else
  		swap_func(a, b, (int)size);
  }
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
138
  #define _CMP_WRAPPER ((cmp_r_func_t)0L)
52ae533b8   Andy Shevchenko   lib/sort: Move sw...
139
  static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
140
141
142
143
144
  {
  	if (cmp == _CMP_WRAPPER)
  		return ((cmp_func_t)(priv))(a, b);
  	return cmp(a, b, priv);
  }
72fd4a35a   Robert P. J. Day   [PATCH] Numerous ...
145
  /**
22a241ccb   George Spelvin   lib/sort: use mor...
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
   * parent - given the offset of the child, find the offset of the parent.
   * @i: the offset of the heap element whose parent is sought.  Non-zero.
   * @lsbit: a precomputed 1-bit mask, equal to "size & -size"
   * @size: size of each element
   *
   * In terms of array indexes, the parent of element j = @i/@size is simply
   * (j-1)/2.  But when working in byte offsets, we can't use implicit
   * truncation of integer divides.
   *
   * Fortunately, we only need one bit of the quotient, not the full divide.
   * @size has a least significant bit.  That bit will be clear if @i is
   * an even multiple of @size, and set if it's an odd multiple.
   *
   * Logically, we're doing "if (i & lsbit) i -= size;", but since the
   * branch is unpredictable, it's done with a bit of clever branch-free
   * code instead.
   */
  __attribute_const__ __always_inline
  static size_t parent(size_t i, unsigned int lsbit, size_t size)
  {
  	i -= size;
  	i -= size & -(i & lsbit);
  	return i / 2;
  }
  
  /**
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
172
   * sort_r - sort an array of elements
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
175
   * @base: pointer to data to sort
   * @num: number of elements
   * @size: size of each element
b53907c01   Wu Fengguang   generic swap(): l...
176
177
   * @cmp_func: pointer to comparison function
   * @swap_func: pointer to swap function or NULL
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
178
   * @priv: third argument passed to comparison function
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
   *
37d0ec34d   George Spelvin   lib/sort: make sw...
180
181
182
   * This function does a heapsort on the given array.  You may provide
   * a swap_func function if you need to do something more than a memory
   * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
8fb583c42   George Spelvin   lib/sort: avoid i...
183
   * avoids a slow retpoline and so is significantly faster.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
185
   *
   * Sorting time is O(n log n) both on average and worst-case. While
22a241ccb   George Spelvin   lib/sort: use mor...
186
   * quicksort is slightly faster on average, it suffers from exploitable
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
189
   * O(n*n) worst-case behavior and extra memory requirements that make
   * it less suitable for kernel use.
   */
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
190
  void sort_r(void *base, size_t num, size_t size,
52ae533b8   Andy Shevchenko   lib/sort: Move sw...
191
192
  	    cmp_r_func_t cmp_func,
  	    swap_func_t swap_func,
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
193
  	    const void *priv)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
195
  {
  	/* pre-scale counters for performance */
22a241ccb   George Spelvin   lib/sort: use mor...
196
197
198
199
200
  	size_t n = num * size, a = (num/2) * size;
  	const unsigned int lsbit = size & -size;  /* Used to find parent */
  
  	if (!a)		/* num < 2 || size == 0 */
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201

ca96ab859   Daniel Wagner   lib/sort: Add 64 ...
202
  	if (!swap_func) {
37d0ec34d   George Spelvin   lib/sort: make sw...
203
  		if (is_aligned(base, size, 8))
8fb583c42   George Spelvin   lib/sort: avoid i...
204
  			swap_func = SWAP_WORDS_64;
37d0ec34d   George Spelvin   lib/sort: make sw...
205
  		else if (is_aligned(base, size, 4))
8fb583c42   George Spelvin   lib/sort: avoid i...
206
  			swap_func = SWAP_WORDS_32;
ca96ab859   Daniel Wagner   lib/sort: Add 64 ...
207
  		else
8fb583c42   George Spelvin   lib/sort: avoid i...
208
  			swap_func = SWAP_BYTES;
ca96ab859   Daniel Wagner   lib/sort: Add 64 ...
209
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
210

22a241ccb   George Spelvin   lib/sort: use mor...
211
212
213
214
215
216
217
218
219
220
221
222
223
  	/*
  	 * Loop invariants:
  	 * 1. elements [a,n) satisfy the heap property (compare greater than
  	 *    all of their children),
  	 * 2. elements [n,num*size) are sorted, and
  	 * 3. a <= b <= c <= d <= n (whenever they are valid).
  	 */
  	for (;;) {
  		size_t b, c, d;
  
  		if (a)			/* Building heap: sift down --a */
  			a -= size;
  		else if (n -= size)	/* Sorting: Extract root to --n */
8fb583c42   George Spelvin   lib/sort: avoid i...
224
  			do_swap(base, base + n, size, swap_func);
22a241ccb   George Spelvin   lib/sort: use mor...
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
  		else			/* Sort complete */
  			break;
  
  		/*
  		 * Sift element at "a" down into heap.  This is the
  		 * "bottom-up" variant, which significantly reduces
  		 * calls to cmp_func(): we find the sift-down path all
  		 * the way to the leaves (one compare per level), then
  		 * backtrack to find where to insert the target element.
  		 *
  		 * Because elements tend to sift down close to the leaves,
  		 * this uses fewer compares than doing two per level
  		 * on the way down.  (A bit more than half as many on
  		 * average, 3/4 worst-case.)
  		 */
  		for (b = a; c = 2*b + size, (d = c + size) < n;)
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
241
  			b = do_cmp(base + c, base + d, cmp_func, priv) >= 0 ? c : d;
22a241ccb   George Spelvin   lib/sort: use mor...
242
243
244
245
  		if (d == n)	/* Special case last leaf with no sibling */
  			b = c;
  
  		/* Now backtrack from "b" to the correct location for "a" */
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
246
  		while (b != a && do_cmp(base + a, base + b, cmp_func, priv) >= 0)
22a241ccb   George Spelvin   lib/sort: use mor...
247
248
249
250
  			b = parent(b, lsbit, size);
  		c = b;			/* Where "a" belongs */
  		while (b != a) {	/* Shift it into place */
  			b = parent(b, lsbit, size);
8fb583c42   George Spelvin   lib/sort: avoid i...
251
  			do_swap(base + b, base + c, size, swap_func);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
253
254
  		}
  	}
  }
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
255
256
257
  EXPORT_SYMBOL(sort_r);
  
  void sort(void *base, size_t num, size_t size,
52ae533b8   Andy Shevchenko   lib/sort: Move sw...
258
259
  	  cmp_func_t cmp_func,
  	  swap_func_t swap_func)
4333fb96c   Rasmus Villemoes   media: lib/sort.c...
260
261
262
  {
  	return sort_r(base, num, size, _CMP_WRAPPER, swap_func, cmp_func);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
  EXPORT_SYMBOL(sort);