Commit f5e70d0fe3ea990cfb3fc8d7f76a719adcb1e0b5
Committed by
David Woodhouse
1 parent
17d857be64
md: Factor out RAID6 algorithms into lib/
We'll want to use these in btrfs too. Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Showing 29 changed files with 1616 additions and 1613 deletions Side-by-side Diff
- drivers/md/Kconfig
- drivers/md/Makefile
- drivers/md/mktables.c
- drivers/md/raid6algos.c
- drivers/md/raid6altivec.uc
- drivers/md/raid6int.uc
- drivers/md/raid6mmx.c
- drivers/md/raid6recov.c
- drivers/md/raid6sse1.c
- drivers/md/raid6sse2.c
- drivers/md/raid6test/Makefile
- drivers/md/raid6test/test.c
- drivers/md/raid6x86.h
- drivers/md/unroll.pl
- lib/Kconfig
- lib/Makefile
- lib/raid6/Makefile
- lib/raid6/mktables.c
- lib/raid6/raid6algos.c
- lib/raid6/raid6altivec.uc
- lib/raid6/raid6int.uc
- lib/raid6/raid6mmx.c
- lib/raid6/raid6recov.c
- lib/raid6/raid6sse1.c
- lib/raid6/raid6sse2.c
- lib/raid6/raid6test/Makefile
- lib/raid6/raid6test/test.c
- lib/raid6/raid6x86.h
- lib/raid6/unroll.pl
drivers/md/Kconfig
... | ... | @@ -121,7 +121,7 @@ |
121 | 121 | config MD_RAID456 |
122 | 122 | tristate "RAID-4/RAID-5/RAID-6 mode" |
123 | 123 | depends on BLK_DEV_MD |
124 | - select MD_RAID6_PQ | |
124 | + select RAID6_PQ | |
125 | 125 | select ASYNC_MEMCPY |
126 | 126 | select ASYNC_XOR |
127 | 127 | select ASYNC_PQ |
128 | 128 | |
... | ... | @@ -165,12 +165,9 @@ |
165 | 165 | |
166 | 166 | If unsure, say N. |
167 | 167 | |
168 | -config MD_RAID6_PQ | |
169 | - tristate | |
170 | - | |
171 | 168 | config ASYNC_RAID6_TEST |
172 | 169 | tristate "Self test for hardware accelerated raid6 recovery" |
173 | - depends on MD_RAID6_PQ | |
170 | + depends on RAID6_PQ | |
174 | 171 | select ASYNC_RAID6_RECOV |
175 | 172 | ---help--- |
176 | 173 | This is a one-shot self test that permutes through the |
drivers/md/Makefile
... | ... | @@ -12,13 +12,6 @@ |
12 | 12 | += dm-log-userspace-base.o dm-log-userspace-transfer.o |
13 | 13 | md-mod-y += md.o bitmap.o |
14 | 14 | raid456-y += raid5.o |
15 | -raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | |
16 | - raid6int1.o raid6int2.o raid6int4.o \ | |
17 | - raid6int8.o raid6int16.o raid6int32.o \ | |
18 | - raid6altivec1.o raid6altivec2.o raid6altivec4.o \ | |
19 | - raid6altivec8.o \ | |
20 | - raid6mmx.o raid6sse1.o raid6sse2.o | |
21 | -hostprogs-y += mktables | |
22 | 15 | |
23 | 16 | # Note: link order is important. All raid personalities |
24 | 17 | # and must come before md.o, as they each initialise |
... | ... | @@ -29,7 +22,6 @@ |
29 | 22 | obj-$(CONFIG_MD_RAID0) += raid0.o |
30 | 23 | obj-$(CONFIG_MD_RAID1) += raid1.o |
31 | 24 | obj-$(CONFIG_MD_RAID10) += raid10.o |
32 | -obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o | |
33 | 25 | obj-$(CONFIG_MD_RAID456) += raid456.o |
34 | 26 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o |
35 | 27 | obj-$(CONFIG_MD_FAULTY) += faulty.o |
36 | 28 | |
... | ... | @@ -45,76 +37,7 @@ |
45 | 37 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o |
46 | 38 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
47 | 39 | |
48 | -quiet_cmd_unroll = UNROLL $@ | |
49 | - cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ | |
50 | - < $< > $@ || ( rm -f $@ && exit 1 ) | |
51 | - | |
52 | -ifeq ($(CONFIG_ALTIVEC),y) | |
53 | -altivec_flags := -maltivec -mabi=altivec | |
54 | -endif | |
55 | - | |
56 | 40 | ifeq ($(CONFIG_DM_UEVENT),y) |
57 | 41 | dm-mod-objs += dm-uevent.o |
58 | 42 | endif |
59 | - | |
60 | -targets += raid6int1.c | |
61 | -$(obj)/raid6int1.c: UNROLL := 1 | |
62 | -$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
63 | - $(call if_changed,unroll) | |
64 | - | |
65 | -targets += raid6int2.c | |
66 | -$(obj)/raid6int2.c: UNROLL := 2 | |
67 | -$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
68 | - $(call if_changed,unroll) | |
69 | - | |
70 | -targets += raid6int4.c | |
71 | -$(obj)/raid6int4.c: UNROLL := 4 | |
72 | -$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
73 | - $(call if_changed,unroll) | |
74 | - | |
75 | -targets += raid6int8.c | |
76 | -$(obj)/raid6int8.c: UNROLL := 8 | |
77 | -$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
78 | - $(call if_changed,unroll) | |
79 | - | |
80 | -targets += raid6int16.c | |
81 | -$(obj)/raid6int16.c: UNROLL := 16 | |
82 | -$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
83 | - $(call if_changed,unroll) | |
84 | - | |
85 | -targets += raid6int32.c | |
86 | -$(obj)/raid6int32.c: UNROLL := 32 | |
87 | -$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
88 | - $(call if_changed,unroll) | |
89 | - | |
90 | -CFLAGS_raid6altivec1.o += $(altivec_flags) | |
91 | -targets += raid6altivec1.c | |
92 | -$(obj)/raid6altivec1.c: UNROLL := 1 | |
93 | -$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
94 | - $(call if_changed,unroll) | |
95 | - | |
96 | -CFLAGS_raid6altivec2.o += $(altivec_flags) | |
97 | -targets += raid6altivec2.c | |
98 | -$(obj)/raid6altivec2.c: UNROLL := 2 | |
99 | -$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
100 | - $(call if_changed,unroll) | |
101 | - | |
102 | -CFLAGS_raid6altivec4.o += $(altivec_flags) | |
103 | -targets += raid6altivec4.c | |
104 | -$(obj)/raid6altivec4.c: UNROLL := 4 | |
105 | -$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
106 | - $(call if_changed,unroll) | |
107 | - | |
108 | -CFLAGS_raid6altivec8.o += $(altivec_flags) | |
109 | -targets += raid6altivec8.c | |
110 | -$(obj)/raid6altivec8.c: UNROLL := 8 | |
111 | -$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
112 | - $(call if_changed,unroll) | |
113 | - | |
114 | -quiet_cmd_mktable = TABLE $@ | |
115 | - cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | |
116 | - | |
117 | -targets += raid6tables.c | |
118 | -$(obj)/raid6tables.c: $(obj)/mktables FORCE | |
119 | - $(call if_changed,mktable) |
drivers/md/mktables.c
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This file is part of the Linux kernel, and is made available under | |
6 | - * the terms of the GNU General Public License version 2 or (at your | |
7 | - * option) any later version; incorporated herein by reference. | |
8 | - * | |
9 | - * ----------------------------------------------------------------------- */ | |
10 | - | |
11 | -/* | |
12 | - * mktables.c | |
13 | - * | |
14 | - * Make RAID-6 tables. This is a host user space program to be run at | |
15 | - * compile time. | |
16 | - */ | |
17 | - | |
18 | -#include <stdio.h> | |
19 | -#include <string.h> | |
20 | -#include <inttypes.h> | |
21 | -#include <stdlib.h> | |
22 | -#include <time.h> | |
23 | - | |
24 | -static uint8_t gfmul(uint8_t a, uint8_t b) | |
25 | -{ | |
26 | - uint8_t v = 0; | |
27 | - | |
28 | - while (b) { | |
29 | - if (b & 1) | |
30 | - v ^= a; | |
31 | - a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); | |
32 | - b >>= 1; | |
33 | - } | |
34 | - | |
35 | - return v; | |
36 | -} | |
37 | - | |
38 | -static uint8_t gfpow(uint8_t a, int b) | |
39 | -{ | |
40 | - uint8_t v = 1; | |
41 | - | |
42 | - b %= 255; | |
43 | - if (b < 0) | |
44 | - b += 255; | |
45 | - | |
46 | - while (b) { | |
47 | - if (b & 1) | |
48 | - v = gfmul(v, a); | |
49 | - a = gfmul(a, a); | |
50 | - b >>= 1; | |
51 | - } | |
52 | - | |
53 | - return v; | |
54 | -} | |
55 | - | |
56 | -int main(int argc, char *argv[]) | |
57 | -{ | |
58 | - int i, j, k; | |
59 | - uint8_t v; | |
60 | - uint8_t exptbl[256], invtbl[256]; | |
61 | - | |
62 | - printf("#include <linux/raid/pq.h>\n"); | |
63 | - | |
64 | - /* Compute multiplication table */ | |
65 | - printf("\nconst u8 __attribute__((aligned(256)))\n" | |
66 | - "raid6_gfmul[256][256] =\n" | |
67 | - "{\n"); | |
68 | - for (i = 0; i < 256; i++) { | |
69 | - printf("\t{\n"); | |
70 | - for (j = 0; j < 256; j += 8) { | |
71 | - printf("\t\t"); | |
72 | - for (k = 0; k < 8; k++) | |
73 | - printf("0x%02x,%c", gfmul(i, j + k), | |
74 | - (k == 7) ? '\n' : ' '); | |
75 | - } | |
76 | - printf("\t},\n"); | |
77 | - } | |
78 | - printf("};\n"); | |
79 | - printf("#ifdef __KERNEL__\n"); | |
80 | - printf("EXPORT_SYMBOL(raid6_gfmul);\n"); | |
81 | - printf("#endif\n"); | |
82 | - | |
83 | - /* Compute power-of-2 table (exponent) */ | |
84 | - v = 1; | |
85 | - printf("\nconst u8 __attribute__((aligned(256)))\n" | |
86 | - "raid6_gfexp[256] =\n" "{\n"); | |
87 | - for (i = 0; i < 256; i += 8) { | |
88 | - printf("\t"); | |
89 | - for (j = 0; j < 8; j++) { | |
90 | - exptbl[i + j] = v; | |
91 | - printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | |
92 | - v = gfmul(v, 2); | |
93 | - if (v == 1) | |
94 | - v = 0; /* For entry 255, not a real entry */ | |
95 | - } | |
96 | - } | |
97 | - printf("};\n"); | |
98 | - printf("#ifdef __KERNEL__\n"); | |
99 | - printf("EXPORT_SYMBOL(raid6_gfexp);\n"); | |
100 | - printf("#endif\n"); | |
101 | - | |
102 | - /* Compute inverse table x^-1 == x^254 */ | |
103 | - printf("\nconst u8 __attribute__((aligned(256)))\n" | |
104 | - "raid6_gfinv[256] =\n" "{\n"); | |
105 | - for (i = 0; i < 256; i += 8) { | |
106 | - printf("\t"); | |
107 | - for (j = 0; j < 8; j++) { | |
108 | - invtbl[i + j] = v = gfpow(i + j, 254); | |
109 | - printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | |
110 | - } | |
111 | - } | |
112 | - printf("};\n"); | |
113 | - printf("#ifdef __KERNEL__\n"); | |
114 | - printf("EXPORT_SYMBOL(raid6_gfinv);\n"); | |
115 | - printf("#endif\n"); | |
116 | - | |
117 | - /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ | |
118 | - printf("\nconst u8 __attribute__((aligned(256)))\n" | |
119 | - "raid6_gfexi[256] =\n" "{\n"); | |
120 | - for (i = 0; i < 256; i += 8) { | |
121 | - printf("\t"); | |
122 | - for (j = 0; j < 8; j++) | |
123 | - printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], | |
124 | - (j == 7) ? '\n' : ' '); | |
125 | - } | |
126 | - printf("};\n"); | |
127 | - printf("#ifdef __KERNEL__\n"); | |
128 | - printf("EXPORT_SYMBOL(raid6_gfexi);\n"); | |
129 | - printf("#endif\n"); | |
130 | - | |
131 | - return 0; | |
132 | -} |
drivers/md/raid6algos.c
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6algos.c | |
15 | - * | |
16 | - * Algorithm list and algorithm selection for RAID-6 | |
17 | - */ | |
18 | - | |
19 | -#include <linux/raid/pq.h> | |
20 | -#ifndef __KERNEL__ | |
21 | -#include <sys/mman.h> | |
22 | -#include <stdio.h> | |
23 | -#else | |
24 | -#if !RAID6_USE_EMPTY_ZERO_PAGE | |
25 | -/* In .bss so it's zeroed */ | |
26 | -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | |
27 | -EXPORT_SYMBOL(raid6_empty_zero_page); | |
28 | -#endif | |
29 | -#endif | |
30 | - | |
31 | -struct raid6_calls raid6_call; | |
32 | -EXPORT_SYMBOL_GPL(raid6_call); | |
33 | - | |
34 | -/* Various routine sets */ | |
35 | -extern const struct raid6_calls raid6_intx1; | |
36 | -extern const struct raid6_calls raid6_intx2; | |
37 | -extern const struct raid6_calls raid6_intx4; | |
38 | -extern const struct raid6_calls raid6_intx8; | |
39 | -extern const struct raid6_calls raid6_intx16; | |
40 | -extern const struct raid6_calls raid6_intx32; | |
41 | -extern const struct raid6_calls raid6_mmxx1; | |
42 | -extern const struct raid6_calls raid6_mmxx2; | |
43 | -extern const struct raid6_calls raid6_sse1x1; | |
44 | -extern const struct raid6_calls raid6_sse1x2; | |
45 | -extern const struct raid6_calls raid6_sse2x1; | |
46 | -extern const struct raid6_calls raid6_sse2x2; | |
47 | -extern const struct raid6_calls raid6_sse2x4; | |
48 | -extern const struct raid6_calls raid6_altivec1; | |
49 | -extern const struct raid6_calls raid6_altivec2; | |
50 | -extern const struct raid6_calls raid6_altivec4; | |
51 | -extern const struct raid6_calls raid6_altivec8; | |
52 | - | |
53 | -const struct raid6_calls * const raid6_algos[] = { | |
54 | - &raid6_intx1, | |
55 | - &raid6_intx2, | |
56 | - &raid6_intx4, | |
57 | - &raid6_intx8, | |
58 | -#if defined(__ia64__) | |
59 | - &raid6_intx16, | |
60 | - &raid6_intx32, | |
61 | -#endif | |
62 | -#if defined(__i386__) && !defined(__arch_um__) | |
63 | - &raid6_mmxx1, | |
64 | - &raid6_mmxx2, | |
65 | - &raid6_sse1x1, | |
66 | - &raid6_sse1x2, | |
67 | - &raid6_sse2x1, | |
68 | - &raid6_sse2x2, | |
69 | -#endif | |
70 | -#if defined(__x86_64__) && !defined(__arch_um__) | |
71 | - &raid6_sse2x1, | |
72 | - &raid6_sse2x2, | |
73 | - &raid6_sse2x4, | |
74 | -#endif | |
75 | -#ifdef CONFIG_ALTIVEC | |
76 | - &raid6_altivec1, | |
77 | - &raid6_altivec2, | |
78 | - &raid6_altivec4, | |
79 | - &raid6_altivec8, | |
80 | -#endif | |
81 | - NULL | |
82 | -}; | |
83 | - | |
84 | -#ifdef __KERNEL__ | |
85 | -#define RAID6_TIME_JIFFIES_LG2 4 | |
86 | -#else | |
87 | -/* Need more time to be stable in userspace */ | |
88 | -#define RAID6_TIME_JIFFIES_LG2 9 | |
89 | -#define time_before(x, y) ((x) < (y)) | |
90 | -#endif | |
91 | - | |
92 | -/* Try to pick the best algorithm */ | |
93 | -/* This code uses the gfmul table as convenient data set to abuse */ | |
94 | - | |
95 | -int __init raid6_select_algo(void) | |
96 | -{ | |
97 | - const struct raid6_calls * const * algo; | |
98 | - const struct raid6_calls * best; | |
99 | - char *syndromes; | |
100 | - void *dptrs[(65536/PAGE_SIZE)+2]; | |
101 | - int i, disks; | |
102 | - unsigned long perf, bestperf; | |
103 | - int bestprefer; | |
104 | - unsigned long j0, j1; | |
105 | - | |
106 | - disks = (65536/PAGE_SIZE)+2; | |
107 | - for ( i = 0 ; i < disks-2 ; i++ ) { | |
108 | - dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; | |
109 | - } | |
110 | - | |
111 | - /* Normal code - use a 2-page allocation to avoid D$ conflict */ | |
112 | - syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); | |
113 | - | |
114 | - if ( !syndromes ) { | |
115 | - printk("raid6: Yikes! No memory available.\n"); | |
116 | - return -ENOMEM; | |
117 | - } | |
118 | - | |
119 | - dptrs[disks-2] = syndromes; | |
120 | - dptrs[disks-1] = syndromes + PAGE_SIZE; | |
121 | - | |
122 | - bestperf = 0; bestprefer = 0; best = NULL; | |
123 | - | |
124 | - for ( algo = raid6_algos ; *algo ; algo++ ) { | |
125 | - if ( !(*algo)->valid || (*algo)->valid() ) { | |
126 | - perf = 0; | |
127 | - | |
128 | - preempt_disable(); | |
129 | - j0 = jiffies; | |
130 | - while ( (j1 = jiffies) == j0 ) | |
131 | - cpu_relax(); | |
132 | - while (time_before(jiffies, | |
133 | - j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { | |
134 | - (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); | |
135 | - perf++; | |
136 | - } | |
137 | - preempt_enable(); | |
138 | - | |
139 | - if ( (*algo)->prefer > bestprefer || | |
140 | - ((*algo)->prefer == bestprefer && | |
141 | - perf > bestperf) ) { | |
142 | - best = *algo; | |
143 | - bestprefer = best->prefer; | |
144 | - bestperf = perf; | |
145 | - } | |
146 | - printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, | |
147 | - (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | |
148 | - } | |
149 | - } | |
150 | - | |
151 | - if (best) { | |
152 | - printk("raid6: using algorithm %s (%ld MB/s)\n", | |
153 | - best->name, | |
154 | - (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | |
155 | - raid6_call = *best; | |
156 | - } else | |
157 | - printk("raid6: Yikes! No algorithm found!\n"); | |
158 | - | |
159 | - free_pages((unsigned long)syndromes, 1); | |
160 | - | |
161 | - return best ? 0 : -EINVAL; | |
162 | -} | |
163 | - | |
164 | -static void raid6_exit(void) | |
165 | -{ | |
166 | - do { } while (0); | |
167 | -} | |
168 | - | |
169 | -subsys_initcall(raid6_select_algo); | |
170 | -module_exit(raid6_exit); | |
171 | -MODULE_LICENSE("GPL"); |
drivers/md/raid6altivec.uc
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6altivec$#.c | |
15 | - * | |
16 | - * $#-way unrolled portable integer math RAID-6 instruction set | |
17 | - * | |
18 | - * This file is postprocessed using unroll.pl | |
19 | - * | |
20 | - * <benh> hpa: in process, | |
21 | - * you can just "steal" the vec unit with enable_kernel_altivec() (but | |
22 | - * bracked this with preempt_disable/enable or in a lock) | |
23 | - */ | |
24 | - | |
25 | -#include <linux/raid/pq.h> | |
26 | - | |
27 | -#ifdef CONFIG_ALTIVEC | |
28 | - | |
29 | -#include <altivec.h> | |
30 | -#ifdef __KERNEL__ | |
31 | -# include <asm/system.h> | |
32 | -# include <asm/cputable.h> | |
33 | -#endif | |
34 | - | |
35 | -/* | |
36 | - * This is the C data type to use. We use a vector of | |
37 | - * signed char so vec_cmpgt() will generate the right | |
38 | - * instruction. | |
39 | - */ | |
40 | - | |
41 | -typedef vector signed char unative_t; | |
42 | - | |
43 | -#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) | |
44 | -#define NSIZE sizeof(unative_t) | |
45 | - | |
46 | -/* | |
47 | - * The SHLBYTE() operation shifts each byte left by 1, *not* | |
48 | - * rolling over into the next byte | |
49 | - */ | |
50 | -static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | |
51 | -{ | |
52 | - return vec_add(v,v); | |
53 | -} | |
54 | - | |
55 | -/* | |
56 | - * The MASK() operation returns 0xFF in any byte for which the high | |
57 | - * bit is 1, 0x00 for any byte for which the high bit is 0. | |
58 | - */ | |
59 | -static inline __attribute_const__ unative_t MASK(unative_t v) | |
60 | -{ | |
61 | - unative_t zv = NBYTES(0); | |
62 | - | |
63 | - /* vec_cmpgt returns a vector bool char; thus the need for the cast */ | |
64 | - return (unative_t)vec_cmpgt(zv, v); | |
65 | -} | |
66 | - | |
67 | - | |
68 | -/* This is noinline to make damned sure that gcc doesn't move any of the | |
69 | - Altivec code around the enable/disable code */ | |
70 | -static void noinline | |
71 | -raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) | |
72 | -{ | |
73 | - u8 **dptr = (u8 **)ptrs; | |
74 | - u8 *p, *q; | |
75 | - int d, z, z0; | |
76 | - | |
77 | - unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | |
78 | - unative_t x1d = NBYTES(0x1d); | |
79 | - | |
80 | - z0 = disks - 3; /* Highest data disk */ | |
81 | - p = dptr[z0+1]; /* XOR parity */ | |
82 | - q = dptr[z0+2]; /* RS syndrome */ | |
83 | - | |
84 | - for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | |
85 | - wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | |
86 | - for ( z = z0-1 ; z >= 0 ; z-- ) { | |
87 | - wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | |
88 | - wp$$ = vec_xor(wp$$, wd$$); | |
89 | - w2$$ = MASK(wq$$); | |
90 | - w1$$ = SHLBYTE(wq$$); | |
91 | - w2$$ = vec_and(w2$$, x1d); | |
92 | - w1$$ = vec_xor(w1$$, w2$$); | |
93 | - wq$$ = vec_xor(w1$$, wd$$); | |
94 | - } | |
95 | - *(unative_t *)&p[d+NSIZE*$$] = wp$$; | |
96 | - *(unative_t *)&q[d+NSIZE*$$] = wq$$; | |
97 | - } | |
98 | -} | |
99 | - | |
100 | -static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
101 | -{ | |
102 | - preempt_disable(); | |
103 | - enable_kernel_altivec(); | |
104 | - | |
105 | - raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); | |
106 | - | |
107 | - preempt_enable(); | |
108 | -} | |
109 | - | |
110 | -int raid6_have_altivec(void); | |
111 | -#if $# == 1 | |
112 | -int raid6_have_altivec(void) | |
113 | -{ | |
114 | - /* This assumes either all CPUs have Altivec or none does */ | |
115 | -# ifdef __KERNEL__ | |
116 | - return cpu_has_feature(CPU_FTR_ALTIVEC); | |
117 | -# else | |
118 | - return 1; | |
119 | -# endif | |
120 | -} | |
121 | -#endif | |
122 | - | |
123 | -const struct raid6_calls raid6_altivec$# = { | |
124 | - raid6_altivec$#_gen_syndrome, | |
125 | - raid6_have_altivec, | |
126 | - "altivecx$#", | |
127 | - 0 | |
128 | -}; | |
129 | - | |
130 | -#endif /* CONFIG_ALTIVEC */ |
drivers/md/raid6int.uc
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6int$#.c | |
15 | - * | |
16 | - * $#-way unrolled portable integer math RAID-6 instruction set | |
17 | - * | |
18 | - * This file is postprocessed using unroll.pl | |
19 | - */ | |
20 | - | |
21 | -#include <linux/raid/pq.h> | |
22 | - | |
23 | -/* | |
24 | - * This is the C data type to use | |
25 | - */ | |
26 | - | |
27 | -/* Change this from BITS_PER_LONG if there is something better... */ | |
28 | -#if BITS_PER_LONG == 64 | |
29 | -# define NBYTES(x) ((x) * 0x0101010101010101UL) | |
30 | -# define NSIZE 8 | |
31 | -# define NSHIFT 3 | |
32 | -# define NSTRING "64" | |
33 | -typedef u64 unative_t; | |
34 | -#else | |
35 | -# define NBYTES(x) ((x) * 0x01010101U) | |
36 | -# define NSIZE 4 | |
37 | -# define NSHIFT 2 | |
38 | -# define NSTRING "32" | |
39 | -typedef u32 unative_t; | |
40 | -#endif | |
41 | - | |
42 | - | |
43 | - | |
44 | -/* | |
45 | - * IA-64 wants insane amounts of unrolling. On other architectures that | |
46 | - * is just a waste of space. | |
47 | - */ | |
48 | -#if ($# <= 8) || defined(__ia64__) | |
49 | - | |
50 | - | |
51 | -/* | |
52 | - * These sub-operations are separate inlines since they can sometimes be | |
53 | - * specially optimized using architecture-specific hacks. | |
54 | - */ | |
55 | - | |
56 | -/* | |
57 | - * The SHLBYTE() operation shifts each byte left by 1, *not* | |
58 | - * rolling over into the next byte | |
59 | - */ | |
60 | -static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | |
61 | -{ | |
62 | - unative_t vv; | |
63 | - | |
64 | - vv = (v << 1) & NBYTES(0xfe); | |
65 | - return vv; | |
66 | -} | |
67 | - | |
68 | -/* | |
69 | - * The MASK() operation returns 0xFF in any byte for which the high | |
70 | - * bit is 1, 0x00 for any byte for which the high bit is 0. | |
71 | - */ | |
72 | -static inline __attribute_const__ unative_t MASK(unative_t v) | |
73 | -{ | |
74 | - unative_t vv; | |
75 | - | |
76 | - vv = v & NBYTES(0x80); | |
77 | - vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ | |
78 | - return vv; | |
79 | -} | |
80 | - | |
81 | - | |
82 | -static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
83 | -{ | |
84 | - u8 **dptr = (u8 **)ptrs; | |
85 | - u8 *p, *q; | |
86 | - int d, z, z0; | |
87 | - | |
88 | - unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | |
89 | - | |
90 | - z0 = disks - 3; /* Highest data disk */ | |
91 | - p = dptr[z0+1]; /* XOR parity */ | |
92 | - q = dptr[z0+2]; /* RS syndrome */ | |
93 | - | |
94 | - for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | |
95 | - wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | |
96 | - for ( z = z0-1 ; z >= 0 ; z-- ) { | |
97 | - wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | |
98 | - wp$$ ^= wd$$; | |
99 | - w2$$ = MASK(wq$$); | |
100 | - w1$$ = SHLBYTE(wq$$); | |
101 | - w2$$ &= NBYTES(0x1d); | |
102 | - w1$$ ^= w2$$; | |
103 | - wq$$ = w1$$ ^ wd$$; | |
104 | - } | |
105 | - *(unative_t *)&p[d+NSIZE*$$] = wp$$; | |
106 | - *(unative_t *)&q[d+NSIZE*$$] = wq$$; | |
107 | - } | |
108 | -} | |
109 | - | |
110 | -const struct raid6_calls raid6_intx$# = { | |
111 | - raid6_int$#_gen_syndrome, | |
112 | - NULL, /* always valid */ | |
113 | - "int" NSTRING "x$#", | |
114 | - 0 | |
115 | -}; | |
116 | - | |
117 | -#endif |
drivers/md/raid6mmx.c
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6mmx.c | |
15 | - * | |
16 | - * MMX implementation of RAID-6 syndrome functions | |
17 | - */ | |
18 | - | |
19 | -#if defined(__i386__) && !defined(__arch_um__) | |
20 | - | |
21 | -#include <linux/raid/pq.h> | |
22 | -#include "raid6x86.h" | |
23 | - | |
24 | -/* Shared with raid6sse1.c */ | |
25 | -const struct raid6_mmx_constants { | |
26 | - u64 x1d; | |
27 | -} raid6_mmx_constants = { | |
28 | - 0x1d1d1d1d1d1d1d1dULL, | |
29 | -}; | |
30 | - | |
31 | -static int raid6_have_mmx(void) | |
32 | -{ | |
33 | - /* Not really "boot_cpu" but "all_cpus" */ | |
34 | - return boot_cpu_has(X86_FEATURE_MMX); | |
35 | -} | |
36 | - | |
37 | -/* | |
38 | - * Plain MMX implementation | |
39 | - */ | |
40 | -static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
41 | -{ | |
42 | - u8 **dptr = (u8 **)ptrs; | |
43 | - u8 *p, *q; | |
44 | - int d, z, z0; | |
45 | - | |
46 | - z0 = disks - 3; /* Highest data disk */ | |
47 | - p = dptr[z0+1]; /* XOR parity */ | |
48 | - q = dptr[z0+2]; /* RS syndrome */ | |
49 | - | |
50 | - kernel_fpu_begin(); | |
51 | - | |
52 | - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
53 | - asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
54 | - | |
55 | - for ( d = 0 ; d < bytes ; d += 8 ) { | |
56 | - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
57 | - asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
58 | - for ( z = z0-1 ; z >= 0 ; z-- ) { | |
59 | - asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | |
60 | - asm volatile("pcmpgtb %mm4,%mm5"); | |
61 | - asm volatile("paddb %mm4,%mm4"); | |
62 | - asm volatile("pand %mm0,%mm5"); | |
63 | - asm volatile("pxor %mm5,%mm4"); | |
64 | - asm volatile("pxor %mm5,%mm5"); | |
65 | - asm volatile("pxor %mm6,%mm2"); | |
66 | - asm volatile("pxor %mm6,%mm4"); | |
67 | - } | |
68 | - asm volatile("movq %%mm2,%0" : "=m" (p[d])); | |
69 | - asm volatile("pxor %mm2,%mm2"); | |
70 | - asm volatile("movq %%mm4,%0" : "=m" (q[d])); | |
71 | - asm volatile("pxor %mm4,%mm4"); | |
72 | - } | |
73 | - | |
74 | - kernel_fpu_end(); | |
75 | -} | |
76 | - | |
77 | -const struct raid6_calls raid6_mmxx1 = { | |
78 | - raid6_mmx1_gen_syndrome, | |
79 | - raid6_have_mmx, | |
80 | - "mmxx1", | |
81 | - 0 | |
82 | -}; | |
83 | - | |
84 | -/* | |
85 | - * Unrolled-by-2 MMX implementation | |
86 | - */ | |
87 | -static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
88 | -{ | |
89 | - u8 **dptr = (u8 **)ptrs; | |
90 | - u8 *p, *q; | |
91 | - int d, z, z0; | |
92 | - | |
93 | - z0 = disks - 3; /* Highest data disk */ | |
94 | - p = dptr[z0+1]; /* XOR parity */ | |
95 | - q = dptr[z0+2]; /* RS syndrome */ | |
96 | - | |
97 | - kernel_fpu_begin(); | |
98 | - | |
99 | - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
100 | - asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
101 | - asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | |
102 | - | |
103 | - for ( d = 0 ; d < bytes ; d += 16 ) { | |
104 | - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
105 | - asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); | |
106 | - asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
107 | - asm volatile("movq %mm3,%mm6"); /* Q[1] */ | |
108 | - for ( z = z0-1 ; z >= 0 ; z-- ) { | |
109 | - asm volatile("pcmpgtb %mm4,%mm5"); | |
110 | - asm volatile("pcmpgtb %mm6,%mm7"); | |
111 | - asm volatile("paddb %mm4,%mm4"); | |
112 | - asm volatile("paddb %mm6,%mm6"); | |
113 | - asm volatile("pand %mm0,%mm5"); | |
114 | - asm volatile("pand %mm0,%mm7"); | |
115 | - asm volatile("pxor %mm5,%mm4"); | |
116 | - asm volatile("pxor %mm7,%mm6"); | |
117 | - asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | |
118 | - asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | |
119 | - asm volatile("pxor %mm5,%mm2"); | |
120 | - asm volatile("pxor %mm7,%mm3"); | |
121 | - asm volatile("pxor %mm5,%mm4"); | |
122 | - asm volatile("pxor %mm7,%mm6"); | |
123 | - asm volatile("pxor %mm5,%mm5"); | |
124 | - asm volatile("pxor %mm7,%mm7"); | |
125 | - } | |
126 | - asm volatile("movq %%mm2,%0" : "=m" (p[d])); | |
127 | - asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); | |
128 | - asm volatile("movq %%mm4,%0" : "=m" (q[d])); | |
129 | - asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); | |
130 | - } | |
131 | - | |
132 | - kernel_fpu_end(); | |
133 | -} | |
134 | - | |
135 | -const struct raid6_calls raid6_mmxx2 = { | |
136 | - raid6_mmx2_gen_syndrome, | |
137 | - raid6_have_mmx, | |
138 | - "mmxx2", | |
139 | - 0 | |
140 | -}; | |
141 | - | |
142 | -#endif |
drivers/md/raid6recov.c
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6recov.c | |
15 | - * | |
16 | - * RAID-6 data recovery in dual failure mode. In single failure mode, | |
17 | - * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct | |
18 | - * the syndrome.) | |
19 | - */ | |
20 | - | |
21 | -#include <linux/raid/pq.h> | |
22 | - | |
23 | -/* Recover two failed data blocks. */ | |
24 | -void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | |
25 | - void **ptrs) | |
26 | -{ | |
27 | - u8 *p, *q, *dp, *dq; | |
28 | - u8 px, qx, db; | |
29 | - const u8 *pbmul; /* P multiplier table for B data */ | |
30 | - const u8 *qmul; /* Q multiplier table (for both) */ | |
31 | - | |
32 | - p = (u8 *)ptrs[disks-2]; | |
33 | - q = (u8 *)ptrs[disks-1]; | |
34 | - | |
35 | - /* Compute syndrome with zero for the missing data pages | |
36 | - Use the dead data pages as temporary storage for | |
37 | - delta p and delta q */ | |
38 | - dp = (u8 *)ptrs[faila]; | |
39 | - ptrs[faila] = (void *)raid6_empty_zero_page; | |
40 | - ptrs[disks-2] = dp; | |
41 | - dq = (u8 *)ptrs[failb]; | |
42 | - ptrs[failb] = (void *)raid6_empty_zero_page; | |
43 | - ptrs[disks-1] = dq; | |
44 | - | |
45 | - raid6_call.gen_syndrome(disks, bytes, ptrs); | |
46 | - | |
47 | - /* Restore pointer table */ | |
48 | - ptrs[faila] = dp; | |
49 | - ptrs[failb] = dq; | |
50 | - ptrs[disks-2] = p; | |
51 | - ptrs[disks-1] = q; | |
52 | - | |
53 | - /* Now, pick the proper data tables */ | |
54 | - pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; | |
55 | - qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; | |
56 | - | |
57 | - /* Now do it... */ | |
58 | - while ( bytes-- ) { | |
59 | - px = *p ^ *dp; | |
60 | - qx = qmul[*q ^ *dq]; | |
61 | - *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ | |
62 | - *dp++ = db ^ px; /* Reconstructed A */ | |
63 | - p++; q++; | |
64 | - } | |
65 | -} | |
66 | -EXPORT_SYMBOL_GPL(raid6_2data_recov); | |
67 | - | |
68 | -/* Recover failure of one data block plus the P block */ | |
69 | -void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) | |
70 | -{ | |
71 | - u8 *p, *q, *dq; | |
72 | - const u8 *qmul; /* Q multiplier table */ | |
73 | - | |
74 | - p = (u8 *)ptrs[disks-2]; | |
75 | - q = (u8 *)ptrs[disks-1]; | |
76 | - | |
77 | - /* Compute syndrome with zero for the missing data page | |
78 | - Use the dead data page as temporary storage for delta q */ | |
79 | - dq = (u8 *)ptrs[faila]; | |
80 | - ptrs[faila] = (void *)raid6_empty_zero_page; | |
81 | - ptrs[disks-1] = dq; | |
82 | - | |
83 | - raid6_call.gen_syndrome(disks, bytes, ptrs); | |
84 | - | |
85 | - /* Restore pointer table */ | |
86 | - ptrs[faila] = dq; | |
87 | - ptrs[disks-1] = q; | |
88 | - | |
89 | - /* Now, pick the proper data tables */ | |
90 | - qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; | |
91 | - | |
92 | - /* Now do it... */ | |
93 | - while ( bytes-- ) { | |
94 | - *p++ ^= *dq = qmul[*q ^ *dq]; | |
95 | - q++; dq++; | |
96 | - } | |
97 | -} | |
98 | -EXPORT_SYMBOL_GPL(raid6_datap_recov); | |
99 | - | |
100 | -#ifndef __KERNEL__ | |
101 | -/* Testing only */ | |
102 | - | |
103 | -/* Recover two failed blocks. */ | |
104 | -void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) | |
105 | -{ | |
106 | - if ( faila > failb ) { | |
107 | - int tmp = faila; | |
108 | - faila = failb; | |
109 | - failb = tmp; | |
110 | - } | |
111 | - | |
112 | - if ( failb == disks-1 ) { | |
113 | - if ( faila == disks-2 ) { | |
114 | - /* P+Q failure. Just rebuild the syndrome. */ | |
115 | - raid6_call.gen_syndrome(disks, bytes, ptrs); | |
116 | - } else { | |
117 | - /* data+Q failure. Reconstruct data from P, | |
118 | - then rebuild syndrome. */ | |
119 | - /* NOT IMPLEMENTED - equivalent to RAID-5 */ | |
120 | - } | |
121 | - } else { | |
122 | - if ( failb == disks-2 ) { | |
123 | - /* data+P failure. */ | |
124 | - raid6_datap_recov(disks, bytes, faila, ptrs); | |
125 | - } else { | |
126 | - /* data+data failure. */ | |
127 | - raid6_2data_recov(disks, bytes, faila, failb, ptrs); | |
128 | - } | |
129 | - } | |
130 | -} | |
131 | - | |
132 | -#endif |
drivers/md/raid6sse1.c
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6sse1.c | |
15 | - * | |
16 | - * SSE-1/MMXEXT implementation of RAID-6 syndrome functions | |
17 | - * | |
18 | - * This is really an MMX implementation, but it requires SSE-1 or | |
19 | - * AMD MMXEXT for prefetch support and a few other features. The | |
20 | - * support for nontemporal memory accesses is enough to make this | |
21 | - * worthwhile as a separate implementation. | |
22 | - */ | |
23 | - | |
24 | -#if defined(__i386__) && !defined(__arch_um__) | |
25 | - | |
26 | -#include <linux/raid/pq.h> | |
27 | -#include "raid6x86.h" | |
28 | - | |
29 | -/* Defined in raid6mmx.c */ | |
30 | -extern const struct raid6_mmx_constants { | |
31 | - u64 x1d; | |
32 | -} raid6_mmx_constants; | |
33 | - | |
34 | -static int raid6_have_sse1_or_mmxext(void) | |
35 | -{ | |
36 | - /* Not really boot_cpu but "all_cpus" */ | |
37 | - return boot_cpu_has(X86_FEATURE_MMX) && | |
38 | - (boot_cpu_has(X86_FEATURE_XMM) || | |
39 | - boot_cpu_has(X86_FEATURE_MMXEXT)); | |
40 | -} | |
41 | - | |
42 | -/* | |
43 | - * Plain SSE1 implementation | |
44 | - */ | |
45 | -static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
46 | -{ | |
47 | - u8 **dptr = (u8 **)ptrs; | |
48 | - u8 *p, *q; | |
49 | - int d, z, z0; | |
50 | - | |
51 | - z0 = disks - 3; /* Highest data disk */ | |
52 | - p = dptr[z0+1]; /* XOR parity */ | |
53 | - q = dptr[z0+2]; /* RS syndrome */ | |
54 | - | |
55 | - kernel_fpu_begin(); | |
56 | - | |
57 | - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
58 | - asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
59 | - | |
60 | - for ( d = 0 ; d < bytes ; d += 8 ) { | |
61 | - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
62 | - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
63 | - asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | |
64 | - asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
65 | - asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); | |
66 | - for ( z = z0-2 ; z >= 0 ; z-- ) { | |
67 | - asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
68 | - asm volatile("pcmpgtb %mm4,%mm5"); | |
69 | - asm volatile("paddb %mm4,%mm4"); | |
70 | - asm volatile("pand %mm0,%mm5"); | |
71 | - asm volatile("pxor %mm5,%mm4"); | |
72 | - asm volatile("pxor %mm5,%mm5"); | |
73 | - asm volatile("pxor %mm6,%mm2"); | |
74 | - asm volatile("pxor %mm6,%mm4"); | |
75 | - asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | |
76 | - } | |
77 | - asm volatile("pcmpgtb %mm4,%mm5"); | |
78 | - asm volatile("paddb %mm4,%mm4"); | |
79 | - asm volatile("pand %mm0,%mm5"); | |
80 | - asm volatile("pxor %mm5,%mm4"); | |
81 | - asm volatile("pxor %mm5,%mm5"); | |
82 | - asm volatile("pxor %mm6,%mm2"); | |
83 | - asm volatile("pxor %mm6,%mm4"); | |
84 | - | |
85 | - asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | |
86 | - asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | |
87 | - } | |
88 | - | |
89 | - asm volatile("sfence" : : : "memory"); | |
90 | - kernel_fpu_end(); | |
91 | -} | |
92 | - | |
93 | -const struct raid6_calls raid6_sse1x1 = { | |
94 | - raid6_sse11_gen_syndrome, | |
95 | - raid6_have_sse1_or_mmxext, | |
96 | - "sse1x1", | |
97 | - 1 /* Has cache hints */ | |
98 | -}; | |
99 | - | |
100 | -/* | |
101 | - * Unrolled-by-2 SSE1 implementation | |
102 | - */ | |
103 | -static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
104 | -{ | |
105 | - u8 **dptr = (u8 **)ptrs; | |
106 | - u8 *p, *q; | |
107 | - int d, z, z0; | |
108 | - | |
109 | - z0 = disks - 3; /* Highest data disk */ | |
110 | - p = dptr[z0+1]; /* XOR parity */ | |
111 | - q = dptr[z0+2]; /* RS syndrome */ | |
112 | - | |
113 | - kernel_fpu_begin(); | |
114 | - | |
115 | - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
116 | - asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
117 | - asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | |
118 | - | |
119 | - /* We uniformly assume a single prefetch covers at least 16 bytes */ | |
120 | - for ( d = 0 ; d < bytes ; d += 16 ) { | |
121 | - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
122 | - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
123 | - asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ | |
124 | - asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
125 | - asm volatile("movq %mm3,%mm6"); /* Q[1] */ | |
126 | - for ( z = z0-1 ; z >= 0 ; z-- ) { | |
127 | - asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
128 | - asm volatile("pcmpgtb %mm4,%mm5"); | |
129 | - asm volatile("pcmpgtb %mm6,%mm7"); | |
130 | - asm volatile("paddb %mm4,%mm4"); | |
131 | - asm volatile("paddb %mm6,%mm6"); | |
132 | - asm volatile("pand %mm0,%mm5"); | |
133 | - asm volatile("pand %mm0,%mm7"); | |
134 | - asm volatile("pxor %mm5,%mm4"); | |
135 | - asm volatile("pxor %mm7,%mm6"); | |
136 | - asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | |
137 | - asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | |
138 | - asm volatile("pxor %mm5,%mm2"); | |
139 | - asm volatile("pxor %mm7,%mm3"); | |
140 | - asm volatile("pxor %mm5,%mm4"); | |
141 | - asm volatile("pxor %mm7,%mm6"); | |
142 | - asm volatile("pxor %mm5,%mm5"); | |
143 | - asm volatile("pxor %mm7,%mm7"); | |
144 | - } | |
145 | - asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | |
146 | - asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); | |
147 | - asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | |
148 | - asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); | |
149 | - } | |
150 | - | |
151 | - asm volatile("sfence" : :: "memory"); | |
152 | - kernel_fpu_end(); | |
153 | -} | |
154 | - | |
155 | -const struct raid6_calls raid6_sse1x2 = { | |
156 | - raid6_sse12_gen_syndrome, | |
157 | - raid6_have_sse1_or_mmxext, | |
158 | - "sse1x2", | |
159 | - 1 /* Has cache hints */ | |
160 | -}; | |
161 | - | |
162 | -#endif |
drivers/md/raid6sse2.c
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6sse2.c | |
15 | - * | |
16 | - * SSE-2 implementation of RAID-6 syndrome functions | |
17 | - * | |
18 | - */ | |
19 | - | |
20 | -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | |
21 | - | |
22 | -#include <linux/raid/pq.h> | |
23 | -#include "raid6x86.h" | |
24 | - | |
25 | -static const struct raid6_sse_constants { | |
26 | - u64 x1d[2]; | |
27 | -} raid6_sse_constants __attribute__((aligned(16))) = { | |
28 | - { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, | |
29 | -}; | |
30 | - | |
31 | -static int raid6_have_sse2(void) | |
32 | -{ | |
33 | - /* Not really boot_cpu but "all_cpus" */ | |
34 | - return boot_cpu_has(X86_FEATURE_MMX) && | |
35 | - boot_cpu_has(X86_FEATURE_FXSR) && | |
36 | - boot_cpu_has(X86_FEATURE_XMM) && | |
37 | - boot_cpu_has(X86_FEATURE_XMM2); | |
38 | -} | |
39 | - | |
40 | -/* | |
41 | - * Plain SSE2 implementation | |
42 | - */ | |
43 | -static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
44 | -{ | |
45 | - u8 **dptr = (u8 **)ptrs; | |
46 | - u8 *p, *q; | |
47 | - int d, z, z0; | |
48 | - | |
49 | - z0 = disks - 3; /* Highest data disk */ | |
50 | - p = dptr[z0+1]; /* XOR parity */ | |
51 | - q = dptr[z0+2]; /* RS syndrome */ | |
52 | - | |
53 | - kernel_fpu_begin(); | |
54 | - | |
55 | - asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | |
56 | - asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | |
57 | - | |
58 | - for ( d = 0 ; d < bytes ; d += 16 ) { | |
59 | - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
60 | - asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
61 | - asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | |
62 | - asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | |
63 | - asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); | |
64 | - for ( z = z0-2 ; z >= 0 ; z-- ) { | |
65 | - asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
66 | - asm volatile("pcmpgtb %xmm4,%xmm5"); | |
67 | - asm volatile("paddb %xmm4,%xmm4"); | |
68 | - asm volatile("pand %xmm0,%xmm5"); | |
69 | - asm volatile("pxor %xmm5,%xmm4"); | |
70 | - asm volatile("pxor %xmm5,%xmm5"); | |
71 | - asm volatile("pxor %xmm6,%xmm2"); | |
72 | - asm volatile("pxor %xmm6,%xmm4"); | |
73 | - asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); | |
74 | - } | |
75 | - asm volatile("pcmpgtb %xmm4,%xmm5"); | |
76 | - asm volatile("paddb %xmm4,%xmm4"); | |
77 | - asm volatile("pand %xmm0,%xmm5"); | |
78 | - asm volatile("pxor %xmm5,%xmm4"); | |
79 | - asm volatile("pxor %xmm5,%xmm5"); | |
80 | - asm volatile("pxor %xmm6,%xmm2"); | |
81 | - asm volatile("pxor %xmm6,%xmm4"); | |
82 | - | |
83 | - asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | |
84 | - asm volatile("pxor %xmm2,%xmm2"); | |
85 | - asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | |
86 | - asm volatile("pxor %xmm4,%xmm4"); | |
87 | - } | |
88 | - | |
89 | - asm volatile("sfence" : : : "memory"); | |
90 | - kernel_fpu_end(); | |
91 | -} | |
92 | - | |
93 | -const struct raid6_calls raid6_sse2x1 = { | |
94 | - raid6_sse21_gen_syndrome, | |
95 | - raid6_have_sse2, | |
96 | - "sse2x1", | |
97 | - 1 /* Has cache hints */ | |
98 | -}; | |
99 | - | |
100 | -/* | |
101 | - * Unrolled-by-2 SSE2 implementation | |
102 | - */ | |
103 | -static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
104 | -{ | |
105 | - u8 **dptr = (u8 **)ptrs; | |
106 | - u8 *p, *q; | |
107 | - int d, z, z0; | |
108 | - | |
109 | - z0 = disks - 3; /* Highest data disk */ | |
110 | - p = dptr[z0+1]; /* XOR parity */ | |
111 | - q = dptr[z0+2]; /* RS syndrome */ | |
112 | - | |
113 | - kernel_fpu_begin(); | |
114 | - | |
115 | - asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | |
116 | - asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | |
117 | - asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | |
118 | - | |
119 | - /* We uniformly assume a single prefetch covers at least 32 bytes */ | |
120 | - for ( d = 0 ; d < bytes ; d += 32 ) { | |
121 | - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
122 | - asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
123 | - asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ | |
124 | - asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | |
125 | - asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ | |
126 | - for ( z = z0-1 ; z >= 0 ; z-- ) { | |
127 | - asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
128 | - asm volatile("pcmpgtb %xmm4,%xmm5"); | |
129 | - asm volatile("pcmpgtb %xmm6,%xmm7"); | |
130 | - asm volatile("paddb %xmm4,%xmm4"); | |
131 | - asm volatile("paddb %xmm6,%xmm6"); | |
132 | - asm volatile("pand %xmm0,%xmm5"); | |
133 | - asm volatile("pand %xmm0,%xmm7"); | |
134 | - asm volatile("pxor %xmm5,%xmm4"); | |
135 | - asm volatile("pxor %xmm7,%xmm6"); | |
136 | - asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); | |
137 | - asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); | |
138 | - asm volatile("pxor %xmm5,%xmm2"); | |
139 | - asm volatile("pxor %xmm7,%xmm3"); | |
140 | - asm volatile("pxor %xmm5,%xmm4"); | |
141 | - asm volatile("pxor %xmm7,%xmm6"); | |
142 | - asm volatile("pxor %xmm5,%xmm5"); | |
143 | - asm volatile("pxor %xmm7,%xmm7"); | |
144 | - } | |
145 | - asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | |
146 | - asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | |
147 | - asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | |
148 | - asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | |
149 | - } | |
150 | - | |
151 | - asm volatile("sfence" : : : "memory"); | |
152 | - kernel_fpu_end(); | |
153 | -} | |
154 | - | |
155 | -const struct raid6_calls raid6_sse2x2 = { | |
156 | - raid6_sse22_gen_syndrome, | |
157 | - raid6_have_sse2, | |
158 | - "sse2x2", | |
159 | - 1 /* Has cache hints */ | |
160 | -}; | |
161 | - | |
162 | -#endif | |
163 | - | |
164 | -#if defined(__x86_64__) && !defined(__arch_um__) | |
165 | - | |
166 | -/* | |
167 | - * Unrolled-by-4 SSE2 implementation | |
168 | - */ | |
169 | -static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
170 | -{ | |
171 | - u8 **dptr = (u8 **)ptrs; | |
172 | - u8 *p, *q; | |
173 | - int d, z, z0; | |
174 | - | |
175 | - z0 = disks - 3; /* Highest data disk */ | |
176 | - p = dptr[z0+1]; /* XOR parity */ | |
177 | - q = dptr[z0+2]; /* RS syndrome */ | |
178 | - | |
179 | - kernel_fpu_begin(); | |
180 | - | |
181 | - asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); | |
182 | - asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ | |
183 | - asm volatile("pxor %xmm3,%xmm3"); /* P[1] */ | |
184 | - asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */ | |
185 | - asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | |
186 | - asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */ | |
187 | - asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | |
188 | - asm volatile("pxor %xmm10,%xmm10"); /* P[2] */ | |
189 | - asm volatile("pxor %xmm11,%xmm11"); /* P[3] */ | |
190 | - asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */ | |
191 | - asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */ | |
192 | - asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */ | |
193 | - asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */ | |
194 | - | |
195 | - for ( d = 0 ; d < bytes ; d += 64 ) { | |
196 | - for ( z = z0 ; z >= 0 ; z-- ) { | |
197 | - /* The second prefetch seems to improve performance... */ | |
198 | - asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); | |
199 | - asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); | |
200 | - asm volatile("pcmpgtb %xmm4,%xmm5"); | |
201 | - asm volatile("pcmpgtb %xmm6,%xmm7"); | |
202 | - asm volatile("pcmpgtb %xmm12,%xmm13"); | |
203 | - asm volatile("pcmpgtb %xmm14,%xmm15"); | |
204 | - asm volatile("paddb %xmm4,%xmm4"); | |
205 | - asm volatile("paddb %xmm6,%xmm6"); | |
206 | - asm volatile("paddb %xmm12,%xmm12"); | |
207 | - asm volatile("paddb %xmm14,%xmm14"); | |
208 | - asm volatile("pand %xmm0,%xmm5"); | |
209 | - asm volatile("pand %xmm0,%xmm7"); | |
210 | - asm volatile("pand %xmm0,%xmm13"); | |
211 | - asm volatile("pand %xmm0,%xmm15"); | |
212 | - asm volatile("pxor %xmm5,%xmm4"); | |
213 | - asm volatile("pxor %xmm7,%xmm6"); | |
214 | - asm volatile("pxor %xmm13,%xmm12"); | |
215 | - asm volatile("pxor %xmm15,%xmm14"); | |
216 | - asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); | |
217 | - asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); | |
218 | - asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); | |
219 | - asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); | |
220 | - asm volatile("pxor %xmm5,%xmm2"); | |
221 | - asm volatile("pxor %xmm7,%xmm3"); | |
222 | - asm volatile("pxor %xmm13,%xmm10"); | |
223 | - asm volatile("pxor %xmm15,%xmm11"); | |
224 | - asm volatile("pxor %xmm5,%xmm4"); | |
225 | - asm volatile("pxor %xmm7,%xmm6"); | |
226 | - asm volatile("pxor %xmm13,%xmm12"); | |
227 | - asm volatile("pxor %xmm15,%xmm14"); | |
228 | - asm volatile("pxor %xmm5,%xmm5"); | |
229 | - asm volatile("pxor %xmm7,%xmm7"); | |
230 | - asm volatile("pxor %xmm13,%xmm13"); | |
231 | - asm volatile("pxor %xmm15,%xmm15"); | |
232 | - } | |
233 | - asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | |
234 | - asm volatile("pxor %xmm2,%xmm2"); | |
235 | - asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | |
236 | - asm volatile("pxor %xmm3,%xmm3"); | |
237 | - asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); | |
238 | - asm volatile("pxor %xmm10,%xmm10"); | |
239 | - asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); | |
240 | - asm volatile("pxor %xmm11,%xmm11"); | |
241 | - asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | |
242 | - asm volatile("pxor %xmm4,%xmm4"); | |
243 | - asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | |
244 | - asm volatile("pxor %xmm6,%xmm6"); | |
245 | - asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); | |
246 | - asm volatile("pxor %xmm12,%xmm12"); | |
247 | - asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); | |
248 | - asm volatile("pxor %xmm14,%xmm14"); | |
249 | - } | |
250 | - | |
251 | - asm volatile("sfence" : : : "memory"); | |
252 | - kernel_fpu_end(); | |
253 | -} | |
254 | - | |
255 | -const struct raid6_calls raid6_sse2x4 = { | |
256 | - raid6_sse24_gen_syndrome, | |
257 | - raid6_have_sse2, | |
258 | - "sse2x4", | |
259 | - 1 /* Has cache hints */ | |
260 | -}; | |
261 | - | |
262 | -#endif |
drivers/md/raid6test/Makefile
1 | -# | |
2 | -# This is a simple Makefile to test some of the RAID-6 code | |
3 | -# from userspace. | |
4 | -# | |
5 | - | |
6 | -CC = gcc | |
7 | -OPTFLAGS = -O2 # Adjust as desired | |
8 | -CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) | |
9 | -LD = ld | |
10 | -PERL = perl | |
11 | -AR = ar | |
12 | -RANLIB = ranlib | |
13 | - | |
14 | -.c.o: | |
15 | - $(CC) $(CFLAGS) -c -o $@ $< | |
16 | - | |
17 | -%.c: ../%.c | |
18 | - cp -f $< $@ | |
19 | - | |
20 | -%.uc: ../%.uc | |
21 | - cp -f $< $@ | |
22 | - | |
23 | -all: raid6.a raid6test | |
24 | - | |
25 | -raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ | |
26 | - raid6int32.o \ | |
27 | - raid6mmx.o raid6sse1.o raid6sse2.o \ | |
28 | - raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \ | |
29 | - raid6recov.o raid6algos.o \ | |
30 | - raid6tables.o | |
31 | - rm -f $@ | |
32 | - $(AR) cq $@ $^ | |
33 | - $(RANLIB) $@ | |
34 | - | |
35 | -raid6test: test.c raid6.a | |
36 | - $(CC) $(CFLAGS) -o raid6test $^ | |
37 | - | |
38 | -raid6altivec1.c: raid6altivec.uc ../unroll.pl | |
39 | - $(PERL) ../unroll.pl 1 < raid6altivec.uc > $@ | |
40 | - | |
41 | -raid6altivec2.c: raid6altivec.uc ../unroll.pl | |
42 | - $(PERL) ../unroll.pl 2 < raid6altivec.uc > $@ | |
43 | - | |
44 | -raid6altivec4.c: raid6altivec.uc ../unroll.pl | |
45 | - $(PERL) ../unroll.pl 4 < raid6altivec.uc > $@ | |
46 | - | |
47 | -raid6altivec8.c: raid6altivec.uc ../unroll.pl | |
48 | - $(PERL) ../unroll.pl 8 < raid6altivec.uc > $@ | |
49 | - | |
50 | -raid6int1.c: raid6int.uc ../unroll.pl | |
51 | - $(PERL) ../unroll.pl 1 < raid6int.uc > $@ | |
52 | - | |
53 | -raid6int2.c: raid6int.uc ../unroll.pl | |
54 | - $(PERL) ../unroll.pl 2 < raid6int.uc > $@ | |
55 | - | |
56 | -raid6int4.c: raid6int.uc ../unroll.pl | |
57 | - $(PERL) ../unroll.pl 4 < raid6int.uc > $@ | |
58 | - | |
59 | -raid6int8.c: raid6int.uc ../unroll.pl | |
60 | - $(PERL) ../unroll.pl 8 < raid6int.uc > $@ | |
61 | - | |
62 | -raid6int16.c: raid6int.uc ../unroll.pl | |
63 | - $(PERL) ../unroll.pl 16 < raid6int.uc > $@ | |
64 | - | |
65 | -raid6int32.c: raid6int.uc ../unroll.pl | |
66 | - $(PERL) ../unroll.pl 32 < raid6int.uc > $@ | |
67 | - | |
68 | -raid6tables.c: mktables | |
69 | - ./mktables > raid6tables.c | |
70 | - | |
71 | -clean: | |
72 | - rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test | |
73 | - | |
74 | -spotless: clean | |
75 | - rm -f *~ |
drivers/md/raid6test/test.c
1 | -/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This file is part of the Linux kernel, and is made available under | |
6 | - * the terms of the GNU General Public License version 2 or (at your | |
7 | - * option) any later version; incorporated herein by reference. | |
8 | - * | |
9 | - * ----------------------------------------------------------------------- */ | |
10 | - | |
11 | -/* | |
12 | - * raid6test.c | |
13 | - * | |
14 | - * Test RAID-6 recovery with various algorithms | |
15 | - */ | |
16 | - | |
17 | -#include <stdlib.h> | |
18 | -#include <stdio.h> | |
19 | -#include <string.h> | |
20 | -#include <linux/raid/pq.h> | |
21 | - | |
22 | -#define NDISKS 16 /* Including P and Q */ | |
23 | - | |
24 | -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | |
25 | -struct raid6_calls raid6_call; | |
26 | - | |
27 | -char *dataptrs[NDISKS]; | |
28 | -char data[NDISKS][PAGE_SIZE]; | |
29 | -char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | |
30 | - | |
31 | -static void makedata(void) | |
32 | -{ | |
33 | - int i, j; | |
34 | - | |
35 | - for (i = 0; i < NDISKS; i++) { | |
36 | - for (j = 0; j < PAGE_SIZE; j++) | |
37 | - data[i][j] = rand(); | |
38 | - | |
39 | - dataptrs[i] = data[i]; | |
40 | - } | |
41 | -} | |
42 | - | |
43 | -static char disk_type(int d) | |
44 | -{ | |
45 | - switch (d) { | |
46 | - case NDISKS-2: | |
47 | - return 'P'; | |
48 | - case NDISKS-1: | |
49 | - return 'Q'; | |
50 | - default: | |
51 | - return 'D'; | |
52 | - } | |
53 | -} | |
54 | - | |
55 | -static int test_disks(int i, int j) | |
56 | -{ | |
57 | - int erra, errb; | |
58 | - | |
59 | - memset(recovi, 0xf0, PAGE_SIZE); | |
60 | - memset(recovj, 0xba, PAGE_SIZE); | |
61 | - | |
62 | - dataptrs[i] = recovi; | |
63 | - dataptrs[j] = recovj; | |
64 | - | |
65 | - raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); | |
66 | - | |
67 | - erra = memcmp(data[i], recovi, PAGE_SIZE); | |
68 | - errb = memcmp(data[j], recovj, PAGE_SIZE); | |
69 | - | |
70 | - if (i < NDISKS-2 && j == NDISKS-1) { | |
71 | - /* We don't implement the DQ failure scenario, since it's | |
72 | - equivalent to a RAID-5 failure (XOR, then recompute Q) */ | |
73 | - erra = errb = 0; | |
74 | - } else { | |
75 | - printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | |
76 | - raid6_call.name, | |
77 | - i, disk_type(i), | |
78 | - j, disk_type(j), | |
79 | - (!erra && !errb) ? "OK" : | |
80 | - !erra ? "ERRB" : | |
81 | - !errb ? "ERRA" : "ERRAB"); | |
82 | - } | |
83 | - | |
84 | - dataptrs[i] = data[i]; | |
85 | - dataptrs[j] = data[j]; | |
86 | - | |
87 | - return erra || errb; | |
88 | -} | |
89 | - | |
90 | -int main(int argc, char *argv[]) | |
91 | -{ | |
92 | - const struct raid6_calls *const *algo; | |
93 | - int i, j; | |
94 | - int err = 0; | |
95 | - | |
96 | - makedata(); | |
97 | - | |
98 | - for (algo = raid6_algos; *algo; algo++) { | |
99 | - if (!(*algo)->valid || (*algo)->valid()) { | |
100 | - raid6_call = **algo; | |
101 | - | |
102 | - /* Nuke syndromes */ | |
103 | - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); | |
104 | - | |
105 | - /* Generate assumed good syndrome */ | |
106 | - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, | |
107 | - (void **)&dataptrs); | |
108 | - | |
109 | - for (i = 0; i < NDISKS-1; i++) | |
110 | - for (j = i+1; j < NDISKS; j++) | |
111 | - err += test_disks(i, j); | |
112 | - } | |
113 | - printf("\n"); | |
114 | - } | |
115 | - | |
116 | - printf("\n"); | |
117 | - /* Pick the best algorithm test */ | |
118 | - raid6_select_algo(); | |
119 | - | |
120 | - if (err) | |
121 | - printf("\n*** ERRORS FOUND ***\n"); | |
122 | - | |
123 | - return err; | |
124 | -} |
drivers/md/raid6x86.h
1 | -/* ----------------------------------------------------------------------- * | |
2 | - * | |
3 | - * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
4 | - * | |
5 | - * This program is free software; you can redistribute it and/or modify | |
6 | - * it under the terms of the GNU General Public License as published by | |
7 | - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | - * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | - * (at your option) any later version; incorporated herein by reference. | |
10 | - * | |
11 | - * ----------------------------------------------------------------------- */ | |
12 | - | |
13 | -/* | |
14 | - * raid6x86.h | |
15 | - * | |
16 | - * Definitions common to x86 and x86-64 RAID-6 code only | |
17 | - */ | |
18 | - | |
19 | -#ifndef LINUX_RAID_RAID6X86_H | |
20 | -#define LINUX_RAID_RAID6X86_H | |
21 | - | |
22 | -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | |
23 | - | |
24 | -#ifdef __KERNEL__ /* Real code */ | |
25 | - | |
26 | -#include <asm/i387.h> | |
27 | - | |
28 | -#else /* Dummy code for user space testing */ | |
29 | - | |
30 | -static inline void kernel_fpu_begin(void) | |
31 | -{ | |
32 | -} | |
33 | - | |
34 | -static inline void kernel_fpu_end(void) | |
35 | -{ | |
36 | -} | |
37 | - | |
38 | -#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ | |
39 | -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions | |
40 | - * (fast save and restore) */ | |
41 | -#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ | |
42 | -#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ | |
43 | -#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | |
44 | - | |
45 | -/* Should work well enough on modern CPUs for testing */ | |
46 | -static inline int boot_cpu_has(int flag) | |
47 | -{ | |
48 | - u32 eax = (flag >> 5) ? 0x80000001 : 1; | |
49 | - u32 edx; | |
50 | - | |
51 | - asm volatile("cpuid" | |
52 | - : "+a" (eax), "=d" (edx) | |
53 | - : : "ecx", "ebx"); | |
54 | - | |
55 | - return (edx >> (flag & 31)) & 1; | |
56 | -} | |
57 | - | |
58 | -#endif /* ndef __KERNEL__ */ | |
59 | - | |
60 | -#endif | |
61 | -#endif |
drivers/md/unroll.pl
1 | -#!/usr/bin/perl | |
2 | -# | |
3 | -# Take a piece of C code and for each line which contains the sequence $$ | |
4 | -# repeat n times with $ replaced by 0...n-1; the sequence $# is replaced | |
5 | -# by the unrolling factor, and $* with a single $ | |
6 | -# | |
7 | - | |
8 | -($n) = @ARGV; | |
9 | -$n += 0; | |
10 | - | |
11 | -while ( defined($line = <STDIN>) ) { | |
12 | - if ( $line =~ /\$\$/ ) { | |
13 | - $rep = $n; | |
14 | - } else { | |
15 | - $rep = 1; | |
16 | - } | |
17 | - for ( $i = 0 ; $i < $rep ; $i++ ) { | |
18 | - $tmp = $line; | |
19 | - $tmp =~ s/\$\$/$i/g; | |
20 | - $tmp =~ s/\$\#/$n/g; | |
21 | - $tmp =~ s/\$\*/\$/g; | |
22 | - print $tmp; | |
23 | - } | |
24 | -} |
lib/Kconfig
lib/Makefile
... | ... | @@ -65,6 +65,7 @@ |
65 | 65 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ |
66 | 66 | obj-$(CONFIG_LZO_COMPRESS) += lzo/ |
67 | 67 | obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ |
68 | +obj-$(CONFIG_RAID6_PQ) += raid6/ | |
68 | 69 | |
69 | 70 | lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o |
70 | 71 | lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o |
lib/raid6/Makefile
1 | +obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | |
2 | + | |
3 | +raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | |
4 | + raid6int1.o raid6int2.o raid6int4.o \ | |
5 | + raid6int8.o raid6int16.o raid6int32.o \ | |
6 | + raid6altivec1.o raid6altivec2.o raid6altivec4.o \ | |
7 | + raid6altivec8.o \ | |
8 | + raid6mmx.o raid6sse1.o raid6sse2.o | |
9 | +hostprogs-y += mktables | |
10 | + | |
11 | +quiet_cmd_unroll = UNROLL $@ | |
12 | + cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ | |
13 | + < $< > $@ || ( rm -f $@ && exit 1 ) | |
14 | + | |
15 | +ifeq ($(CONFIG_ALTIVEC),y) | |
16 | +altivec_flags := -maltivec -mabi=altivec | |
17 | +endif | |
18 | + | |
19 | +targets += raid6int1.c | |
20 | +$(obj)/raid6int1.c: UNROLL := 1 | |
21 | +$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
22 | + $(call if_changed,unroll) | |
23 | + | |
24 | +targets += raid6int2.c | |
25 | +$(obj)/raid6int2.c: UNROLL := 2 | |
26 | +$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
27 | + $(call if_changed,unroll) | |
28 | + | |
29 | +targets += raid6int4.c | |
30 | +$(obj)/raid6int4.c: UNROLL := 4 | |
31 | +$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
32 | + $(call if_changed,unroll) | |
33 | + | |
34 | +targets += raid6int8.c | |
35 | +$(obj)/raid6int8.c: UNROLL := 8 | |
36 | +$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
37 | + $(call if_changed,unroll) | |
38 | + | |
39 | +targets += raid6int16.c | |
40 | +$(obj)/raid6int16.c: UNROLL := 16 | |
41 | +$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
42 | + $(call if_changed,unroll) | |
43 | + | |
44 | +targets += raid6int32.c | |
45 | +$(obj)/raid6int32.c: UNROLL := 32 | |
46 | +$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE | |
47 | + $(call if_changed,unroll) | |
48 | + | |
49 | +CFLAGS_raid6altivec1.o += $(altivec_flags) | |
50 | +targets += raid6altivec1.c | |
51 | +$(obj)/raid6altivec1.c: UNROLL := 1 | |
52 | +$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
53 | + $(call if_changed,unroll) | |
54 | + | |
55 | +CFLAGS_raid6altivec2.o += $(altivec_flags) | |
56 | +targets += raid6altivec2.c | |
57 | +$(obj)/raid6altivec2.c: UNROLL := 2 | |
58 | +$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
59 | + $(call if_changed,unroll) | |
60 | + | |
61 | +CFLAGS_raid6altivec4.o += $(altivec_flags) | |
62 | +targets += raid6altivec4.c | |
63 | +$(obj)/raid6altivec4.c: UNROLL := 4 | |
64 | +$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
65 | + $(call if_changed,unroll) | |
66 | + | |
67 | +CFLAGS_raid6altivec8.o += $(altivec_flags) | |
68 | +targets += raid6altivec8.c | |
69 | +$(obj)/raid6altivec8.c: UNROLL := 8 | |
70 | +$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE | |
71 | + $(call if_changed,unroll) | |
72 | + | |
73 | +quiet_cmd_mktable = TABLE $@ | |
74 | + cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | |
75 | + | |
76 | +targets += raid6tables.c | |
77 | +$(obj)/raid6tables.c: $(obj)/mktables FORCE | |
78 | + $(call if_changed,mktable) |
lib/raid6/mktables.c
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This file is part of the Linux kernel, and is made available under | |
6 | + * the terms of the GNU General Public License version 2 or (at your | |
7 | + * option) any later version; incorporated herein by reference. | |
8 | + * | |
9 | + * ----------------------------------------------------------------------- */ | |
10 | + | |
11 | +/* | |
12 | + * mktables.c | |
13 | + * | |
14 | + * Make RAID-6 tables. This is a host user space program to be run at | |
15 | + * compile time. | |
16 | + */ | |
17 | + | |
18 | +#include <stdio.h> | |
19 | +#include <string.h> | |
20 | +#include <inttypes.h> | |
21 | +#include <stdlib.h> | |
22 | +#include <time.h> | |
23 | + | |
24 | +static uint8_t gfmul(uint8_t a, uint8_t b) | |
25 | +{ | |
26 | + uint8_t v = 0; | |
27 | + | |
28 | + while (b) { | |
29 | + if (b & 1) | |
30 | + v ^= a; | |
31 | + a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); | |
32 | + b >>= 1; | |
33 | + } | |
34 | + | |
35 | + return v; | |
36 | +} | |
37 | + | |
38 | +static uint8_t gfpow(uint8_t a, int b) | |
39 | +{ | |
40 | + uint8_t v = 1; | |
41 | + | |
42 | + b %= 255; | |
43 | + if (b < 0) | |
44 | + b += 255; | |
45 | + | |
46 | + while (b) { | |
47 | + if (b & 1) | |
48 | + v = gfmul(v, a); | |
49 | + a = gfmul(a, a); | |
50 | + b >>= 1; | |
51 | + } | |
52 | + | |
53 | + return v; | |
54 | +} | |
55 | + | |
56 | +int main(int argc, char *argv[]) | |
57 | +{ | |
58 | + int i, j, k; | |
59 | + uint8_t v; | |
60 | + uint8_t exptbl[256], invtbl[256]; | |
61 | + | |
62 | + printf("#include <linux/raid/pq.h>\n"); | |
63 | + | |
64 | + /* Compute multiplication table */ | |
65 | + printf("\nconst u8 __attribute__((aligned(256)))\n" | |
66 | + "raid6_gfmul[256][256] =\n" | |
67 | + "{\n"); | |
68 | + for (i = 0; i < 256; i++) { | |
69 | + printf("\t{\n"); | |
70 | + for (j = 0; j < 256; j += 8) { | |
71 | + printf("\t\t"); | |
72 | + for (k = 0; k < 8; k++) | |
73 | + printf("0x%02x,%c", gfmul(i, j + k), | |
74 | + (k == 7) ? '\n' : ' '); | |
75 | + } | |
76 | + printf("\t},\n"); | |
77 | + } | |
78 | + printf("};\n"); | |
79 | + printf("#ifdef __KERNEL__\n"); | |
80 | + printf("EXPORT_SYMBOL(raid6_gfmul);\n"); | |
81 | + printf("#endif\n"); | |
82 | + | |
83 | + /* Compute power-of-2 table (exponent) */ | |
84 | + v = 1; | |
85 | + printf("\nconst u8 __attribute__((aligned(256)))\n" | |
86 | + "raid6_gfexp[256] =\n" "{\n"); | |
87 | + for (i = 0; i < 256; i += 8) { | |
88 | + printf("\t"); | |
89 | + for (j = 0; j < 8; j++) { | |
90 | + exptbl[i + j] = v; | |
91 | + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | |
92 | + v = gfmul(v, 2); | |
93 | + if (v == 1) | |
94 | + v = 0; /* For entry 255, not a real entry */ | |
95 | + } | |
96 | + } | |
97 | + printf("};\n"); | |
98 | + printf("#ifdef __KERNEL__\n"); | |
99 | + printf("EXPORT_SYMBOL(raid6_gfexp);\n"); | |
100 | + printf("#endif\n"); | |
101 | + | |
102 | + /* Compute inverse table x^-1 == x^254 */ | |
103 | + printf("\nconst u8 __attribute__((aligned(256)))\n" | |
104 | + "raid6_gfinv[256] =\n" "{\n"); | |
105 | + for (i = 0; i < 256; i += 8) { | |
106 | + printf("\t"); | |
107 | + for (j = 0; j < 8; j++) { | |
108 | + invtbl[i + j] = v = gfpow(i + j, 254); | |
109 | + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | |
110 | + } | |
111 | + } | |
112 | + printf("};\n"); | |
113 | + printf("#ifdef __KERNEL__\n"); | |
114 | + printf("EXPORT_SYMBOL(raid6_gfinv);\n"); | |
115 | + printf("#endif\n"); | |
116 | + | |
117 | + /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ | |
118 | + printf("\nconst u8 __attribute__((aligned(256)))\n" | |
119 | + "raid6_gfexi[256] =\n" "{\n"); | |
120 | + for (i = 0; i < 256; i += 8) { | |
121 | + printf("\t"); | |
122 | + for (j = 0; j < 8; j++) | |
123 | + printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], | |
124 | + (j == 7) ? '\n' : ' '); | |
125 | + } | |
126 | + printf("};\n"); | |
127 | + printf("#ifdef __KERNEL__\n"); | |
128 | + printf("EXPORT_SYMBOL(raid6_gfexi);\n"); | |
129 | + printf("#endif\n"); | |
130 | + | |
131 | + return 0; | |
132 | +} |
lib/raid6/raid6algos.c
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6algos.c | |
15 | + * | |
16 | + * Algorithm list and algorithm selection for RAID-6 | |
17 | + */ | |
18 | + | |
19 | +#include <linux/raid/pq.h> | |
20 | +#ifndef __KERNEL__ | |
21 | +#include <sys/mman.h> | |
22 | +#include <stdio.h> | |
23 | +#else | |
24 | +#if !RAID6_USE_EMPTY_ZERO_PAGE | |
25 | +/* In .bss so it's zeroed */ | |
26 | +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | |
27 | +EXPORT_SYMBOL(raid6_empty_zero_page); | |
28 | +#endif | |
29 | +#endif | |
30 | + | |
31 | +struct raid6_calls raid6_call; | |
32 | +EXPORT_SYMBOL_GPL(raid6_call); | |
33 | + | |
34 | +/* Various routine sets */ | |
35 | +extern const struct raid6_calls raid6_intx1; | |
36 | +extern const struct raid6_calls raid6_intx2; | |
37 | +extern const struct raid6_calls raid6_intx4; | |
38 | +extern const struct raid6_calls raid6_intx8; | |
39 | +extern const struct raid6_calls raid6_intx16; | |
40 | +extern const struct raid6_calls raid6_intx32; | |
41 | +extern const struct raid6_calls raid6_mmxx1; | |
42 | +extern const struct raid6_calls raid6_mmxx2; | |
43 | +extern const struct raid6_calls raid6_sse1x1; | |
44 | +extern const struct raid6_calls raid6_sse1x2; | |
45 | +extern const struct raid6_calls raid6_sse2x1; | |
46 | +extern const struct raid6_calls raid6_sse2x2; | |
47 | +extern const struct raid6_calls raid6_sse2x4; | |
48 | +extern const struct raid6_calls raid6_altivec1; | |
49 | +extern const struct raid6_calls raid6_altivec2; | |
50 | +extern const struct raid6_calls raid6_altivec4; | |
51 | +extern const struct raid6_calls raid6_altivec8; | |
52 | + | |
53 | +const struct raid6_calls * const raid6_algos[] = { | |
54 | + &raid6_intx1, | |
55 | + &raid6_intx2, | |
56 | + &raid6_intx4, | |
57 | + &raid6_intx8, | |
58 | +#if defined(__ia64__) | |
59 | + &raid6_intx16, | |
60 | + &raid6_intx32, | |
61 | +#endif | |
62 | +#if defined(__i386__) && !defined(__arch_um__) | |
63 | + &raid6_mmxx1, | |
64 | + &raid6_mmxx2, | |
65 | + &raid6_sse1x1, | |
66 | + &raid6_sse1x2, | |
67 | + &raid6_sse2x1, | |
68 | + &raid6_sse2x2, | |
69 | +#endif | |
70 | +#if defined(__x86_64__) && !defined(__arch_um__) | |
71 | + &raid6_sse2x1, | |
72 | + &raid6_sse2x2, | |
73 | + &raid6_sse2x4, | |
74 | +#endif | |
75 | +#ifdef CONFIG_ALTIVEC | |
76 | + &raid6_altivec1, | |
77 | + &raid6_altivec2, | |
78 | + &raid6_altivec4, | |
79 | + &raid6_altivec8, | |
80 | +#endif | |
81 | + NULL | |
82 | +}; | |
83 | + | |
84 | +#ifdef __KERNEL__ | |
85 | +#define RAID6_TIME_JIFFIES_LG2 4 | |
86 | +#else | |
87 | +/* Need more time to be stable in userspace */ | |
88 | +#define RAID6_TIME_JIFFIES_LG2 9 | |
89 | +#define time_before(x, y) ((x) < (y)) | |
90 | +#endif | |
91 | + | |
92 | +/* Try to pick the best algorithm */ | |
93 | +/* This code uses the gfmul table as convenient data set to abuse */ | |
94 | + | |
95 | +int __init raid6_select_algo(void) | |
96 | +{ | |
97 | + const struct raid6_calls * const * algo; | |
98 | + const struct raid6_calls * best; | |
99 | + char *syndromes; | |
100 | + void *dptrs[(65536/PAGE_SIZE)+2]; | |
101 | + int i, disks; | |
102 | + unsigned long perf, bestperf; | |
103 | + int bestprefer; | |
104 | + unsigned long j0, j1; | |
105 | + | |
106 | + disks = (65536/PAGE_SIZE)+2; | |
107 | + for ( i = 0 ; i < disks-2 ; i++ ) { | |
108 | + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; | |
109 | + } | |
110 | + | |
111 | + /* Normal code - use a 2-page allocation to avoid D$ conflict */ | |
112 | + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); | |
113 | + | |
114 | + if ( !syndromes ) { | |
115 | + printk("raid6: Yikes! No memory available.\n"); | |
116 | + return -ENOMEM; | |
117 | + } | |
118 | + | |
119 | + dptrs[disks-2] = syndromes; | |
120 | + dptrs[disks-1] = syndromes + PAGE_SIZE; | |
121 | + | |
122 | + bestperf = 0; bestprefer = 0; best = NULL; | |
123 | + | |
124 | + for ( algo = raid6_algos ; *algo ; algo++ ) { | |
125 | + if ( !(*algo)->valid || (*algo)->valid() ) { | |
126 | + perf = 0; | |
127 | + | |
128 | + preempt_disable(); | |
129 | + j0 = jiffies; | |
130 | + while ( (j1 = jiffies) == j0 ) | |
131 | + cpu_relax(); | |
132 | + while (time_before(jiffies, | |
133 | + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { | |
134 | + (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); | |
135 | + perf++; | |
136 | + } | |
137 | + preempt_enable(); | |
138 | + | |
139 | + if ( (*algo)->prefer > bestprefer || | |
140 | + ((*algo)->prefer == bestprefer && | |
141 | + perf > bestperf) ) { | |
142 | + best = *algo; | |
143 | + bestprefer = best->prefer; | |
144 | + bestperf = perf; | |
145 | + } | |
146 | + printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, | |
147 | + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | |
148 | + } | |
149 | + } | |
150 | + | |
151 | + if (best) { | |
152 | + printk("raid6: using algorithm %s (%ld MB/s)\n", | |
153 | + best->name, | |
154 | + (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | |
155 | + raid6_call = *best; | |
156 | + } else | |
157 | + printk("raid6: Yikes! No algorithm found!\n"); | |
158 | + | |
159 | + free_pages((unsigned long)syndromes, 1); | |
160 | + | |
161 | + return best ? 0 : -EINVAL; | |
162 | +} | |
163 | + | |
164 | +static void raid6_exit(void) | |
165 | +{ | |
166 | + do { } while (0); | |
167 | +} | |
168 | + | |
169 | +subsys_initcall(raid6_select_algo); | |
170 | +module_exit(raid6_exit); | |
171 | +MODULE_LICENSE("GPL"); |
lib/raid6/raid6altivec.uc
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6altivec$#.c | |
15 | + * | |
16 | + * $#-way unrolled portable integer math RAID-6 instruction set | |
17 | + * | |
18 | + * This file is postprocessed using unroll.pl | |
19 | + * | |
20 | + * <benh> hpa: in process, | |
21 | + * you can just "steal" the vec unit with enable_kernel_altivec() (but | |
22 | + * bracked this with preempt_disable/enable or in a lock) | |
23 | + */ | |
24 | + | |
25 | +#include <linux/raid/pq.h> | |
26 | + | |
27 | +#ifdef CONFIG_ALTIVEC | |
28 | + | |
29 | +#include <altivec.h> | |
30 | +#ifdef __KERNEL__ | |
31 | +# include <asm/system.h> | |
32 | +# include <asm/cputable.h> | |
33 | +#endif | |
34 | + | |
35 | +/* | |
36 | + * This is the C data type to use. We use a vector of | |
37 | + * signed char so vec_cmpgt() will generate the right | |
38 | + * instruction. | |
39 | + */ | |
40 | + | |
41 | +typedef vector signed char unative_t; | |
42 | + | |
43 | +#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) | |
44 | +#define NSIZE sizeof(unative_t) | |
45 | + | |
46 | +/* | |
47 | + * The SHLBYTE() operation shifts each byte left by 1, *not* | |
48 | + * rolling over into the next byte | |
49 | + */ | |
50 | +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | |
51 | +{ | |
52 | + return vec_add(v,v); | |
53 | +} | |
54 | + | |
55 | +/* | |
56 | + * The MASK() operation returns 0xFF in any byte for which the high | |
57 | + * bit is 1, 0x00 for any byte for which the high bit is 0. | |
58 | + */ | |
59 | +static inline __attribute_const__ unative_t MASK(unative_t v) | |
60 | +{ | |
61 | + unative_t zv = NBYTES(0); | |
62 | + | |
63 | + /* vec_cmpgt returns a vector bool char; thus the need for the cast */ | |
64 | + return (unative_t)vec_cmpgt(zv, v); | |
65 | +} | |
66 | + | |
67 | + | |
68 | +/* This is noinline to make damned sure that gcc doesn't move any of the | |
69 | + Altivec code around the enable/disable code */ | |
70 | +static void noinline | |
71 | +raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) | |
72 | +{ | |
73 | + u8 **dptr = (u8 **)ptrs; | |
74 | + u8 *p, *q; | |
75 | + int d, z, z0; | |
76 | + | |
77 | + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | |
78 | + unative_t x1d = NBYTES(0x1d); | |
79 | + | |
80 | + z0 = disks - 3; /* Highest data disk */ | |
81 | + p = dptr[z0+1]; /* XOR parity */ | |
82 | + q = dptr[z0+2]; /* RS syndrome */ | |
83 | + | |
84 | + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | |
85 | + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | |
86 | + for ( z = z0-1 ; z >= 0 ; z-- ) { | |
87 | + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | |
88 | + wp$$ = vec_xor(wp$$, wd$$); | |
89 | + w2$$ = MASK(wq$$); | |
90 | + w1$$ = SHLBYTE(wq$$); | |
91 | + w2$$ = vec_and(w2$$, x1d); | |
92 | + w1$$ = vec_xor(w1$$, w2$$); | |
93 | + wq$$ = vec_xor(w1$$, wd$$); | |
94 | + } | |
95 | + *(unative_t *)&p[d+NSIZE*$$] = wp$$; | |
96 | + *(unative_t *)&q[d+NSIZE*$$] = wq$$; | |
97 | + } | |
98 | +} | |
99 | + | |
100 | +static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
101 | +{ | |
102 | + preempt_disable(); | |
103 | + enable_kernel_altivec(); | |
104 | + | |
105 | + raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); | |
106 | + | |
107 | + preempt_enable(); | |
108 | +} | |
109 | + | |
110 | +int raid6_have_altivec(void); | |
111 | +#if $# == 1 | |
112 | +int raid6_have_altivec(void) | |
113 | +{ | |
114 | + /* This assumes either all CPUs have Altivec or none does */ | |
115 | +# ifdef __KERNEL__ | |
116 | + return cpu_has_feature(CPU_FTR_ALTIVEC); | |
117 | +# else | |
118 | + return 1; | |
119 | +# endif | |
120 | +} | |
121 | +#endif | |
122 | + | |
123 | +const struct raid6_calls raid6_altivec$# = { | |
124 | + raid6_altivec$#_gen_syndrome, | |
125 | + raid6_have_altivec, | |
126 | + "altivecx$#", | |
127 | + 0 | |
128 | +}; | |
129 | + | |
130 | +#endif /* CONFIG_ALTIVEC */ |
lib/raid6/raid6int.uc
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6int$#.c | |
15 | + * | |
16 | + * $#-way unrolled portable integer math RAID-6 instruction set | |
17 | + * | |
18 | + * This file is postprocessed using unroll.pl | |
19 | + */ | |
20 | + | |
21 | +#include <linux/raid/pq.h> | |
22 | + | |
23 | +/* | |
24 | + * This is the C data type to use | |
25 | + */ | |
26 | + | |
27 | +/* Change this from BITS_PER_LONG if there is something better... */ | |
28 | +#if BITS_PER_LONG == 64 | |
29 | +# define NBYTES(x) ((x) * 0x0101010101010101UL) | |
30 | +# define NSIZE 8 | |
31 | +# define NSHIFT 3 | |
32 | +# define NSTRING "64" | |
33 | +typedef u64 unative_t; | |
34 | +#else | |
35 | +# define NBYTES(x) ((x) * 0x01010101U) | |
36 | +# define NSIZE 4 | |
37 | +# define NSHIFT 2 | |
38 | +# define NSTRING "32" | |
39 | +typedef u32 unative_t; | |
40 | +#endif | |
41 | + | |
42 | + | |
43 | + | |
44 | +/* | |
45 | + * IA-64 wants insane amounts of unrolling. On other architectures that | |
46 | + * is just a waste of space. | |
47 | + */ | |
48 | +#if ($# <= 8) || defined(__ia64__) | |
49 | + | |
50 | + | |
51 | +/* | |
52 | + * These sub-operations are separate inlines since they can sometimes be | |
53 | + * specially optimized using architecture-specific hacks. | |
54 | + */ | |
55 | + | |
56 | +/* | |
57 | + * The SHLBYTE() operation shifts each byte left by 1, *not* | |
58 | + * rolling over into the next byte | |
59 | + */ | |
60 | +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | |
61 | +{ | |
62 | + unative_t vv; | |
63 | + | |
64 | + vv = (v << 1) & NBYTES(0xfe); | |
65 | + return vv; | |
66 | +} | |
67 | + | |
68 | +/* | |
69 | + * The MASK() operation returns 0xFF in any byte for which the high | |
70 | + * bit is 1, 0x00 for any byte for which the high bit is 0. | |
71 | + */ | |
72 | +static inline __attribute_const__ unative_t MASK(unative_t v) | |
73 | +{ | |
74 | + unative_t vv; | |
75 | + | |
76 | + vv = v & NBYTES(0x80); | |
77 | + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ | |
78 | + return vv; | |
79 | +} | |
80 | + | |
81 | + | |
82 | +static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
83 | +{ | |
84 | + u8 **dptr = (u8 **)ptrs; | |
85 | + u8 *p, *q; | |
86 | + int d, z, z0; | |
87 | + | |
88 | + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | |
89 | + | |
90 | + z0 = disks - 3; /* Highest data disk */ | |
91 | + p = dptr[z0+1]; /* XOR parity */ | |
92 | + q = dptr[z0+2]; /* RS syndrome */ | |
93 | + | |
94 | + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | |
95 | + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | |
96 | + for ( z = z0-1 ; z >= 0 ; z-- ) { | |
97 | + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | |
98 | + wp$$ ^= wd$$; | |
99 | + w2$$ = MASK(wq$$); | |
100 | + w1$$ = SHLBYTE(wq$$); | |
101 | + w2$$ &= NBYTES(0x1d); | |
102 | + w1$$ ^= w2$$; | |
103 | + wq$$ = w1$$ ^ wd$$; | |
104 | + } | |
105 | + *(unative_t *)&p[d+NSIZE*$$] = wp$$; | |
106 | + *(unative_t *)&q[d+NSIZE*$$] = wq$$; | |
107 | + } | |
108 | +} | |
109 | + | |
110 | +const struct raid6_calls raid6_intx$# = { | |
111 | + raid6_int$#_gen_syndrome, | |
112 | + NULL, /* always valid */ | |
113 | + "int" NSTRING "x$#", | |
114 | + 0 | |
115 | +}; | |
116 | + | |
117 | +#endif |
lib/raid6/raid6mmx.c
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6mmx.c | |
15 | + * | |
16 | + * MMX implementation of RAID-6 syndrome functions | |
17 | + */ | |
18 | + | |
19 | +#if defined(__i386__) && !defined(__arch_um__) | |
20 | + | |
21 | +#include <linux/raid/pq.h> | |
22 | +#include "raid6x86.h" | |
23 | + | |
24 | +/* Shared with raid6sse1.c */ | |
25 | +const struct raid6_mmx_constants { | |
26 | + u64 x1d; | |
27 | +} raid6_mmx_constants = { | |
28 | + 0x1d1d1d1d1d1d1d1dULL, | |
29 | +}; | |
30 | + | |
31 | +static int raid6_have_mmx(void) | |
32 | +{ | |
33 | + /* Not really "boot_cpu" but "all_cpus" */ | |
34 | + return boot_cpu_has(X86_FEATURE_MMX); | |
35 | +} | |
36 | + | |
37 | +/* | |
38 | + * Plain MMX implementation | |
39 | + */ | |
40 | +static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
41 | +{ | |
42 | + u8 **dptr = (u8 **)ptrs; | |
43 | + u8 *p, *q; | |
44 | + int d, z, z0; | |
45 | + | |
46 | + z0 = disks - 3; /* Highest data disk */ | |
47 | + p = dptr[z0+1]; /* XOR parity */ | |
48 | + q = dptr[z0+2]; /* RS syndrome */ | |
49 | + | |
50 | + kernel_fpu_begin(); | |
51 | + | |
52 | + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
53 | + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
54 | + | |
55 | + for ( d = 0 ; d < bytes ; d += 8 ) { | |
56 | + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
57 | + asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
58 | + for ( z = z0-1 ; z >= 0 ; z-- ) { | |
59 | + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | |
60 | + asm volatile("pcmpgtb %mm4,%mm5"); | |
61 | + asm volatile("paddb %mm4,%mm4"); | |
62 | + asm volatile("pand %mm0,%mm5"); | |
63 | + asm volatile("pxor %mm5,%mm4"); | |
64 | + asm volatile("pxor %mm5,%mm5"); | |
65 | + asm volatile("pxor %mm6,%mm2"); | |
66 | + asm volatile("pxor %mm6,%mm4"); | |
67 | + } | |
68 | + asm volatile("movq %%mm2,%0" : "=m" (p[d])); | |
69 | + asm volatile("pxor %mm2,%mm2"); | |
70 | + asm volatile("movq %%mm4,%0" : "=m" (q[d])); | |
71 | + asm volatile("pxor %mm4,%mm4"); | |
72 | + } | |
73 | + | |
74 | + kernel_fpu_end(); | |
75 | +} | |
76 | + | |
77 | +const struct raid6_calls raid6_mmxx1 = { | |
78 | + raid6_mmx1_gen_syndrome, | |
79 | + raid6_have_mmx, | |
80 | + "mmxx1", | |
81 | + 0 | |
82 | +}; | |
83 | + | |
84 | +/* | |
85 | + * Unrolled-by-2 MMX implementation | |
86 | + */ | |
87 | +static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
88 | +{ | |
89 | + u8 **dptr = (u8 **)ptrs; | |
90 | + u8 *p, *q; | |
91 | + int d, z, z0; | |
92 | + | |
93 | + z0 = disks - 3; /* Highest data disk */ | |
94 | + p = dptr[z0+1]; /* XOR parity */ | |
95 | + q = dptr[z0+2]; /* RS syndrome */ | |
96 | + | |
97 | + kernel_fpu_begin(); | |
98 | + | |
99 | + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
100 | + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
101 | + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | |
102 | + | |
103 | + for ( d = 0 ; d < bytes ; d += 16 ) { | |
104 | + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
105 | + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); | |
106 | + asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
107 | + asm volatile("movq %mm3,%mm6"); /* Q[1] */ | |
108 | + for ( z = z0-1 ; z >= 0 ; z-- ) { | |
109 | + asm volatile("pcmpgtb %mm4,%mm5"); | |
110 | + asm volatile("pcmpgtb %mm6,%mm7"); | |
111 | + asm volatile("paddb %mm4,%mm4"); | |
112 | + asm volatile("paddb %mm6,%mm6"); | |
113 | + asm volatile("pand %mm0,%mm5"); | |
114 | + asm volatile("pand %mm0,%mm7"); | |
115 | + asm volatile("pxor %mm5,%mm4"); | |
116 | + asm volatile("pxor %mm7,%mm6"); | |
117 | + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | |
118 | + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | |
119 | + asm volatile("pxor %mm5,%mm2"); | |
120 | + asm volatile("pxor %mm7,%mm3"); | |
121 | + asm volatile("pxor %mm5,%mm4"); | |
122 | + asm volatile("pxor %mm7,%mm6"); | |
123 | + asm volatile("pxor %mm5,%mm5"); | |
124 | + asm volatile("pxor %mm7,%mm7"); | |
125 | + } | |
126 | + asm volatile("movq %%mm2,%0" : "=m" (p[d])); | |
127 | + asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); | |
128 | + asm volatile("movq %%mm4,%0" : "=m" (q[d])); | |
129 | + asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); | |
130 | + } | |
131 | + | |
132 | + kernel_fpu_end(); | |
133 | +} | |
134 | + | |
135 | +const struct raid6_calls raid6_mmxx2 = { | |
136 | + raid6_mmx2_gen_syndrome, | |
137 | + raid6_have_mmx, | |
138 | + "mmxx2", | |
139 | + 0 | |
140 | +}; | |
141 | + | |
142 | +#endif |
lib/raid6/raid6recov.c
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6recov.c | |
15 | + * | |
16 | + * RAID-6 data recovery in dual failure mode. In single failure mode, | |
17 | + * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct | |
18 | + * the syndrome.) | |
19 | + */ | |
20 | + | |
21 | +#include <linux/raid/pq.h> | |
22 | + | |
23 | +/* Recover two failed data blocks. */ | |
24 | +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | |
25 | + void **ptrs) | |
26 | +{ | |
27 | + u8 *p, *q, *dp, *dq; | |
28 | + u8 px, qx, db; | |
29 | + const u8 *pbmul; /* P multiplier table for B data */ | |
30 | + const u8 *qmul; /* Q multiplier table (for both) */ | |
31 | + | |
32 | + p = (u8 *)ptrs[disks-2]; | |
33 | + q = (u8 *)ptrs[disks-1]; | |
34 | + | |
35 | + /* Compute syndrome with zero for the missing data pages | |
36 | + Use the dead data pages as temporary storage for | |
37 | + delta p and delta q */ | |
38 | + dp = (u8 *)ptrs[faila]; | |
39 | + ptrs[faila] = (void *)raid6_empty_zero_page; | |
40 | + ptrs[disks-2] = dp; | |
41 | + dq = (u8 *)ptrs[failb]; | |
42 | + ptrs[failb] = (void *)raid6_empty_zero_page; | |
43 | + ptrs[disks-1] = dq; | |
44 | + | |
45 | + raid6_call.gen_syndrome(disks, bytes, ptrs); | |
46 | + | |
47 | + /* Restore pointer table */ | |
48 | + ptrs[faila] = dp; | |
49 | + ptrs[failb] = dq; | |
50 | + ptrs[disks-2] = p; | |
51 | + ptrs[disks-1] = q; | |
52 | + | |
53 | + /* Now, pick the proper data tables */ | |
54 | + pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; | |
55 | + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; | |
56 | + | |
57 | + /* Now do it... */ | |
58 | + while ( bytes-- ) { | |
59 | + px = *p ^ *dp; | |
60 | + qx = qmul[*q ^ *dq]; | |
61 | + *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ | |
62 | + *dp++ = db ^ px; /* Reconstructed A */ | |
63 | + p++; q++; | |
64 | + } | |
65 | +} | |
66 | +EXPORT_SYMBOL_GPL(raid6_2data_recov); | |
67 | + | |
68 | +/* Recover failure of one data block plus the P block */ | |
69 | +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) | |
70 | +{ | |
71 | + u8 *p, *q, *dq; | |
72 | + const u8 *qmul; /* Q multiplier table */ | |
73 | + | |
74 | + p = (u8 *)ptrs[disks-2]; | |
75 | + q = (u8 *)ptrs[disks-1]; | |
76 | + | |
77 | + /* Compute syndrome with zero for the missing data page | |
78 | + Use the dead data page as temporary storage for delta q */ | |
79 | + dq = (u8 *)ptrs[faila]; | |
80 | + ptrs[faila] = (void *)raid6_empty_zero_page; | |
81 | + ptrs[disks-1] = dq; | |
82 | + | |
83 | + raid6_call.gen_syndrome(disks, bytes, ptrs); | |
84 | + | |
85 | + /* Restore pointer table */ | |
86 | + ptrs[faila] = dq; | |
87 | + ptrs[disks-1] = q; | |
88 | + | |
89 | + /* Now, pick the proper data tables */ | |
90 | + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; | |
91 | + | |
92 | + /* Now do it... */ | |
93 | + while ( bytes-- ) { | |
94 | + *p++ ^= *dq = qmul[*q ^ *dq]; | |
95 | + q++; dq++; | |
96 | + } | |
97 | +} | |
98 | +EXPORT_SYMBOL_GPL(raid6_datap_recov); | |
99 | + | |
100 | +#ifndef __KERNEL__ | |
101 | +/* Testing only */ | |
102 | + | |
103 | +/* Recover two failed blocks. */ | |
104 | +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) | |
105 | +{ | |
106 | + if ( faila > failb ) { | |
107 | + int tmp = faila; | |
108 | + faila = failb; | |
109 | + failb = tmp; | |
110 | + } | |
111 | + | |
112 | + if ( failb == disks-1 ) { | |
113 | + if ( faila == disks-2 ) { | |
114 | + /* P+Q failure. Just rebuild the syndrome. */ | |
115 | + raid6_call.gen_syndrome(disks, bytes, ptrs); | |
116 | + } else { | |
117 | + /* data+Q failure. Reconstruct data from P, | |
118 | + then rebuild syndrome. */ | |
119 | + /* NOT IMPLEMENTED - equivalent to RAID-5 */ | |
120 | + } | |
121 | + } else { | |
122 | + if ( failb == disks-2 ) { | |
123 | + /* data+P failure. */ | |
124 | + raid6_datap_recov(disks, bytes, faila, ptrs); | |
125 | + } else { | |
126 | + /* data+data failure. */ | |
127 | + raid6_2data_recov(disks, bytes, faila, failb, ptrs); | |
128 | + } | |
129 | + } | |
130 | +} | |
131 | + | |
132 | +#endif |
lib/raid6/raid6sse1.c
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6sse1.c | |
15 | + * | |
16 | + * SSE-1/MMXEXT implementation of RAID-6 syndrome functions | |
17 | + * | |
18 | + * This is really an MMX implementation, but it requires SSE-1 or | |
19 | + * AMD MMXEXT for prefetch support and a few other features. The | |
20 | + * support for nontemporal memory accesses is enough to make this | |
21 | + * worthwhile as a separate implementation. | |
22 | + */ | |
23 | + | |
24 | +#if defined(__i386__) && !defined(__arch_um__) | |
25 | + | |
26 | +#include <linux/raid/pq.h> | |
27 | +#include "raid6x86.h" | |
28 | + | |
29 | +/* Defined in raid6mmx.c */ | |
30 | +extern const struct raid6_mmx_constants { | |
31 | + u64 x1d; | |
32 | +} raid6_mmx_constants; | |
33 | + | |
34 | +static int raid6_have_sse1_or_mmxext(void) | |
35 | +{ | |
36 | + /* Not really boot_cpu but "all_cpus" */ | |
37 | + return boot_cpu_has(X86_FEATURE_MMX) && | |
38 | + (boot_cpu_has(X86_FEATURE_XMM) || | |
39 | + boot_cpu_has(X86_FEATURE_MMXEXT)); | |
40 | +} | |
41 | + | |
42 | +/* | |
43 | + * Plain SSE1 implementation | |
44 | + */ | |
45 | +static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
46 | +{ | |
47 | + u8 **dptr = (u8 **)ptrs; | |
48 | + u8 *p, *q; | |
49 | + int d, z, z0; | |
50 | + | |
51 | + z0 = disks - 3; /* Highest data disk */ | |
52 | + p = dptr[z0+1]; /* XOR parity */ | |
53 | + q = dptr[z0+2]; /* RS syndrome */ | |
54 | + | |
55 | + kernel_fpu_begin(); | |
56 | + | |
57 | + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
58 | + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
59 | + | |
60 | + for ( d = 0 ; d < bytes ; d += 8 ) { | |
61 | + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
62 | + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
63 | + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | |
64 | + asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
65 | + asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); | |
66 | + for ( z = z0-2 ; z >= 0 ; z-- ) { | |
67 | + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
68 | + asm volatile("pcmpgtb %mm4,%mm5"); | |
69 | + asm volatile("paddb %mm4,%mm4"); | |
70 | + asm volatile("pand %mm0,%mm5"); | |
71 | + asm volatile("pxor %mm5,%mm4"); | |
72 | + asm volatile("pxor %mm5,%mm5"); | |
73 | + asm volatile("pxor %mm6,%mm2"); | |
74 | + asm volatile("pxor %mm6,%mm4"); | |
75 | + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | |
76 | + } | |
77 | + asm volatile("pcmpgtb %mm4,%mm5"); | |
78 | + asm volatile("paddb %mm4,%mm4"); | |
79 | + asm volatile("pand %mm0,%mm5"); | |
80 | + asm volatile("pxor %mm5,%mm4"); | |
81 | + asm volatile("pxor %mm5,%mm5"); | |
82 | + asm volatile("pxor %mm6,%mm2"); | |
83 | + asm volatile("pxor %mm6,%mm4"); | |
84 | + | |
85 | + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | |
86 | + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | |
87 | + } | |
88 | + | |
89 | + asm volatile("sfence" : : : "memory"); | |
90 | + kernel_fpu_end(); | |
91 | +} | |
92 | + | |
93 | +const struct raid6_calls raid6_sse1x1 = { | |
94 | + raid6_sse11_gen_syndrome, | |
95 | + raid6_have_sse1_or_mmxext, | |
96 | + "sse1x1", | |
97 | + 1 /* Has cache hints */ | |
98 | +}; | |
99 | + | |
100 | +/* | |
101 | + * Unrolled-by-2 SSE1 implementation | |
102 | + */ | |
103 | +static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
104 | +{ | |
105 | + u8 **dptr = (u8 **)ptrs; | |
106 | + u8 *p, *q; | |
107 | + int d, z, z0; | |
108 | + | |
109 | + z0 = disks - 3; /* Highest data disk */ | |
110 | + p = dptr[z0+1]; /* XOR parity */ | |
111 | + q = dptr[z0+2]; /* RS syndrome */ | |
112 | + | |
113 | + kernel_fpu_begin(); | |
114 | + | |
115 | + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | |
116 | + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | |
117 | + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | |
118 | + | |
119 | + /* We uniformly assume a single prefetch covers at least 16 bytes */ | |
120 | + for ( d = 0 ; d < bytes ; d += 16 ) { | |
121 | + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
122 | + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
123 | + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ | |
124 | + asm volatile("movq %mm2,%mm4"); /* Q[0] */ | |
125 | + asm volatile("movq %mm3,%mm6"); /* Q[1] */ | |
126 | + for ( z = z0-1 ; z >= 0 ; z-- ) { | |
127 | + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
128 | + asm volatile("pcmpgtb %mm4,%mm5"); | |
129 | + asm volatile("pcmpgtb %mm6,%mm7"); | |
130 | + asm volatile("paddb %mm4,%mm4"); | |
131 | + asm volatile("paddb %mm6,%mm6"); | |
132 | + asm volatile("pand %mm0,%mm5"); | |
133 | + asm volatile("pand %mm0,%mm7"); | |
134 | + asm volatile("pxor %mm5,%mm4"); | |
135 | + asm volatile("pxor %mm7,%mm6"); | |
136 | + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | |
137 | + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | |
138 | + asm volatile("pxor %mm5,%mm2"); | |
139 | + asm volatile("pxor %mm7,%mm3"); | |
140 | + asm volatile("pxor %mm5,%mm4"); | |
141 | + asm volatile("pxor %mm7,%mm6"); | |
142 | + asm volatile("pxor %mm5,%mm5"); | |
143 | + asm volatile("pxor %mm7,%mm7"); | |
144 | + } | |
145 | + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | |
146 | + asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); | |
147 | + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | |
148 | + asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); | |
149 | + } | |
150 | + | |
151 | + asm volatile("sfence" : :: "memory"); | |
152 | + kernel_fpu_end(); | |
153 | +} | |
154 | + | |
155 | +const struct raid6_calls raid6_sse1x2 = { | |
156 | + raid6_sse12_gen_syndrome, | |
157 | + raid6_have_sse1_or_mmxext, | |
158 | + "sse1x2", | |
159 | + 1 /* Has cache hints */ | |
160 | +}; | |
161 | + | |
162 | +#endif |
lib/raid6/raid6sse2.c
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6sse2.c | |
15 | + * | |
16 | + * SSE-2 implementation of RAID-6 syndrome functions | |
17 | + * | |
18 | + */ | |
19 | + | |
20 | +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | |
21 | + | |
22 | +#include <linux/raid/pq.h> | |
23 | +#include "raid6x86.h" | |
24 | + | |
25 | +static const struct raid6_sse_constants { | |
26 | + u64 x1d[2]; | |
27 | +} raid6_sse_constants __attribute__((aligned(16))) = { | |
28 | + { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, | |
29 | +}; | |
30 | + | |
31 | +static int raid6_have_sse2(void) | |
32 | +{ | |
33 | + /* Not really boot_cpu but "all_cpus" */ | |
34 | + return boot_cpu_has(X86_FEATURE_MMX) && | |
35 | + boot_cpu_has(X86_FEATURE_FXSR) && | |
36 | + boot_cpu_has(X86_FEATURE_XMM) && | |
37 | + boot_cpu_has(X86_FEATURE_XMM2); | |
38 | +} | |
39 | + | |
40 | +/* | |
41 | + * Plain SSE2 implementation | |
42 | + */ | |
43 | +static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
44 | +{ | |
45 | + u8 **dptr = (u8 **)ptrs; | |
46 | + u8 *p, *q; | |
47 | + int d, z, z0; | |
48 | + | |
49 | + z0 = disks - 3; /* Highest data disk */ | |
50 | + p = dptr[z0+1]; /* XOR parity */ | |
51 | + q = dptr[z0+2]; /* RS syndrome */ | |
52 | + | |
53 | + kernel_fpu_begin(); | |
54 | + | |
55 | + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | |
56 | + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | |
57 | + | |
58 | + for ( d = 0 ; d < bytes ; d += 16 ) { | |
59 | + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
60 | + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
61 | + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | |
62 | + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | |
63 | + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); | |
64 | + for ( z = z0-2 ; z >= 0 ; z-- ) { | |
65 | + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
66 | + asm volatile("pcmpgtb %xmm4,%xmm5"); | |
67 | + asm volatile("paddb %xmm4,%xmm4"); | |
68 | + asm volatile("pand %xmm0,%xmm5"); | |
69 | + asm volatile("pxor %xmm5,%xmm4"); | |
70 | + asm volatile("pxor %xmm5,%xmm5"); | |
71 | + asm volatile("pxor %xmm6,%xmm2"); | |
72 | + asm volatile("pxor %xmm6,%xmm4"); | |
73 | + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); | |
74 | + } | |
75 | + asm volatile("pcmpgtb %xmm4,%xmm5"); | |
76 | + asm volatile("paddb %xmm4,%xmm4"); | |
77 | + asm volatile("pand %xmm0,%xmm5"); | |
78 | + asm volatile("pxor %xmm5,%xmm4"); | |
79 | + asm volatile("pxor %xmm5,%xmm5"); | |
80 | + asm volatile("pxor %xmm6,%xmm2"); | |
81 | + asm volatile("pxor %xmm6,%xmm4"); | |
82 | + | |
83 | + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | |
84 | + asm volatile("pxor %xmm2,%xmm2"); | |
85 | + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | |
86 | + asm volatile("pxor %xmm4,%xmm4"); | |
87 | + } | |
88 | + | |
89 | + asm volatile("sfence" : : : "memory"); | |
90 | + kernel_fpu_end(); | |
91 | +} | |
92 | + | |
93 | +const struct raid6_calls raid6_sse2x1 = { | |
94 | + raid6_sse21_gen_syndrome, | |
95 | + raid6_have_sse2, | |
96 | + "sse2x1", | |
97 | + 1 /* Has cache hints */ | |
98 | +}; | |
99 | + | |
100 | +/* | |
101 | + * Unrolled-by-2 SSE2 implementation | |
102 | + */ | |
103 | +static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
104 | +{ | |
105 | + u8 **dptr = (u8 **)ptrs; | |
106 | + u8 *p, *q; | |
107 | + int d, z, z0; | |
108 | + | |
109 | + z0 = disks - 3; /* Highest data disk */ | |
110 | + p = dptr[z0+1]; /* XOR parity */ | |
111 | + q = dptr[z0+2]; /* RS syndrome */ | |
112 | + | |
113 | + kernel_fpu_begin(); | |
114 | + | |
115 | + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | |
116 | + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | |
117 | + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | |
118 | + | |
119 | + /* We uniformly assume a single prefetch covers at least 32 bytes */ | |
120 | + for ( d = 0 ; d < bytes ; d += 32 ) { | |
121 | + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | |
122 | + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | |
123 | + asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ | |
124 | + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | |
125 | + asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ | |
126 | + for ( z = z0-1 ; z >= 0 ; z-- ) { | |
127 | + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | |
128 | + asm volatile("pcmpgtb %xmm4,%xmm5"); | |
129 | + asm volatile("pcmpgtb %xmm6,%xmm7"); | |
130 | + asm volatile("paddb %xmm4,%xmm4"); | |
131 | + asm volatile("paddb %xmm6,%xmm6"); | |
132 | + asm volatile("pand %xmm0,%xmm5"); | |
133 | + asm volatile("pand %xmm0,%xmm7"); | |
134 | + asm volatile("pxor %xmm5,%xmm4"); | |
135 | + asm volatile("pxor %xmm7,%xmm6"); | |
136 | + asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); | |
137 | + asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); | |
138 | + asm volatile("pxor %xmm5,%xmm2"); | |
139 | + asm volatile("pxor %xmm7,%xmm3"); | |
140 | + asm volatile("pxor %xmm5,%xmm4"); | |
141 | + asm volatile("pxor %xmm7,%xmm6"); | |
142 | + asm volatile("pxor %xmm5,%xmm5"); | |
143 | + asm volatile("pxor %xmm7,%xmm7"); | |
144 | + } | |
145 | + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | |
146 | + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | |
147 | + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | |
148 | + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | |
149 | + } | |
150 | + | |
151 | + asm volatile("sfence" : : : "memory"); | |
152 | + kernel_fpu_end(); | |
153 | +} | |
154 | + | |
155 | +const struct raid6_calls raid6_sse2x2 = { | |
156 | + raid6_sse22_gen_syndrome, | |
157 | + raid6_have_sse2, | |
158 | + "sse2x2", | |
159 | + 1 /* Has cache hints */ | |
160 | +}; | |
161 | + | |
162 | +#endif | |
163 | + | |
164 | +#if defined(__x86_64__) && !defined(__arch_um__) | |
165 | + | |
166 | +/* | |
167 | + * Unrolled-by-4 SSE2 implementation | |
168 | + */ | |
169 | +static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
170 | +{ | |
171 | + u8 **dptr = (u8 **)ptrs; | |
172 | + u8 *p, *q; | |
173 | + int d, z, z0; | |
174 | + | |
175 | + z0 = disks - 3; /* Highest data disk */ | |
176 | + p = dptr[z0+1]; /* XOR parity */ | |
177 | + q = dptr[z0+2]; /* RS syndrome */ | |
178 | + | |
179 | + kernel_fpu_begin(); | |
180 | + | |
181 | + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); | |
182 | + asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ | |
183 | + asm volatile("pxor %xmm3,%xmm3"); /* P[1] */ | |
184 | + asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */ | |
185 | + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | |
186 | + asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */ | |
187 | + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | |
188 | + asm volatile("pxor %xmm10,%xmm10"); /* P[2] */ | |
189 | + asm volatile("pxor %xmm11,%xmm11"); /* P[3] */ | |
190 | + asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */ | |
191 | + asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */ | |
192 | + asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */ | |
193 | + asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */ | |
194 | + | |
195 | + for ( d = 0 ; d < bytes ; d += 64 ) { | |
196 | + for ( z = z0 ; z >= 0 ; z-- ) { | |
197 | + /* The second prefetch seems to improve performance... */ | |
198 | + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); | |
199 | + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); | |
200 | + asm volatile("pcmpgtb %xmm4,%xmm5"); | |
201 | + asm volatile("pcmpgtb %xmm6,%xmm7"); | |
202 | + asm volatile("pcmpgtb %xmm12,%xmm13"); | |
203 | + asm volatile("pcmpgtb %xmm14,%xmm15"); | |
204 | + asm volatile("paddb %xmm4,%xmm4"); | |
205 | + asm volatile("paddb %xmm6,%xmm6"); | |
206 | + asm volatile("paddb %xmm12,%xmm12"); | |
207 | + asm volatile("paddb %xmm14,%xmm14"); | |
208 | + asm volatile("pand %xmm0,%xmm5"); | |
209 | + asm volatile("pand %xmm0,%xmm7"); | |
210 | + asm volatile("pand %xmm0,%xmm13"); | |
211 | + asm volatile("pand %xmm0,%xmm15"); | |
212 | + asm volatile("pxor %xmm5,%xmm4"); | |
213 | + asm volatile("pxor %xmm7,%xmm6"); | |
214 | + asm volatile("pxor %xmm13,%xmm12"); | |
215 | + asm volatile("pxor %xmm15,%xmm14"); | |
216 | + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); | |
217 | + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); | |
218 | + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); | |
219 | + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); | |
220 | + asm volatile("pxor %xmm5,%xmm2"); | |
221 | + asm volatile("pxor %xmm7,%xmm3"); | |
222 | + asm volatile("pxor %xmm13,%xmm10"); | |
223 | + asm volatile("pxor %xmm15,%xmm11"); | |
224 | + asm volatile("pxor %xmm5,%xmm4"); | |
225 | + asm volatile("pxor %xmm7,%xmm6"); | |
226 | + asm volatile("pxor %xmm13,%xmm12"); | |
227 | + asm volatile("pxor %xmm15,%xmm14"); | |
228 | + asm volatile("pxor %xmm5,%xmm5"); | |
229 | + asm volatile("pxor %xmm7,%xmm7"); | |
230 | + asm volatile("pxor %xmm13,%xmm13"); | |
231 | + asm volatile("pxor %xmm15,%xmm15"); | |
232 | + } | |
233 | + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | |
234 | + asm volatile("pxor %xmm2,%xmm2"); | |
235 | + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | |
236 | + asm volatile("pxor %xmm3,%xmm3"); | |
237 | + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); | |
238 | + asm volatile("pxor %xmm10,%xmm10"); | |
239 | + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); | |
240 | + asm volatile("pxor %xmm11,%xmm11"); | |
241 | + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | |
242 | + asm volatile("pxor %xmm4,%xmm4"); | |
243 | + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | |
244 | + asm volatile("pxor %xmm6,%xmm6"); | |
245 | + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); | |
246 | + asm volatile("pxor %xmm12,%xmm12"); | |
247 | + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); | |
248 | + asm volatile("pxor %xmm14,%xmm14"); | |
249 | + } | |
250 | + | |
251 | + asm volatile("sfence" : : : "memory"); | |
252 | + kernel_fpu_end(); | |
253 | +} | |
254 | + | |
255 | +const struct raid6_calls raid6_sse2x4 = { | |
256 | + raid6_sse24_gen_syndrome, | |
257 | + raid6_have_sse2, | |
258 | + "sse2x4", | |
259 | + 1 /* Has cache hints */ | |
260 | +}; | |
261 | + | |
262 | +#endif |
lib/raid6/raid6test/Makefile
1 | +# | |
2 | +# This is a simple Makefile to test some of the RAID-6 code | |
3 | +# from userspace. | |
4 | +# | |
5 | + | |
6 | +CC = gcc | |
7 | +OPTFLAGS = -O2 # Adjust as desired | |
8 | +CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) | |
9 | +LD = ld | |
10 | +PERL = perl | |
11 | +AR = ar | |
12 | +RANLIB = ranlib | |
13 | + | |
14 | +.c.o: | |
15 | + $(CC) $(CFLAGS) -c -o $@ $< | |
16 | + | |
17 | +%.c: ../%.c | |
18 | + cp -f $< $@ | |
19 | + | |
20 | +%.uc: ../%.uc | |
21 | + cp -f $< $@ | |
22 | + | |
23 | +all: raid6.a raid6test | |
24 | + | |
25 | +raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ | |
26 | + raid6int32.o \ | |
27 | + raid6mmx.o raid6sse1.o raid6sse2.o \ | |
28 | + raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \ | |
29 | + raid6recov.o raid6algos.o \ | |
30 | + raid6tables.o | |
31 | + rm -f $@ | |
32 | + $(AR) cq $@ $^ | |
33 | + $(RANLIB) $@ | |
34 | + | |
35 | +raid6test: test.c raid6.a | |
36 | + $(CC) $(CFLAGS) -o raid6test $^ | |
37 | + | |
38 | +raid6altivec1.c: raid6altivec.uc ../unroll.pl | |
39 | + $(PERL) ../unroll.pl 1 < raid6altivec.uc > $@ | |
40 | + | |
41 | +raid6altivec2.c: raid6altivec.uc ../unroll.pl | |
42 | + $(PERL) ../unroll.pl 2 < raid6altivec.uc > $@ | |
43 | + | |
44 | +raid6altivec4.c: raid6altivec.uc ../unroll.pl | |
45 | + $(PERL) ../unroll.pl 4 < raid6altivec.uc > $@ | |
46 | + | |
47 | +raid6altivec8.c: raid6altivec.uc ../unroll.pl | |
48 | + $(PERL) ../unroll.pl 8 < raid6altivec.uc > $@ | |
49 | + | |
50 | +raid6int1.c: raid6int.uc ../unroll.pl | |
51 | + $(PERL) ../unroll.pl 1 < raid6int.uc > $@ | |
52 | + | |
53 | +raid6int2.c: raid6int.uc ../unroll.pl | |
54 | + $(PERL) ../unroll.pl 2 < raid6int.uc > $@ | |
55 | + | |
56 | +raid6int4.c: raid6int.uc ../unroll.pl | |
57 | + $(PERL) ../unroll.pl 4 < raid6int.uc > $@ | |
58 | + | |
59 | +raid6int8.c: raid6int.uc ../unroll.pl | |
60 | + $(PERL) ../unroll.pl 8 < raid6int.uc > $@ | |
61 | + | |
62 | +raid6int16.c: raid6int.uc ../unroll.pl | |
63 | + $(PERL) ../unroll.pl 16 < raid6int.uc > $@ | |
64 | + | |
65 | +raid6int32.c: raid6int.uc ../unroll.pl | |
66 | + $(PERL) ../unroll.pl 32 < raid6int.uc > $@ | |
67 | + | |
68 | +raid6tables.c: mktables | |
69 | + ./mktables > raid6tables.c | |
70 | + | |
71 | +clean: | |
72 | + rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test | |
73 | + | |
74 | +spotless: clean | |
75 | + rm -f *~ |
lib/raid6/raid6test/test.c
1 | +/* -*- linux-c -*- ------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This file is part of the Linux kernel, and is made available under | |
6 | + * the terms of the GNU General Public License version 2 or (at your | |
7 | + * option) any later version; incorporated herein by reference. | |
8 | + * | |
9 | + * ----------------------------------------------------------------------- */ | |
10 | + | |
11 | +/* | |
12 | + * raid6test.c | |
13 | + * | |
14 | + * Test RAID-6 recovery with various algorithms | |
15 | + */ | |
16 | + | |
17 | +#include <stdlib.h> | |
18 | +#include <stdio.h> | |
19 | +#include <string.h> | |
20 | +#include <linux/raid/pq.h> | |
21 | + | |
22 | +#define NDISKS 16 /* Including P and Q */ | |
23 | + | |
24 | +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | |
25 | +struct raid6_calls raid6_call; | |
26 | + | |
27 | +char *dataptrs[NDISKS]; | |
28 | +char data[NDISKS][PAGE_SIZE]; | |
29 | +char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | |
30 | + | |
31 | +static void makedata(void) | |
32 | +{ | |
33 | + int i, j; | |
34 | + | |
35 | + for (i = 0; i < NDISKS; i++) { | |
36 | + for (j = 0; j < PAGE_SIZE; j++) | |
37 | + data[i][j] = rand(); | |
38 | + | |
39 | + dataptrs[i] = data[i]; | |
40 | + } | |
41 | +} | |
42 | + | |
43 | +static char disk_type(int d) | |
44 | +{ | |
45 | + switch (d) { | |
46 | + case NDISKS-2: | |
47 | + return 'P'; | |
48 | + case NDISKS-1: | |
49 | + return 'Q'; | |
50 | + default: | |
51 | + return 'D'; | |
52 | + } | |
53 | +} | |
54 | + | |
55 | +static int test_disks(int i, int j) | |
56 | +{ | |
57 | + int erra, errb; | |
58 | + | |
59 | + memset(recovi, 0xf0, PAGE_SIZE); | |
60 | + memset(recovj, 0xba, PAGE_SIZE); | |
61 | + | |
62 | + dataptrs[i] = recovi; | |
63 | + dataptrs[j] = recovj; | |
64 | + | |
65 | + raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); | |
66 | + | |
67 | + erra = memcmp(data[i], recovi, PAGE_SIZE); | |
68 | + errb = memcmp(data[j], recovj, PAGE_SIZE); | |
69 | + | |
70 | + if (i < NDISKS-2 && j == NDISKS-1) { | |
71 | + /* We don't implement the DQ failure scenario, since it's | |
72 | + equivalent to a RAID-5 failure (XOR, then recompute Q) */ | |
73 | + erra = errb = 0; | |
74 | + } else { | |
75 | + printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | |
76 | + raid6_call.name, | |
77 | + i, disk_type(i), | |
78 | + j, disk_type(j), | |
79 | + (!erra && !errb) ? "OK" : | |
80 | + !erra ? "ERRB" : | |
81 | + !errb ? "ERRA" : "ERRAB"); | |
82 | + } | |
83 | + | |
84 | + dataptrs[i] = data[i]; | |
85 | + dataptrs[j] = data[j]; | |
86 | + | |
87 | + return erra || errb; | |
88 | +} | |
89 | + | |
90 | +int main(int argc, char *argv[]) | |
91 | +{ | |
92 | + const struct raid6_calls *const *algo; | |
93 | + int i, j; | |
94 | + int err = 0; | |
95 | + | |
96 | + makedata(); | |
97 | + | |
98 | + for (algo = raid6_algos; *algo; algo++) { | |
99 | + if (!(*algo)->valid || (*algo)->valid()) { | |
100 | + raid6_call = **algo; | |
101 | + | |
102 | + /* Nuke syndromes */ | |
103 | + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); | |
104 | + | |
105 | + /* Generate assumed good syndrome */ | |
106 | + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, | |
107 | + (void **)&dataptrs); | |
108 | + | |
109 | + for (i = 0; i < NDISKS-1; i++) | |
110 | + for (j = i+1; j < NDISKS; j++) | |
111 | + err += test_disks(i, j); | |
112 | + } | |
113 | + printf("\n"); | |
114 | + } | |
115 | + | |
116 | + printf("\n"); | |
117 | + /* Pick the best algorithm test */ | |
118 | + raid6_select_algo(); | |
119 | + | |
120 | + if (err) | |
121 | + printf("\n*** ERRORS FOUND ***\n"); | |
122 | + | |
123 | + return err; | |
124 | +} |
lib/raid6/raid6x86.h
1 | +/* ----------------------------------------------------------------------- * | |
2 | + * | |
3 | + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
4 | + * | |
5 | + * This program is free software; you can redistribute it and/or modify | |
6 | + * it under the terms of the GNU General Public License as published by | |
7 | + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | + * Boston MA 02111-1307, USA; either version 2 of the License, or | |
9 | + * (at your option) any later version; incorporated herein by reference. | |
10 | + * | |
11 | + * ----------------------------------------------------------------------- */ | |
12 | + | |
13 | +/* | |
14 | + * raid6x86.h | |
15 | + * | |
16 | + * Definitions common to x86 and x86-64 RAID-6 code only | |
17 | + */ | |
18 | + | |
19 | +#ifndef LINUX_RAID_RAID6X86_H | |
20 | +#define LINUX_RAID_RAID6X86_H | |
21 | + | |
22 | +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | |
23 | + | |
24 | +#ifdef __KERNEL__ /* Real code */ | |
25 | + | |
26 | +#include <asm/i387.h> | |
27 | + | |
28 | +#else /* Dummy code for user space testing */ | |
29 | + | |
30 | +static inline void kernel_fpu_begin(void) | |
31 | +{ | |
32 | +} | |
33 | + | |
34 | +static inline void kernel_fpu_end(void) | |
35 | +{ | |
36 | +} | |
37 | + | |
38 | +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ | |
39 | +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions | |
40 | + * (fast save and restore) */ | |
41 | +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ | |
42 | +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ | |
43 | +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | |
44 | + | |
45 | +/* Should work well enough on modern CPUs for testing */ | |
46 | +static inline int boot_cpu_has(int flag) | |
47 | +{ | |
48 | + u32 eax = (flag >> 5) ? 0x80000001 : 1; | |
49 | + u32 edx; | |
50 | + | |
51 | + asm volatile("cpuid" | |
52 | + : "+a" (eax), "=d" (edx) | |
53 | + : : "ecx", "ebx"); | |
54 | + | |
55 | + return (edx >> (flag & 31)) & 1; | |
56 | +} | |
57 | + | |
58 | +#endif /* ndef __KERNEL__ */ | |
59 | + | |
60 | +#endif | |
61 | +#endif |
lib/raid6/unroll.pl
1 | +#!/usr/bin/perl | |
2 | +# | |
3 | +# Take a piece of C code and for each line which contains the sequence $$ | |
4 | +# repeat n times with $ replaced by 0...n-1; the sequence $# is replaced | |
5 | +# by the unrolling factor, and $* with a single $ | |
6 | +# | |
7 | + | |
8 | +($n) = @ARGV; | |
9 | +$n += 0; | |
10 | + | |
11 | +while ( defined($line = <STDIN>) ) { | |
12 | + if ( $line =~ /\$\$/ ) { | |
13 | + $rep = $n; | |
14 | + } else { | |
15 | + $rep = 1; | |
16 | + } | |
17 | + for ( $i = 0 ; $i < $rep ; $i++ ) { | |
18 | + $tmp = $line; | |
19 | + $tmp =~ s/\$\$/$i/g; | |
20 | + $tmp =~ s/\$\#/$n/g; | |
21 | + $tmp =~ s/\$\*/\$/g; | |
22 | + print $tmp; | |
23 | + } | |
24 | +} |