Commit f5e70d0fe3ea990cfb3fc8d7f76a719adcb1e0b5

Authored by David Woodhouse
Committed by David Woodhouse
1 parent 17d857be64

md: Factor out RAID6 algorithms into lib/

We'll want to use these in btrfs too.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

Showing 29 changed files with 1616 additions and 1613 deletions Side-by-side Diff

... ... @@ -121,7 +121,7 @@
121 121 config MD_RAID456
122 122 tristate "RAID-4/RAID-5/RAID-6 mode"
123 123 depends on BLK_DEV_MD
124   - select MD_RAID6_PQ
  124 + select RAID6_PQ
125 125 select ASYNC_MEMCPY
126 126 select ASYNC_XOR
127 127 select ASYNC_PQ
128 128  
... ... @@ -165,12 +165,9 @@
165 165  
166 166 If unsure, say N.
167 167  
168   -config MD_RAID6_PQ
169   - tristate
170   -
171 168 config ASYNC_RAID6_TEST
172 169 tristate "Self test for hardware accelerated raid6 recovery"
173   - depends on MD_RAID6_PQ
  170 + depends on RAID6_PQ
174 171 select ASYNC_RAID6_RECOV
175 172 ---help---
176 173 This is a one-shot self test that permutes through the
... ... @@ -12,13 +12,6 @@
12 12 += dm-log-userspace-base.o dm-log-userspace-transfer.o
13 13 md-mod-y += md.o bitmap.o
14 14 raid456-y += raid5.o
15   -raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
16   - raid6int1.o raid6int2.o raid6int4.o \
17   - raid6int8.o raid6int16.o raid6int32.o \
18   - raid6altivec1.o raid6altivec2.o raid6altivec4.o \
19   - raid6altivec8.o \
20   - raid6mmx.o raid6sse1.o raid6sse2.o
21   -hostprogs-y += mktables
22 15  
23 16 # Note: link order is important. All raid personalities
24 17 # and must come before md.o, as they each initialise
... ... @@ -29,7 +22,6 @@
29 22 obj-$(CONFIG_MD_RAID0) += raid0.o
30 23 obj-$(CONFIG_MD_RAID1) += raid1.o
31 24 obj-$(CONFIG_MD_RAID10) += raid10.o
32   -obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o
33 25 obj-$(CONFIG_MD_RAID456) += raid456.o
34 26 obj-$(CONFIG_MD_MULTIPATH) += multipath.o
35 27 obj-$(CONFIG_MD_FAULTY) += faulty.o
36 28  
... ... @@ -45,76 +37,7 @@
45 37 obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
46 38 obj-$(CONFIG_DM_ZERO) += dm-zero.o
47 39  
48   -quiet_cmd_unroll = UNROLL $@
49   - cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
50   - < $< > $@ || ( rm -f $@ && exit 1 )
51   -
52   -ifeq ($(CONFIG_ALTIVEC),y)
53   -altivec_flags := -maltivec -mabi=altivec
54   -endif
55   -
56 40 ifeq ($(CONFIG_DM_UEVENT),y)
57 41 dm-mod-objs += dm-uevent.o
58 42 endif
59   -
60   -targets += raid6int1.c
61   -$(obj)/raid6int1.c: UNROLL := 1
62   -$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
63   - $(call if_changed,unroll)
64   -
65   -targets += raid6int2.c
66   -$(obj)/raid6int2.c: UNROLL := 2
67   -$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
68   - $(call if_changed,unroll)
69   -
70   -targets += raid6int4.c
71   -$(obj)/raid6int4.c: UNROLL := 4
72   -$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
73   - $(call if_changed,unroll)
74   -
75   -targets += raid6int8.c
76   -$(obj)/raid6int8.c: UNROLL := 8
77   -$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
78   - $(call if_changed,unroll)
79   -
80   -targets += raid6int16.c
81   -$(obj)/raid6int16.c: UNROLL := 16
82   -$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
83   - $(call if_changed,unroll)
84   -
85   -targets += raid6int32.c
86   -$(obj)/raid6int32.c: UNROLL := 32
87   -$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
88   - $(call if_changed,unroll)
89   -
90   -CFLAGS_raid6altivec1.o += $(altivec_flags)
91   -targets += raid6altivec1.c
92   -$(obj)/raid6altivec1.c: UNROLL := 1
93   -$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
94   - $(call if_changed,unroll)
95   -
96   -CFLAGS_raid6altivec2.o += $(altivec_flags)
97   -targets += raid6altivec2.c
98   -$(obj)/raid6altivec2.c: UNROLL := 2
99   -$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
100   - $(call if_changed,unroll)
101   -
102   -CFLAGS_raid6altivec4.o += $(altivec_flags)
103   -targets += raid6altivec4.c
104   -$(obj)/raid6altivec4.c: UNROLL := 4
105   -$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
106   - $(call if_changed,unroll)
107   -
108   -CFLAGS_raid6altivec8.o += $(altivec_flags)
109   -targets += raid6altivec8.c
110   -$(obj)/raid6altivec8.c: UNROLL := 8
111   -$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
112   - $(call if_changed,unroll)
113   -
114   -quiet_cmd_mktable = TABLE $@
115   - cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
116   -
117   -targets += raid6tables.c
118   -$(obj)/raid6tables.c: $(obj)/mktables FORCE
119   - $(call if_changed,mktable)
drivers/md/mktables.c
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This file is part of the Linux kernel, and is made available under
6   - * the terms of the GNU General Public License version 2 or (at your
7   - * option) any later version; incorporated herein by reference.
8   - *
9   - * ----------------------------------------------------------------------- */
10   -
11   -/*
12   - * mktables.c
13   - *
14   - * Make RAID-6 tables. This is a host user space program to be run at
15   - * compile time.
16   - */
17   -
18   -#include <stdio.h>
19   -#include <string.h>
20   -#include <inttypes.h>
21   -#include <stdlib.h>
22   -#include <time.h>
23   -
24   -static uint8_t gfmul(uint8_t a, uint8_t b)
25   -{
26   - uint8_t v = 0;
27   -
28   - while (b) {
29   - if (b & 1)
30   - v ^= a;
31   - a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
32   - b >>= 1;
33   - }
34   -
35   - return v;
36   -}
37   -
38   -static uint8_t gfpow(uint8_t a, int b)
39   -{
40   - uint8_t v = 1;
41   -
42   - b %= 255;
43   - if (b < 0)
44   - b += 255;
45   -
46   - while (b) {
47   - if (b & 1)
48   - v = gfmul(v, a);
49   - a = gfmul(a, a);
50   - b >>= 1;
51   - }
52   -
53   - return v;
54   -}
55   -
56   -int main(int argc, char *argv[])
57   -{
58   - int i, j, k;
59   - uint8_t v;
60   - uint8_t exptbl[256], invtbl[256];
61   -
62   - printf("#include <linux/raid/pq.h>\n");
63   -
64   - /* Compute multiplication table */
65   - printf("\nconst u8 __attribute__((aligned(256)))\n"
66   - "raid6_gfmul[256][256] =\n"
67   - "{\n");
68   - for (i = 0; i < 256; i++) {
69   - printf("\t{\n");
70   - for (j = 0; j < 256; j += 8) {
71   - printf("\t\t");
72   - for (k = 0; k < 8; k++)
73   - printf("0x%02x,%c", gfmul(i, j + k),
74   - (k == 7) ? '\n' : ' ');
75   - }
76   - printf("\t},\n");
77   - }
78   - printf("};\n");
79   - printf("#ifdef __KERNEL__\n");
80   - printf("EXPORT_SYMBOL(raid6_gfmul);\n");
81   - printf("#endif\n");
82   -
83   - /* Compute power-of-2 table (exponent) */
84   - v = 1;
85   - printf("\nconst u8 __attribute__((aligned(256)))\n"
86   - "raid6_gfexp[256] =\n" "{\n");
87   - for (i = 0; i < 256; i += 8) {
88   - printf("\t");
89   - for (j = 0; j < 8; j++) {
90   - exptbl[i + j] = v;
91   - printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
92   - v = gfmul(v, 2);
93   - if (v == 1)
94   - v = 0; /* For entry 255, not a real entry */
95   - }
96   - }
97   - printf("};\n");
98   - printf("#ifdef __KERNEL__\n");
99   - printf("EXPORT_SYMBOL(raid6_gfexp);\n");
100   - printf("#endif\n");
101   -
102   - /* Compute inverse table x^-1 == x^254 */
103   - printf("\nconst u8 __attribute__((aligned(256)))\n"
104   - "raid6_gfinv[256] =\n" "{\n");
105   - for (i = 0; i < 256; i += 8) {
106   - printf("\t");
107   - for (j = 0; j < 8; j++) {
108   - invtbl[i + j] = v = gfpow(i + j, 254);
109   - printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
110   - }
111   - }
112   - printf("};\n");
113   - printf("#ifdef __KERNEL__\n");
114   - printf("EXPORT_SYMBOL(raid6_gfinv);\n");
115   - printf("#endif\n");
116   -
117   - /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
118   - printf("\nconst u8 __attribute__((aligned(256)))\n"
119   - "raid6_gfexi[256] =\n" "{\n");
120   - for (i = 0; i < 256; i += 8) {
121   - printf("\t");
122   - for (j = 0; j < 8; j++)
123   - printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
124   - (j == 7) ? '\n' : ' ');
125   - }
126   - printf("};\n");
127   - printf("#ifdef __KERNEL__\n");
128   - printf("EXPORT_SYMBOL(raid6_gfexi);\n");
129   - printf("#endif\n");
130   -
131   - return 0;
132   -}
drivers/md/raid6algos.c
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6algos.c
15   - *
16   - * Algorithm list and algorithm selection for RAID-6
17   - */
18   -
19   -#include <linux/raid/pq.h>
20   -#ifndef __KERNEL__
21   -#include <sys/mman.h>
22   -#include <stdio.h>
23   -#else
24   -#if !RAID6_USE_EMPTY_ZERO_PAGE
25   -/* In .bss so it's zeroed */
26   -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
27   -EXPORT_SYMBOL(raid6_empty_zero_page);
28   -#endif
29   -#endif
30   -
31   -struct raid6_calls raid6_call;
32   -EXPORT_SYMBOL_GPL(raid6_call);
33   -
34   -/* Various routine sets */
35   -extern const struct raid6_calls raid6_intx1;
36   -extern const struct raid6_calls raid6_intx2;
37   -extern const struct raid6_calls raid6_intx4;
38   -extern const struct raid6_calls raid6_intx8;
39   -extern const struct raid6_calls raid6_intx16;
40   -extern const struct raid6_calls raid6_intx32;
41   -extern const struct raid6_calls raid6_mmxx1;
42   -extern const struct raid6_calls raid6_mmxx2;
43   -extern const struct raid6_calls raid6_sse1x1;
44   -extern const struct raid6_calls raid6_sse1x2;
45   -extern const struct raid6_calls raid6_sse2x1;
46   -extern const struct raid6_calls raid6_sse2x2;
47   -extern const struct raid6_calls raid6_sse2x4;
48   -extern const struct raid6_calls raid6_altivec1;
49   -extern const struct raid6_calls raid6_altivec2;
50   -extern const struct raid6_calls raid6_altivec4;
51   -extern const struct raid6_calls raid6_altivec8;
52   -
53   -const struct raid6_calls * const raid6_algos[] = {
54   - &raid6_intx1,
55   - &raid6_intx2,
56   - &raid6_intx4,
57   - &raid6_intx8,
58   -#if defined(__ia64__)
59   - &raid6_intx16,
60   - &raid6_intx32,
61   -#endif
62   -#if defined(__i386__) && !defined(__arch_um__)
63   - &raid6_mmxx1,
64   - &raid6_mmxx2,
65   - &raid6_sse1x1,
66   - &raid6_sse1x2,
67   - &raid6_sse2x1,
68   - &raid6_sse2x2,
69   -#endif
70   -#if defined(__x86_64__) && !defined(__arch_um__)
71   - &raid6_sse2x1,
72   - &raid6_sse2x2,
73   - &raid6_sse2x4,
74   -#endif
75   -#ifdef CONFIG_ALTIVEC
76   - &raid6_altivec1,
77   - &raid6_altivec2,
78   - &raid6_altivec4,
79   - &raid6_altivec8,
80   -#endif
81   - NULL
82   -};
83   -
84   -#ifdef __KERNEL__
85   -#define RAID6_TIME_JIFFIES_LG2 4
86   -#else
87   -/* Need more time to be stable in userspace */
88   -#define RAID6_TIME_JIFFIES_LG2 9
89   -#define time_before(x, y) ((x) < (y))
90   -#endif
91   -
92   -/* Try to pick the best algorithm */
93   -/* This code uses the gfmul table as convenient data set to abuse */
94   -
95   -int __init raid6_select_algo(void)
96   -{
97   - const struct raid6_calls * const * algo;
98   - const struct raid6_calls * best;
99   - char *syndromes;
100   - void *dptrs[(65536/PAGE_SIZE)+2];
101   - int i, disks;
102   - unsigned long perf, bestperf;
103   - int bestprefer;
104   - unsigned long j0, j1;
105   -
106   - disks = (65536/PAGE_SIZE)+2;
107   - for ( i = 0 ; i < disks-2 ; i++ ) {
108   - dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
109   - }
110   -
111   - /* Normal code - use a 2-page allocation to avoid D$ conflict */
112   - syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
113   -
114   - if ( !syndromes ) {
115   - printk("raid6: Yikes! No memory available.\n");
116   - return -ENOMEM;
117   - }
118   -
119   - dptrs[disks-2] = syndromes;
120   - dptrs[disks-1] = syndromes + PAGE_SIZE;
121   -
122   - bestperf = 0; bestprefer = 0; best = NULL;
123   -
124   - for ( algo = raid6_algos ; *algo ; algo++ ) {
125   - if ( !(*algo)->valid || (*algo)->valid() ) {
126   - perf = 0;
127   -
128   - preempt_disable();
129   - j0 = jiffies;
130   - while ( (j1 = jiffies) == j0 )
131   - cpu_relax();
132   - while (time_before(jiffies,
133   - j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
134   - (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
135   - perf++;
136   - }
137   - preempt_enable();
138   -
139   - if ( (*algo)->prefer > bestprefer ||
140   - ((*algo)->prefer == bestprefer &&
141   - perf > bestperf) ) {
142   - best = *algo;
143   - bestprefer = best->prefer;
144   - bestperf = perf;
145   - }
146   - printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
147   - (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
148   - }
149   - }
150   -
151   - if (best) {
152   - printk("raid6: using algorithm %s (%ld MB/s)\n",
153   - best->name,
154   - (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
155   - raid6_call = *best;
156   - } else
157   - printk("raid6: Yikes! No algorithm found!\n");
158   -
159   - free_pages((unsigned long)syndromes, 1);
160   -
161   - return best ? 0 : -EINVAL;
162   -}
163   -
164   -static void raid6_exit(void)
165   -{
166   - do { } while (0);
167   -}
168   -
169   -subsys_initcall(raid6_select_algo);
170   -module_exit(raid6_exit);
171   -MODULE_LICENSE("GPL");
drivers/md/raid6altivec.uc
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6altivec$#.c
15   - *
16   - * $#-way unrolled portable integer math RAID-6 instruction set
17   - *
18   - * This file is postprocessed using unroll.pl
19   - *
20   - * <benh> hpa: in process,
21   - * you can just "steal" the vec unit with enable_kernel_altivec() (but
22   - * bracked this with preempt_disable/enable or in a lock)
23   - */
24   -
25   -#include <linux/raid/pq.h>
26   -
27   -#ifdef CONFIG_ALTIVEC
28   -
29   -#include <altivec.h>
30   -#ifdef __KERNEL__
31   -# include <asm/system.h>
32   -# include <asm/cputable.h>
33   -#endif
34   -
35   -/*
36   - * This is the C data type to use. We use a vector of
37   - * signed char so vec_cmpgt() will generate the right
38   - * instruction.
39   - */
40   -
41   -typedef vector signed char unative_t;
42   -
43   -#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
44   -#define NSIZE sizeof(unative_t)
45   -
46   -/*
47   - * The SHLBYTE() operation shifts each byte left by 1, *not*
48   - * rolling over into the next byte
49   - */
50   -static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
51   -{
52   - return vec_add(v,v);
53   -}
54   -
55   -/*
56   - * The MASK() operation returns 0xFF in any byte for which the high
57   - * bit is 1, 0x00 for any byte for which the high bit is 0.
58   - */
59   -static inline __attribute_const__ unative_t MASK(unative_t v)
60   -{
61   - unative_t zv = NBYTES(0);
62   -
63   - /* vec_cmpgt returns a vector bool char; thus the need for the cast */
64   - return (unative_t)vec_cmpgt(zv, v);
65   -}
66   -
67   -
68   -/* This is noinline to make damned sure that gcc doesn't move any of the
69   - Altivec code around the enable/disable code */
70   -static void noinline
71   -raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
72   -{
73   - u8 **dptr = (u8 **)ptrs;
74   - u8 *p, *q;
75   - int d, z, z0;
76   -
77   - unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
78   - unative_t x1d = NBYTES(0x1d);
79   -
80   - z0 = disks - 3; /* Highest data disk */
81   - p = dptr[z0+1]; /* XOR parity */
82   - q = dptr[z0+2]; /* RS syndrome */
83   -
84   - for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
85   - wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
86   - for ( z = z0-1 ; z >= 0 ; z-- ) {
87   - wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
88   - wp$$ = vec_xor(wp$$, wd$$);
89   - w2$$ = MASK(wq$$);
90   - w1$$ = SHLBYTE(wq$$);
91   - w2$$ = vec_and(w2$$, x1d);
92   - w1$$ = vec_xor(w1$$, w2$$);
93   - wq$$ = vec_xor(w1$$, wd$$);
94   - }
95   - *(unative_t *)&p[d+NSIZE*$$] = wp$$;
96   - *(unative_t *)&q[d+NSIZE*$$] = wq$$;
97   - }
98   -}
99   -
100   -static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
101   -{
102   - preempt_disable();
103   - enable_kernel_altivec();
104   -
105   - raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
106   -
107   - preempt_enable();
108   -}
109   -
110   -int raid6_have_altivec(void);
111   -#if $# == 1
112   -int raid6_have_altivec(void)
113   -{
114   - /* This assumes either all CPUs have Altivec or none does */
115   -# ifdef __KERNEL__
116   - return cpu_has_feature(CPU_FTR_ALTIVEC);
117   -# else
118   - return 1;
119   -# endif
120   -}
121   -#endif
122   -
123   -const struct raid6_calls raid6_altivec$# = {
124   - raid6_altivec$#_gen_syndrome,
125   - raid6_have_altivec,
126   - "altivecx$#",
127   - 0
128   -};
129   -
130   -#endif /* CONFIG_ALTIVEC */
drivers/md/raid6int.uc
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6int$#.c
15   - *
16   - * $#-way unrolled portable integer math RAID-6 instruction set
17   - *
18   - * This file is postprocessed using unroll.pl
19   - */
20   -
21   -#include <linux/raid/pq.h>
22   -
23   -/*
24   - * This is the C data type to use
25   - */
26   -
27   -/* Change this from BITS_PER_LONG if there is something better... */
28   -#if BITS_PER_LONG == 64
29   -# define NBYTES(x) ((x) * 0x0101010101010101UL)
30   -# define NSIZE 8
31   -# define NSHIFT 3
32   -# define NSTRING "64"
33   -typedef u64 unative_t;
34   -#else
35   -# define NBYTES(x) ((x) * 0x01010101U)
36   -# define NSIZE 4
37   -# define NSHIFT 2
38   -# define NSTRING "32"
39   -typedef u32 unative_t;
40   -#endif
41   -
42   -
43   -
44   -/*
45   - * IA-64 wants insane amounts of unrolling. On other architectures that
46   - * is just a waste of space.
47   - */
48   -#if ($# <= 8) || defined(__ia64__)
49   -
50   -
51   -/*
52   - * These sub-operations are separate inlines since they can sometimes be
53   - * specially optimized using architecture-specific hacks.
54   - */
55   -
56   -/*
57   - * The SHLBYTE() operation shifts each byte left by 1, *not*
58   - * rolling over into the next byte
59   - */
60   -static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
61   -{
62   - unative_t vv;
63   -
64   - vv = (v << 1) & NBYTES(0xfe);
65   - return vv;
66   -}
67   -
68   -/*
69   - * The MASK() operation returns 0xFF in any byte for which the high
70   - * bit is 1, 0x00 for any byte for which the high bit is 0.
71   - */
72   -static inline __attribute_const__ unative_t MASK(unative_t v)
73   -{
74   - unative_t vv;
75   -
76   - vv = v & NBYTES(0x80);
77   - vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
78   - return vv;
79   -}
80   -
81   -
82   -static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
83   -{
84   - u8 **dptr = (u8 **)ptrs;
85   - u8 *p, *q;
86   - int d, z, z0;
87   -
88   - unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
89   -
90   - z0 = disks - 3; /* Highest data disk */
91   - p = dptr[z0+1]; /* XOR parity */
92   - q = dptr[z0+2]; /* RS syndrome */
93   -
94   - for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
95   - wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
96   - for ( z = z0-1 ; z >= 0 ; z-- ) {
97   - wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
98   - wp$$ ^= wd$$;
99   - w2$$ = MASK(wq$$);
100   - w1$$ = SHLBYTE(wq$$);
101   - w2$$ &= NBYTES(0x1d);
102   - w1$$ ^= w2$$;
103   - wq$$ = w1$$ ^ wd$$;
104   - }
105   - *(unative_t *)&p[d+NSIZE*$$] = wp$$;
106   - *(unative_t *)&q[d+NSIZE*$$] = wq$$;
107   - }
108   -}
109   -
110   -const struct raid6_calls raid6_intx$# = {
111   - raid6_int$#_gen_syndrome,
112   - NULL, /* always valid */
113   - "int" NSTRING "x$#",
114   - 0
115   -};
116   -
117   -#endif
drivers/md/raid6mmx.c
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6mmx.c
15   - *
16   - * MMX implementation of RAID-6 syndrome functions
17   - */
18   -
19   -#if defined(__i386__) && !defined(__arch_um__)
20   -
21   -#include <linux/raid/pq.h>
22   -#include "raid6x86.h"
23   -
24   -/* Shared with raid6sse1.c */
25   -const struct raid6_mmx_constants {
26   - u64 x1d;
27   -} raid6_mmx_constants = {
28   - 0x1d1d1d1d1d1d1d1dULL,
29   -};
30   -
31   -static int raid6_have_mmx(void)
32   -{
33   - /* Not really "boot_cpu" but "all_cpus" */
34   - return boot_cpu_has(X86_FEATURE_MMX);
35   -}
36   -
37   -/*
38   - * Plain MMX implementation
39   - */
40   -static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
41   -{
42   - u8 **dptr = (u8 **)ptrs;
43   - u8 *p, *q;
44   - int d, z, z0;
45   -
46   - z0 = disks - 3; /* Highest data disk */
47   - p = dptr[z0+1]; /* XOR parity */
48   - q = dptr[z0+2]; /* RS syndrome */
49   -
50   - kernel_fpu_begin();
51   -
52   - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
53   - asm volatile("pxor %mm5,%mm5"); /* Zero temp */
54   -
55   - for ( d = 0 ; d < bytes ; d += 8 ) {
56   - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
57   - asm volatile("movq %mm2,%mm4"); /* Q[0] */
58   - for ( z = z0-1 ; z >= 0 ; z-- ) {
59   - asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
60   - asm volatile("pcmpgtb %mm4,%mm5");
61   - asm volatile("paddb %mm4,%mm4");
62   - asm volatile("pand %mm0,%mm5");
63   - asm volatile("pxor %mm5,%mm4");
64   - asm volatile("pxor %mm5,%mm5");
65   - asm volatile("pxor %mm6,%mm2");
66   - asm volatile("pxor %mm6,%mm4");
67   - }
68   - asm volatile("movq %%mm2,%0" : "=m" (p[d]));
69   - asm volatile("pxor %mm2,%mm2");
70   - asm volatile("movq %%mm4,%0" : "=m" (q[d]));
71   - asm volatile("pxor %mm4,%mm4");
72   - }
73   -
74   - kernel_fpu_end();
75   -}
76   -
77   -const struct raid6_calls raid6_mmxx1 = {
78   - raid6_mmx1_gen_syndrome,
79   - raid6_have_mmx,
80   - "mmxx1",
81   - 0
82   -};
83   -
84   -/*
85   - * Unrolled-by-2 MMX implementation
86   - */
87   -static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
88   -{
89   - u8 **dptr = (u8 **)ptrs;
90   - u8 *p, *q;
91   - int d, z, z0;
92   -
93   - z0 = disks - 3; /* Highest data disk */
94   - p = dptr[z0+1]; /* XOR parity */
95   - q = dptr[z0+2]; /* RS syndrome */
96   -
97   - kernel_fpu_begin();
98   -
99   - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
100   - asm volatile("pxor %mm5,%mm5"); /* Zero temp */
101   - asm volatile("pxor %mm7,%mm7"); /* Zero temp */
102   -
103   - for ( d = 0 ; d < bytes ; d += 16 ) {
104   - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
105   - asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
106   - asm volatile("movq %mm2,%mm4"); /* Q[0] */
107   - asm volatile("movq %mm3,%mm6"); /* Q[1] */
108   - for ( z = z0-1 ; z >= 0 ; z-- ) {
109   - asm volatile("pcmpgtb %mm4,%mm5");
110   - asm volatile("pcmpgtb %mm6,%mm7");
111   - asm volatile("paddb %mm4,%mm4");
112   - asm volatile("paddb %mm6,%mm6");
113   - asm volatile("pand %mm0,%mm5");
114   - asm volatile("pand %mm0,%mm7");
115   - asm volatile("pxor %mm5,%mm4");
116   - asm volatile("pxor %mm7,%mm6");
117   - asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
118   - asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
119   - asm volatile("pxor %mm5,%mm2");
120   - asm volatile("pxor %mm7,%mm3");
121   - asm volatile("pxor %mm5,%mm4");
122   - asm volatile("pxor %mm7,%mm6");
123   - asm volatile("pxor %mm5,%mm5");
124   - asm volatile("pxor %mm7,%mm7");
125   - }
126   - asm volatile("movq %%mm2,%0" : "=m" (p[d]));
127   - asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
128   - asm volatile("movq %%mm4,%0" : "=m" (q[d]));
129   - asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
130   - }
131   -
132   - kernel_fpu_end();
133   -}
134   -
135   -const struct raid6_calls raid6_mmxx2 = {
136   - raid6_mmx2_gen_syndrome,
137   - raid6_have_mmx,
138   - "mmxx2",
139   - 0
140   -};
141   -
142   -#endif
drivers/md/raid6recov.c
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6recov.c
15   - *
16   - * RAID-6 data recovery in dual failure mode. In single failure mode,
17   - * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
18   - * the syndrome.)
19   - */
20   -
21   -#include <linux/raid/pq.h>
22   -
23   -/* Recover two failed data blocks. */
24   -void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
25   - void **ptrs)
26   -{
27   - u8 *p, *q, *dp, *dq;
28   - u8 px, qx, db;
29   - const u8 *pbmul; /* P multiplier table for B data */
30   - const u8 *qmul; /* Q multiplier table (for both) */
31   -
32   - p = (u8 *)ptrs[disks-2];
33   - q = (u8 *)ptrs[disks-1];
34   -
35   - /* Compute syndrome with zero for the missing data pages
36   - Use the dead data pages as temporary storage for
37   - delta p and delta q */
38   - dp = (u8 *)ptrs[faila];
39   - ptrs[faila] = (void *)raid6_empty_zero_page;
40   - ptrs[disks-2] = dp;
41   - dq = (u8 *)ptrs[failb];
42   - ptrs[failb] = (void *)raid6_empty_zero_page;
43   - ptrs[disks-1] = dq;
44   -
45   - raid6_call.gen_syndrome(disks, bytes, ptrs);
46   -
47   - /* Restore pointer table */
48   - ptrs[faila] = dp;
49   - ptrs[failb] = dq;
50   - ptrs[disks-2] = p;
51   - ptrs[disks-1] = q;
52   -
53   - /* Now, pick the proper data tables */
54   - pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
55   - qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
56   -
57   - /* Now do it... */
58   - while ( bytes-- ) {
59   - px = *p ^ *dp;
60   - qx = qmul[*q ^ *dq];
61   - *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
62   - *dp++ = db ^ px; /* Reconstructed A */
63   - p++; q++;
64   - }
65   -}
66   -EXPORT_SYMBOL_GPL(raid6_2data_recov);
67   -
68   -/* Recover failure of one data block plus the P block */
69   -void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
70   -{
71   - u8 *p, *q, *dq;
72   - const u8 *qmul; /* Q multiplier table */
73   -
74   - p = (u8 *)ptrs[disks-2];
75   - q = (u8 *)ptrs[disks-1];
76   -
77   - /* Compute syndrome with zero for the missing data page
78   - Use the dead data page as temporary storage for delta q */
79   - dq = (u8 *)ptrs[faila];
80   - ptrs[faila] = (void *)raid6_empty_zero_page;
81   - ptrs[disks-1] = dq;
82   -
83   - raid6_call.gen_syndrome(disks, bytes, ptrs);
84   -
85   - /* Restore pointer table */
86   - ptrs[faila] = dq;
87   - ptrs[disks-1] = q;
88   -
89   - /* Now, pick the proper data tables */
90   - qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
91   -
92   - /* Now do it... */
93   - while ( bytes-- ) {
94   - *p++ ^= *dq = qmul[*q ^ *dq];
95   - q++; dq++;
96   - }
97   -}
98   -EXPORT_SYMBOL_GPL(raid6_datap_recov);
99   -
100   -#ifndef __KERNEL__
101   -/* Testing only */
102   -
103   -/* Recover two failed blocks. */
104   -void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
105   -{
106   - if ( faila > failb ) {
107   - int tmp = faila;
108   - faila = failb;
109   - failb = tmp;
110   - }
111   -
112   - if ( failb == disks-1 ) {
113   - if ( faila == disks-2 ) {
114   - /* P+Q failure. Just rebuild the syndrome. */
115   - raid6_call.gen_syndrome(disks, bytes, ptrs);
116   - } else {
117   - /* data+Q failure. Reconstruct data from P,
118   - then rebuild syndrome. */
119   - /* NOT IMPLEMENTED - equivalent to RAID-5 */
120   - }
121   - } else {
122   - if ( failb == disks-2 ) {
123   - /* data+P failure. */
124   - raid6_datap_recov(disks, bytes, faila, ptrs);
125   - } else {
126   - /* data+data failure. */
127   - raid6_2data_recov(disks, bytes, faila, failb, ptrs);
128   - }
129   - }
130   -}
131   -
132   -#endif
drivers/md/raid6sse1.c
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6sse1.c
15   - *
16   - * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
17   - *
18   - * This is really an MMX implementation, but it requires SSE-1 or
19   - * AMD MMXEXT for prefetch support and a few other features. The
20   - * support for nontemporal memory accesses is enough to make this
21   - * worthwhile as a separate implementation.
22   - */
23   -
24   -#if defined(__i386__) && !defined(__arch_um__)
25   -
26   -#include <linux/raid/pq.h>
27   -#include "raid6x86.h"
28   -
29   -/* Defined in raid6mmx.c */
30   -extern const struct raid6_mmx_constants {
31   - u64 x1d;
32   -} raid6_mmx_constants;
33   -
34   -static int raid6_have_sse1_or_mmxext(void)
35   -{
36   - /* Not really boot_cpu but "all_cpus" */
37   - return boot_cpu_has(X86_FEATURE_MMX) &&
38   - (boot_cpu_has(X86_FEATURE_XMM) ||
39   - boot_cpu_has(X86_FEATURE_MMXEXT));
40   -}
41   -
42   -/*
43   - * Plain SSE1 implementation
44   - */
45   -static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
46   -{
47   - u8 **dptr = (u8 **)ptrs;
48   - u8 *p, *q;
49   - int d, z, z0;
50   -
51   - z0 = disks - 3; /* Highest data disk */
52   - p = dptr[z0+1]; /* XOR parity */
53   - q = dptr[z0+2]; /* RS syndrome */
54   -
55   - kernel_fpu_begin();
56   -
57   - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
58   - asm volatile("pxor %mm5,%mm5"); /* Zero temp */
59   -
60   - for ( d = 0 ; d < bytes ; d += 8 ) {
61   - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
62   - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
63   - asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
64   - asm volatile("movq %mm2,%mm4"); /* Q[0] */
65   - asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
66   - for ( z = z0-2 ; z >= 0 ; z-- ) {
67   - asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
68   - asm volatile("pcmpgtb %mm4,%mm5");
69   - asm volatile("paddb %mm4,%mm4");
70   - asm volatile("pand %mm0,%mm5");
71   - asm volatile("pxor %mm5,%mm4");
72   - asm volatile("pxor %mm5,%mm5");
73   - asm volatile("pxor %mm6,%mm2");
74   - asm volatile("pxor %mm6,%mm4");
75   - asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
76   - }
77   - asm volatile("pcmpgtb %mm4,%mm5");
78   - asm volatile("paddb %mm4,%mm4");
79   - asm volatile("pand %mm0,%mm5");
80   - asm volatile("pxor %mm5,%mm4");
81   - asm volatile("pxor %mm5,%mm5");
82   - asm volatile("pxor %mm6,%mm2");
83   - asm volatile("pxor %mm6,%mm4");
84   -
85   - asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
86   - asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
87   - }
88   -
89   - asm volatile("sfence" : : : "memory");
90   - kernel_fpu_end();
91   -}
92   -
93   -const struct raid6_calls raid6_sse1x1 = {
94   - raid6_sse11_gen_syndrome,
95   - raid6_have_sse1_or_mmxext,
96   - "sse1x1",
97   - 1 /* Has cache hints */
98   -};
99   -
100   -/*
101   - * Unrolled-by-2 SSE1 implementation
102   - */
103   -static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
104   -{
105   - u8 **dptr = (u8 **)ptrs;
106   - u8 *p, *q;
107   - int d, z, z0;
108   -
109   - z0 = disks - 3; /* Highest data disk */
110   - p = dptr[z0+1]; /* XOR parity */
111   - q = dptr[z0+2]; /* RS syndrome */
112   -
113   - kernel_fpu_begin();
114   -
115   - asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
116   - asm volatile("pxor %mm5,%mm5"); /* Zero temp */
117   - asm volatile("pxor %mm7,%mm7"); /* Zero temp */
118   -
119   - /* We uniformly assume a single prefetch covers at least 16 bytes */
120   - for ( d = 0 ; d < bytes ; d += 16 ) {
121   - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122   - asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
123   - asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
124   - asm volatile("movq %mm2,%mm4"); /* Q[0] */
125   - asm volatile("movq %mm3,%mm6"); /* Q[1] */
126   - for ( z = z0-1 ; z >= 0 ; z-- ) {
127   - asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128   - asm volatile("pcmpgtb %mm4,%mm5");
129   - asm volatile("pcmpgtb %mm6,%mm7");
130   - asm volatile("paddb %mm4,%mm4");
131   - asm volatile("paddb %mm6,%mm6");
132   - asm volatile("pand %mm0,%mm5");
133   - asm volatile("pand %mm0,%mm7");
134   - asm volatile("pxor %mm5,%mm4");
135   - asm volatile("pxor %mm7,%mm6");
136   - asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
137   - asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
138   - asm volatile("pxor %mm5,%mm2");
139   - asm volatile("pxor %mm7,%mm3");
140   - asm volatile("pxor %mm5,%mm4");
141   - asm volatile("pxor %mm7,%mm6");
142   - asm volatile("pxor %mm5,%mm5");
143   - asm volatile("pxor %mm7,%mm7");
144   - }
145   - asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
146   - asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
147   - asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
148   - asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
149   - }
150   -
151   - asm volatile("sfence" : :: "memory");
152   - kernel_fpu_end();
153   -}
154   -
155   -const struct raid6_calls raid6_sse1x2 = {
156   - raid6_sse12_gen_syndrome,
157   - raid6_have_sse1_or_mmxext,
158   - "sse1x2",
159   - 1 /* Has cache hints */
160   -};
161   -
162   -#endif
drivers/md/raid6sse2.c
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6sse2.c
15   - *
16   - * SSE-2 implementation of RAID-6 syndrome functions
17   - *
18   - */
19   -
20   -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
21   -
22   -#include <linux/raid/pq.h>
23   -#include "raid6x86.h"
24   -
25   -static const struct raid6_sse_constants {
26   - u64 x1d[2];
27   -} raid6_sse_constants __attribute__((aligned(16))) = {
28   - { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
29   -};
30   -
31   -static int raid6_have_sse2(void)
32   -{
33   - /* Not really boot_cpu but "all_cpus" */
34   - return boot_cpu_has(X86_FEATURE_MMX) &&
35   - boot_cpu_has(X86_FEATURE_FXSR) &&
36   - boot_cpu_has(X86_FEATURE_XMM) &&
37   - boot_cpu_has(X86_FEATURE_XMM2);
38   -}
39   -
40   -/*
41   - * Plain SSE2 implementation
42   - */
43   -static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
44   -{
45   - u8 **dptr = (u8 **)ptrs;
46   - u8 *p, *q;
47   - int d, z, z0;
48   -
49   - z0 = disks - 3; /* Highest data disk */
50   - p = dptr[z0+1]; /* XOR parity */
51   - q = dptr[z0+2]; /* RS syndrome */
52   -
53   - kernel_fpu_begin();
54   -
55   - asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
56   - asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
57   -
58   - for ( d = 0 ; d < bytes ; d += 16 ) {
59   - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
60   - asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
61   - asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
62   - asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
63   - asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
64   - for ( z = z0-2 ; z >= 0 ; z-- ) {
65   - asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
66   - asm volatile("pcmpgtb %xmm4,%xmm5");
67   - asm volatile("paddb %xmm4,%xmm4");
68   - asm volatile("pand %xmm0,%xmm5");
69   - asm volatile("pxor %xmm5,%xmm4");
70   - asm volatile("pxor %xmm5,%xmm5");
71   - asm volatile("pxor %xmm6,%xmm2");
72   - asm volatile("pxor %xmm6,%xmm4");
73   - asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
74   - }
75   - asm volatile("pcmpgtb %xmm4,%xmm5");
76   - asm volatile("paddb %xmm4,%xmm4");
77   - asm volatile("pand %xmm0,%xmm5");
78   - asm volatile("pxor %xmm5,%xmm4");
79   - asm volatile("pxor %xmm5,%xmm5");
80   - asm volatile("pxor %xmm6,%xmm2");
81   - asm volatile("pxor %xmm6,%xmm4");
82   -
83   - asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
84   - asm volatile("pxor %xmm2,%xmm2");
85   - asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
86   - asm volatile("pxor %xmm4,%xmm4");
87   - }
88   -
89   - asm volatile("sfence" : : : "memory");
90   - kernel_fpu_end();
91   -}
92   -
93   -const struct raid6_calls raid6_sse2x1 = {
94   - raid6_sse21_gen_syndrome,
95   - raid6_have_sse2,
96   - "sse2x1",
97   - 1 /* Has cache hints */
98   -};
99   -
100   -/*
101   - * Unrolled-by-2 SSE2 implementation
102   - */
103   -static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
104   -{
105   - u8 **dptr = (u8 **)ptrs;
106   - u8 *p, *q;
107   - int d, z, z0;
108   -
109   - z0 = disks - 3; /* Highest data disk */
110   - p = dptr[z0+1]; /* XOR parity */
111   - q = dptr[z0+2]; /* RS syndrome */
112   -
113   - kernel_fpu_begin();
114   -
115   - asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
116   - asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
117   - asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
118   -
119   - /* We uniformly assume a single prefetch covers at least 32 bytes */
120   - for ( d = 0 ; d < bytes ; d += 32 ) {
121   - asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122   - asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
123   - asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
124   - asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
125   - asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
126   - for ( z = z0-1 ; z >= 0 ; z-- ) {
127   - asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128   - asm volatile("pcmpgtb %xmm4,%xmm5");
129   - asm volatile("pcmpgtb %xmm6,%xmm7");
130   - asm volatile("paddb %xmm4,%xmm4");
131   - asm volatile("paddb %xmm6,%xmm6");
132   - asm volatile("pand %xmm0,%xmm5");
133   - asm volatile("pand %xmm0,%xmm7");
134   - asm volatile("pxor %xmm5,%xmm4");
135   - asm volatile("pxor %xmm7,%xmm6");
136   - asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
137   - asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
138   - asm volatile("pxor %xmm5,%xmm2");
139   - asm volatile("pxor %xmm7,%xmm3");
140   - asm volatile("pxor %xmm5,%xmm4");
141   - asm volatile("pxor %xmm7,%xmm6");
142   - asm volatile("pxor %xmm5,%xmm5");
143   - asm volatile("pxor %xmm7,%xmm7");
144   - }
145   - asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
146   - asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
147   - asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
148   - asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
149   - }
150   -
151   - asm volatile("sfence" : : : "memory");
152   - kernel_fpu_end();
153   -}
154   -
155   -const struct raid6_calls raid6_sse2x2 = {
156   - raid6_sse22_gen_syndrome,
157   - raid6_have_sse2,
158   - "sse2x2",
159   - 1 /* Has cache hints */
160   -};
161   -
162   -#endif
163   -
164   -#if defined(__x86_64__) && !defined(__arch_um__)
165   -
166   -/*
167   - * Unrolled-by-4 SSE2 implementation
168   - */
169   -static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
170   -{
171   - u8 **dptr = (u8 **)ptrs;
172   - u8 *p, *q;
173   - int d, z, z0;
174   -
175   - z0 = disks - 3; /* Highest data disk */
176   - p = dptr[z0+1]; /* XOR parity */
177   - q = dptr[z0+2]; /* RS syndrome */
178   -
179   - kernel_fpu_begin();
180   -
181   - asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
182   - asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
183   - asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
184   - asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
185   - asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
186   - asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
187   - asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
188   - asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
189   - asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
190   - asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
191   - asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
192   - asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
193   - asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
194   -
195   - for ( d = 0 ; d < bytes ; d += 64 ) {
196   - for ( z = z0 ; z >= 0 ; z-- ) {
197   - /* The second prefetch seems to improve performance... */
198   - asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
199   - asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
200   - asm volatile("pcmpgtb %xmm4,%xmm5");
201   - asm volatile("pcmpgtb %xmm6,%xmm7");
202   - asm volatile("pcmpgtb %xmm12,%xmm13");
203   - asm volatile("pcmpgtb %xmm14,%xmm15");
204   - asm volatile("paddb %xmm4,%xmm4");
205   - asm volatile("paddb %xmm6,%xmm6");
206   - asm volatile("paddb %xmm12,%xmm12");
207   - asm volatile("paddb %xmm14,%xmm14");
208   - asm volatile("pand %xmm0,%xmm5");
209   - asm volatile("pand %xmm0,%xmm7");
210   - asm volatile("pand %xmm0,%xmm13");
211   - asm volatile("pand %xmm0,%xmm15");
212   - asm volatile("pxor %xmm5,%xmm4");
213   - asm volatile("pxor %xmm7,%xmm6");
214   - asm volatile("pxor %xmm13,%xmm12");
215   - asm volatile("pxor %xmm15,%xmm14");
216   - asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
217   - asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
218   - asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
219   - asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
220   - asm volatile("pxor %xmm5,%xmm2");
221   - asm volatile("pxor %xmm7,%xmm3");
222   - asm volatile("pxor %xmm13,%xmm10");
223   - asm volatile("pxor %xmm15,%xmm11");
224   - asm volatile("pxor %xmm5,%xmm4");
225   - asm volatile("pxor %xmm7,%xmm6");
226   - asm volatile("pxor %xmm13,%xmm12");
227   - asm volatile("pxor %xmm15,%xmm14");
228   - asm volatile("pxor %xmm5,%xmm5");
229   - asm volatile("pxor %xmm7,%xmm7");
230   - asm volatile("pxor %xmm13,%xmm13");
231   - asm volatile("pxor %xmm15,%xmm15");
232   - }
233   - asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
234   - asm volatile("pxor %xmm2,%xmm2");
235   - asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
236   - asm volatile("pxor %xmm3,%xmm3");
237   - asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
238   - asm volatile("pxor %xmm10,%xmm10");
239   - asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
240   - asm volatile("pxor %xmm11,%xmm11");
241   - asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
242   - asm volatile("pxor %xmm4,%xmm4");
243   - asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
244   - asm volatile("pxor %xmm6,%xmm6");
245   - asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
246   - asm volatile("pxor %xmm12,%xmm12");
247   - asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
248   - asm volatile("pxor %xmm14,%xmm14");
249   - }
250   -
251   - asm volatile("sfence" : : : "memory");
252   - kernel_fpu_end();
253   -}
254   -
255   -const struct raid6_calls raid6_sse2x4 = {
256   - raid6_sse24_gen_syndrome,
257   - raid6_have_sse2,
258   - "sse2x4",
259   - 1 /* Has cache hints */
260   -};
261   -
262   -#endif
drivers/md/raid6test/Makefile
1   -#
2   -# This is a simple Makefile to test some of the RAID-6 code
3   -# from userspace.
4   -#
5   -
6   -CC = gcc
7   -OPTFLAGS = -O2 # Adjust as desired
8   -CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
9   -LD = ld
10   -PERL = perl
11   -AR = ar
12   -RANLIB = ranlib
13   -
14   -.c.o:
15   - $(CC) $(CFLAGS) -c -o $@ $<
16   -
17   -%.c: ../%.c
18   - cp -f $< $@
19   -
20   -%.uc: ../%.uc
21   - cp -f $< $@
22   -
23   -all: raid6.a raid6test
24   -
25   -raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \
26   - raid6int32.o \
27   - raid6mmx.o raid6sse1.o raid6sse2.o \
28   - raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \
29   - raid6recov.o raid6algos.o \
30   - raid6tables.o
31   - rm -f $@
32   - $(AR) cq $@ $^
33   - $(RANLIB) $@
34   -
35   -raid6test: test.c raid6.a
36   - $(CC) $(CFLAGS) -o raid6test $^
37   -
38   -raid6altivec1.c: raid6altivec.uc ../unroll.pl
39   - $(PERL) ../unroll.pl 1 < raid6altivec.uc > $@
40   -
41   -raid6altivec2.c: raid6altivec.uc ../unroll.pl
42   - $(PERL) ../unroll.pl 2 < raid6altivec.uc > $@
43   -
44   -raid6altivec4.c: raid6altivec.uc ../unroll.pl
45   - $(PERL) ../unroll.pl 4 < raid6altivec.uc > $@
46   -
47   -raid6altivec8.c: raid6altivec.uc ../unroll.pl
48   - $(PERL) ../unroll.pl 8 < raid6altivec.uc > $@
49   -
50   -raid6int1.c: raid6int.uc ../unroll.pl
51   - $(PERL) ../unroll.pl 1 < raid6int.uc > $@
52   -
53   -raid6int2.c: raid6int.uc ../unroll.pl
54   - $(PERL) ../unroll.pl 2 < raid6int.uc > $@
55   -
56   -raid6int4.c: raid6int.uc ../unroll.pl
57   - $(PERL) ../unroll.pl 4 < raid6int.uc > $@
58   -
59   -raid6int8.c: raid6int.uc ../unroll.pl
60   - $(PERL) ../unroll.pl 8 < raid6int.uc > $@
61   -
62   -raid6int16.c: raid6int.uc ../unroll.pl
63   - $(PERL) ../unroll.pl 16 < raid6int.uc > $@
64   -
65   -raid6int32.c: raid6int.uc ../unroll.pl
66   - $(PERL) ../unroll.pl 32 < raid6int.uc > $@
67   -
68   -raid6tables.c: mktables
69   - ./mktables > raid6tables.c
70   -
71   -clean:
72   - rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test
73   -
74   -spotless: clean
75   - rm -f *~
drivers/md/raid6test/test.c
1   -/* -*- linux-c -*- ------------------------------------------------------- *
2   - *
3   - * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This file is part of the Linux kernel, and is made available under
6   - * the terms of the GNU General Public License version 2 or (at your
7   - * option) any later version; incorporated herein by reference.
8   - *
9   - * ----------------------------------------------------------------------- */
10   -
11   -/*
12   - * raid6test.c
13   - *
14   - * Test RAID-6 recovery with various algorithms
15   - */
16   -
17   -#include <stdlib.h>
18   -#include <stdio.h>
19   -#include <string.h>
20   -#include <linux/raid/pq.h>
21   -
22   -#define NDISKS 16 /* Including P and Q */
23   -
24   -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
25   -struct raid6_calls raid6_call;
26   -
27   -char *dataptrs[NDISKS];
28   -char data[NDISKS][PAGE_SIZE];
29   -char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30   -
31   -static void makedata(void)
32   -{
33   - int i, j;
34   -
35   - for (i = 0; i < NDISKS; i++) {
36   - for (j = 0; j < PAGE_SIZE; j++)
37   - data[i][j] = rand();
38   -
39   - dataptrs[i] = data[i];
40   - }
41   -}
42   -
43   -static char disk_type(int d)
44   -{
45   - switch (d) {
46   - case NDISKS-2:
47   - return 'P';
48   - case NDISKS-1:
49   - return 'Q';
50   - default:
51   - return 'D';
52   - }
53   -}
54   -
55   -static int test_disks(int i, int j)
56   -{
57   - int erra, errb;
58   -
59   - memset(recovi, 0xf0, PAGE_SIZE);
60   - memset(recovj, 0xba, PAGE_SIZE);
61   -
62   - dataptrs[i] = recovi;
63   - dataptrs[j] = recovj;
64   -
65   - raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
66   -
67   - erra = memcmp(data[i], recovi, PAGE_SIZE);
68   - errb = memcmp(data[j], recovj, PAGE_SIZE);
69   -
70   - if (i < NDISKS-2 && j == NDISKS-1) {
71   - /* We don't implement the DQ failure scenario, since it's
72   - equivalent to a RAID-5 failure (XOR, then recompute Q) */
73   - erra = errb = 0;
74   - } else {
75   - printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n",
76   - raid6_call.name,
77   - i, disk_type(i),
78   - j, disk_type(j),
79   - (!erra && !errb) ? "OK" :
80   - !erra ? "ERRB" :
81   - !errb ? "ERRA" : "ERRAB");
82   - }
83   -
84   - dataptrs[i] = data[i];
85   - dataptrs[j] = data[j];
86   -
87   - return erra || errb;
88   -}
89   -
90   -int main(int argc, char *argv[])
91   -{
92   - const struct raid6_calls *const *algo;
93   - int i, j;
94   - int err = 0;
95   -
96   - makedata();
97   -
98   - for (algo = raid6_algos; *algo; algo++) {
99   - if (!(*algo)->valid || (*algo)->valid()) {
100   - raid6_call = **algo;
101   -
102   - /* Nuke syndromes */
103   - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
104   -
105   - /* Generate assumed good syndrome */
106   - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
107   - (void **)&dataptrs);
108   -
109   - for (i = 0; i < NDISKS-1; i++)
110   - for (j = i+1; j < NDISKS; j++)
111   - err += test_disks(i, j);
112   - }
113   - printf("\n");
114   - }
115   -
116   - printf("\n");
117   - /* Pick the best algorithm test */
118   - raid6_select_algo();
119   -
120   - if (err)
121   - printf("\n*** ERRORS FOUND ***\n");
122   -
123   - return err;
124   -}
drivers/md/raid6x86.h
1   -/* ----------------------------------------------------------------------- *
2   - *
3   - * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4   - *
5   - * This program is free software; you can redistribute it and/or modify
6   - * it under the terms of the GNU General Public License as published by
7   - * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8   - * Boston MA 02111-1307, USA; either version 2 of the License, or
9   - * (at your option) any later version; incorporated herein by reference.
10   - *
11   - * ----------------------------------------------------------------------- */
12   -
13   -/*
14   - * raid6x86.h
15   - *
16   - * Definitions common to x86 and x86-64 RAID-6 code only
17   - */
18   -
19   -#ifndef LINUX_RAID_RAID6X86_H
20   -#define LINUX_RAID_RAID6X86_H
21   -
22   -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
23   -
24   -#ifdef __KERNEL__ /* Real code */
25   -
26   -#include <asm/i387.h>
27   -
28   -#else /* Dummy code for user space testing */
29   -
30   -static inline void kernel_fpu_begin(void)
31   -{
32   -}
33   -
34   -static inline void kernel_fpu_end(void)
35   -{
36   -}
37   -
38   -#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
39   -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
40   - * (fast save and restore) */
41   -#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
42   -#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
43   -#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
44   -
45   -/* Should work well enough on modern CPUs for testing */
46   -static inline int boot_cpu_has(int flag)
47   -{
48   - u32 eax = (flag >> 5) ? 0x80000001 : 1;
49   - u32 edx;
50   -
51   - asm volatile("cpuid"
52   - : "+a" (eax), "=d" (edx)
53   - : : "ecx", "ebx");
54   -
55   - return (edx >> (flag & 31)) & 1;
56   -}
57   -
58   -#endif /* ndef __KERNEL__ */
59   -
60   -#endif
61   -#endif
drivers/md/unroll.pl
1   -#!/usr/bin/perl
2   -#
3   -# Take a piece of C code and for each line which contains the sequence $$
4   -# repeat n times with $ replaced by 0...n-1; the sequence $# is replaced
5   -# by the unrolling factor, and $* with a single $
6   -#
7   -
8   -($n) = @ARGV;
9   -$n += 0;
10   -
11   -while ( defined($line = <STDIN>) ) {
12   - if ( $line =~ /\$\$/ ) {
13   - $rep = $n;
14   - } else {
15   - $rep = 1;
16   - }
17   - for ( $i = 0 ; $i < $rep ; $i++ ) {
18   - $tmp = $line;
19   - $tmp =~ s/\$\$/$i/g;
20   - $tmp =~ s/\$\#/$n/g;
21   - $tmp =~ s/\$\*/\$/g;
22   - print $tmp;
23   - }
24   -}
... ... @@ -7,6 +7,9 @@
7 7  
8 8 menu "Library routines"
9 9  
  10 +config RAID6_PQ
  11 + tristate
  12 +
10 13 config BITREVERSE
11 14 tristate
12 15  
... ... @@ -65,6 +65,7 @@
65 65 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
66 66 obj-$(CONFIG_LZO_COMPRESS) += lzo/
67 67 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
  68 +obj-$(CONFIG_RAID6_PQ) += raid6/
68 69  
69 70 lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
70 71 lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
  1 +obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
  2 +
  3 +raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
  4 + raid6int1.o raid6int2.o raid6int4.o \
  5 + raid6int8.o raid6int16.o raid6int32.o \
  6 + raid6altivec1.o raid6altivec2.o raid6altivec4.o \
  7 + raid6altivec8.o \
  8 + raid6mmx.o raid6sse1.o raid6sse2.o
  9 +hostprogs-y += mktables
  10 +
  11 +quiet_cmd_unroll = UNROLL $@
  12 + cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
  13 + < $< > $@ || ( rm -f $@ && exit 1 )
  14 +
  15 +ifeq ($(CONFIG_ALTIVEC),y)
  16 +altivec_flags := -maltivec -mabi=altivec
  17 +endif
  18 +
  19 +targets += raid6int1.c
  20 +$(obj)/raid6int1.c: UNROLL := 1
  21 +$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
  22 + $(call if_changed,unroll)
  23 +
  24 +targets += raid6int2.c
  25 +$(obj)/raid6int2.c: UNROLL := 2
  26 +$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
  27 + $(call if_changed,unroll)
  28 +
  29 +targets += raid6int4.c
  30 +$(obj)/raid6int4.c: UNROLL := 4
  31 +$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
  32 + $(call if_changed,unroll)
  33 +
  34 +targets += raid6int8.c
  35 +$(obj)/raid6int8.c: UNROLL := 8
  36 +$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
  37 + $(call if_changed,unroll)
  38 +
  39 +targets += raid6int16.c
  40 +$(obj)/raid6int16.c: UNROLL := 16
  41 +$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
  42 + $(call if_changed,unroll)
  43 +
  44 +targets += raid6int32.c
  45 +$(obj)/raid6int32.c: UNROLL := 32
  46 +$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
  47 + $(call if_changed,unroll)
  48 +
  49 +CFLAGS_raid6altivec1.o += $(altivec_flags)
  50 +targets += raid6altivec1.c
  51 +$(obj)/raid6altivec1.c: UNROLL := 1
  52 +$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
  53 + $(call if_changed,unroll)
  54 +
  55 +CFLAGS_raid6altivec2.o += $(altivec_flags)
  56 +targets += raid6altivec2.c
  57 +$(obj)/raid6altivec2.c: UNROLL := 2
  58 +$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
  59 + $(call if_changed,unroll)
  60 +
  61 +CFLAGS_raid6altivec4.o += $(altivec_flags)
  62 +targets += raid6altivec4.c
  63 +$(obj)/raid6altivec4.c: UNROLL := 4
  64 +$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
  65 + $(call if_changed,unroll)
  66 +
  67 +CFLAGS_raid6altivec8.o += $(altivec_flags)
  68 +targets += raid6altivec8.c
  69 +$(obj)/raid6altivec8.c: UNROLL := 8
  70 +$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE
  71 + $(call if_changed,unroll)
  72 +
  73 +quiet_cmd_mktable = TABLE $@
  74 + cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
  75 +
  76 +targets += raid6tables.c
  77 +$(obj)/raid6tables.c: $(obj)/mktables FORCE
  78 + $(call if_changed,mktable)
lib/raid6/mktables.c
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This file is part of the Linux kernel, and is made available under
  6 + * the terms of the GNU General Public License version 2 or (at your
  7 + * option) any later version; incorporated herein by reference.
  8 + *
  9 + * ----------------------------------------------------------------------- */
  10 +
  11 +/*
  12 + * mktables.c
  13 + *
  14 + * Make RAID-6 tables. This is a host user space program to be run at
  15 + * compile time.
  16 + */
  17 +
  18 +#include <stdio.h>
  19 +#include <string.h>
  20 +#include <inttypes.h>
  21 +#include <stdlib.h>
  22 +#include <time.h>
  23 +
  24 +static uint8_t gfmul(uint8_t a, uint8_t b)
  25 +{
  26 + uint8_t v = 0;
  27 +
  28 + while (b) {
  29 + if (b & 1)
  30 + v ^= a;
  31 + a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
  32 + b >>= 1;
  33 + }
  34 +
  35 + return v;
  36 +}
  37 +
  38 +static uint8_t gfpow(uint8_t a, int b)
  39 +{
  40 + uint8_t v = 1;
  41 +
  42 + b %= 255;
  43 + if (b < 0)
  44 + b += 255;
  45 +
  46 + while (b) {
  47 + if (b & 1)
  48 + v = gfmul(v, a);
  49 + a = gfmul(a, a);
  50 + b >>= 1;
  51 + }
  52 +
  53 + return v;
  54 +}
  55 +
  56 +int main(int argc, char *argv[])
  57 +{
  58 + int i, j, k;
  59 + uint8_t v;
  60 + uint8_t exptbl[256], invtbl[256];
  61 +
  62 + printf("#include <linux/raid/pq.h>\n");
  63 +
  64 + /* Compute multiplication table */
  65 + printf("\nconst u8 __attribute__((aligned(256)))\n"
  66 + "raid6_gfmul[256][256] =\n"
  67 + "{\n");
  68 + for (i = 0; i < 256; i++) {
  69 + printf("\t{\n");
  70 + for (j = 0; j < 256; j += 8) {
  71 + printf("\t\t");
  72 + for (k = 0; k < 8; k++)
  73 + printf("0x%02x,%c", gfmul(i, j + k),
  74 + (k == 7) ? '\n' : ' ');
  75 + }
  76 + printf("\t},\n");
  77 + }
  78 + printf("};\n");
  79 + printf("#ifdef __KERNEL__\n");
  80 + printf("EXPORT_SYMBOL(raid6_gfmul);\n");
  81 + printf("#endif\n");
  82 +
  83 + /* Compute power-of-2 table (exponent) */
  84 + v = 1;
  85 + printf("\nconst u8 __attribute__((aligned(256)))\n"
  86 + "raid6_gfexp[256] =\n" "{\n");
  87 + for (i = 0; i < 256; i += 8) {
  88 + printf("\t");
  89 + for (j = 0; j < 8; j++) {
  90 + exptbl[i + j] = v;
  91 + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
  92 + v = gfmul(v, 2);
  93 + if (v == 1)
  94 + v = 0; /* For entry 255, not a real entry */
  95 + }
  96 + }
  97 + printf("};\n");
  98 + printf("#ifdef __KERNEL__\n");
  99 + printf("EXPORT_SYMBOL(raid6_gfexp);\n");
  100 + printf("#endif\n");
  101 +
  102 + /* Compute inverse table x^-1 == x^254 */
  103 + printf("\nconst u8 __attribute__((aligned(256)))\n"
  104 + "raid6_gfinv[256] =\n" "{\n");
  105 + for (i = 0; i < 256; i += 8) {
  106 + printf("\t");
  107 + for (j = 0; j < 8; j++) {
  108 + invtbl[i + j] = v = gfpow(i + j, 254);
  109 + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
  110 + }
  111 + }
  112 + printf("};\n");
  113 + printf("#ifdef __KERNEL__\n");
  114 + printf("EXPORT_SYMBOL(raid6_gfinv);\n");
  115 + printf("#endif\n");
  116 +
  117 + /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
  118 + printf("\nconst u8 __attribute__((aligned(256)))\n"
  119 + "raid6_gfexi[256] =\n" "{\n");
  120 + for (i = 0; i < 256; i += 8) {
  121 + printf("\t");
  122 + for (j = 0; j < 8; j++)
  123 + printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
  124 + (j == 7) ? '\n' : ' ');
  125 + }
  126 + printf("};\n");
  127 + printf("#ifdef __KERNEL__\n");
  128 + printf("EXPORT_SYMBOL(raid6_gfexi);\n");
  129 + printf("#endif\n");
  130 +
  131 + return 0;
  132 +}
lib/raid6/raid6algos.c
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6algos.c
  15 + *
  16 + * Algorithm list and algorithm selection for RAID-6
  17 + */
  18 +
  19 +#include <linux/raid/pq.h>
  20 +#ifndef __KERNEL__
  21 +#include <sys/mman.h>
  22 +#include <stdio.h>
  23 +#else
  24 +#if !RAID6_USE_EMPTY_ZERO_PAGE
  25 +/* In .bss so it's zeroed */
  26 +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
  27 +EXPORT_SYMBOL(raid6_empty_zero_page);
  28 +#endif
  29 +#endif
  30 +
  31 +struct raid6_calls raid6_call;
  32 +EXPORT_SYMBOL_GPL(raid6_call);
  33 +
  34 +/* Various routine sets */
  35 +extern const struct raid6_calls raid6_intx1;
  36 +extern const struct raid6_calls raid6_intx2;
  37 +extern const struct raid6_calls raid6_intx4;
  38 +extern const struct raid6_calls raid6_intx8;
  39 +extern const struct raid6_calls raid6_intx16;
  40 +extern const struct raid6_calls raid6_intx32;
  41 +extern const struct raid6_calls raid6_mmxx1;
  42 +extern const struct raid6_calls raid6_mmxx2;
  43 +extern const struct raid6_calls raid6_sse1x1;
  44 +extern const struct raid6_calls raid6_sse1x2;
  45 +extern const struct raid6_calls raid6_sse2x1;
  46 +extern const struct raid6_calls raid6_sse2x2;
  47 +extern const struct raid6_calls raid6_sse2x4;
  48 +extern const struct raid6_calls raid6_altivec1;
  49 +extern const struct raid6_calls raid6_altivec2;
  50 +extern const struct raid6_calls raid6_altivec4;
  51 +extern const struct raid6_calls raid6_altivec8;
  52 +
  53 +const struct raid6_calls * const raid6_algos[] = {
  54 + &raid6_intx1,
  55 + &raid6_intx2,
  56 + &raid6_intx4,
  57 + &raid6_intx8,
  58 +#if defined(__ia64__)
  59 + &raid6_intx16,
  60 + &raid6_intx32,
  61 +#endif
  62 +#if defined(__i386__) && !defined(__arch_um__)
  63 + &raid6_mmxx1,
  64 + &raid6_mmxx2,
  65 + &raid6_sse1x1,
  66 + &raid6_sse1x2,
  67 + &raid6_sse2x1,
  68 + &raid6_sse2x2,
  69 +#endif
  70 +#if defined(__x86_64__) && !defined(__arch_um__)
  71 + &raid6_sse2x1,
  72 + &raid6_sse2x2,
  73 + &raid6_sse2x4,
  74 +#endif
  75 +#ifdef CONFIG_ALTIVEC
  76 + &raid6_altivec1,
  77 + &raid6_altivec2,
  78 + &raid6_altivec4,
  79 + &raid6_altivec8,
  80 +#endif
  81 + NULL
  82 +};
  83 +
  84 +#ifdef __KERNEL__
  85 +#define RAID6_TIME_JIFFIES_LG2 4
  86 +#else
  87 +/* Need more time to be stable in userspace */
  88 +#define RAID6_TIME_JIFFIES_LG2 9
  89 +#define time_before(x, y) ((x) < (y))
  90 +#endif
  91 +
  92 +/* Try to pick the best algorithm */
  93 +/* This code uses the gfmul table as convenient data set to abuse */
  94 +
  95 +int __init raid6_select_algo(void)
  96 +{
  97 + const struct raid6_calls * const * algo;
  98 + const struct raid6_calls * best;
  99 + char *syndromes;
  100 + void *dptrs[(65536/PAGE_SIZE)+2];
  101 + int i, disks;
  102 + unsigned long perf, bestperf;
  103 + int bestprefer;
  104 + unsigned long j0, j1;
  105 +
  106 + disks = (65536/PAGE_SIZE)+2;
  107 + for ( i = 0 ; i < disks-2 ; i++ ) {
  108 + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
  109 + }
  110 +
  111 + /* Normal code - use a 2-page allocation to avoid D$ conflict */
  112 + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
  113 +
  114 + if ( !syndromes ) {
  115 + printk("raid6: Yikes! No memory available.\n");
  116 + return -ENOMEM;
  117 + }
  118 +
  119 + dptrs[disks-2] = syndromes;
  120 + dptrs[disks-1] = syndromes + PAGE_SIZE;
  121 +
  122 + bestperf = 0; bestprefer = 0; best = NULL;
  123 +
  124 + for ( algo = raid6_algos ; *algo ; algo++ ) {
  125 + if ( !(*algo)->valid || (*algo)->valid() ) {
  126 + perf = 0;
  127 +
  128 + preempt_disable();
  129 + j0 = jiffies;
  130 + while ( (j1 = jiffies) == j0 )
  131 + cpu_relax();
  132 + while (time_before(jiffies,
  133 + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
  134 + (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
  135 + perf++;
  136 + }
  137 + preempt_enable();
  138 +
  139 + if ( (*algo)->prefer > bestprefer ||
  140 + ((*algo)->prefer == bestprefer &&
  141 + perf > bestperf) ) {
  142 + best = *algo;
  143 + bestprefer = best->prefer;
  144 + bestperf = perf;
  145 + }
  146 + printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
  147 + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
  148 + }
  149 + }
  150 +
  151 + if (best) {
  152 + printk("raid6: using algorithm %s (%ld MB/s)\n",
  153 + best->name,
  154 + (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
  155 + raid6_call = *best;
  156 + } else
  157 + printk("raid6: Yikes! No algorithm found!\n");
  158 +
  159 + free_pages((unsigned long)syndromes, 1);
  160 +
  161 + return best ? 0 : -EINVAL;
  162 +}
  163 +
  164 +static void raid6_exit(void)
  165 +{
  166 + do { } while (0);
  167 +}
  168 +
  169 +subsys_initcall(raid6_select_algo);
  170 +module_exit(raid6_exit);
  171 +MODULE_LICENSE("GPL");
lib/raid6/raid6altivec.uc
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6altivec$#.c
  15 + *
  16 + * $#-way unrolled portable integer math RAID-6 instruction set
  17 + *
  18 + * This file is postprocessed using unroll.pl
  19 + *
  20 + * <benh> hpa: in process,
  21 + * you can just "steal" the vec unit with enable_kernel_altivec() (but
  22 + * bracked this with preempt_disable/enable or in a lock)
  23 + */
  24 +
  25 +#include <linux/raid/pq.h>
  26 +
  27 +#ifdef CONFIG_ALTIVEC
  28 +
  29 +#include <altivec.h>
  30 +#ifdef __KERNEL__
  31 +# include <asm/system.h>
  32 +# include <asm/cputable.h>
  33 +#endif
  34 +
  35 +/*
  36 + * This is the C data type to use. We use a vector of
  37 + * signed char so vec_cmpgt() will generate the right
  38 + * instruction.
  39 + */
  40 +
  41 +typedef vector signed char unative_t;
  42 +
  43 +#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
  44 +#define NSIZE sizeof(unative_t)
  45 +
  46 +/*
  47 + * The SHLBYTE() operation shifts each byte left by 1, *not*
  48 + * rolling over into the next byte
  49 + */
  50 +static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
  51 +{
  52 + return vec_add(v,v);
  53 +}
  54 +
  55 +/*
  56 + * The MASK() operation returns 0xFF in any byte for which the high
  57 + * bit is 1, 0x00 for any byte for which the high bit is 0.
  58 + */
  59 +static inline __attribute_const__ unative_t MASK(unative_t v)
  60 +{
  61 + unative_t zv = NBYTES(0);
  62 +
  63 + /* vec_cmpgt returns a vector bool char; thus the need for the cast */
  64 + return (unative_t)vec_cmpgt(zv, v);
  65 +}
  66 +
  67 +
  68 +/* This is noinline to make damned sure that gcc doesn't move any of the
  69 + Altivec code around the enable/disable code */
  70 +static void noinline
  71 +raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
  72 +{
  73 + u8 **dptr = (u8 **)ptrs;
  74 + u8 *p, *q;
  75 + int d, z, z0;
  76 +
  77 + unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
  78 + unative_t x1d = NBYTES(0x1d);
  79 +
  80 + z0 = disks - 3; /* Highest data disk */
  81 + p = dptr[z0+1]; /* XOR parity */
  82 + q = dptr[z0+2]; /* RS syndrome */
  83 +
  84 + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
  85 + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
  86 + for ( z = z0-1 ; z >= 0 ; z-- ) {
  87 + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
  88 + wp$$ = vec_xor(wp$$, wd$$);
  89 + w2$$ = MASK(wq$$);
  90 + w1$$ = SHLBYTE(wq$$);
  91 + w2$$ = vec_and(w2$$, x1d);
  92 + w1$$ = vec_xor(w1$$, w2$$);
  93 + wq$$ = vec_xor(w1$$, wd$$);
  94 + }
  95 + *(unative_t *)&p[d+NSIZE*$$] = wp$$;
  96 + *(unative_t *)&q[d+NSIZE*$$] = wq$$;
  97 + }
  98 +}
  99 +
  100 +static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
  101 +{
  102 + preempt_disable();
  103 + enable_kernel_altivec();
  104 +
  105 + raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
  106 +
  107 + preempt_enable();
  108 +}
  109 +
  110 +int raid6_have_altivec(void);
  111 +#if $# == 1
  112 +int raid6_have_altivec(void)
  113 +{
  114 + /* This assumes either all CPUs have Altivec or none does */
  115 +# ifdef __KERNEL__
  116 + return cpu_has_feature(CPU_FTR_ALTIVEC);
  117 +# else
  118 + return 1;
  119 +# endif
  120 +}
  121 +#endif
  122 +
  123 +const struct raid6_calls raid6_altivec$# = {
  124 + raid6_altivec$#_gen_syndrome,
  125 + raid6_have_altivec,
  126 + "altivecx$#",
  127 + 0
  128 +};
  129 +
  130 +#endif /* CONFIG_ALTIVEC */
lib/raid6/raid6int.uc
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6int$#.c
  15 + *
  16 + * $#-way unrolled portable integer math RAID-6 instruction set
  17 + *
  18 + * This file is postprocessed using unroll.pl
  19 + */
  20 +
  21 +#include <linux/raid/pq.h>
  22 +
  23 +/*
  24 + * This is the C data type to use
  25 + */
  26 +
  27 +/* Change this from BITS_PER_LONG if there is something better... */
  28 +#if BITS_PER_LONG == 64
  29 +# define NBYTES(x) ((x) * 0x0101010101010101UL)
  30 +# define NSIZE 8
  31 +# define NSHIFT 3
  32 +# define NSTRING "64"
  33 +typedef u64 unative_t;
  34 +#else
  35 +# define NBYTES(x) ((x) * 0x01010101U)
  36 +# define NSIZE 4
  37 +# define NSHIFT 2
  38 +# define NSTRING "32"
  39 +typedef u32 unative_t;
  40 +#endif
  41 +
  42 +
  43 +
  44 +/*
  45 + * IA-64 wants insane amounts of unrolling. On other architectures that
  46 + * is just a waste of space.
  47 + */
  48 +#if ($# <= 8) || defined(__ia64__)
  49 +
  50 +
  51 +/*
  52 + * These sub-operations are separate inlines since they can sometimes be
  53 + * specially optimized using architecture-specific hacks.
  54 + */
  55 +
  56 +/*
  57 + * The SHLBYTE() operation shifts each byte left by 1, *not*
  58 + * rolling over into the next byte
  59 + */
  60 +static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
  61 +{
  62 + unative_t vv;
  63 +
  64 + vv = (v << 1) & NBYTES(0xfe);
  65 + return vv;
  66 +}
  67 +
  68 +/*
  69 + * The MASK() operation returns 0xFF in any byte for which the high
  70 + * bit is 1, 0x00 for any byte for which the high bit is 0.
  71 + */
  72 +static inline __attribute_const__ unative_t MASK(unative_t v)
  73 +{
  74 + unative_t vv;
  75 +
  76 + vv = v & NBYTES(0x80);
  77 + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
  78 + return vv;
  79 +}
  80 +
  81 +
  82 +static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
  83 +{
  84 + u8 **dptr = (u8 **)ptrs;
  85 + u8 *p, *q;
  86 + int d, z, z0;
  87 +
  88 + unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
  89 +
  90 + z0 = disks - 3; /* Highest data disk */
  91 + p = dptr[z0+1]; /* XOR parity */
  92 + q = dptr[z0+2]; /* RS syndrome */
  93 +
  94 + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
  95 + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
  96 + for ( z = z0-1 ; z >= 0 ; z-- ) {
  97 + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
  98 + wp$$ ^= wd$$;
  99 + w2$$ = MASK(wq$$);
  100 + w1$$ = SHLBYTE(wq$$);
  101 + w2$$ &= NBYTES(0x1d);
  102 + w1$$ ^= w2$$;
  103 + wq$$ = w1$$ ^ wd$$;
  104 + }
  105 + *(unative_t *)&p[d+NSIZE*$$] = wp$$;
  106 + *(unative_t *)&q[d+NSIZE*$$] = wq$$;
  107 + }
  108 +}
  109 +
  110 +const struct raid6_calls raid6_intx$# = {
  111 + raid6_int$#_gen_syndrome,
  112 + NULL, /* always valid */
  113 + "int" NSTRING "x$#",
  114 + 0
  115 +};
  116 +
  117 +#endif
lib/raid6/raid6mmx.c
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6mmx.c
  15 + *
  16 + * MMX implementation of RAID-6 syndrome functions
  17 + */
  18 +
  19 +#if defined(__i386__) && !defined(__arch_um__)
  20 +
  21 +#include <linux/raid/pq.h>
  22 +#include "raid6x86.h"
  23 +
  24 +/* Shared with raid6sse1.c */
  25 +const struct raid6_mmx_constants {
  26 + u64 x1d;
  27 +} raid6_mmx_constants = {
  28 + 0x1d1d1d1d1d1d1d1dULL,
  29 +};
  30 +
  31 +static int raid6_have_mmx(void)
  32 +{
  33 + /* Not really "boot_cpu" but "all_cpus" */
  34 + return boot_cpu_has(X86_FEATURE_MMX);
  35 +}
  36 +
  37 +/*
  38 + * Plain MMX implementation
  39 + */
  40 +static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
  41 +{
  42 + u8 **dptr = (u8 **)ptrs;
  43 + u8 *p, *q;
  44 + int d, z, z0;
  45 +
  46 + z0 = disks - 3; /* Highest data disk */
  47 + p = dptr[z0+1]; /* XOR parity */
  48 + q = dptr[z0+2]; /* RS syndrome */
  49 +
  50 + kernel_fpu_begin();
  51 +
  52 + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
  53 + asm volatile("pxor %mm5,%mm5"); /* Zero temp */
  54 +
  55 + for ( d = 0 ; d < bytes ; d += 8 ) {
  56 + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
  57 + asm volatile("movq %mm2,%mm4"); /* Q[0] */
  58 + for ( z = z0-1 ; z >= 0 ; z-- ) {
  59 + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
  60 + asm volatile("pcmpgtb %mm4,%mm5");
  61 + asm volatile("paddb %mm4,%mm4");
  62 + asm volatile("pand %mm0,%mm5");
  63 + asm volatile("pxor %mm5,%mm4");
  64 + asm volatile("pxor %mm5,%mm5");
  65 + asm volatile("pxor %mm6,%mm2");
  66 + asm volatile("pxor %mm6,%mm4");
  67 + }
  68 + asm volatile("movq %%mm2,%0" : "=m" (p[d]));
  69 + asm volatile("pxor %mm2,%mm2");
  70 + asm volatile("movq %%mm4,%0" : "=m" (q[d]));
  71 + asm volatile("pxor %mm4,%mm4");
  72 + }
  73 +
  74 + kernel_fpu_end();
  75 +}
  76 +
  77 +const struct raid6_calls raid6_mmxx1 = {
  78 + raid6_mmx1_gen_syndrome,
  79 + raid6_have_mmx,
  80 + "mmxx1",
  81 + 0
  82 +};
  83 +
  84 +/*
  85 + * Unrolled-by-2 MMX implementation
  86 + */
  87 +static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
  88 +{
  89 + u8 **dptr = (u8 **)ptrs;
  90 + u8 *p, *q;
  91 + int d, z, z0;
  92 +
  93 + z0 = disks - 3; /* Highest data disk */
  94 + p = dptr[z0+1]; /* XOR parity */
  95 + q = dptr[z0+2]; /* RS syndrome */
  96 +
  97 + kernel_fpu_begin();
  98 +
  99 + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
  100 + asm volatile("pxor %mm5,%mm5"); /* Zero temp */
  101 + asm volatile("pxor %mm7,%mm7"); /* Zero temp */
  102 +
  103 + for ( d = 0 ; d < bytes ; d += 16 ) {
  104 + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
  105 + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
  106 + asm volatile("movq %mm2,%mm4"); /* Q[0] */
  107 + asm volatile("movq %mm3,%mm6"); /* Q[1] */
  108 + for ( z = z0-1 ; z >= 0 ; z-- ) {
  109 + asm volatile("pcmpgtb %mm4,%mm5");
  110 + asm volatile("pcmpgtb %mm6,%mm7");
  111 + asm volatile("paddb %mm4,%mm4");
  112 + asm volatile("paddb %mm6,%mm6");
  113 + asm volatile("pand %mm0,%mm5");
  114 + asm volatile("pand %mm0,%mm7");
  115 + asm volatile("pxor %mm5,%mm4");
  116 + asm volatile("pxor %mm7,%mm6");
  117 + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
  118 + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
  119 + asm volatile("pxor %mm5,%mm2");
  120 + asm volatile("pxor %mm7,%mm3");
  121 + asm volatile("pxor %mm5,%mm4");
  122 + asm volatile("pxor %mm7,%mm6");
  123 + asm volatile("pxor %mm5,%mm5");
  124 + asm volatile("pxor %mm7,%mm7");
  125 + }
  126 + asm volatile("movq %%mm2,%0" : "=m" (p[d]));
  127 + asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
  128 + asm volatile("movq %%mm4,%0" : "=m" (q[d]));
  129 + asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
  130 + }
  131 +
  132 + kernel_fpu_end();
  133 +}
  134 +
  135 +const struct raid6_calls raid6_mmxx2 = {
  136 + raid6_mmx2_gen_syndrome,
  137 + raid6_have_mmx,
  138 + "mmxx2",
  139 + 0
  140 +};
  141 +
  142 +#endif
lib/raid6/raid6recov.c
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6recov.c
  15 + *
  16 + * RAID-6 data recovery in dual failure mode. In single failure mode,
  17 + * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
  18 + * the syndrome.)
  19 + */
  20 +
  21 +#include <linux/raid/pq.h>
  22 +
  23 +/* Recover two failed data blocks. */
  24 +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
  25 + void **ptrs)
  26 +{
  27 + u8 *p, *q, *dp, *dq;
  28 + u8 px, qx, db;
  29 + const u8 *pbmul; /* P multiplier table for B data */
  30 + const u8 *qmul; /* Q multiplier table (for both) */
  31 +
  32 + p = (u8 *)ptrs[disks-2];
  33 + q = (u8 *)ptrs[disks-1];
  34 +
  35 + /* Compute syndrome with zero for the missing data pages
  36 + Use the dead data pages as temporary storage for
  37 + delta p and delta q */
  38 + dp = (u8 *)ptrs[faila];
  39 + ptrs[faila] = (void *)raid6_empty_zero_page;
  40 + ptrs[disks-2] = dp;
  41 + dq = (u8 *)ptrs[failb];
  42 + ptrs[failb] = (void *)raid6_empty_zero_page;
  43 + ptrs[disks-1] = dq;
  44 +
  45 + raid6_call.gen_syndrome(disks, bytes, ptrs);
  46 +
  47 + /* Restore pointer table */
  48 + ptrs[faila] = dp;
  49 + ptrs[failb] = dq;
  50 + ptrs[disks-2] = p;
  51 + ptrs[disks-1] = q;
  52 +
  53 + /* Now, pick the proper data tables */
  54 + pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
  55 + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
  56 +
  57 + /* Now do it... */
  58 + while ( bytes-- ) {
  59 + px = *p ^ *dp;
  60 + qx = qmul[*q ^ *dq];
  61 + *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
  62 + *dp++ = db ^ px; /* Reconstructed A */
  63 + p++; q++;
  64 + }
  65 +}
  66 +EXPORT_SYMBOL_GPL(raid6_2data_recov);
  67 +
  68 +/* Recover failure of one data block plus the P block */
  69 +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
  70 +{
  71 + u8 *p, *q, *dq;
  72 + const u8 *qmul; /* Q multiplier table */
  73 +
  74 + p = (u8 *)ptrs[disks-2];
  75 + q = (u8 *)ptrs[disks-1];
  76 +
  77 + /* Compute syndrome with zero for the missing data page
  78 + Use the dead data page as temporary storage for delta q */
  79 + dq = (u8 *)ptrs[faila];
  80 + ptrs[faila] = (void *)raid6_empty_zero_page;
  81 + ptrs[disks-1] = dq;
  82 +
  83 + raid6_call.gen_syndrome(disks, bytes, ptrs);
  84 +
  85 + /* Restore pointer table */
  86 + ptrs[faila] = dq;
  87 + ptrs[disks-1] = q;
  88 +
  89 + /* Now, pick the proper data tables */
  90 + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
  91 +
  92 + /* Now do it... */
  93 + while ( bytes-- ) {
  94 + *p++ ^= *dq = qmul[*q ^ *dq];
  95 + q++; dq++;
  96 + }
  97 +}
  98 +EXPORT_SYMBOL_GPL(raid6_datap_recov);
  99 +
  100 +#ifndef __KERNEL__
  101 +/* Testing only */
  102 +
  103 +/* Recover two failed blocks. */
  104 +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
  105 +{
  106 + if ( faila > failb ) {
  107 + int tmp = faila;
  108 + faila = failb;
  109 + failb = tmp;
  110 + }
  111 +
  112 + if ( failb == disks-1 ) {
  113 + if ( faila == disks-2 ) {
  114 + /* P+Q failure. Just rebuild the syndrome. */
  115 + raid6_call.gen_syndrome(disks, bytes, ptrs);
  116 + } else {
  117 + /* data+Q failure. Reconstruct data from P,
  118 + then rebuild syndrome. */
  119 + /* NOT IMPLEMENTED - equivalent to RAID-5 */
  120 + }
  121 + } else {
  122 + if ( failb == disks-2 ) {
  123 + /* data+P failure. */
  124 + raid6_datap_recov(disks, bytes, faila, ptrs);
  125 + } else {
  126 + /* data+data failure. */
  127 + raid6_2data_recov(disks, bytes, faila, failb, ptrs);
  128 + }
  129 + }
  130 +}
  131 +
  132 +#endif
lib/raid6/raid6sse1.c
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6sse1.c
  15 + *
  16 + * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
  17 + *
  18 + * This is really an MMX implementation, but it requires SSE-1 or
  19 + * AMD MMXEXT for prefetch support and a few other features. The
  20 + * support for nontemporal memory accesses is enough to make this
  21 + * worthwhile as a separate implementation.
  22 + */
  23 +
  24 +#if defined(__i386__) && !defined(__arch_um__)
  25 +
  26 +#include <linux/raid/pq.h>
  27 +#include "raid6x86.h"
  28 +
  29 +/* Defined in raid6mmx.c */
  30 +extern const struct raid6_mmx_constants {
  31 + u64 x1d;
  32 +} raid6_mmx_constants;
  33 +
  34 +static int raid6_have_sse1_or_mmxext(void)
  35 +{
  36 + /* Not really boot_cpu but "all_cpus" */
  37 + return boot_cpu_has(X86_FEATURE_MMX) &&
  38 + (boot_cpu_has(X86_FEATURE_XMM) ||
  39 + boot_cpu_has(X86_FEATURE_MMXEXT));
  40 +}
  41 +
  42 +/*
  43 + * Plain SSE1 implementation
  44 + */
  45 +static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
  46 +{
  47 + u8 **dptr = (u8 **)ptrs;
  48 + u8 *p, *q;
  49 + int d, z, z0;
  50 +
  51 + z0 = disks - 3; /* Highest data disk */
  52 + p = dptr[z0+1]; /* XOR parity */
  53 + q = dptr[z0+2]; /* RS syndrome */
  54 +
  55 + kernel_fpu_begin();
  56 +
  57 + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
  58 + asm volatile("pxor %mm5,%mm5"); /* Zero temp */
  59 +
  60 + for ( d = 0 ; d < bytes ; d += 8 ) {
  61 + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  62 + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
  63 + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
  64 + asm volatile("movq %mm2,%mm4"); /* Q[0] */
  65 + asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
  66 + for ( z = z0-2 ; z >= 0 ; z-- ) {
  67 + asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  68 + asm volatile("pcmpgtb %mm4,%mm5");
  69 + asm volatile("paddb %mm4,%mm4");
  70 + asm volatile("pand %mm0,%mm5");
  71 + asm volatile("pxor %mm5,%mm4");
  72 + asm volatile("pxor %mm5,%mm5");
  73 + asm volatile("pxor %mm6,%mm2");
  74 + asm volatile("pxor %mm6,%mm4");
  75 + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
  76 + }
  77 + asm volatile("pcmpgtb %mm4,%mm5");
  78 + asm volatile("paddb %mm4,%mm4");
  79 + asm volatile("pand %mm0,%mm5");
  80 + asm volatile("pxor %mm5,%mm4");
  81 + asm volatile("pxor %mm5,%mm5");
  82 + asm volatile("pxor %mm6,%mm2");
  83 + asm volatile("pxor %mm6,%mm4");
  84 +
  85 + asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
  86 + asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
  87 + }
  88 +
  89 + asm volatile("sfence" : : : "memory");
  90 + kernel_fpu_end();
  91 +}
  92 +
  93 +const struct raid6_calls raid6_sse1x1 = {
  94 + raid6_sse11_gen_syndrome,
  95 + raid6_have_sse1_or_mmxext,
  96 + "sse1x1",
  97 + 1 /* Has cache hints */
  98 +};
  99 +
  100 +/*
  101 + * Unrolled-by-2 SSE1 implementation
  102 + */
  103 +static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
  104 +{
  105 + u8 **dptr = (u8 **)ptrs;
  106 + u8 *p, *q;
  107 + int d, z, z0;
  108 +
  109 + z0 = disks - 3; /* Highest data disk */
  110 + p = dptr[z0+1]; /* XOR parity */
  111 + q = dptr[z0+2]; /* RS syndrome */
  112 +
  113 + kernel_fpu_begin();
  114 +
  115 + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
  116 + asm volatile("pxor %mm5,%mm5"); /* Zero temp */
  117 + asm volatile("pxor %mm7,%mm7"); /* Zero temp */
  118 +
  119 + /* We uniformly assume a single prefetch covers at least 16 bytes */
  120 + for ( d = 0 ; d < bytes ; d += 16 ) {
  121 + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  122 + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
  123 + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
  124 + asm volatile("movq %mm2,%mm4"); /* Q[0] */
  125 + asm volatile("movq %mm3,%mm6"); /* Q[1] */
  126 + for ( z = z0-1 ; z >= 0 ; z-- ) {
  127 + asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  128 + asm volatile("pcmpgtb %mm4,%mm5");
  129 + asm volatile("pcmpgtb %mm6,%mm7");
  130 + asm volatile("paddb %mm4,%mm4");
  131 + asm volatile("paddb %mm6,%mm6");
  132 + asm volatile("pand %mm0,%mm5");
  133 + asm volatile("pand %mm0,%mm7");
  134 + asm volatile("pxor %mm5,%mm4");
  135 + asm volatile("pxor %mm7,%mm6");
  136 + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
  137 + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
  138 + asm volatile("pxor %mm5,%mm2");
  139 + asm volatile("pxor %mm7,%mm3");
  140 + asm volatile("pxor %mm5,%mm4");
  141 + asm volatile("pxor %mm7,%mm6");
  142 + asm volatile("pxor %mm5,%mm5");
  143 + asm volatile("pxor %mm7,%mm7");
  144 + }
  145 + asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
  146 + asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
  147 + asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
  148 + asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
  149 + }
  150 +
  151 + asm volatile("sfence" : :: "memory");
  152 + kernel_fpu_end();
  153 +}
  154 +
  155 +const struct raid6_calls raid6_sse1x2 = {
  156 + raid6_sse12_gen_syndrome,
  157 + raid6_have_sse1_or_mmxext,
  158 + "sse1x2",
  159 + 1 /* Has cache hints */
  160 +};
  161 +
  162 +#endif
lib/raid6/raid6sse2.c
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6sse2.c
  15 + *
  16 + * SSE-2 implementation of RAID-6 syndrome functions
  17 + *
  18 + */
  19 +
  20 +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
  21 +
  22 +#include <linux/raid/pq.h>
  23 +#include "raid6x86.h"
  24 +
  25 +static const struct raid6_sse_constants {
  26 + u64 x1d[2];
  27 +} raid6_sse_constants __attribute__((aligned(16))) = {
  28 + { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
  29 +};
  30 +
  31 +static int raid6_have_sse2(void)
  32 +{
  33 + /* Not really boot_cpu but "all_cpus" */
  34 + return boot_cpu_has(X86_FEATURE_MMX) &&
  35 + boot_cpu_has(X86_FEATURE_FXSR) &&
  36 + boot_cpu_has(X86_FEATURE_XMM) &&
  37 + boot_cpu_has(X86_FEATURE_XMM2);
  38 +}
  39 +
  40 +/*
  41 + * Plain SSE2 implementation
  42 + */
  43 +static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
  44 +{
  45 + u8 **dptr = (u8 **)ptrs;
  46 + u8 *p, *q;
  47 + int d, z, z0;
  48 +
  49 + z0 = disks - 3; /* Highest data disk */
  50 + p = dptr[z0+1]; /* XOR parity */
  51 + q = dptr[z0+2]; /* RS syndrome */
  52 +
  53 + kernel_fpu_begin();
  54 +
  55 + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
  56 + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
  57 +
  58 + for ( d = 0 ; d < bytes ; d += 16 ) {
  59 + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  60 + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
  61 + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
  62 + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
  63 + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
  64 + for ( z = z0-2 ; z >= 0 ; z-- ) {
  65 + asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  66 + asm volatile("pcmpgtb %xmm4,%xmm5");
  67 + asm volatile("paddb %xmm4,%xmm4");
  68 + asm volatile("pand %xmm0,%xmm5");
  69 + asm volatile("pxor %xmm5,%xmm4");
  70 + asm volatile("pxor %xmm5,%xmm5");
  71 + asm volatile("pxor %xmm6,%xmm2");
  72 + asm volatile("pxor %xmm6,%xmm4");
  73 + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
  74 + }
  75 + asm volatile("pcmpgtb %xmm4,%xmm5");
  76 + asm volatile("paddb %xmm4,%xmm4");
  77 + asm volatile("pand %xmm0,%xmm5");
  78 + asm volatile("pxor %xmm5,%xmm4");
  79 + asm volatile("pxor %xmm5,%xmm5");
  80 + asm volatile("pxor %xmm6,%xmm2");
  81 + asm volatile("pxor %xmm6,%xmm4");
  82 +
  83 + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
  84 + asm volatile("pxor %xmm2,%xmm2");
  85 + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
  86 + asm volatile("pxor %xmm4,%xmm4");
  87 + }
  88 +
  89 + asm volatile("sfence" : : : "memory");
  90 + kernel_fpu_end();
  91 +}
  92 +
  93 +const struct raid6_calls raid6_sse2x1 = {
  94 + raid6_sse21_gen_syndrome,
  95 + raid6_have_sse2,
  96 + "sse2x1",
  97 + 1 /* Has cache hints */
  98 +};
  99 +
  100 +/*
  101 + * Unrolled-by-2 SSE2 implementation
  102 + */
  103 +static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
  104 +{
  105 + u8 **dptr = (u8 **)ptrs;
  106 + u8 *p, *q;
  107 + int d, z, z0;
  108 +
  109 + z0 = disks - 3; /* Highest data disk */
  110 + p = dptr[z0+1]; /* XOR parity */
  111 + q = dptr[z0+2]; /* RS syndrome */
  112 +
  113 + kernel_fpu_begin();
  114 +
  115 + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
  116 + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
  117 + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
  118 +
  119 + /* We uniformly assume a single prefetch covers at least 32 bytes */
  120 + for ( d = 0 ; d < bytes ; d += 32 ) {
  121 + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  122 + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
  123 + asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
  124 + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
  125 + asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
  126 + for ( z = z0-1 ; z >= 0 ; z-- ) {
  127 + asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  128 + asm volatile("pcmpgtb %xmm4,%xmm5");
  129 + asm volatile("pcmpgtb %xmm6,%xmm7");
  130 + asm volatile("paddb %xmm4,%xmm4");
  131 + asm volatile("paddb %xmm6,%xmm6");
  132 + asm volatile("pand %xmm0,%xmm5");
  133 + asm volatile("pand %xmm0,%xmm7");
  134 + asm volatile("pxor %xmm5,%xmm4");
  135 + asm volatile("pxor %xmm7,%xmm6");
  136 + asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
  137 + asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
  138 + asm volatile("pxor %xmm5,%xmm2");
  139 + asm volatile("pxor %xmm7,%xmm3");
  140 + asm volatile("pxor %xmm5,%xmm4");
  141 + asm volatile("pxor %xmm7,%xmm6");
  142 + asm volatile("pxor %xmm5,%xmm5");
  143 + asm volatile("pxor %xmm7,%xmm7");
  144 + }
  145 + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
  146 + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
  147 + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
  148 + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
  149 + }
  150 +
  151 + asm volatile("sfence" : : : "memory");
  152 + kernel_fpu_end();
  153 +}
  154 +
  155 +const struct raid6_calls raid6_sse2x2 = {
  156 + raid6_sse22_gen_syndrome,
  157 + raid6_have_sse2,
  158 + "sse2x2",
  159 + 1 /* Has cache hints */
  160 +};
  161 +
  162 +#endif
  163 +
  164 +#if defined(__x86_64__) && !defined(__arch_um__)
  165 +
  166 +/*
  167 + * Unrolled-by-4 SSE2 implementation
  168 + */
  169 +static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
  170 +{
  171 + u8 **dptr = (u8 **)ptrs;
  172 + u8 *p, *q;
  173 + int d, z, z0;
  174 +
  175 + z0 = disks - 3; /* Highest data disk */
  176 + p = dptr[z0+1]; /* XOR parity */
  177 + q = dptr[z0+2]; /* RS syndrome */
  178 +
  179 + kernel_fpu_begin();
  180 +
  181 + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
  182 + asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
  183 + asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
  184 + asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
  185 + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
  186 + asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
  187 + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
  188 + asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
  189 + asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
  190 + asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
  191 + asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
  192 + asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
  193 + asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
  194 +
  195 + for ( d = 0 ; d < bytes ; d += 64 ) {
  196 + for ( z = z0 ; z >= 0 ; z-- ) {
  197 + /* The second prefetch seems to improve performance... */
  198 + asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
  199 + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
  200 + asm volatile("pcmpgtb %xmm4,%xmm5");
  201 + asm volatile("pcmpgtb %xmm6,%xmm7");
  202 + asm volatile("pcmpgtb %xmm12,%xmm13");
  203 + asm volatile("pcmpgtb %xmm14,%xmm15");
  204 + asm volatile("paddb %xmm4,%xmm4");
  205 + asm volatile("paddb %xmm6,%xmm6");
  206 + asm volatile("paddb %xmm12,%xmm12");
  207 + asm volatile("paddb %xmm14,%xmm14");
  208 + asm volatile("pand %xmm0,%xmm5");
  209 + asm volatile("pand %xmm0,%xmm7");
  210 + asm volatile("pand %xmm0,%xmm13");
  211 + asm volatile("pand %xmm0,%xmm15");
  212 + asm volatile("pxor %xmm5,%xmm4");
  213 + asm volatile("pxor %xmm7,%xmm6");
  214 + asm volatile("pxor %xmm13,%xmm12");
  215 + asm volatile("pxor %xmm15,%xmm14");
  216 + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
  217 + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
  218 + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
  219 + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
  220 + asm volatile("pxor %xmm5,%xmm2");
  221 + asm volatile("pxor %xmm7,%xmm3");
  222 + asm volatile("pxor %xmm13,%xmm10");
  223 + asm volatile("pxor %xmm15,%xmm11");
  224 + asm volatile("pxor %xmm5,%xmm4");
  225 + asm volatile("pxor %xmm7,%xmm6");
  226 + asm volatile("pxor %xmm13,%xmm12");
  227 + asm volatile("pxor %xmm15,%xmm14");
  228 + asm volatile("pxor %xmm5,%xmm5");
  229 + asm volatile("pxor %xmm7,%xmm7");
  230 + asm volatile("pxor %xmm13,%xmm13");
  231 + asm volatile("pxor %xmm15,%xmm15");
  232 + }
  233 + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
  234 + asm volatile("pxor %xmm2,%xmm2");
  235 + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
  236 + asm volatile("pxor %xmm3,%xmm3");
  237 + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
  238 + asm volatile("pxor %xmm10,%xmm10");
  239 + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
  240 + asm volatile("pxor %xmm11,%xmm11");
  241 + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
  242 + asm volatile("pxor %xmm4,%xmm4");
  243 + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
  244 + asm volatile("pxor %xmm6,%xmm6");
  245 + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
  246 + asm volatile("pxor %xmm12,%xmm12");
  247 + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
  248 + asm volatile("pxor %xmm14,%xmm14");
  249 + }
  250 +
  251 + asm volatile("sfence" : : : "memory");
  252 + kernel_fpu_end();
  253 +}
  254 +
  255 +const struct raid6_calls raid6_sse2x4 = {
  256 + raid6_sse24_gen_syndrome,
  257 + raid6_have_sse2,
  258 + "sse2x4",
  259 + 1 /* Has cache hints */
  260 +};
  261 +
  262 +#endif
lib/raid6/raid6test/Makefile
  1 +#
  2 +# This is a simple Makefile to test some of the RAID-6 code
  3 +# from userspace.
  4 +#
  5 +
  6 +CC = gcc
  7 +OPTFLAGS = -O2 # Adjust as desired
  8 +CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
  9 +LD = ld
  10 +PERL = perl
  11 +AR = ar
  12 +RANLIB = ranlib
  13 +
  14 +.c.o:
  15 + $(CC) $(CFLAGS) -c -o $@ $<
  16 +
  17 +%.c: ../%.c
  18 + cp -f $< $@
  19 +
  20 +%.uc: ../%.uc
  21 + cp -f $< $@
  22 +
  23 +all: raid6.a raid6test
  24 +
  25 +raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \
  26 + raid6int32.o \
  27 + raid6mmx.o raid6sse1.o raid6sse2.o \
  28 + raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \
  29 + raid6recov.o raid6algos.o \
  30 + raid6tables.o
  31 + rm -f $@
  32 + $(AR) cq $@ $^
  33 + $(RANLIB) $@
  34 +
  35 +raid6test: test.c raid6.a
  36 + $(CC) $(CFLAGS) -o raid6test $^
  37 +
  38 +raid6altivec1.c: raid6altivec.uc ../unroll.pl
  39 + $(PERL) ../unroll.pl 1 < raid6altivec.uc > $@
  40 +
  41 +raid6altivec2.c: raid6altivec.uc ../unroll.pl
  42 + $(PERL) ../unroll.pl 2 < raid6altivec.uc > $@
  43 +
  44 +raid6altivec4.c: raid6altivec.uc ../unroll.pl
  45 + $(PERL) ../unroll.pl 4 < raid6altivec.uc > $@
  46 +
  47 +raid6altivec8.c: raid6altivec.uc ../unroll.pl
  48 + $(PERL) ../unroll.pl 8 < raid6altivec.uc > $@
  49 +
  50 +raid6int1.c: raid6int.uc ../unroll.pl
  51 + $(PERL) ../unroll.pl 1 < raid6int.uc > $@
  52 +
  53 +raid6int2.c: raid6int.uc ../unroll.pl
  54 + $(PERL) ../unroll.pl 2 < raid6int.uc > $@
  55 +
  56 +raid6int4.c: raid6int.uc ../unroll.pl
  57 + $(PERL) ../unroll.pl 4 < raid6int.uc > $@
  58 +
  59 +raid6int8.c: raid6int.uc ../unroll.pl
  60 + $(PERL) ../unroll.pl 8 < raid6int.uc > $@
  61 +
  62 +raid6int16.c: raid6int.uc ../unroll.pl
  63 + $(PERL) ../unroll.pl 16 < raid6int.uc > $@
  64 +
  65 +raid6int32.c: raid6int.uc ../unroll.pl
  66 + $(PERL) ../unroll.pl 32 < raid6int.uc > $@
  67 +
  68 +raid6tables.c: mktables
  69 + ./mktables > raid6tables.c
  70 +
  71 +clean:
  72 + rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test
  73 +
  74 +spotless: clean
  75 + rm -f *~
lib/raid6/raid6test/test.c
  1 +/* -*- linux-c -*- ------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This file is part of the Linux kernel, and is made available under
  6 + * the terms of the GNU General Public License version 2 or (at your
  7 + * option) any later version; incorporated herein by reference.
  8 + *
  9 + * ----------------------------------------------------------------------- */
  10 +
  11 +/*
  12 + * raid6test.c
  13 + *
  14 + * Test RAID-6 recovery with various algorithms
  15 + */
  16 +
  17 +#include <stdlib.h>
  18 +#include <stdio.h>
  19 +#include <string.h>
  20 +#include <linux/raid/pq.h>
  21 +
  22 +#define NDISKS 16 /* Including P and Q */
  23 +
  24 +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
  25 +struct raid6_calls raid6_call;
  26 +
  27 +char *dataptrs[NDISKS];
  28 +char data[NDISKS][PAGE_SIZE];
  29 +char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
  30 +
  31 +static void makedata(void)
  32 +{
  33 + int i, j;
  34 +
  35 + for (i = 0; i < NDISKS; i++) {
  36 + for (j = 0; j < PAGE_SIZE; j++)
  37 + data[i][j] = rand();
  38 +
  39 + dataptrs[i] = data[i];
  40 + }
  41 +}
  42 +
  43 +static char disk_type(int d)
  44 +{
  45 + switch (d) {
  46 + case NDISKS-2:
  47 + return 'P';
  48 + case NDISKS-1:
  49 + return 'Q';
  50 + default:
  51 + return 'D';
  52 + }
  53 +}
  54 +
  55 +static int test_disks(int i, int j)
  56 +{
  57 + int erra, errb;
  58 +
  59 + memset(recovi, 0xf0, PAGE_SIZE);
  60 + memset(recovj, 0xba, PAGE_SIZE);
  61 +
  62 + dataptrs[i] = recovi;
  63 + dataptrs[j] = recovj;
  64 +
  65 + raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
  66 +
  67 + erra = memcmp(data[i], recovi, PAGE_SIZE);
  68 + errb = memcmp(data[j], recovj, PAGE_SIZE);
  69 +
  70 + if (i < NDISKS-2 && j == NDISKS-1) {
  71 + /* We don't implement the DQ failure scenario, since it's
  72 + equivalent to a RAID-5 failure (XOR, then recompute Q) */
  73 + erra = errb = 0;
  74 + } else {
  75 + printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n",
  76 + raid6_call.name,
  77 + i, disk_type(i),
  78 + j, disk_type(j),
  79 + (!erra && !errb) ? "OK" :
  80 + !erra ? "ERRB" :
  81 + !errb ? "ERRA" : "ERRAB");
  82 + }
  83 +
  84 + dataptrs[i] = data[i];
  85 + dataptrs[j] = data[j];
  86 +
  87 + return erra || errb;
  88 +}
  89 +
  90 +int main(int argc, char *argv[])
  91 +{
  92 + const struct raid6_calls *const *algo;
  93 + int i, j;
  94 + int err = 0;
  95 +
  96 + makedata();
  97 +
  98 + for (algo = raid6_algos; *algo; algo++) {
  99 + if (!(*algo)->valid || (*algo)->valid()) {
  100 + raid6_call = **algo;
  101 +
  102 + /* Nuke syndromes */
  103 + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
  104 +
  105 + /* Generate assumed good syndrome */
  106 + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
  107 + (void **)&dataptrs);
  108 +
  109 + for (i = 0; i < NDISKS-1; i++)
  110 + for (j = i+1; j < NDISKS; j++)
  111 + err += test_disks(i, j);
  112 + }
  113 + printf("\n");
  114 + }
  115 +
  116 + printf("\n");
  117 + /* Pick the best algorithm test */
  118 + raid6_select_algo();
  119 +
  120 + if (err)
  121 + printf("\n*** ERRORS FOUND ***\n");
  122 +
  123 + return err;
  124 +}
lib/raid6/raid6x86.h
  1 +/* ----------------------------------------------------------------------- *
  2 + *
  3 + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License as published by
  7 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8 + * Boston MA 02111-1307, USA; either version 2 of the License, or
  9 + * (at your option) any later version; incorporated herein by reference.
  10 + *
  11 + * ----------------------------------------------------------------------- */
  12 +
  13 +/*
  14 + * raid6x86.h
  15 + *
  16 + * Definitions common to x86 and x86-64 RAID-6 code only
  17 + */
  18 +
  19 +#ifndef LINUX_RAID_RAID6X86_H
  20 +#define LINUX_RAID_RAID6X86_H
  21 +
  22 +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
  23 +
  24 +#ifdef __KERNEL__ /* Real code */
  25 +
  26 +#include <asm/i387.h>
  27 +
  28 +#else /* Dummy code for user space testing */
  29 +
  30 +static inline void kernel_fpu_begin(void)
  31 +{
  32 +}
  33 +
  34 +static inline void kernel_fpu_end(void)
  35 +{
  36 +}
  37 +
  38 +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
  39 +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
  40 + * (fast save and restore) */
  41 +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
  42 +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
  43 +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
  44 +
  45 +/* Should work well enough on modern CPUs for testing */
  46 +static inline int boot_cpu_has(int flag)
  47 +{
  48 + u32 eax = (flag >> 5) ? 0x80000001 : 1;
  49 + u32 edx;
  50 +
  51 + asm volatile("cpuid"
  52 + : "+a" (eax), "=d" (edx)
  53 + : : "ecx", "ebx");
  54 +
  55 + return (edx >> (flag & 31)) & 1;
  56 +}
  57 +
  58 +#endif /* ndef __KERNEL__ */
  59 +
  60 +#endif
  61 +#endif
  1 +#!/usr/bin/perl
  2 +#
  3 +# Take a piece of C code and for each line which contains the sequence $$
  4 +# repeat n times with $ replaced by 0...n-1; the sequence $# is replaced
  5 +# by the unrolling factor, and $* with a single $
  6 +#
  7 +
  8 +($n) = @ARGV;
  9 +$n += 0;
  10 +
  11 +while ( defined($line = <STDIN>) ) {
  12 + if ( $line =~ /\$\$/ ) {
  13 + $rep = $n;
  14 + } else {
  15 + $rep = 1;
  16 + }
  17 + for ( $i = 0 ; $i < $rep ; $i++ ) {
  18 + $tmp = $line;
  19 + $tmp =~ s/\$\$/$i/g;
  20 + $tmp =~ s/\$\#/$n/g;
  21 + $tmp =~ s/\$\*/\$/g;
  22 + print $tmp;
  23 + }
  24 +}