Blame view
lib/raid6/sse1.c
4.86 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 |
/* -*- linux-c -*- ------------------------------------------------------- * * * Copyright 2002 H. Peter Anvin - All Rights Reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
93ed05e2a md: fix typo in F... |
8 |
* Boston MA 02111-1307, USA; either version 2 of the License, or |
1da177e4c Linux-2.6.12-rc2 |
9 10 11 12 13 |
* (at your option) any later version; incorporated herein by reference. * * ----------------------------------------------------------------------- */ /* |
a8e026c78 Further tidyup of... |
14 |
* raid6/sse1.c |
1da177e4c Linux-2.6.12-rc2 |
15 16 17 18 19 20 21 22 |
* * SSE-1/MMXEXT implementation of RAID-6 syndrome functions * * This is really an MMX implementation, but it requires SSE-1 or * AMD MMXEXT for prefetch support and a few other features. The * support for nontemporal memory accesses is enough to make this * worthwhile as a separate implementation. */ |
ca5cd877a x86 merge fallout... |
23 |
#if defined(__i386__) && !defined(__arch_um__) |
1da177e4c Linux-2.6.12-rc2 |
24 |
|
f701d589a md/raid6: move ra... |
25 |
#include <linux/raid/pq.h> |
a8e026c78 Further tidyup of... |
26 |
#include "x86.h" |
1da177e4c Linux-2.6.12-rc2 |
27 |
|
a8e026c78 Further tidyup of... |
28 |
/* Defined in raid6/mmx.c */ |
1da177e4c Linux-2.6.12-rc2 |
29 30 31 32 33 34 |
extern const struct raid6_mmx_constants { u64 x1d; } raid6_mmx_constants; static int raid6_have_sse1_or_mmxext(void) { |
1da177e4c Linux-2.6.12-rc2 |
35 36 37 38 |
/* Not really boot_cpu but "all_cpus" */ return boot_cpu_has(X86_FEATURE_MMX) && (boot_cpu_has(X86_FEATURE_XMM) || boot_cpu_has(X86_FEATURE_MMXEXT)); |
1da177e4c Linux-2.6.12-rc2 |
39 40 41 42 43 44 45 46 47 48 |
} /* * Plain SSE1 implementation */ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; u8 *p, *q; int d, z, z0; |
1da177e4c Linux-2.6.12-rc2 |
49 50 51 52 |
z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ q = dptr[z0+2]; /* RS syndrome */ |
a723406c4 [PATCH] md: RAID6... |
53 |
kernel_fpu_begin(); |
1da177e4c Linux-2.6.12-rc2 |
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); asm volatile("pxor %mm5,%mm5"); /* Zero temp */ for ( d = 0 ; d < bytes ; d += 8 ) { asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); asm volatile("movq %mm2,%mm4"); /* Q[0] */ asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); for ( z = z0-2 ; z >= 0 ; z-- ) { asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); asm volatile("pcmpgtb %mm4,%mm5"); asm volatile("paddb %mm4,%mm4"); asm volatile("pand %mm0,%mm5"); asm volatile("pxor %mm5,%mm4"); asm volatile("pxor %mm5,%mm5"); asm volatile("pxor %mm6,%mm2"); asm volatile("pxor %mm6,%mm4"); asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); } asm volatile("pcmpgtb %mm4,%mm5"); asm volatile("paddb %mm4,%mm4"); asm volatile("pand %mm0,%mm5"); asm volatile("pxor %mm5,%mm4"); asm volatile("pxor %mm5,%mm5"); asm volatile("pxor %mm6,%mm2"); asm volatile("pxor %mm6,%mm4"); asm volatile("movntq %%mm2,%0" : "=m" (p[d])); asm volatile("movntq %%mm4,%0" : "=m" (q[d])); } |
1da177e4c Linux-2.6.12-rc2 |
86 |
asm volatile("sfence" : : : "memory"); |
a723406c4 [PATCH] md: RAID6... |
87 |
kernel_fpu_end(); |
1da177e4c Linux-2.6.12-rc2 |
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
} const struct raid6_calls raid6_sse1x1 = { raid6_sse11_gen_syndrome, raid6_have_sse1_or_mmxext, "sse1x1", 1 /* Has cache hints */ }; /* * Unrolled-by-2 SSE1 implementation */ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; u8 *p, *q; int d, z, z0; |
1da177e4c Linux-2.6.12-rc2 |
105 106 107 108 |
z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ q = dptr[z0+2]; /* RS syndrome */ |
a723406c4 [PATCH] md: RAID6... |
109 |
kernel_fpu_begin(); |
1da177e4c Linux-2.6.12-rc2 |
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); asm volatile("pxor %mm5,%mm5"); /* Zero temp */ asm volatile("pxor %mm7,%mm7"); /* Zero temp */ /* We uniformly assume a single prefetch covers at least 16 bytes */ for ( d = 0 ; d < bytes ; d += 16 ) { asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ asm volatile("movq %mm2,%mm4"); /* Q[0] */ asm volatile("movq %mm3,%mm6"); /* Q[1] */ for ( z = z0-1 ; z >= 0 ; z-- ) { asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); asm volatile("pcmpgtb %mm4,%mm5"); asm volatile("pcmpgtb %mm6,%mm7"); asm volatile("paddb %mm4,%mm4"); asm volatile("paddb %mm6,%mm6"); asm volatile("pand %mm0,%mm5"); asm volatile("pand %mm0,%mm7"); asm volatile("pxor %mm5,%mm4"); asm volatile("pxor %mm7,%mm6"); asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); asm volatile("pxor %mm5,%mm2"); asm volatile("pxor %mm7,%mm3"); asm volatile("pxor %mm5,%mm4"); asm volatile("pxor %mm7,%mm6"); asm volatile("pxor %mm5,%mm5"); asm volatile("pxor %mm7,%mm7"); } asm volatile("movntq %%mm2,%0" : "=m" (p[d])); asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); asm volatile("movntq %%mm4,%0" : "=m" (q[d])); asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); } |
1da177e4c Linux-2.6.12-rc2 |
146 |
asm volatile("sfence" : :: "memory"); |
a723406c4 [PATCH] md: RAID6... |
147 |
kernel_fpu_end(); |
1da177e4c Linux-2.6.12-rc2 |
148 149 150 151 152 153 154 155 156 157 |
} const struct raid6_calls raid6_sse1x2 = { raid6_sse12_gen_syndrome, raid6_have_sse1_or_mmxext, "sse1x2", 1 /* Has cache hints */ }; #endif |