Commit 2239aff6ab2b95af1f628eee7a809f21c41605b3

Authored by Nicolas Pitre
Committed by Lennert Buytenhek
1 parent 4c4925c1f4

[ARM] cache align destination pointer when copying memory for some processors

The implementation for memory copy functions on ARM had a (disabled)
provision for aligning the source pointer before loading registers with
data.  Turns out that aligning the _destination_ pointer is much more
useful, as the read side is already sufficiently helped with the use of
preload.

So this changes the definition of the CALGN() macro to target the
destination pointer instead, and turns it on for Feroceon processors
where the gain is very noticeable.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>

Showing 3 changed files with 19 additions and 20 deletions Side-by-side Diff

arch/arm/lib/copy_template.S
... ... @@ -13,14 +13,6 @@
13 13 */
14 14  
15 15 /*
16   - * This can be used to enable code to cacheline align the source pointer.
17   - * Experiments on tested architectures (StrongARM and XScale) didn't show
18   - * this a worthwhile thing to do. That might be different in the future.
19   - */
20   -//#define CALGN(code...) code
21   -#define CALGN(code...)
22   -
23   -/*
24 16 * Theory of operation
25 17 * -------------------
26 18 *
... ... @@ -82,7 +74,7 @@
82 74 stmfd sp!, {r5 - r8}
83 75 blt 5f
84 76  
85   - CALGN( ands ip, r1, #31 )
  77 + CALGN( ands ip, r0, #31 )
86 78 CALGN( rsb r3, ip, #32 )
87 79 CALGN( sbcnes r4, r3, r2 ) @ C is always set here
88 80 CALGN( bcs 2f )
... ... @@ -168,7 +160,7 @@
168 160 subs r2, r2, #28
169 161 blt 14f
170 162  
171   - CALGN( ands ip, r1, #31 )
  163 + CALGN( ands ip, r0, #31 )
172 164 CALGN( rsb ip, ip, #32 )
173 165 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
174 166 CALGN( subcc r2, r2, ip )
arch/arm/lib/memmove.S
... ... @@ -13,14 +13,6 @@
13 13 #include <linux/linkage.h>
14 14 #include <asm/assembler.h>
15 15  
16   -/*
17   - * This can be used to enable code to cacheline align the source pointer.
18   - * Experiments on tested architectures (StrongARM and XScale) didn't show
19   - * this a worthwhile thing to do. That might be different in the future.
20   - */
21   -//#define CALGN(code...) code
22   -#define CALGN(code...)
23   -
24 16 .text
25 17  
26 18 /*
... ... @@ -55,7 +47,7 @@
55 47 stmfd sp!, {r5 - r8}
56 48 blt 5f
57 49  
58   - CALGN( ands ip, r1, #31 )
  50 + CALGN( ands ip, r0, #31 )
59 51 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
60 52 CALGN( bcs 2f )
61 53 CALGN( adr r4, 6f )
... ... @@ -139,7 +131,7 @@
139 131 subs r2, r2, #28
140 132 blt 14f
141 133  
142   - CALGN( ands ip, r1, #31 )
  134 + CALGN( ands ip, r0, #31 )
143 135 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
144 136 CALGN( subcc r2, r2, ip )
145 137 CALGN( bcc 15f )
include/asm-arm/assembler.h
... ... @@ -56,6 +56,21 @@
56 56 #endif
57 57  
58 58 /*
  59 + * This can be used to enable code to cacheline align the destination
  60 + * pointer when bulk writing to memory. Experiments on StrongARM and
  61 + * XScale didn't show this a worthwhile thing to do when the cache is not
  62 + * set to write-allocate (this would need further testing on XScale when WA
  63 + * is used).
  64 + *
  65 + * On Feroceon there is much to gain however, regardless of cache mode.
  66 + */
  67 +#ifdef CONFIG_CPU_FEROCEON
  68 +#define CALGN(code...) code
  69 +#else
  70 +#define CALGN(code...)
  71 +#endif
  72 +
  73 +/*
59 74 * Enable and disable interrupts
60 75 */
61 76 #if __LINUX_ARM_ARCH__ >= 6