Commit 5d7244e7c984cecead412bde6395ce18618a4a37

Authored by Jan Beulich
Committed by Ingo Molnar
1 parent 4269329090

x86-64: Fix memset() to support sizes of 4Gb and above

While currently there doesn't appear to be any reachable in-tree
case where such large memory blocks may be passed to memset()
(alloc_bootmem() being the primary non-reachable one, as it gets
called with suitably large sizes in FLATMEM configurations), we
have recently hit the problem a second time in our Xen kernels.

Rather than working around it a second time, prevent others from
falling into the same trap by fixing this long standing
limitation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F05D992020000780006AA09@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 1 changed file with 15 additions and 18 deletions Side-by-side Diff

arch/x86/lib/memset_64.S
... ... @@ -19,16 +19,15 @@
19 19 .section .altinstr_replacement, "ax", @progbits
20 20 .Lmemset_c:
21 21 movq %rdi,%r9
22   - movl %edx,%r8d
23   - andl $7,%r8d
24   - movl %edx,%ecx
25   - shrl $3,%ecx
  22 + movq %rdx,%rcx
  23 + andl $7,%edx
  24 + shrq $3,%rcx
26 25 /* expand byte value */
27 26 movzbl %sil,%esi
28 27 movabs $0x0101010101010101,%rax
29   - mulq %rsi /* with rax, clobbers rdx */
  28 + imulq %rsi,%rax
30 29 rep stosq
31   - movl %r8d,%ecx
  30 + movl %edx,%ecx
32 31 rep stosb
33 32 movq %r9,%rax
34 33 ret
... ... @@ -50,7 +49,7 @@
50 49 .Lmemset_c_e:
51 50 movq %rdi,%r9
52 51 movb %sil,%al
53   - movl %edx,%ecx
  52 + movq %rdx,%rcx
54 53 rep stosb
55 54 movq %r9,%rax
56 55 ret
57 56  
... ... @@ -61,12 +60,11 @@
61 60 ENTRY(__memset)
62 61 CFI_STARTPROC
63 62 movq %rdi,%r10
64   - movq %rdx,%r11
65 63  
66 64 /* expand byte value */
67 65 movzbl %sil,%ecx
68 66 movabs $0x0101010101010101,%rax
69   - mul %rcx /* with rax, clobbers rdx */
  67 + imulq %rcx,%rax
70 68  
71 69 /* align dst */
72 70 movl %edi,%r9d
73 71  
... ... @@ -75,13 +73,13 @@
75 73 CFI_REMEMBER_STATE
76 74 .Lafter_bad_alignment:
77 75  
78   - movl %r11d,%ecx
79   - shrl $6,%ecx
  76 + movq %rdx,%rcx
  77 + shrq $6,%rcx
80 78 jz .Lhandle_tail
81 79  
82 80 .p2align 4
83 81 .Lloop_64:
84   - decl %ecx
  82 + decq %rcx
85 83 movq %rax,(%rdi)
86 84 movq %rax,8(%rdi)
87 85 movq %rax,16(%rdi)
... ... @@ -97,7 +95,7 @@
97 95 to predict jump tables. */
98 96 .p2align 4
99 97 .Lhandle_tail:
100   - movl %r11d,%ecx
  98 + movl %edx,%ecx
101 99 andl $63&(~7),%ecx
102 100 jz .Lhandle_7
103 101 shrl $3,%ecx
104 102  
... ... @@ -109,12 +107,11 @@
109 107 jnz .Lloop_8
110 108  
111 109 .Lhandle_7:
112   - movl %r11d,%ecx
113   - andl $7,%ecx
  110 + andl $7,%edx
114 111 jz .Lende
115 112 .p2align 4
116 113 .Lloop_1:
117   - decl %ecx
  114 + decl %edx
118 115 movb %al,(%rdi)
119 116 leaq 1(%rdi),%rdi
120 117 jnz .Lloop_1
121 118  
... ... @@ -125,13 +122,13 @@
125 122  
126 123 CFI_RESTORE_STATE
127 124 .Lbad_alignment:
128   - cmpq $7,%r11
  125 + cmpq $7,%rdx
129 126 jbe .Lhandle_7
130 127 movq %rax,(%rdi) /* unaligned store */
131 128 movq $8,%r8
132 129 subq %r9,%r8
133 130 addq %r8,%rdi
134   - subq %r8,%r11
  131 + subq %r8,%rdx
135 132 jmp .Lafter_bad_alignment
136 133 .Lfinal:
137 134 CFI_ENDPROC