Commit 5d7244e7c984cecead412bde6395ce18618a4a37
Committed by
Ingo Molnar
1 parent
4269329090
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
x86-64: Fix memset() to support sizes of 4Gb and above
While currently there doesn't appear to be any reachable in-tree case where such large memory blocks may be passed to memset() (alloc_bootmem() being the primary non-reachable one, as it gets called with suitably large sizes in FLATMEM configurations), we have recently hit the problem a second time in our Xen kernels. Rather than working around it a second time, prevent others from falling into the same trap by fixing this long standing limitation. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/4F05D992020000780006AA09@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 1 changed file with 15 additions and 18 deletions Side-by-side Diff
arch/x86/lib/memset_64.S
... | ... | @@ -19,16 +19,15 @@ |
19 | 19 | .section .altinstr_replacement, "ax", @progbits |
20 | 20 | .Lmemset_c: |
21 | 21 | movq %rdi,%r9 |
22 | - movl %edx,%r8d | |
23 | - andl $7,%r8d | |
24 | - movl %edx,%ecx | |
25 | - shrl $3,%ecx | |
22 | + movq %rdx,%rcx | |
23 | + andl $7,%edx | |
24 | + shrq $3,%rcx | |
26 | 25 | /* expand byte value */ |
27 | 26 | movzbl %sil,%esi |
28 | 27 | movabs $0x0101010101010101,%rax |
29 | - mulq %rsi /* with rax, clobbers rdx */ | |
28 | + imulq %rsi,%rax | |
30 | 29 | rep stosq |
31 | - movl %r8d,%ecx | |
30 | + movl %edx,%ecx | |
32 | 31 | rep stosb |
33 | 32 | movq %r9,%rax |
34 | 33 | ret |
... | ... | @@ -50,7 +49,7 @@ |
50 | 49 | .Lmemset_c_e: |
51 | 50 | movq %rdi,%r9 |
52 | 51 | movb %sil,%al |
53 | - movl %edx,%ecx | |
52 | + movq %rdx,%rcx | |
54 | 53 | rep stosb |
55 | 54 | movq %r9,%rax |
56 | 55 | ret |
57 | 56 | |
... | ... | @@ -61,12 +60,11 @@ |
61 | 60 | ENTRY(__memset) |
62 | 61 | CFI_STARTPROC |
63 | 62 | movq %rdi,%r10 |
64 | - movq %rdx,%r11 | |
65 | 63 | |
66 | 64 | /* expand byte value */ |
67 | 65 | movzbl %sil,%ecx |
68 | 66 | movabs $0x0101010101010101,%rax |
69 | - mul %rcx /* with rax, clobbers rdx */ | |
67 | + imulq %rcx,%rax | |
70 | 68 | |
71 | 69 | /* align dst */ |
72 | 70 | movl %edi,%r9d |
73 | 71 | |
... | ... | @@ -75,13 +73,13 @@ |
75 | 73 | CFI_REMEMBER_STATE |
76 | 74 | .Lafter_bad_alignment: |
77 | 75 | |
78 | - movl %r11d,%ecx | |
79 | - shrl $6,%ecx | |
76 | + movq %rdx,%rcx | |
77 | + shrq $6,%rcx | |
80 | 78 | jz .Lhandle_tail |
81 | 79 | |
82 | 80 | .p2align 4 |
83 | 81 | .Lloop_64: |
84 | - decl %ecx | |
82 | + decq %rcx | |
85 | 83 | movq %rax,(%rdi) |
86 | 84 | movq %rax,8(%rdi) |
87 | 85 | movq %rax,16(%rdi) |
... | ... | @@ -97,7 +95,7 @@ |
97 | 95 | to predict jump tables. */ |
98 | 96 | .p2align 4 |
99 | 97 | .Lhandle_tail: |
100 | - movl %r11d,%ecx | |
98 | + movl %edx,%ecx | |
101 | 99 | andl $63&(~7),%ecx |
102 | 100 | jz .Lhandle_7 |
103 | 101 | shrl $3,%ecx |
104 | 102 | |
... | ... | @@ -109,12 +107,11 @@ |
109 | 107 | jnz .Lloop_8 |
110 | 108 | |
111 | 109 | .Lhandle_7: |
112 | - movl %r11d,%ecx | |
113 | - andl $7,%ecx | |
110 | + andl $7,%edx | |
114 | 111 | jz .Lende |
115 | 112 | .p2align 4 |
116 | 113 | .Lloop_1: |
117 | - decl %ecx | |
114 | + decl %edx | |
118 | 115 | movb %al,(%rdi) |
119 | 116 | leaq 1(%rdi),%rdi |
120 | 117 | jnz .Lloop_1 |
121 | 118 | |
... | ... | @@ -125,13 +122,13 @@ |
125 | 122 | |
126 | 123 | CFI_RESTORE_STATE |
127 | 124 | .Lbad_alignment: |
128 | - cmpq $7,%r11 | |
125 | + cmpq $7,%rdx | |
129 | 126 | jbe .Lhandle_7 |
130 | 127 | movq %rax,(%rdi) /* unaligned store */ |
131 | 128 | movq $8,%r8 |
132 | 129 | subq %r9,%r8 |
133 | 130 | addq %r8,%rdi |
134 | - subq %r8,%r11 | |
131 | + subq %r8,%rdx | |
135 | 132 | jmp .Lafter_bad_alignment |
136 | 133 | .Lfinal: |
137 | 134 | CFI_ENDPROC |