Commit 438a76167959061e371025f727fabec2ad9e70a7

Authored by Russell King
Committed by Russell King
1 parent b3402cf50e

[PATCH] ARM: Fix VFP to use do_div()

VFP used __divdi3 64-bit division needlessly.  Convert it to use
our 64-bit by 32-bit division instead.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

Showing 3 changed files with 27 additions and 4 deletions Inline Diff

1 /* 1 /*
2 * linux/arch/arm/vfp/vfp.h 2 * linux/arch/arm/vfp/vfp.h
3 * 3 *
4 * Copyright (C) 2004 ARM Limited. 4 * Copyright (C) 2004 ARM Limited.
5 * Written by Deep Blue Solutions Limited. 5 * Written by Deep Blue Solutions Limited.
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11 11
12 static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) 12 static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
13 { 13 {
14 if (shift) { 14 if (shift) {
15 if (shift < 32) 15 if (shift < 32)
16 val = val >> shift | ((val << (32 - shift)) != 0); 16 val = val >> shift | ((val << (32 - shift)) != 0);
17 else 17 else
18 val = val != 0; 18 val = val != 0;
19 } 19 }
20 return val; 20 return val;
21 } 21 }
22 22
23 static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) 23 static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
24 { 24 {
25 if (shift) { 25 if (shift) {
26 if (shift < 64) 26 if (shift < 64)
27 val = val >> shift | ((val << (64 - shift)) != 0); 27 val = val >> shift | ((val << (64 - shift)) != 0);
28 else 28 else
29 val = val != 0; 29 val = val != 0;
30 } 30 }
31 return val; 31 return val;
32 } 32 }
33 33
34 static inline u32 vfp_hi64to32jamming(u64 val) 34 static inline u32 vfp_hi64to32jamming(u64 val)
35 { 35 {
36 u32 v; 36 u32 v;
37 37
38 asm( 38 asm(
39 "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t" 39 "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t"
40 "movcc %0, %R1\n\t" 40 "movcc %0, %R1\n\t"
41 "orrcs %0, %R1, #1" 41 "orrcs %0, %R1, #1"
42 : "=r" (v) : "r" (val) : "cc"); 42 : "=r" (v) : "r" (val) : "cc");
43 43
44 return v; 44 return v;
45 } 45 }
46 46
47 static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) 47 static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
48 { 48 {
49 asm( "adds %Q0, %Q2, %Q4\n\t" 49 asm( "adds %Q0, %Q2, %Q4\n\t"
50 "adcs %R0, %R2, %R4\n\t" 50 "adcs %R0, %R2, %R4\n\t"
51 "adcs %Q1, %Q3, %Q5\n\t" 51 "adcs %Q1, %Q3, %Q5\n\t"
52 "adc %R1, %R3, %R5" 52 "adc %R1, %R3, %R5"
53 : "=r" (nl), "=r" (nh) 53 : "=r" (nl), "=r" (nh)
54 : "0" (nl), "1" (nh), "r" (ml), "r" (mh) 54 : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
55 : "cc"); 55 : "cc");
56 *resh = nh; 56 *resh = nh;
57 *resl = nl; 57 *resl = nl;
58 } 58 }
59 59
60 static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) 60 static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
61 { 61 {
62 asm( "subs %Q0, %Q2, %Q4\n\t" 62 asm( "subs %Q0, %Q2, %Q4\n\t"
63 "sbcs %R0, %R2, %R4\n\t" 63 "sbcs %R0, %R2, %R4\n\t"
64 "sbcs %Q1, %Q3, %Q5\n\t" 64 "sbcs %Q1, %Q3, %Q5\n\t"
65 "sbc %R1, %R3, %R5\n\t" 65 "sbc %R1, %R3, %R5\n\t"
66 : "=r" (nl), "=r" (nh) 66 : "=r" (nl), "=r" (nh)
67 : "0" (nl), "1" (nh), "r" (ml), "r" (mh) 67 : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
68 : "cc"); 68 : "cc");
69 *resh = nh; 69 *resh = nh;
70 *resl = nl; 70 *resl = nl;
71 } 71 }
72 72
73 static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) 73 static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
74 { 74 {
75 u32 nh, nl, mh, ml; 75 u32 nh, nl, mh, ml;
76 u64 rh, rma, rmb, rl; 76 u64 rh, rma, rmb, rl;
77 77
78 nl = n; 78 nl = n;
79 ml = m; 79 ml = m;
80 rl = (u64)nl * ml; 80 rl = (u64)nl * ml;
81 81
82 nh = n >> 32; 82 nh = n >> 32;
83 rma = (u64)nh * ml; 83 rma = (u64)nh * ml;
84 84
85 mh = m >> 32; 85 mh = m >> 32;
86 rmb = (u64)nl * mh; 86 rmb = (u64)nl * mh;
87 rma += rmb; 87 rma += rmb;
88 88
89 rh = (u64)nh * mh; 89 rh = (u64)nh * mh;
90 rh += ((u64)(rma < rmb) << 32) + (rma >> 32); 90 rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
91 91
92 rma <<= 32; 92 rma <<= 32;
93 rl += rma; 93 rl += rma;
94 rh += (rl < rma); 94 rh += (rl < rma);
95 95
96 *resl = rl; 96 *resl = rl;
97 *resh = rh; 97 *resh = rh;
98 } 98 }
99 99
100 static inline void shift64left(u64 *resh, u64 *resl, u64 n) 100 static inline void shift64left(u64 *resh, u64 *resl, u64 n)
101 { 101 {
102 *resh = n >> 63; 102 *resh = n >> 63;
103 *resl = n << 1; 103 *resl = n << 1;
104 } 104 }
105 105
106 static inline u64 vfp_hi64multiply64(u64 n, u64 m) 106 static inline u64 vfp_hi64multiply64(u64 n, u64 m)
107 { 107 {
108 u64 rh, rl; 108 u64 rh, rl;
109 mul64to128(&rh, &rl, n, m); 109 mul64to128(&rh, &rl, n, m);
110 return rh | (rl != 0); 110 return rh | (rl != 0);
111 } 111 }
112 112
113 static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) 113 static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
114 { 114 {
115 u64 mh, ml, remh, reml, termh, terml, z; 115 u64 mh, ml, remh, reml, termh, terml, z;
116 116
117 if (nh >= m) 117 if (nh >= m)
118 return ~0ULL; 118 return ~0ULL;
119 mh = m >> 32; 119 mh = m >> 32;
120 z = (mh << 32 <= nh) ? 0xffffffff00000000ULL : (nh / mh) << 32; 120 if (mh << 32 <= nh) {
121 z = 0xffffffff00000000ULL;
122 } else {
123 z = nh;
124 do_div(z, mh);
125 z <<= 32;
126 }
121 mul64to128(&termh, &terml, m, z); 127 mul64to128(&termh, &terml, m, z);
122 sub128(&remh, &reml, nh, nl, termh, terml); 128 sub128(&remh, &reml, nh, nl, termh, terml);
123 ml = m << 32; 129 ml = m << 32;
124 while ((s64)remh < 0) { 130 while ((s64)remh < 0) {
125 z -= 0x100000000ULL; 131 z -= 0x100000000ULL;
126 add128(&remh, &reml, remh, reml, mh, ml); 132 add128(&remh, &reml, remh, reml, mh, ml);
127 } 133 }
128 remh = (remh << 32) | (reml >> 32); 134 remh = (remh << 32) | (reml >> 32);
129 z |= (mh << 32 <= remh) ? 0xffffffff : remh / mh; 135 if (mh << 32 <= remh) {
136 z |= 0xffffffff;
137 } else {
138 do_div(remh, mh);
139 z |= remh;
140 }
130 return z; 141 return z;
131 } 142 }
132 143
133 /* 144 /*
134 * Operations on unpacked elements 145 * Operations on unpacked elements
135 */ 146 */
136 #define vfp_sign_negate(sign) (sign ^ 0x8000) 147 #define vfp_sign_negate(sign) (sign ^ 0x8000)
137 148
138 /* 149 /*
139 * Single-precision 150 * Single-precision
140 */ 151 */
141 struct vfp_single { 152 struct vfp_single {
142 s16 exponent; 153 s16 exponent;
143 u16 sign; 154 u16 sign;
144 u32 significand; 155 u32 significand;
145 }; 156 };
146 157
147 extern s32 vfp_get_float(unsigned int reg); 158 extern s32 vfp_get_float(unsigned int reg);
148 extern void vfp_put_float(unsigned int reg, s32 val); 159 extern void vfp_put_float(unsigned int reg, s32 val);
149 160
150 /* 161 /*
151 * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa 162 * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
152 * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent 163 * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
153 * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand 164 * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
154 * which are not propagated to the float upon packing. 165 * which are not propagated to the float upon packing.
155 */ 166 */
156 #define VFP_SINGLE_MANTISSA_BITS (23) 167 #define VFP_SINGLE_MANTISSA_BITS (23)
157 #define VFP_SINGLE_EXPONENT_BITS (8) 168 #define VFP_SINGLE_EXPONENT_BITS (8)
158 #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) 169 #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2)
159 #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) 170 #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1)
160 171
161 /* 172 /*
162 * The bit in an unpacked float which indicates that it is a quiet NaN 173 * The bit in an unpacked float which indicates that it is a quiet NaN
163 */ 174 */
164 #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) 175 #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
165 176
166 /* 177 /*
167 * Operations on packed single-precision numbers 178 * Operations on packed single-precision numbers
168 */ 179 */
169 #define vfp_single_packed_sign(v) ((v) & 0x80000000) 180 #define vfp_single_packed_sign(v) ((v) & 0x80000000)
170 #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) 181 #define vfp_single_packed_negate(v) ((v) ^ 0x80000000)
171 #define vfp_single_packed_abs(v) ((v) & ~0x80000000) 182 #define vfp_single_packed_abs(v) ((v) & ~0x80000000)
172 #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) 183 #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
173 #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) 184 #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
174 185
175 /* 186 /*
176 * Unpack a single-precision float. Note that this returns the magnitude 187 * Unpack a single-precision float. Note that this returns the magnitude
177 * of the single-precision float mantissa with the 1. if necessary, 188 * of the single-precision float mantissa with the 1. if necessary,
178 * aligned to bit 30. 189 * aligned to bit 30.
179 */ 190 */
180 static inline void vfp_single_unpack(struct vfp_single *s, s32 val) 191 static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
181 { 192 {
182 u32 significand; 193 u32 significand;
183 194
184 s->sign = vfp_single_packed_sign(val) >> 16, 195 s->sign = vfp_single_packed_sign(val) >> 16,
185 s->exponent = vfp_single_packed_exponent(val); 196 s->exponent = vfp_single_packed_exponent(val);
186 197
187 significand = (u32) val; 198 significand = (u32) val;
188 significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; 199 significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
189 if (s->exponent && s->exponent != 255) 200 if (s->exponent && s->exponent != 255)
190 significand |= 0x40000000; 201 significand |= 0x40000000;
191 s->significand = significand; 202 s->significand = significand;
192 } 203 }
193 204
194 /* 205 /*
195 * Re-pack a single-precision float. This assumes that the float is 206 * Re-pack a single-precision float. This assumes that the float is
196 * already normalised such that the MSB is bit 30, _not_ bit 31. 207 * already normalised such that the MSB is bit 30, _not_ bit 31.
197 */ 208 */
198 static inline s32 vfp_single_pack(struct vfp_single *s) 209 static inline s32 vfp_single_pack(struct vfp_single *s)
199 { 210 {
200 u32 val; 211 u32 val;
201 val = (s->sign << 16) + 212 val = (s->sign << 16) +
202 (s->exponent << VFP_SINGLE_MANTISSA_BITS) + 213 (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
203 (s->significand >> VFP_SINGLE_LOW_BITS); 214 (s->significand >> VFP_SINGLE_LOW_BITS);
204 return (s32)val; 215 return (s32)val;
205 } 216 }
206 217
207 #define VFP_NUMBER (1<<0) 218 #define VFP_NUMBER (1<<0)
208 #define VFP_ZERO (1<<1) 219 #define VFP_ZERO (1<<1)
209 #define VFP_DENORMAL (1<<2) 220 #define VFP_DENORMAL (1<<2)
210 #define VFP_INFINITY (1<<3) 221 #define VFP_INFINITY (1<<3)
211 #define VFP_NAN (1<<4) 222 #define VFP_NAN (1<<4)
212 #define VFP_NAN_SIGNAL (1<<5) 223 #define VFP_NAN_SIGNAL (1<<5)
213 224
214 #define VFP_QNAN (VFP_NAN) 225 #define VFP_QNAN (VFP_NAN)
215 #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) 226 #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL)
216 227
217 static inline int vfp_single_type(struct vfp_single *s) 228 static inline int vfp_single_type(struct vfp_single *s)
218 { 229 {
219 int type = VFP_NUMBER; 230 int type = VFP_NUMBER;
220 if (s->exponent == 255) { 231 if (s->exponent == 255) {
221 if (s->significand == 0) 232 if (s->significand == 0)
222 type = VFP_INFINITY; 233 type = VFP_INFINITY;
223 else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) 234 else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
224 type = VFP_QNAN; 235 type = VFP_QNAN;
225 else 236 else
226 type = VFP_SNAN; 237 type = VFP_SNAN;
227 } else if (s->exponent == 0) { 238 } else if (s->exponent == 0) {
228 if (s->significand == 0) 239 if (s->significand == 0)
229 type |= VFP_ZERO; 240 type |= VFP_ZERO;
230 else 241 else
231 type |= VFP_DENORMAL; 242 type |= VFP_DENORMAL;
232 } 243 }
233 return type; 244 return type;
234 } 245 }
235 246
236 #ifndef DEBUG 247 #ifndef DEBUG
237 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 248 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
238 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions); 249 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions);
239 #else 250 #else
240 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); 251 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
241 #endif 252 #endif
242 253
243 /* 254 /*
244 * Double-precision 255 * Double-precision
245 */ 256 */
246 struct vfp_double { 257 struct vfp_double {
247 s16 exponent; 258 s16 exponent;
248 u16 sign; 259 u16 sign;
249 u64 significand; 260 u64 significand;
250 }; 261 };
251 262
252 /* 263 /*
253 * VFP_REG_ZERO is a special register number for vfp_get_double 264 * VFP_REG_ZERO is a special register number for vfp_get_double
254 * which returns (double)0.0. This is useful for the compare with 265 * which returns (double)0.0. This is useful for the compare with
255 * zero instructions. 266 * zero instructions.
256 */ 267 */
257 #define VFP_REG_ZERO 16 268 #define VFP_REG_ZERO 16
258 extern u64 vfp_get_double(unsigned int reg); 269 extern u64 vfp_get_double(unsigned int reg);
259 extern void vfp_put_double(unsigned int reg, u64 val); 270 extern void vfp_put_double(unsigned int reg, u64 val);
260 271
261 #define VFP_DOUBLE_MANTISSA_BITS (52) 272 #define VFP_DOUBLE_MANTISSA_BITS (52)
262 #define VFP_DOUBLE_EXPONENT_BITS (11) 273 #define VFP_DOUBLE_EXPONENT_BITS (11)
263 #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) 274 #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2)
264 #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) 275 #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1)
265 276
266 /* 277 /*
267 * The bit in an unpacked double which indicates that it is a quiet NaN 278 * The bit in an unpacked double which indicates that it is a quiet NaN
268 */ 279 */
269 #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) 280 #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
270 281
271 /* 282 /*
272 * Operations on packed single-precision numbers 283 * Operations on packed single-precision numbers
273 */ 284 */
274 #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) 285 #define vfp_double_packed_sign(v) ((v) & (1ULL << 63))
275 #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) 286 #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63))
276 #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) 287 #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63))
277 #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) 288 #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
278 #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) 289 #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
279 290
280 /* 291 /*
281 * Unpack a double-precision float. Note that this returns the magnitude 292 * Unpack a double-precision float. Note that this returns the magnitude
282 * of the double-precision float mantissa with the 1. if necessary, 293 * of the double-precision float mantissa with the 1. if necessary,
283 * aligned to bit 62. 294 * aligned to bit 62.
284 */ 295 */
285 static inline void vfp_double_unpack(struct vfp_double *s, s64 val) 296 static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
286 { 297 {
287 u64 significand; 298 u64 significand;
288 299
289 s->sign = vfp_double_packed_sign(val) >> 48; 300 s->sign = vfp_double_packed_sign(val) >> 48;
290 s->exponent = vfp_double_packed_exponent(val); 301 s->exponent = vfp_double_packed_exponent(val);
291 302
292 significand = (u64) val; 303 significand = (u64) val;
293 significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; 304 significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
294 if (s->exponent && s->exponent != 2047) 305 if (s->exponent && s->exponent != 2047)
295 significand |= (1ULL << 62); 306 significand |= (1ULL << 62);
296 s->significand = significand; 307 s->significand = significand;
297 } 308 }
298 309
299 /* 310 /*
300 * Re-pack a double-precision float. This assumes that the float is 311 * Re-pack a double-precision float. This assumes that the float is
301 * already normalised such that the MSB is bit 30, _not_ bit 31. 312 * already normalised such that the MSB is bit 30, _not_ bit 31.
302 */ 313 */
303 static inline s64 vfp_double_pack(struct vfp_double *s) 314 static inline s64 vfp_double_pack(struct vfp_double *s)
304 { 315 {
305 u64 val; 316 u64 val;
306 val = ((u64)s->sign << 48) + 317 val = ((u64)s->sign << 48) +
307 ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + 318 ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
308 (s->significand >> VFP_DOUBLE_LOW_BITS); 319 (s->significand >> VFP_DOUBLE_LOW_BITS);
309 return (s64)val; 320 return (s64)val;
310 } 321 }
311 322
312 static inline int vfp_double_type(struct vfp_double *s) 323 static inline int vfp_double_type(struct vfp_double *s)
313 { 324 {
314 int type = VFP_NUMBER; 325 int type = VFP_NUMBER;
315 if (s->exponent == 2047) { 326 if (s->exponent == 2047) {
316 if (s->significand == 0) 327 if (s->significand == 0)
317 type = VFP_INFINITY; 328 type = VFP_INFINITY;
318 else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) 329 else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
319 type = VFP_QNAN; 330 type = VFP_QNAN;
320 else 331 else
321 type = VFP_SNAN; 332 type = VFP_SNAN;
322 } else if (s->exponent == 0) { 333 } else if (s->exponent == 0) {
323 if (s->significand == 0) 334 if (s->significand == 0)
324 type |= VFP_ZERO; 335 type |= VFP_ZERO;
325 else 336 else
326 type |= VFP_DENORMAL; 337 type |= VFP_DENORMAL;
327 } 338 }
328 return type; 339 return type;
329 } 340 }
330 341
331 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); 342 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
332 343
333 /* 344 /*
334 * System registers 345 * System registers
335 */ 346 */
336 extern u32 vfp_get_sys(unsigned int reg); 347 extern u32 vfp_get_sys(unsigned int reg);
337 extern void vfp_put_sys(unsigned int reg, u32 val); 348 extern void vfp_put_sys(unsigned int reg, u32 val);
338 349
339 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); 350 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
340 351
341 /* 352 /*
342 * A special flag to tell the normalisation code not to normalise. 353 * A special flag to tell the normalisation code not to normalise.
343 */ 354 */
344 #define VFP_NAN_FLAG 0x100 355 #define VFP_NAN_FLAG 0x100
345 356
arch/arm/vfp/vfpdouble.c
1 /* 1 /*
2 * linux/arch/arm/vfp/vfpdouble.c 2 * linux/arch/arm/vfp/vfpdouble.c
3 * 3 *
4 * This code is derived in part from John R. Housers softfloat library, which 4 * This code is derived in part from John R. Housers softfloat library, which
5 * carries the following notice: 5 * carries the following notice:
6 * 6 *
7 * =========================================================================== 7 * ===========================================================================
8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
9 * Arithmetic Package, Release 2. 9 * Arithmetic Package, Release 2.
10 * 10 *
11 * Written by John R. Hauser. This work was made possible in part by the 11 * Written by John R. Hauser. This work was made possible in part by the
12 * International Computer Science Institute, located at Suite 600, 1947 Center 12 * International Computer Science Institute, located at Suite 600, 1947 Center
13 * Street, Berkeley, California 94704. Funding was partially provided by the 13 * Street, Berkeley, California 94704. Funding was partially provided by the
14 * National Science Foundation under grant MIP-9311980. The original version 14 * National Science Foundation under grant MIP-9311980. The original version
15 * of this code was written as part of a project to build a fixed-point vector 15 * of this code was written as part of a project to build a fixed-point vector
16 * processor in collaboration with the University of California at Berkeley, 16 * processor in collaboration with the University of California at Berkeley,
17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19 * arithmetic/softfloat.html'. 19 * arithmetic/softfloat.html'.
20 * 20 *
21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26 * 26 *
27 * Derivative works are acceptable, even for commercial purposes, so long as 27 * Derivative works are acceptable, even for commercial purposes, so long as
28 * (1) they include prominent notice that the work is derivative, and (2) they 28 * (1) they include prominent notice that the work is derivative, and (2) they
29 * include prominent notice akin to these three paragraphs for those parts of 29 * include prominent notice akin to these three paragraphs for those parts of
30 * this code that are retained. 30 * this code that are retained.
31 * =========================================================================== 31 * ===========================================================================
32 */ 32 */
33 #include <linux/kernel.h> 33 #include <linux/kernel.h>
34 #include <linux/bitops.h> 34 #include <linux/bitops.h>
35
36 #include <asm/div64.h>
35 #include <asm/ptrace.h> 37 #include <asm/ptrace.h>
36 #include <asm/vfp.h> 38 #include <asm/vfp.h>
37 39
38 #include "vfpinstr.h" 40 #include "vfpinstr.h"
39 #include "vfp.h" 41 #include "vfp.h"
40 42
41 static struct vfp_double vfp_double_default_qnan = { 43 static struct vfp_double vfp_double_default_qnan = {
42 .exponent = 2047, 44 .exponent = 2047,
43 .sign = 0, 45 .sign = 0,
44 .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, 46 .significand = VFP_DOUBLE_SIGNIFICAND_QNAN,
45 }; 47 };
46 48
47 static void vfp_double_dump(const char *str, struct vfp_double *d) 49 static void vfp_double_dump(const char *str, struct vfp_double *d)
48 { 50 {
49 pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
50 str, d->sign != 0, d->exponent, d->significand); 52 str, d->sign != 0, d->exponent, d->significand);
51 } 53 }
52 54
53 static void vfp_double_normalise_denormal(struct vfp_double *vd) 55 static void vfp_double_normalise_denormal(struct vfp_double *vd)
54 { 56 {
55 int bits = 31 - fls(vd->significand >> 32); 57 int bits = 31 - fls(vd->significand >> 32);
56 if (bits == 31) 58 if (bits == 31)
57 bits = 62 - fls(vd->significand); 59 bits = 62 - fls(vd->significand);
58 60
59 vfp_double_dump("normalise_denormal: in", vd); 61 vfp_double_dump("normalise_denormal: in", vd);
60 62
61 if (bits) { 63 if (bits) {
62 vd->exponent -= bits - 1; 64 vd->exponent -= bits - 1;
63 vd->significand <<= bits; 65 vd->significand <<= bits;
64 } 66 }
65 67
66 vfp_double_dump("normalise_denormal: out", vd); 68 vfp_double_dump("normalise_denormal: out", vd);
67 } 69 }
68 70
69 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) 71 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
70 { 72 {
71 u64 significand, incr; 73 u64 significand, incr;
72 int exponent, shift, underflow; 74 int exponent, shift, underflow;
73 u32 rmode; 75 u32 rmode;
74 76
75 vfp_double_dump("pack: in", vd); 77 vfp_double_dump("pack: in", vd);
76 78
77 /* 79 /*
78 * Infinities and NaNs are a special case. 80 * Infinities and NaNs are a special case.
79 */ 81 */
80 if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) 82 if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
81 goto pack; 83 goto pack;
82 84
83 /* 85 /*
84 * Special-case zero. 86 * Special-case zero.
85 */ 87 */
86 if (vd->significand == 0) { 88 if (vd->significand == 0) {
87 vd->exponent = 0; 89 vd->exponent = 0;
88 goto pack; 90 goto pack;
89 } 91 }
90 92
91 exponent = vd->exponent; 93 exponent = vd->exponent;
92 significand = vd->significand; 94 significand = vd->significand;
93 95
94 shift = 32 - fls(significand >> 32); 96 shift = 32 - fls(significand >> 32);
95 if (shift == 32) 97 if (shift == 32)
96 shift = 64 - fls(significand); 98 shift = 64 - fls(significand);
97 if (shift) { 99 if (shift) {
98 exponent -= shift; 100 exponent -= shift;
99 significand <<= shift; 101 significand <<= shift;
100 } 102 }
101 103
102 #ifdef DEBUG 104 #ifdef DEBUG
103 vd->exponent = exponent; 105 vd->exponent = exponent;
104 vd->significand = significand; 106 vd->significand = significand;
105 vfp_double_dump("pack: normalised", vd); 107 vfp_double_dump("pack: normalised", vd);
106 #endif 108 #endif
107 109
108 /* 110 /*
109 * Tiny number? 111 * Tiny number?
110 */ 112 */
111 underflow = exponent < 0; 113 underflow = exponent < 0;
112 if (underflow) { 114 if (underflow) {
113 significand = vfp_shiftright64jamming(significand, -exponent); 115 significand = vfp_shiftright64jamming(significand, -exponent);
114 exponent = 0; 116 exponent = 0;
115 #ifdef DEBUG 117 #ifdef DEBUG
116 vd->exponent = exponent; 118 vd->exponent = exponent;
117 vd->significand = significand; 119 vd->significand = significand;
118 vfp_double_dump("pack: tiny number", vd); 120 vfp_double_dump("pack: tiny number", vd);
119 #endif 121 #endif
120 if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) 122 if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
121 underflow = 0; 123 underflow = 0;
122 } 124 }
123 125
124 /* 126 /*
125 * Select rounding increment. 127 * Select rounding increment.
126 */ 128 */
127 incr = 0; 129 incr = 0;
128 rmode = fpscr & FPSCR_RMODE_MASK; 130 rmode = fpscr & FPSCR_RMODE_MASK;
129 131
130 if (rmode == FPSCR_ROUND_NEAREST) { 132 if (rmode == FPSCR_ROUND_NEAREST) {
131 incr = 1ULL << VFP_DOUBLE_LOW_BITS; 133 incr = 1ULL << VFP_DOUBLE_LOW_BITS;
132 if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) 134 if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
133 incr -= 1; 135 incr -= 1;
134 } else if (rmode == FPSCR_ROUND_TOZERO) { 136 } else if (rmode == FPSCR_ROUND_TOZERO) {
135 incr = 0; 137 incr = 0;
136 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) 138 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
137 incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; 139 incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
138 140
139 pr_debug("VFP: rounding increment = 0x%08llx\n", incr); 141 pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
140 142
141 /* 143 /*
142 * Is our rounding going to overflow? 144 * Is our rounding going to overflow?
143 */ 145 */
144 if ((significand + incr) < significand) { 146 if ((significand + incr) < significand) {
145 exponent += 1; 147 exponent += 1;
146 significand = (significand >> 1) | (significand & 1); 148 significand = (significand >> 1) | (significand & 1);
147 incr >>= 1; 149 incr >>= 1;
148 #ifdef DEBUG 150 #ifdef DEBUG
149 vd->exponent = exponent; 151 vd->exponent = exponent;
150 vd->significand = significand; 152 vd->significand = significand;
151 vfp_double_dump("pack: overflow", vd); 153 vfp_double_dump("pack: overflow", vd);
152 #endif 154 #endif
153 } 155 }
154 156
155 /* 157 /*
156 * If any of the low bits (which will be shifted out of the 158 * If any of the low bits (which will be shifted out of the
157 * number) are non-zero, the result is inexact. 159 * number) are non-zero, the result is inexact.
158 */ 160 */
159 if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) 161 if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
160 exceptions |= FPSCR_IXC; 162 exceptions |= FPSCR_IXC;
161 163
162 /* 164 /*
163 * Do our rounding. 165 * Do our rounding.
164 */ 166 */
165 significand += incr; 167 significand += incr;
166 168
167 /* 169 /*
168 * Infinity? 170 * Infinity?
169 */ 171 */
170 if (exponent >= 2046) { 172 if (exponent >= 2046) {
171 exceptions |= FPSCR_OFC | FPSCR_IXC; 173 exceptions |= FPSCR_OFC | FPSCR_IXC;
172 if (incr == 0) { 174 if (incr == 0) {
173 vd->exponent = 2045; 175 vd->exponent = 2045;
174 vd->significand = 0x7fffffffffffffffULL; 176 vd->significand = 0x7fffffffffffffffULL;
175 } else { 177 } else {
176 vd->exponent = 2047; /* infinity */ 178 vd->exponent = 2047; /* infinity */
177 vd->significand = 0; 179 vd->significand = 0;
178 } 180 }
179 } else { 181 } else {
180 if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) 182 if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
181 exponent = 0; 183 exponent = 0;
182 if (exponent || significand > 0x8000000000000000ULL) 184 if (exponent || significand > 0x8000000000000000ULL)
183 underflow = 0; 185 underflow = 0;
184 if (underflow) 186 if (underflow)
185 exceptions |= FPSCR_UFC; 187 exceptions |= FPSCR_UFC;
186 vd->exponent = exponent; 188 vd->exponent = exponent;
187 vd->significand = significand >> 1; 189 vd->significand = significand >> 1;
188 } 190 }
189 191
190 pack: 192 pack:
191 vfp_double_dump("pack: final", vd); 193 vfp_double_dump("pack: final", vd);
192 { 194 {
193 s64 d = vfp_double_pack(vd); 195 s64 d = vfp_double_pack(vd);
194 pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, 196 pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
195 dd, d, exceptions); 197 dd, d, exceptions);
196 vfp_put_double(dd, d); 198 vfp_put_double(dd, d);
197 } 199 }
198 return exceptions & ~VFP_NAN_FLAG; 200 return exceptions & ~VFP_NAN_FLAG;
199 } 201 }
200 202
201 /* 203 /*
202 * Propagate the NaN, setting exceptions if it is signalling. 204 * Propagate the NaN, setting exceptions if it is signalling.
203 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 205 * 'n' is always a NaN. 'm' may be a number, NaN or infinity.
204 */ 206 */
205 static u32 207 static u32
206 vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, 208 vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
207 struct vfp_double *vdm, u32 fpscr) 209 struct vfp_double *vdm, u32 fpscr)
208 { 210 {
209 struct vfp_double *nan; 211 struct vfp_double *nan;
210 int tn, tm = 0; 212 int tn, tm = 0;
211 213
212 tn = vfp_double_type(vdn); 214 tn = vfp_double_type(vdn);
213 215
214 if (vdm) 216 if (vdm)
215 tm = vfp_double_type(vdm); 217 tm = vfp_double_type(vdm);
216 218
217 if (fpscr & FPSCR_DEFAULT_NAN) 219 if (fpscr & FPSCR_DEFAULT_NAN)
218 /* 220 /*
219 * Default NaN mode - always returns a quiet NaN 221 * Default NaN mode - always returns a quiet NaN
220 */ 222 */
221 nan = &vfp_double_default_qnan; 223 nan = &vfp_double_default_qnan;
222 else { 224 else {
223 /* 225 /*
224 * Contemporary mode - select the first signalling 226 * Contemporary mode - select the first signalling
225 * NAN, or if neither are signalling, the first 227 * NAN, or if neither are signalling, the first
226 * quiet NAN. 228 * quiet NAN.
227 */ 229 */
228 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 230 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
229 nan = vdn; 231 nan = vdn;
230 else 232 else
231 nan = vdm; 233 nan = vdm;
232 /* 234 /*
233 * Make the NaN quiet. 235 * Make the NaN quiet.
234 */ 236 */
235 nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 237 nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
236 } 238 }
237 239
238 *vdd = *nan; 240 *vdd = *nan;
239 241
240 /* 242 /*
241 * If one was a signalling NAN, raise invalid operation. 243 * If one was a signalling NAN, raise invalid operation.
242 */ 244 */
243 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 245 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
244 } 246 }
245 247
246 /* 248 /*
247 * Extended operations 249 * Extended operations
248 */ 250 */
249 static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) 251 static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
250 { 252 {
251 vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm))); 253 vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm)));
252 return 0; 254 return 0;
253 } 255 }
254 256
255 static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) 257 static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
256 { 258 {
257 vfp_put_double(dd, vfp_get_double(dm)); 259 vfp_put_double(dd, vfp_get_double(dm));
258 return 0; 260 return 0;
259 } 261 }
260 262
261 static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) 263 static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
262 { 264 {
263 vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm))); 265 vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm)));
264 return 0; 266 return 0;
265 } 267 }
266 268
267 static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) 269 static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
268 { 270 {
269 struct vfp_double vdm, vdd; 271 struct vfp_double vdm, vdd;
270 int ret, tm; 272 int ret, tm;
271 273
272 vfp_double_unpack(&vdm, vfp_get_double(dm)); 274 vfp_double_unpack(&vdm, vfp_get_double(dm));
273 tm = vfp_double_type(&vdm); 275 tm = vfp_double_type(&vdm);
274 if (tm & (VFP_NAN|VFP_INFINITY)) { 276 if (tm & (VFP_NAN|VFP_INFINITY)) {
275 struct vfp_double *vdp = &vdd; 277 struct vfp_double *vdp = &vdd;
276 278
277 if (tm & VFP_NAN) 279 if (tm & VFP_NAN)
278 ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); 280 ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
279 else if (vdm.sign == 0) { 281 else if (vdm.sign == 0) {
280 sqrt_copy: 282 sqrt_copy:
281 vdp = &vdm; 283 vdp = &vdm;
282 ret = 0; 284 ret = 0;
283 } else { 285 } else {
284 sqrt_invalid: 286 sqrt_invalid:
285 vdp = &vfp_double_default_qnan; 287 vdp = &vfp_double_default_qnan;
286 ret = FPSCR_IOC; 288 ret = FPSCR_IOC;
287 } 289 }
288 vfp_put_double(dd, vfp_double_pack(vdp)); 290 vfp_put_double(dd, vfp_double_pack(vdp));
289 return ret; 291 return ret;
290 } 292 }
291 293
292 /* 294 /*
293 * sqrt(+/- 0) == +/- 0 295 * sqrt(+/- 0) == +/- 0
294 */ 296 */
295 if (tm & VFP_ZERO) 297 if (tm & VFP_ZERO)
296 goto sqrt_copy; 298 goto sqrt_copy;
297 299
298 /* 300 /*
299 * Normalise a denormalised number 301 * Normalise a denormalised number
300 */ 302 */
301 if (tm & VFP_DENORMAL) 303 if (tm & VFP_DENORMAL)
302 vfp_double_normalise_denormal(&vdm); 304 vfp_double_normalise_denormal(&vdm);
303 305
304 /* 306 /*
305 * sqrt(<0) = invalid 307 * sqrt(<0) = invalid
306 */ 308 */
307 if (vdm.sign) 309 if (vdm.sign)
308 goto sqrt_invalid; 310 goto sqrt_invalid;
309 311
310 vfp_double_dump("sqrt", &vdm); 312 vfp_double_dump("sqrt", &vdm);
311 313
312 /* 314 /*
313 * Estimate the square root. 315 * Estimate the square root.
314 */ 316 */
315 vdd.sign = 0; 317 vdd.sign = 0;
316 vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; 318 vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
317 vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; 319 vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
318 320
319 vfp_double_dump("sqrt estimate1", &vdd); 321 vfp_double_dump("sqrt estimate1", &vdd);
320 322
321 vdm.significand >>= 1 + (vdm.exponent & 1); 323 vdm.significand >>= 1 + (vdm.exponent & 1);
322 vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); 324 vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
323 325
324 vfp_double_dump("sqrt estimate2", &vdd); 326 vfp_double_dump("sqrt estimate2", &vdd);
325 327
326 /* 328 /*
327 * And now adjust. 329 * And now adjust.
328 */ 330 */
329 if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { 331 if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
330 if (vdd.significand < 2) { 332 if (vdd.significand < 2) {
331 vdd.significand = ~0ULL; 333 vdd.significand = ~0ULL;
332 } else { 334 } else {
333 u64 termh, terml, remh, reml; 335 u64 termh, terml, remh, reml;
334 vdm.significand <<= 2; 336 vdm.significand <<= 2;
335 mul64to128(&termh, &terml, vdd.significand, vdd.significand); 337 mul64to128(&termh, &terml, vdd.significand, vdd.significand);
336 sub128(&remh, &reml, vdm.significand, 0, termh, terml); 338 sub128(&remh, &reml, vdm.significand, 0, termh, terml);
337 while ((s64)remh < 0) { 339 while ((s64)remh < 0) {
338 vdd.significand -= 1; 340 vdd.significand -= 1;
339 shift64left(&termh, &terml, vdd.significand); 341 shift64left(&termh, &terml, vdd.significand);
340 terml |= 1; 342 terml |= 1;
341 add128(&remh, &reml, remh, reml, termh, terml); 343 add128(&remh, &reml, remh, reml, termh, terml);
342 } 344 }
343 vdd.significand |= (remh | reml) != 0; 345 vdd.significand |= (remh | reml) != 0;
344 } 346 }
345 } 347 }
346 vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); 348 vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
347 349
348 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); 350 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
349 } 351 }
350 352
351 /* 353 /*
352 * Equal := ZC 354 * Equal := ZC
353 * Less than := N 355 * Less than := N
354 * Greater than := C 356 * Greater than := C
355 * Unordered := CV 357 * Unordered := CV
356 */ 358 */
357 static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) 359 static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
358 { 360 {
359 s64 d, m; 361 s64 d, m;
360 u32 ret = 0; 362 u32 ret = 0;
361 363
362 m = vfp_get_double(dm); 364 m = vfp_get_double(dm);
363 if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { 365 if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
364 ret |= FPSCR_C | FPSCR_V; 366 ret |= FPSCR_C | FPSCR_V;
365 if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 367 if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
366 /* 368 /*
367 * Signalling NaN, or signalling on quiet NaN 369 * Signalling NaN, or signalling on quiet NaN
368 */ 370 */
369 ret |= FPSCR_IOC; 371 ret |= FPSCR_IOC;
370 } 372 }
371 373
372 d = vfp_get_double(dd); 374 d = vfp_get_double(dd);
373 if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { 375 if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
374 ret |= FPSCR_C | FPSCR_V; 376 ret |= FPSCR_C | FPSCR_V;
375 if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 377 if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
376 /* 378 /*
377 * Signalling NaN, or signalling on quiet NaN 379 * Signalling NaN, or signalling on quiet NaN
378 */ 380 */
379 ret |= FPSCR_IOC; 381 ret |= FPSCR_IOC;
380 } 382 }
381 383
382 if (ret == 0) { 384 if (ret == 0) {
383 if (d == m || vfp_double_packed_abs(d | m) == 0) { 385 if (d == m || vfp_double_packed_abs(d | m) == 0) {
384 /* 386 /*
385 * equal 387 * equal
386 */ 388 */
387 ret |= FPSCR_Z | FPSCR_C; 389 ret |= FPSCR_Z | FPSCR_C;
388 } else if (vfp_double_packed_sign(d ^ m)) { 390 } else if (vfp_double_packed_sign(d ^ m)) {
389 /* 391 /*
390 * different signs 392 * different signs
391 */ 393 */
392 if (vfp_double_packed_sign(d)) 394 if (vfp_double_packed_sign(d))
393 /* 395 /*
394 * d is negative, so d < m 396 * d is negative, so d < m
395 */ 397 */
396 ret |= FPSCR_N; 398 ret |= FPSCR_N;
397 else 399 else
398 /* 400 /*
399 * d is positive, so d > m 401 * d is positive, so d > m
400 */ 402 */
401 ret |= FPSCR_C; 403 ret |= FPSCR_C;
402 } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { 404 } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
403 /* 405 /*
404 * d < m 406 * d < m
405 */ 407 */
406 ret |= FPSCR_N; 408 ret |= FPSCR_N;
407 } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { 409 } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
408 /* 410 /*
409 * d > m 411 * d > m
410 */ 412 */
411 ret |= FPSCR_C; 413 ret |= FPSCR_C;
412 } 414 }
413 } 415 }
414 416
415 return ret; 417 return ret;
416 } 418 }
417 419
418 static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) 420 static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
419 { 421 {
420 return vfp_compare(dd, 0, dm, fpscr); 422 return vfp_compare(dd, 0, dm, fpscr);
421 } 423 }
422 424
423 static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) 425 static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
424 { 426 {
425 return vfp_compare(dd, 1, dm, fpscr); 427 return vfp_compare(dd, 1, dm, fpscr);
426 } 428 }
427 429
428 static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) 430 static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
429 { 431 {
430 return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); 432 return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
431 } 433 }
432 434
433 static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) 435 static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
434 { 436 {
435 return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); 437 return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
436 } 438 }
437 439
438 static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) 440 static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
439 { 441 {
440 struct vfp_double vdm; 442 struct vfp_double vdm;
441 struct vfp_single vsd; 443 struct vfp_single vsd;
442 int tm; 444 int tm;
443 u32 exceptions = 0; 445 u32 exceptions = 0;
444 446
445 vfp_double_unpack(&vdm, vfp_get_double(dm)); 447 vfp_double_unpack(&vdm, vfp_get_double(dm));
446 448
447 tm = vfp_double_type(&vdm); 449 tm = vfp_double_type(&vdm);
448 450
449 /* 451 /*
450 * If we have a signalling NaN, signal invalid operation. 452 * If we have a signalling NaN, signal invalid operation.
451 */ 453 */
452 if (tm == VFP_SNAN) 454 if (tm == VFP_SNAN)
453 exceptions = FPSCR_IOC; 455 exceptions = FPSCR_IOC;
454 456
455 if (tm & VFP_DENORMAL) 457 if (tm & VFP_DENORMAL)
456 vfp_double_normalise_denormal(&vdm); 458 vfp_double_normalise_denormal(&vdm);
457 459
458 vsd.sign = vdm.sign; 460 vsd.sign = vdm.sign;
459 vsd.significand = vfp_hi64to32jamming(vdm.significand); 461 vsd.significand = vfp_hi64to32jamming(vdm.significand);
460 462
461 /* 463 /*
462 * If we have an infinity or a NaN, the exponent must be 255 464 * If we have an infinity or a NaN, the exponent must be 255
463 */ 465 */
464 if (tm & (VFP_INFINITY|VFP_NAN)) { 466 if (tm & (VFP_INFINITY|VFP_NAN)) {
465 vsd.exponent = 255; 467 vsd.exponent = 255;
466 if (tm & VFP_NAN) 468 if (tm & VFP_NAN)
467 vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 469 vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
468 goto pack_nan; 470 goto pack_nan;
469 } else if (tm & VFP_ZERO) 471 } else if (tm & VFP_ZERO)
470 vsd.exponent = 0; 472 vsd.exponent = 0;
471 else 473 else
472 vsd.exponent = vdm.exponent - (1023 - 127); 474 vsd.exponent = vdm.exponent - (1023 - 127);
473 475
474 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); 476 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
475 477
476 pack_nan: 478 pack_nan:
477 vfp_put_float(sd, vfp_single_pack(&vsd)); 479 vfp_put_float(sd, vfp_single_pack(&vsd));
478 return exceptions; 480 return exceptions;
479 } 481 }
480 482
481 static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) 483 static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
482 { 484 {
483 struct vfp_double vdm; 485 struct vfp_double vdm;
484 u32 m = vfp_get_float(dm); 486 u32 m = vfp_get_float(dm);
485 487
486 vdm.sign = 0; 488 vdm.sign = 0;
487 vdm.exponent = 1023 + 63 - 1; 489 vdm.exponent = 1023 + 63 - 1;
488 vdm.significand = (u64)m; 490 vdm.significand = (u64)m;
489 491
490 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); 492 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
491 } 493 }
492 494
493 static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) 495 static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
494 { 496 {
495 struct vfp_double vdm; 497 struct vfp_double vdm;
496 u32 m = vfp_get_float(dm); 498 u32 m = vfp_get_float(dm);
497 499
498 vdm.sign = (m & 0x80000000) >> 16; 500 vdm.sign = (m & 0x80000000) >> 16;
499 vdm.exponent = 1023 + 63 - 1; 501 vdm.exponent = 1023 + 63 - 1;
500 vdm.significand = vdm.sign ? -m : m; 502 vdm.significand = vdm.sign ? -m : m;
501 503
502 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); 504 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
503 } 505 }
504 506
505 static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) 507 static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
506 { 508 {
507 struct vfp_double vdm; 509 struct vfp_double vdm;
508 u32 d, exceptions = 0; 510 u32 d, exceptions = 0;
509 int rmode = fpscr & FPSCR_RMODE_MASK; 511 int rmode = fpscr & FPSCR_RMODE_MASK;
510 int tm; 512 int tm;
511 513
512 vfp_double_unpack(&vdm, vfp_get_double(dm)); 514 vfp_double_unpack(&vdm, vfp_get_double(dm));
513 515
514 /* 516 /*
515 * Do we have a denormalised number? 517 * Do we have a denormalised number?
516 */ 518 */
517 tm = vfp_double_type(&vdm); 519 tm = vfp_double_type(&vdm);
518 if (tm & VFP_DENORMAL) 520 if (tm & VFP_DENORMAL)
519 exceptions |= FPSCR_IDC; 521 exceptions |= FPSCR_IDC;
520 522
521 if (tm & VFP_NAN) 523 if (tm & VFP_NAN)
522 vdm.sign = 0; 524 vdm.sign = 0;
523 525
524 if (vdm.exponent >= 1023 + 32) { 526 if (vdm.exponent >= 1023 + 32) {
525 d = vdm.sign ? 0 : 0xffffffff; 527 d = vdm.sign ? 0 : 0xffffffff;
526 exceptions = FPSCR_IOC; 528 exceptions = FPSCR_IOC;
527 } else if (vdm.exponent >= 1023 - 1) { 529 } else if (vdm.exponent >= 1023 - 1) {
528 int shift = 1023 + 63 - vdm.exponent; 530 int shift = 1023 + 63 - vdm.exponent;
529 u64 rem, incr = 0; 531 u64 rem, incr = 0;
530 532
531 /* 533 /*
532 * 2^0 <= m < 2^32-2^8 534 * 2^0 <= m < 2^32-2^8
533 */ 535 */
534 d = (vdm.significand << 1) >> shift; 536 d = (vdm.significand << 1) >> shift;
535 rem = vdm.significand << (65 - shift); 537 rem = vdm.significand << (65 - shift);
536 538
537 if (rmode == FPSCR_ROUND_NEAREST) { 539 if (rmode == FPSCR_ROUND_NEAREST) {
538 incr = 0x8000000000000000ULL; 540 incr = 0x8000000000000000ULL;
539 if ((d & 1) == 0) 541 if ((d & 1) == 0)
540 incr -= 1; 542 incr -= 1;
541 } else if (rmode == FPSCR_ROUND_TOZERO) { 543 } else if (rmode == FPSCR_ROUND_TOZERO) {
542 incr = 0; 544 incr = 0;
543 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 545 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
544 incr = ~0ULL; 546 incr = ~0ULL;
545 } 547 }
546 548
547 if ((rem + incr) < rem) { 549 if ((rem + incr) < rem) {
548 if (d < 0xffffffff) 550 if (d < 0xffffffff)
549 d += 1; 551 d += 1;
550 else 552 else
551 exceptions |= FPSCR_IOC; 553 exceptions |= FPSCR_IOC;
552 } 554 }
553 555
554 if (d && vdm.sign) { 556 if (d && vdm.sign) {
555 d = 0; 557 d = 0;
556 exceptions |= FPSCR_IOC; 558 exceptions |= FPSCR_IOC;
557 } else if (rem) 559 } else if (rem)
558 exceptions |= FPSCR_IXC; 560 exceptions |= FPSCR_IXC;
559 } else { 561 } else {
560 d = 0; 562 d = 0;
561 if (vdm.exponent | vdm.significand) { 563 if (vdm.exponent | vdm.significand) {
562 exceptions |= FPSCR_IXC; 564 exceptions |= FPSCR_IXC;
563 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 565 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
564 d = 1; 566 d = 1;
565 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { 567 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
566 d = 0; 568 d = 0;
567 exceptions |= FPSCR_IOC; 569 exceptions |= FPSCR_IOC;
568 } 570 }
569 } 571 }
570 } 572 }
571 573
572 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 574 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
573 575
574 vfp_put_float(sd, d); 576 vfp_put_float(sd, d);
575 577
576 return exceptions; 578 return exceptions;
577 } 579 }
578 580
579 static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) 581 static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
580 { 582 {
581 return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); 583 return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
582 } 584 }
583 585
584 static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) 586 static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
585 { 587 {
586 struct vfp_double vdm; 588 struct vfp_double vdm;
587 u32 d, exceptions = 0; 589 u32 d, exceptions = 0;
588 int rmode = fpscr & FPSCR_RMODE_MASK; 590 int rmode = fpscr & FPSCR_RMODE_MASK;
589 591
590 vfp_double_unpack(&vdm, vfp_get_double(dm)); 592 vfp_double_unpack(&vdm, vfp_get_double(dm));
591 vfp_double_dump("VDM", &vdm); 593 vfp_double_dump("VDM", &vdm);
592 594
593 /* 595 /*
594 * Do we have denormalised number? 596 * Do we have denormalised number?
595 */ 597 */
596 if (vfp_double_type(&vdm) & VFP_DENORMAL) 598 if (vfp_double_type(&vdm) & VFP_DENORMAL)
597 exceptions |= FPSCR_IDC; 599 exceptions |= FPSCR_IDC;
598 600
599 if (vdm.exponent >= 1023 + 32) { 601 if (vdm.exponent >= 1023 + 32) {
600 d = 0x7fffffff; 602 d = 0x7fffffff;
601 if (vdm.sign) 603 if (vdm.sign)
602 d = ~d; 604 d = ~d;
603 exceptions |= FPSCR_IOC; 605 exceptions |= FPSCR_IOC;
604 } else if (vdm.exponent >= 1023 - 1) { 606 } else if (vdm.exponent >= 1023 - 1) {
605 int shift = 1023 + 63 - vdm.exponent; /* 58 */ 607 int shift = 1023 + 63 - vdm.exponent; /* 58 */
606 u64 rem, incr = 0; 608 u64 rem, incr = 0;
607 609
608 d = (vdm.significand << 1) >> shift; 610 d = (vdm.significand << 1) >> shift;
609 rem = vdm.significand << (65 - shift); 611 rem = vdm.significand << (65 - shift);
610 612
611 if (rmode == FPSCR_ROUND_NEAREST) { 613 if (rmode == FPSCR_ROUND_NEAREST) {
612 incr = 0x8000000000000000ULL; 614 incr = 0x8000000000000000ULL;
613 if ((d & 1) == 0) 615 if ((d & 1) == 0)
614 incr -= 1; 616 incr -= 1;
615 } else if (rmode == FPSCR_ROUND_TOZERO) { 617 } else if (rmode == FPSCR_ROUND_TOZERO) {
616 incr = 0; 618 incr = 0;
617 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 619 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
618 incr = ~0ULL; 620 incr = ~0ULL;
619 } 621 }
620 622
621 if ((rem + incr) < rem && d < 0xffffffff) 623 if ((rem + incr) < rem && d < 0xffffffff)
622 d += 1; 624 d += 1;
623 if (d > 0x7fffffff + (vdm.sign != 0)) { 625 if (d > 0x7fffffff + (vdm.sign != 0)) {
624 d = 0x7fffffff + (vdm.sign != 0); 626 d = 0x7fffffff + (vdm.sign != 0);
625 exceptions |= FPSCR_IOC; 627 exceptions |= FPSCR_IOC;
626 } else if (rem) 628 } else if (rem)
627 exceptions |= FPSCR_IXC; 629 exceptions |= FPSCR_IXC;
628 630
629 if (vdm.sign) 631 if (vdm.sign)
630 d = -d; 632 d = -d;
631 } else { 633 } else {
632 d = 0; 634 d = 0;
633 if (vdm.exponent | vdm.significand) { 635 if (vdm.exponent | vdm.significand) {
634 exceptions |= FPSCR_IXC; 636 exceptions |= FPSCR_IXC;
635 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 637 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
636 d = 1; 638 d = 1;
637 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) 639 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
638 d = -1; 640 d = -1;
639 } 641 }
640 } 642 }
641 643
642 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 644 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
643 645
644 vfp_put_float(sd, (s32)d); 646 vfp_put_float(sd, (s32)d);
645 647
646 return exceptions; 648 return exceptions;
647 } 649 }
648 650
649 static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) 651 static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
650 { 652 {
651 return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); 653 return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
652 } 654 }
653 655
654 656
655 static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { 657 static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = {
656 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy, 658 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy,
657 [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs, 659 [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs,
658 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg, 660 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg,
659 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt, 661 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt,
660 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp, 662 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp,
661 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe, 663 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe,
662 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz, 664 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz,
663 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez, 665 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez,
664 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts, 666 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts,
665 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito, 667 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito,
666 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito, 668 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito,
667 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui, 669 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui,
668 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz, 670 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz,
669 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi, 671 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi,
670 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz, 672 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz,
671 }; 673 };
672 674
673 675
674 676
675 677
676 static u32 678 static u32
677 vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, 679 vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
678 struct vfp_double *vdm, u32 fpscr) 680 struct vfp_double *vdm, u32 fpscr)
679 { 681 {
680 struct vfp_double *vdp; 682 struct vfp_double *vdp;
681 u32 exceptions = 0; 683 u32 exceptions = 0;
682 int tn, tm; 684 int tn, tm;
683 685
684 tn = vfp_double_type(vdn); 686 tn = vfp_double_type(vdn);
685 tm = vfp_double_type(vdm); 687 tm = vfp_double_type(vdm);
686 688
687 if (tn & tm & VFP_INFINITY) { 689 if (tn & tm & VFP_INFINITY) {
688 /* 690 /*
689 * Two infinities. Are they different signs? 691 * Two infinities. Are they different signs?
690 */ 692 */
691 if (vdn->sign ^ vdm->sign) { 693 if (vdn->sign ^ vdm->sign) {
692 /* 694 /*
693 * different signs -> invalid 695 * different signs -> invalid
694 */ 696 */
695 exceptions = FPSCR_IOC; 697 exceptions = FPSCR_IOC;
696 vdp = &vfp_double_default_qnan; 698 vdp = &vfp_double_default_qnan;
697 } else { 699 } else {
698 /* 700 /*
699 * same signs -> valid 701 * same signs -> valid
700 */ 702 */
701 vdp = vdn; 703 vdp = vdn;
702 } 704 }
703 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 705 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
704 /* 706 /*
705 * One infinity and one number -> infinity 707 * One infinity and one number -> infinity
706 */ 708 */
707 vdp = vdn; 709 vdp = vdn;
708 } else { 710 } else {
709 /* 711 /*
710 * 'n' is a NaN of some type 712 * 'n' is a NaN of some type
711 */ 713 */
712 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 714 return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
713 } 715 }
714 *vdd = *vdp; 716 *vdd = *vdp;
715 return exceptions; 717 return exceptions;
716 } 718 }
717 719
718 static u32 720 static u32
719 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, 721 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
720 struct vfp_double *vdm, u32 fpscr) 722 struct vfp_double *vdm, u32 fpscr)
721 { 723 {
722 u32 exp_diff; 724 u32 exp_diff;
723 u64 m_sig; 725 u64 m_sig;
724 726
725 if (vdn->significand & (1ULL << 63) || 727 if (vdn->significand & (1ULL << 63) ||
726 vdm->significand & (1ULL << 63)) { 728 vdm->significand & (1ULL << 63)) {
727 pr_info("VFP: bad FP values in %s\n", __func__); 729 pr_info("VFP: bad FP values in %s\n", __func__);
728 vfp_double_dump("VDN", vdn); 730 vfp_double_dump("VDN", vdn);
729 vfp_double_dump("VDM", vdm); 731 vfp_double_dump("VDM", vdm);
730 } 732 }
731 733
732 /* 734 /*
733 * Ensure that 'n' is the largest magnitude number. Note that 735 * Ensure that 'n' is the largest magnitude number. Note that
734 * if 'n' and 'm' have equal exponents, we do not swap them. 736 * if 'n' and 'm' have equal exponents, we do not swap them.
735 * This ensures that NaN propagation works correctly. 737 * This ensures that NaN propagation works correctly.
736 */ 738 */
737 if (vdn->exponent < vdm->exponent) { 739 if (vdn->exponent < vdm->exponent) {
738 struct vfp_double *t = vdn; 740 struct vfp_double *t = vdn;
739 vdn = vdm; 741 vdn = vdm;
740 vdm = t; 742 vdm = t;
741 } 743 }
742 744
743 /* 745 /*
744 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 746 * Is 'n' an infinity or a NaN? Note that 'm' may be a number,
745 * infinity or a NaN here. 747 * infinity or a NaN here.
746 */ 748 */
747 if (vdn->exponent == 2047) 749 if (vdn->exponent == 2047)
748 return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); 750 return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
749 751
750 /* 752 /*
751 * We have two proper numbers, where 'vdn' is the larger magnitude. 753 * We have two proper numbers, where 'vdn' is the larger magnitude.
752 * 754 *
753 * Copy 'n' to 'd' before doing the arithmetic. 755 * Copy 'n' to 'd' before doing the arithmetic.
754 */ 756 */
755 *vdd = *vdn; 757 *vdd = *vdn;
756 758
757 /* 759 /*
758 * Align 'm' with the result. 760 * Align 'm' with the result.
759 */ 761 */
760 exp_diff = vdn->exponent - vdm->exponent; 762 exp_diff = vdn->exponent - vdm->exponent;
761 m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); 763 m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
762 764
763 /* 765 /*
764 * If the signs are different, we are really subtracting. 766 * If the signs are different, we are really subtracting.
765 */ 767 */
766 if (vdn->sign ^ vdm->sign) { 768 if (vdn->sign ^ vdm->sign) {
767 m_sig = vdn->significand - m_sig; 769 m_sig = vdn->significand - m_sig;
768 if ((s64)m_sig < 0) { 770 if ((s64)m_sig < 0) {
769 vdd->sign = vfp_sign_negate(vdd->sign); 771 vdd->sign = vfp_sign_negate(vdd->sign);
770 m_sig = -m_sig; 772 m_sig = -m_sig;
771 } 773 }
772 } else { 774 } else {
773 m_sig += vdn->significand; 775 m_sig += vdn->significand;
774 } 776 }
775 vdd->significand = m_sig; 777 vdd->significand = m_sig;
776 778
777 return 0; 779 return 0;
778 } 780 }
779 781
780 static u32 782 static u32
781 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, 783 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
782 struct vfp_double *vdm, u32 fpscr) 784 struct vfp_double *vdm, u32 fpscr)
783 { 785 {
784 vfp_double_dump("VDN", vdn); 786 vfp_double_dump("VDN", vdn);
785 vfp_double_dump("VDM", vdm); 787 vfp_double_dump("VDM", vdm);
786 788
787 /* 789 /*
788 * Ensure that 'n' is the largest magnitude number. Note that 790 * Ensure that 'n' is the largest magnitude number. Note that
789 * if 'n' and 'm' have equal exponents, we do not swap them. 791 * if 'n' and 'm' have equal exponents, we do not swap them.
790 * This ensures that NaN propagation works correctly. 792 * This ensures that NaN propagation works correctly.
791 */ 793 */
792 if (vdn->exponent < vdm->exponent) { 794 if (vdn->exponent < vdm->exponent) {
793 struct vfp_double *t = vdn; 795 struct vfp_double *t = vdn;
794 vdn = vdm; 796 vdn = vdm;
795 vdm = t; 797 vdm = t;
796 pr_debug("VFP: swapping M <-> N\n"); 798 pr_debug("VFP: swapping M <-> N\n");
797 } 799 }
798 800
799 vdd->sign = vdn->sign ^ vdm->sign; 801 vdd->sign = vdn->sign ^ vdm->sign;
800 802
801 /* 803 /*
802 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 804 * If 'n' is an infinity or NaN, handle it. 'm' may be anything.
803 */ 805 */
804 if (vdn->exponent == 2047) { 806 if (vdn->exponent == 2047) {
805 if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) 807 if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
806 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 808 return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
807 if ((vdm->exponent | vdm->significand) == 0) { 809 if ((vdm->exponent | vdm->significand) == 0) {
808 *vdd = vfp_double_default_qnan; 810 *vdd = vfp_double_default_qnan;
809 return FPSCR_IOC; 811 return FPSCR_IOC;
810 } 812 }
811 vdd->exponent = vdn->exponent; 813 vdd->exponent = vdn->exponent;
812 vdd->significand = 0; 814 vdd->significand = 0;
813 return 0; 815 return 0;
814 } 816 }
815 817
816 /* 818 /*
817 * If 'm' is zero, the result is always zero. In this case, 819 * If 'm' is zero, the result is always zero. In this case,
818 * 'n' may be zero or a number, but it doesn't matter which. 820 * 'n' may be zero or a number, but it doesn't matter which.
819 */ 821 */
820 if ((vdm->exponent | vdm->significand) == 0) { 822 if ((vdm->exponent | vdm->significand) == 0) {
821 vdd->exponent = 0; 823 vdd->exponent = 0;
822 vdd->significand = 0; 824 vdd->significand = 0;
823 return 0; 825 return 0;
824 } 826 }
825 827
826 /* 828 /*
827 * We add 2 to the destination exponent for the same reason 829 * We add 2 to the destination exponent for the same reason
828 * as the addition case - though this time we have +1 from 830 * as the addition case - though this time we have +1 from
829 * each input operand. 831 * each input operand.
830 */ 832 */
831 vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; 833 vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
832 vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); 834 vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
833 835
834 vfp_double_dump("VDD", vdd); 836 vfp_double_dump("VDD", vdd);
835 return 0; 837 return 0;
836 } 838 }
837 839
838 #define NEG_MULTIPLY (1 << 0) 840 #define NEG_MULTIPLY (1 << 0)
839 #define NEG_SUBTRACT (1 << 1) 841 #define NEG_SUBTRACT (1 << 1)
840 842
841 static u32 843 static u32
842 vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) 844 vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
843 { 845 {
844 struct vfp_double vdd, vdp, vdn, vdm; 846 struct vfp_double vdd, vdp, vdn, vdm;
845 u32 exceptions; 847 u32 exceptions;
846 848
847 vfp_double_unpack(&vdn, vfp_get_double(dn)); 849 vfp_double_unpack(&vdn, vfp_get_double(dn));
848 if (vdn.exponent == 0 && vdn.significand) 850 if (vdn.exponent == 0 && vdn.significand)
849 vfp_double_normalise_denormal(&vdn); 851 vfp_double_normalise_denormal(&vdn);
850 852
851 vfp_double_unpack(&vdm, vfp_get_double(dm)); 853 vfp_double_unpack(&vdm, vfp_get_double(dm));
852 if (vdm.exponent == 0 && vdm.significand) 854 if (vdm.exponent == 0 && vdm.significand)
853 vfp_double_normalise_denormal(&vdm); 855 vfp_double_normalise_denormal(&vdm);
854 856
855 exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); 857 exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
856 if (negate & NEG_MULTIPLY) 858 if (negate & NEG_MULTIPLY)
857 vdp.sign = vfp_sign_negate(vdp.sign); 859 vdp.sign = vfp_sign_negate(vdp.sign);
858 860
859 vfp_double_unpack(&vdn, vfp_get_double(dd)); 861 vfp_double_unpack(&vdn, vfp_get_double(dd));
860 if (negate & NEG_SUBTRACT) 862 if (negate & NEG_SUBTRACT)
861 vdn.sign = vfp_sign_negate(vdn.sign); 863 vdn.sign = vfp_sign_negate(vdn.sign);
862 864
863 exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); 865 exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
864 866
865 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); 867 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
866 } 868 }
867 869
868 /* 870 /*
869 * Standard operations 871 * Standard operations
870 */ 872 */
871 873
872 /* 874 /*
873 * sd = sd + (sn * sm) 875 * sd = sd + (sn * sm)
874 */ 876 */
875 static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) 877 static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
876 { 878 {
877 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); 879 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
878 } 880 }
879 881
880 /* 882 /*
881 * sd = sd - (sn * sm) 883 * sd = sd - (sn * sm)
882 */ 884 */
883 static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) 885 static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
884 { 886 {
885 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); 887 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
886 } 888 }
887 889
888 /* 890 /*
889 * sd = -sd + (sn * sm) 891 * sd = -sd + (sn * sm)
890 */ 892 */
891 static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) 893 static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
892 { 894 {
893 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); 895 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
894 } 896 }
895 897
896 /* 898 /*
897 * sd = -sd - (sn * sm) 899 * sd = -sd - (sn * sm)
898 */ 900 */
899 static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) 901 static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
900 { 902 {
901 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 903 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
902 } 904 }
903 905
904 /* 906 /*
905 * sd = sn * sm 907 * sd = sn * sm
906 */ 908 */
907 static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) 909 static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
908 { 910 {
909 struct vfp_double vdd, vdn, vdm; 911 struct vfp_double vdd, vdn, vdm;
910 u32 exceptions; 912 u32 exceptions;
911 913
912 vfp_double_unpack(&vdn, vfp_get_double(dn)); 914 vfp_double_unpack(&vdn, vfp_get_double(dn));
913 if (vdn.exponent == 0 && vdn.significand) 915 if (vdn.exponent == 0 && vdn.significand)
914 vfp_double_normalise_denormal(&vdn); 916 vfp_double_normalise_denormal(&vdn);
915 917
916 vfp_double_unpack(&vdm, vfp_get_double(dm)); 918 vfp_double_unpack(&vdm, vfp_get_double(dm));
917 if (vdm.exponent == 0 && vdm.significand) 919 if (vdm.exponent == 0 && vdm.significand)
918 vfp_double_normalise_denormal(&vdm); 920 vfp_double_normalise_denormal(&vdm);
919 921
920 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 922 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
921 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); 923 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
922 } 924 }
923 925
924 /* 926 /*
925 * sd = -(sn * sm) 927 * sd = -(sn * sm)
926 */ 928 */
927 static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) 929 static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
928 { 930 {
929 struct vfp_double vdd, vdn, vdm; 931 struct vfp_double vdd, vdn, vdm;
930 u32 exceptions; 932 u32 exceptions;
931 933
932 vfp_double_unpack(&vdn, vfp_get_double(dn)); 934 vfp_double_unpack(&vdn, vfp_get_double(dn));
933 if (vdn.exponent == 0 && vdn.significand) 935 if (vdn.exponent == 0 && vdn.significand)
934 vfp_double_normalise_denormal(&vdn); 936 vfp_double_normalise_denormal(&vdn);
935 937
936 vfp_double_unpack(&vdm, vfp_get_double(dm)); 938 vfp_double_unpack(&vdm, vfp_get_double(dm));
937 if (vdm.exponent == 0 && vdm.significand) 939 if (vdm.exponent == 0 && vdm.significand)
938 vfp_double_normalise_denormal(&vdm); 940 vfp_double_normalise_denormal(&vdm);
939 941
940 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 942 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
941 vdd.sign = vfp_sign_negate(vdd.sign); 943 vdd.sign = vfp_sign_negate(vdd.sign);
942 944
943 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); 945 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
944 } 946 }
945 947
946 /* 948 /*
947 * sd = sn + sm 949 * sd = sn + sm
948 */ 950 */
949 static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) 951 static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
950 { 952 {
951 struct vfp_double vdd, vdn, vdm; 953 struct vfp_double vdd, vdn, vdm;
952 u32 exceptions; 954 u32 exceptions;
953 955
954 vfp_double_unpack(&vdn, vfp_get_double(dn)); 956 vfp_double_unpack(&vdn, vfp_get_double(dn));
955 if (vdn.exponent == 0 && vdn.significand) 957 if (vdn.exponent == 0 && vdn.significand)
956 vfp_double_normalise_denormal(&vdn); 958 vfp_double_normalise_denormal(&vdn);
957 959
958 vfp_double_unpack(&vdm, vfp_get_double(dm)); 960 vfp_double_unpack(&vdm, vfp_get_double(dm));
959 if (vdm.exponent == 0 && vdm.significand) 961 if (vdm.exponent == 0 && vdm.significand)
960 vfp_double_normalise_denormal(&vdm); 962 vfp_double_normalise_denormal(&vdm);
961 963
962 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 964 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
963 965
964 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); 966 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
965 } 967 }
966 968
967 /* 969 /*
968 * sd = sn - sm 970 * sd = sn - sm
969 */ 971 */
970 static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) 972 static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
971 { 973 {
972 struct vfp_double vdd, vdn, vdm; 974 struct vfp_double vdd, vdn, vdm;
973 u32 exceptions; 975 u32 exceptions;
974 976
975 vfp_double_unpack(&vdn, vfp_get_double(dn)); 977 vfp_double_unpack(&vdn, vfp_get_double(dn));
976 if (vdn.exponent == 0 && vdn.significand) 978 if (vdn.exponent == 0 && vdn.significand)
977 vfp_double_normalise_denormal(&vdn); 979 vfp_double_normalise_denormal(&vdn);
978 980
979 vfp_double_unpack(&vdm, vfp_get_double(dm)); 981 vfp_double_unpack(&vdm, vfp_get_double(dm));
980 if (vdm.exponent == 0 && vdm.significand) 982 if (vdm.exponent == 0 && vdm.significand)
981 vfp_double_normalise_denormal(&vdm); 983 vfp_double_normalise_denormal(&vdm);
982 984
983 /* 985 /*
984 * Subtraction is like addition, but with a negated operand. 986 * Subtraction is like addition, but with a negated operand.
985 */ 987 */
986 vdm.sign = vfp_sign_negate(vdm.sign); 988 vdm.sign = vfp_sign_negate(vdm.sign);
987 989
988 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 990 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
989 991
990 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); 992 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
991 } 993 }
992 994
993 /* 995 /*
994 * sd = sn / sm 996 * sd = sn / sm
995 */ 997 */
996 static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) 998 static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
997 { 999 {
998 struct vfp_double vdd, vdn, vdm; 1000 struct vfp_double vdd, vdn, vdm;
999 u32 exceptions = 0; 1001 u32 exceptions = 0;
1000 int tm, tn; 1002 int tm, tn;
1001 1003
1002 vfp_double_unpack(&vdn, vfp_get_double(dn)); 1004 vfp_double_unpack(&vdn, vfp_get_double(dn));
1003 vfp_double_unpack(&vdm, vfp_get_double(dm)); 1005 vfp_double_unpack(&vdm, vfp_get_double(dm));
1004 1006
1005 vdd.sign = vdn.sign ^ vdm.sign; 1007 vdd.sign = vdn.sign ^ vdm.sign;
1006 1008
1007 tn = vfp_double_type(&vdn); 1009 tn = vfp_double_type(&vdn);
1008 tm = vfp_double_type(&vdm); 1010 tm = vfp_double_type(&vdm);
1009 1011
1010 /* 1012 /*
1011 * Is n a NAN? 1013 * Is n a NAN?
1012 */ 1014 */
1013 if (tn & VFP_NAN) 1015 if (tn & VFP_NAN)
1014 goto vdn_nan; 1016 goto vdn_nan;
1015 1017
1016 /* 1018 /*
1017 * Is m a NAN? 1019 * Is m a NAN?
1018 */ 1020 */
1019 if (tm & VFP_NAN) 1021 if (tm & VFP_NAN)
1020 goto vdm_nan; 1022 goto vdm_nan;
1021 1023
1022 /* 1024 /*
1023 * If n and m are infinity, the result is invalid 1025 * If n and m are infinity, the result is invalid
1024 * If n and m are zero, the result is invalid 1026 * If n and m are zero, the result is invalid
1025 */ 1027 */
1026 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1028 if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1027 goto invalid; 1029 goto invalid;
1028 1030
1029 /* 1031 /*
1030 * If n is infinity, the result is infinity 1032 * If n is infinity, the result is infinity
1031 */ 1033 */
1032 if (tn & VFP_INFINITY) 1034 if (tn & VFP_INFINITY)
1033 goto infinity; 1035 goto infinity;
1034 1036
1035 /* 1037 /*
1036 * If m is zero, raise div0 exceptions 1038 * If m is zero, raise div0 exceptions
1037 */ 1039 */
1038 if (tm & VFP_ZERO) 1040 if (tm & VFP_ZERO)
1039 goto divzero; 1041 goto divzero;
1040 1042
1041 /* 1043 /*
1042 * If m is infinity, or n is zero, the result is zero 1044 * If m is infinity, or n is zero, the result is zero
1043 */ 1045 */
1044 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1046 if (tm & VFP_INFINITY || tn & VFP_ZERO)
1045 goto zero; 1047 goto zero;
1046 1048
1047 if (tn & VFP_DENORMAL) 1049 if (tn & VFP_DENORMAL)
1048 vfp_double_normalise_denormal(&vdn); 1050 vfp_double_normalise_denormal(&vdn);
1049 if (tm & VFP_DENORMAL) 1051 if (tm & VFP_DENORMAL)
1050 vfp_double_normalise_denormal(&vdm); 1052 vfp_double_normalise_denormal(&vdm);
1051 1053
1052 /* 1054 /*
1053 * Ok, we have two numbers, we can perform division. 1055 * Ok, we have two numbers, we can perform division.
1054 */ 1056 */
1055 vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; 1057 vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
1056 vdm.significand <<= 1; 1058 vdm.significand <<= 1;
1057 if (vdm.significand <= (2 * vdn.significand)) { 1059 if (vdm.significand <= (2 * vdn.significand)) {
1058 vdn.significand >>= 1; 1060 vdn.significand >>= 1;
1059 vdd.exponent++; 1061 vdd.exponent++;
1060 } 1062 }
1061 vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); 1063 vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
1062 if ((vdd.significand & 0x1ff) <= 2) { 1064 if ((vdd.significand & 0x1ff) <= 2) {
1063 u64 termh, terml, remh, reml; 1065 u64 termh, terml, remh, reml;
1064 mul64to128(&termh, &terml, vdm.significand, vdd.significand); 1066 mul64to128(&termh, &terml, vdm.significand, vdd.significand);
1065 sub128(&remh, &reml, vdn.significand, 0, termh, terml); 1067 sub128(&remh, &reml, vdn.significand, 0, termh, terml);
1066 while ((s64)remh < 0) { 1068 while ((s64)remh < 0) {
1067 vdd.significand -= 1; 1069 vdd.significand -= 1;
1068 add128(&remh, &reml, remh, reml, 0, vdm.significand); 1070 add128(&remh, &reml, remh, reml, 0, vdm.significand);
1069 } 1071 }
1070 vdd.significand |= (reml != 0); 1072 vdd.significand |= (reml != 0);
1071 } 1073 }
1072 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); 1074 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
1073 1075
1074 vdn_nan: 1076 vdn_nan:
1075 exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); 1077 exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
1076 pack: 1078 pack:
1077 vfp_put_double(dd, vfp_double_pack(&vdd)); 1079 vfp_put_double(dd, vfp_double_pack(&vdd));
1078 return exceptions; 1080 return exceptions;
1079 1081
1080 vdm_nan: 1082 vdm_nan:
1081 exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); 1083 exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
1082 goto pack; 1084 goto pack;
1083 1085
1084 zero: 1086 zero:
1085 vdd.exponent = 0; 1087 vdd.exponent = 0;
1086 vdd.significand = 0; 1088 vdd.significand = 0;
1087 goto pack; 1089 goto pack;
1088 1090
1089 divzero: 1091 divzero:
1090 exceptions = FPSCR_DZC; 1092 exceptions = FPSCR_DZC;
1091 infinity: 1093 infinity:
1092 vdd.exponent = 2047; 1094 vdd.exponent = 2047;
1093 vdd.significand = 0; 1095 vdd.significand = 0;
1094 goto pack; 1096 goto pack;
1095 1097
1096 invalid: 1098 invalid:
1097 vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan)); 1099 vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan));
1098 return FPSCR_IOC; 1100 return FPSCR_IOC;
1099 } 1101 }
1100 1102
1101 static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { 1103 static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = {
1102 [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac, 1104 [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac,
1103 [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac, 1105 [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac,
1104 [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc, 1106 [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc,
1105 [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc, 1107 [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc,
1106 [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul, 1108 [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul,
1107 [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul, 1109 [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul,
1108 [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd, 1110 [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd,
1109 [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub, 1111 [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub,
1110 [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv, 1112 [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv,
1111 }; 1113 };
1112 1114
1113 #define FREG_BANK(x) ((x) & 0x0c) 1115 #define FREG_BANK(x) ((x) & 0x0c)
1114 #define FREG_IDX(x) ((x) & 3) 1116 #define FREG_IDX(x) ((x) & 3)
1115 1117
1116 u32 vfp_double_cpdo(u32 inst, u32 fpscr) 1118 u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1117 { 1119 {
1118 u32 op = inst & FOP_MASK; 1120 u32 op = inst & FOP_MASK;
1119 u32 exceptions = 0; 1121 u32 exceptions = 0;
1120 unsigned int dd = vfp_get_sd(inst); 1122 unsigned int dd = vfp_get_sd(inst);
1121 unsigned int dn = vfp_get_sn(inst); 1123 unsigned int dn = vfp_get_sn(inst);
1122 unsigned int dm = vfp_get_sm(inst); 1124 unsigned int dm = vfp_get_sm(inst);
1123 unsigned int vecitr, veclen, vecstride; 1125 unsigned int vecitr, veclen, vecstride;
1124 u32 (*fop)(int, int, s32, u32); 1126 u32 (*fop)(int, int, s32, u32);
1125 1127
1126 veclen = fpscr & FPSCR_LENGTH_MASK; 1128 veclen = fpscr & FPSCR_LENGTH_MASK;
1127 vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; 1129 vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2;
1128 1130
1129 /* 1131 /*
1130 * If destination bank is zero, vector length is always '1'. 1132 * If destination bank is zero, vector length is always '1'.
1131 * ARM DDI0100F C5.1.3, C5.3.2. 1133 * ARM DDI0100F C5.1.3, C5.3.2.
1132 */ 1134 */
1133 if (FREG_BANK(dd) == 0) 1135 if (FREG_BANK(dd) == 0)
1134 veclen = 0; 1136 veclen = 0;
1135 1137
1136 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1138 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1137 (veclen >> FPSCR_LENGTH_BIT) + 1); 1139 (veclen >> FPSCR_LENGTH_BIT) + 1);
1138 1140
1139 fop = (op == FOP_EXT) ? fop_extfns[dn] : fop_fns[FOP_TO_IDX(op)]; 1141 fop = (op == FOP_EXT) ? fop_extfns[dn] : fop_fns[FOP_TO_IDX(op)];
1140 if (!fop) 1142 if (!fop)
1141 goto invalid; 1143 goto invalid;
1142 1144
1143 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1145 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1144 u32 except; 1146 u32 except;
1145 1147
1146 if (op == FOP_EXT) 1148 if (op == FOP_EXT)
1147 pr_debug("VFP: itr%d (d%u.%u) = op[%u] (d%u.%u)\n", 1149 pr_debug("VFP: itr%d (d%u.%u) = op[%u] (d%u.%u)\n",
1148 vecitr >> FPSCR_LENGTH_BIT, 1150 vecitr >> FPSCR_LENGTH_BIT,
1149 dd >> 1, dd & 1, dn, 1151 dd >> 1, dd & 1, dn,
1150 dm >> 1, dm & 1); 1152 dm >> 1, dm & 1);
1151 else 1153 else
1152 pr_debug("VFP: itr%d (d%u.%u) = (d%u.%u) op[%u] (d%u.%u)\n", 1154 pr_debug("VFP: itr%d (d%u.%u) = (d%u.%u) op[%u] (d%u.%u)\n",
1153 vecitr >> FPSCR_LENGTH_BIT, 1155 vecitr >> FPSCR_LENGTH_BIT,
1154 dd >> 1, dd & 1, 1156 dd >> 1, dd & 1,
1155 dn >> 1, dn & 1, 1157 dn >> 1, dn & 1,
1156 FOP_TO_IDX(op), 1158 FOP_TO_IDX(op),
1157 dm >> 1, dm & 1); 1159 dm >> 1, dm & 1);
1158 1160
1159 except = fop(dd, dn, dm, fpscr); 1161 except = fop(dd, dn, dm, fpscr);
1160 pr_debug("VFP: itr%d: exceptions=%08x\n", 1162 pr_debug("VFP: itr%d: exceptions=%08x\n",
1161 vecitr >> FPSCR_LENGTH_BIT, except); 1163 vecitr >> FPSCR_LENGTH_BIT, except);
1162 1164
1163 exceptions |= except; 1165 exceptions |= except;
1164 1166
1165 /* 1167 /*
1166 * This ensures that comparisons only operate on scalars; 1168 * This ensures that comparisons only operate on scalars;
1167 * comparisons always return with one FPSCR status bit set. 1169 * comparisons always return with one FPSCR status bit set.
1168 */ 1170 */
1169 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1171 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
1170 break; 1172 break;
1171 1173
1172 /* 1174 /*
1173 * CHECK: It appears to be undefined whether we stop when 1175 * CHECK: It appears to be undefined whether we stop when
1174 * we encounter an exception. We continue. 1176 * we encounter an exception. We continue.
1175 */ 1177 */
1176 1178
1177 dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6); 1179 dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6);
1178 dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); 1180 dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6);
1179 if (FREG_BANK(dm) != 0) 1181 if (FREG_BANK(dm) != 0)
1180 dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); 1182 dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6);
1181 } 1183 }
1182 return exceptions; 1184 return exceptions;
1183 1185
1184 invalid: 1186 invalid:
1185 return ~0; 1187 return ~0;
1186 } 1188 }
1187 1189
arch/arm/vfp/vfpsingle.c
1 /* 1 /*
2 * linux/arch/arm/vfp/vfpsingle.c 2 * linux/arch/arm/vfp/vfpsingle.c
3 * 3 *
4 * This code is derived in part from John R. Housers softfloat library, which 4 * This code is derived in part from John R. Housers softfloat library, which
5 * carries the following notice: 5 * carries the following notice:
6 * 6 *
7 * =========================================================================== 7 * ===========================================================================
8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
9 * Arithmetic Package, Release 2. 9 * Arithmetic Package, Release 2.
10 * 10 *
11 * Written by John R. Hauser. This work was made possible in part by the 11 * Written by John R. Hauser. This work was made possible in part by the
12 * International Computer Science Institute, located at Suite 600, 1947 Center 12 * International Computer Science Institute, located at Suite 600, 1947 Center
13 * Street, Berkeley, California 94704. Funding was partially provided by the 13 * Street, Berkeley, California 94704. Funding was partially provided by the
14 * National Science Foundation under grant MIP-9311980. The original version 14 * National Science Foundation under grant MIP-9311980. The original version
15 * of this code was written as part of a project to build a fixed-point vector 15 * of this code was written as part of a project to build a fixed-point vector
16 * processor in collaboration with the University of California at Berkeley, 16 * processor in collaboration with the University of California at Berkeley,
17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19 * arithmetic/softfloat.html'. 19 * arithmetic/softfloat.html'.
20 * 20 *
21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26 * 26 *
27 * Derivative works are acceptable, even for commercial purposes, so long as 27 * Derivative works are acceptable, even for commercial purposes, so long as
28 * (1) they include prominent notice that the work is derivative, and (2) they 28 * (1) they include prominent notice that the work is derivative, and (2) they
29 * include prominent notice akin to these three paragraphs for those parts of 29 * include prominent notice akin to these three paragraphs for those parts of
30 * this code that are retained. 30 * this code that are retained.
31 * =========================================================================== 31 * ===========================================================================
32 */ 32 */
33 #include <linux/kernel.h> 33 #include <linux/kernel.h>
34 #include <linux/bitops.h> 34 #include <linux/bitops.h>
35
36 #include <asm/div64.h>
35 #include <asm/ptrace.h> 37 #include <asm/ptrace.h>
36 #include <asm/vfp.h> 38 #include <asm/vfp.h>
37 39
38 #include "vfpinstr.h" 40 #include "vfpinstr.h"
39 #include "vfp.h" 41 #include "vfp.h"
40 42
41 static struct vfp_single vfp_single_default_qnan = { 43 static struct vfp_single vfp_single_default_qnan = {
42 .exponent = 255, 44 .exponent = 255,
43 .sign = 0, 45 .sign = 0,
44 .significand = VFP_SINGLE_SIGNIFICAND_QNAN, 46 .significand = VFP_SINGLE_SIGNIFICAND_QNAN,
45 }; 47 };
46 48
47 static void vfp_single_dump(const char *str, struct vfp_single *s) 49 static void vfp_single_dump(const char *str, struct vfp_single *s)
48 { 50 {
49 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
50 str, s->sign != 0, s->exponent, s->significand); 52 str, s->sign != 0, s->exponent, s->significand);
51 } 53 }
52 54
53 static void vfp_single_normalise_denormal(struct vfp_single *vs) 55 static void vfp_single_normalise_denormal(struct vfp_single *vs)
54 { 56 {
55 int bits = 31 - fls(vs->significand); 57 int bits = 31 - fls(vs->significand);
56 58
57 vfp_single_dump("normalise_denormal: in", vs); 59 vfp_single_dump("normalise_denormal: in", vs);
58 60
59 if (bits) { 61 if (bits) {
60 vs->exponent -= bits - 1; 62 vs->exponent -= bits - 1;
61 vs->significand <<= bits; 63 vs->significand <<= bits;
62 } 64 }
63 65
64 vfp_single_dump("normalise_denormal: out", vs); 66 vfp_single_dump("normalise_denormal: out", vs);
65 } 67 }
66 68
67 #ifndef DEBUG 69 #ifndef DEBUG
68 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 70 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
69 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) 71 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
70 #else 72 #else
71 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) 73 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
72 #endif 74 #endif
73 { 75 {
74 u32 significand, incr, rmode; 76 u32 significand, incr, rmode;
75 int exponent, shift, underflow; 77 int exponent, shift, underflow;
76 78
77 vfp_single_dump("pack: in", vs); 79 vfp_single_dump("pack: in", vs);
78 80
79 /* 81 /*
80 * Infinities and NaNs are a special case. 82 * Infinities and NaNs are a special case.
81 */ 83 */
82 if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) 84 if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
83 goto pack; 85 goto pack;
84 86
85 /* 87 /*
86 * Special-case zero. 88 * Special-case zero.
87 */ 89 */
88 if (vs->significand == 0) { 90 if (vs->significand == 0) {
89 vs->exponent = 0; 91 vs->exponent = 0;
90 goto pack; 92 goto pack;
91 } 93 }
92 94
93 exponent = vs->exponent; 95 exponent = vs->exponent;
94 significand = vs->significand; 96 significand = vs->significand;
95 97
96 /* 98 /*
97 * Normalise first. Note that we shift the significand up to 99 * Normalise first. Note that we shift the significand up to
98 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least 100 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
99 * significant bit. 101 * significant bit.
100 */ 102 */
101 shift = 32 - fls(significand); 103 shift = 32 - fls(significand);
102 if (shift < 32 && shift) { 104 if (shift < 32 && shift) {
103 exponent -= shift; 105 exponent -= shift;
104 significand <<= shift; 106 significand <<= shift;
105 } 107 }
106 108
107 #ifdef DEBUG 109 #ifdef DEBUG
108 vs->exponent = exponent; 110 vs->exponent = exponent;
109 vs->significand = significand; 111 vs->significand = significand;
110 vfp_single_dump("pack: normalised", vs); 112 vfp_single_dump("pack: normalised", vs);
111 #endif 113 #endif
112 114
113 /* 115 /*
114 * Tiny number? 116 * Tiny number?
115 */ 117 */
116 underflow = exponent < 0; 118 underflow = exponent < 0;
117 if (underflow) { 119 if (underflow) {
118 significand = vfp_shiftright32jamming(significand, -exponent); 120 significand = vfp_shiftright32jamming(significand, -exponent);
119 exponent = 0; 121 exponent = 0;
120 #ifdef DEBUG 122 #ifdef DEBUG
121 vs->exponent = exponent; 123 vs->exponent = exponent;
122 vs->significand = significand; 124 vs->significand = significand;
123 vfp_single_dump("pack: tiny number", vs); 125 vfp_single_dump("pack: tiny number", vs);
124 #endif 126 #endif
125 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) 127 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
126 underflow = 0; 128 underflow = 0;
127 } 129 }
128 130
129 /* 131 /*
130 * Select rounding increment. 132 * Select rounding increment.
131 */ 133 */
132 incr = 0; 134 incr = 0;
133 rmode = fpscr & FPSCR_RMODE_MASK; 135 rmode = fpscr & FPSCR_RMODE_MASK;
134 136
135 if (rmode == FPSCR_ROUND_NEAREST) { 137 if (rmode == FPSCR_ROUND_NEAREST) {
136 incr = 1 << VFP_SINGLE_LOW_BITS; 138 incr = 1 << VFP_SINGLE_LOW_BITS;
137 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) 139 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
138 incr -= 1; 140 incr -= 1;
139 } else if (rmode == FPSCR_ROUND_TOZERO) { 141 } else if (rmode == FPSCR_ROUND_TOZERO) {
140 incr = 0; 142 incr = 0;
141 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) 143 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
142 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; 144 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
143 145
144 pr_debug("VFP: rounding increment = 0x%08x\n", incr); 146 pr_debug("VFP: rounding increment = 0x%08x\n", incr);
145 147
146 /* 148 /*
147 * Is our rounding going to overflow? 149 * Is our rounding going to overflow?
148 */ 150 */
149 if ((significand + incr) < significand) { 151 if ((significand + incr) < significand) {
150 exponent += 1; 152 exponent += 1;
151 significand = (significand >> 1) | (significand & 1); 153 significand = (significand >> 1) | (significand & 1);
152 incr >>= 1; 154 incr >>= 1;
153 #ifdef DEBUG 155 #ifdef DEBUG
154 vs->exponent = exponent; 156 vs->exponent = exponent;
155 vs->significand = significand; 157 vs->significand = significand;
156 vfp_single_dump("pack: overflow", vs); 158 vfp_single_dump("pack: overflow", vs);
157 #endif 159 #endif
158 } 160 }
159 161
160 /* 162 /*
161 * If any of the low bits (which will be shifted out of the 163 * If any of the low bits (which will be shifted out of the
162 * number) are non-zero, the result is inexact. 164 * number) are non-zero, the result is inexact.
163 */ 165 */
164 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) 166 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
165 exceptions |= FPSCR_IXC; 167 exceptions |= FPSCR_IXC;
166 168
167 /* 169 /*
168 * Do our rounding. 170 * Do our rounding.
169 */ 171 */
170 significand += incr; 172 significand += incr;
171 173
172 /* 174 /*
173 * Infinity? 175 * Infinity?
174 */ 176 */
175 if (exponent >= 254) { 177 if (exponent >= 254) {
176 exceptions |= FPSCR_OFC | FPSCR_IXC; 178 exceptions |= FPSCR_OFC | FPSCR_IXC;
177 if (incr == 0) { 179 if (incr == 0) {
178 vs->exponent = 253; 180 vs->exponent = 253;
179 vs->significand = 0x7fffffff; 181 vs->significand = 0x7fffffff;
180 } else { 182 } else {
181 vs->exponent = 255; /* infinity */ 183 vs->exponent = 255; /* infinity */
182 vs->significand = 0; 184 vs->significand = 0;
183 } 185 }
184 } else { 186 } else {
185 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) 187 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
186 exponent = 0; 188 exponent = 0;
187 if (exponent || significand > 0x80000000) 189 if (exponent || significand > 0x80000000)
188 underflow = 0; 190 underflow = 0;
189 if (underflow) 191 if (underflow)
190 exceptions |= FPSCR_UFC; 192 exceptions |= FPSCR_UFC;
191 vs->exponent = exponent; 193 vs->exponent = exponent;
192 vs->significand = significand >> 1; 194 vs->significand = significand >> 1;
193 } 195 }
194 196
195 pack: 197 pack:
196 vfp_single_dump("pack: final", vs); 198 vfp_single_dump("pack: final", vs);
197 { 199 {
198 s32 d = vfp_single_pack(vs); 200 s32 d = vfp_single_pack(vs);
199 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, 201 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
200 sd, d, exceptions); 202 sd, d, exceptions);
201 vfp_put_float(sd, d); 203 vfp_put_float(sd, d);
202 } 204 }
203 205
204 return exceptions & ~VFP_NAN_FLAG; 206 return exceptions & ~VFP_NAN_FLAG;
205 } 207 }
206 208
207 /* 209 /*
208 * Propagate the NaN, setting exceptions if it is signalling. 210 * Propagate the NaN, setting exceptions if it is signalling.
209 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 211 * 'n' is always a NaN. 'm' may be a number, NaN or infinity.
210 */ 212 */
211 static u32 213 static u32
212 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, 214 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
213 struct vfp_single *vsm, u32 fpscr) 215 struct vfp_single *vsm, u32 fpscr)
214 { 216 {
215 struct vfp_single *nan; 217 struct vfp_single *nan;
216 int tn, tm = 0; 218 int tn, tm = 0;
217 219
218 tn = vfp_single_type(vsn); 220 tn = vfp_single_type(vsn);
219 221
220 if (vsm) 222 if (vsm)
221 tm = vfp_single_type(vsm); 223 tm = vfp_single_type(vsm);
222 224
223 if (fpscr & FPSCR_DEFAULT_NAN) 225 if (fpscr & FPSCR_DEFAULT_NAN)
224 /* 226 /*
225 * Default NaN mode - always returns a quiet NaN 227 * Default NaN mode - always returns a quiet NaN
226 */ 228 */
227 nan = &vfp_single_default_qnan; 229 nan = &vfp_single_default_qnan;
228 else { 230 else {
229 /* 231 /*
230 * Contemporary mode - select the first signalling 232 * Contemporary mode - select the first signalling
231 * NAN, or if neither are signalling, the first 233 * NAN, or if neither are signalling, the first
232 * quiet NAN. 234 * quiet NAN.
233 */ 235 */
234 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 236 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
235 nan = vsn; 237 nan = vsn;
236 else 238 else
237 nan = vsm; 239 nan = vsm;
238 /* 240 /*
239 * Make the NaN quiet. 241 * Make the NaN quiet.
240 */ 242 */
241 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 243 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
242 } 244 }
243 245
244 *vsd = *nan; 246 *vsd = *nan;
245 247
246 /* 248 /*
247 * If one was a signalling NAN, raise invalid operation. 249 * If one was a signalling NAN, raise invalid operation.
248 */ 250 */
249 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 251 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
250 } 252 }
251 253
252 254
253 /* 255 /*
254 * Extended operations 256 * Extended operations
255 */ 257 */
256 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) 258 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
257 { 259 {
258 vfp_put_float(sd, vfp_single_packed_abs(m)); 260 vfp_put_float(sd, vfp_single_packed_abs(m));
259 return 0; 261 return 0;
260 } 262 }
261 263
262 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) 264 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
263 { 265 {
264 vfp_put_float(sd, m); 266 vfp_put_float(sd, m);
265 return 0; 267 return 0;
266 } 268 }
267 269
268 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) 270 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
269 { 271 {
270 vfp_put_float(sd, vfp_single_packed_negate(m)); 272 vfp_put_float(sd, vfp_single_packed_negate(m));
271 return 0; 273 return 0;
272 } 274 }
273 275
274 static const u16 sqrt_oddadjust[] = { 276 static const u16 sqrt_oddadjust[] = {
275 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, 277 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
276 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 278 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
277 }; 279 };
278 280
279 static const u16 sqrt_evenadjust[] = { 281 static const u16 sqrt_evenadjust[] = {
280 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, 282 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
281 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 283 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
282 }; 284 };
283 285
284 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) 286 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
285 { 287 {
286 int index; 288 int index;
287 u32 z, a; 289 u32 z, a;
288 290
289 if ((significand & 0xc0000000) != 0x40000000) { 291 if ((significand & 0xc0000000) != 0x40000000) {
290 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); 292 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
291 } 293 }
292 294
293 a = significand << 1; 295 a = significand << 1;
294 index = (a >> 27) & 15; 296 index = (a >> 27) & 15;
295 if (exponent & 1) { 297 if (exponent & 1) {
296 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; 298 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
297 z = ((a / z) << 14) + (z << 15); 299 z = ((a / z) << 14) + (z << 15);
298 a >>= 1; 300 a >>= 1;
299 } else { 301 } else {
300 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; 302 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
301 z = a / z + z; 303 z = a / z + z;
302 z = (z >= 0x20000) ? 0xffff8000 : (z << 15); 304 z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
303 if (z <= a) 305 if (z <= a)
304 return (s32)a >> 1; 306 return (s32)a >> 1;
305 } 307 }
306 return (u32)(((u64)a << 31) / z) + (z >> 1); 308 {
309 u64 v = (u64)a << 31;
310 do_div(v, z);
311 return v + (z >> 1);
312 }
307 } 313 }
308 314
309 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) 315 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
310 { 316 {
311 struct vfp_single vsm, vsd; 317 struct vfp_single vsm, vsd;
312 int ret, tm; 318 int ret, tm;
313 319
314 vfp_single_unpack(&vsm, m); 320 vfp_single_unpack(&vsm, m);
315 tm = vfp_single_type(&vsm); 321 tm = vfp_single_type(&vsm);
316 if (tm & (VFP_NAN|VFP_INFINITY)) { 322 if (tm & (VFP_NAN|VFP_INFINITY)) {
317 struct vfp_single *vsp = &vsd; 323 struct vfp_single *vsp = &vsd;
318 324
319 if (tm & VFP_NAN) 325 if (tm & VFP_NAN)
320 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); 326 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
321 else if (vsm.sign == 0) { 327 else if (vsm.sign == 0) {
322 sqrt_copy: 328 sqrt_copy:
323 vsp = &vsm; 329 vsp = &vsm;
324 ret = 0; 330 ret = 0;
325 } else { 331 } else {
326 sqrt_invalid: 332 sqrt_invalid:
327 vsp = &vfp_single_default_qnan; 333 vsp = &vfp_single_default_qnan;
328 ret = FPSCR_IOC; 334 ret = FPSCR_IOC;
329 } 335 }
330 vfp_put_float(sd, vfp_single_pack(vsp)); 336 vfp_put_float(sd, vfp_single_pack(vsp));
331 return ret; 337 return ret;
332 } 338 }
333 339
334 /* 340 /*
335 * sqrt(+/- 0) == +/- 0 341 * sqrt(+/- 0) == +/- 0
336 */ 342 */
337 if (tm & VFP_ZERO) 343 if (tm & VFP_ZERO)
338 goto sqrt_copy; 344 goto sqrt_copy;
339 345
340 /* 346 /*
341 * Normalise a denormalised number 347 * Normalise a denormalised number
342 */ 348 */
343 if (tm & VFP_DENORMAL) 349 if (tm & VFP_DENORMAL)
344 vfp_single_normalise_denormal(&vsm); 350 vfp_single_normalise_denormal(&vsm);
345 351
346 /* 352 /*
347 * sqrt(<0) = invalid 353 * sqrt(<0) = invalid
348 */ 354 */
349 if (vsm.sign) 355 if (vsm.sign)
350 goto sqrt_invalid; 356 goto sqrt_invalid;
351 357
352 vfp_single_dump("sqrt", &vsm); 358 vfp_single_dump("sqrt", &vsm);
353 359
354 /* 360 /*
355 * Estimate the square root. 361 * Estimate the square root.
356 */ 362 */
357 vsd.sign = 0; 363 vsd.sign = 0;
358 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; 364 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
359 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; 365 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
360 366
361 vfp_single_dump("sqrt estimate", &vsd); 367 vfp_single_dump("sqrt estimate", &vsd);
362 368
363 /* 369 /*
364 * And now adjust. 370 * And now adjust.
365 */ 371 */
366 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { 372 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
367 if (vsd.significand < 2) { 373 if (vsd.significand < 2) {
368 vsd.significand = 0xffffffff; 374 vsd.significand = 0xffffffff;
369 } else { 375 } else {
370 u64 term; 376 u64 term;
371 s64 rem; 377 s64 rem;
372 vsm.significand <<= !(vsm.exponent & 1); 378 vsm.significand <<= !(vsm.exponent & 1);
373 term = (u64)vsd.significand * vsd.significand; 379 term = (u64)vsd.significand * vsd.significand;
374 rem = ((u64)vsm.significand << 32) - term; 380 rem = ((u64)vsm.significand << 32) - term;
375 381
376 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); 382 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
377 383
378 while (rem < 0) { 384 while (rem < 0) {
379 vsd.significand -= 1; 385 vsd.significand -= 1;
380 rem += ((u64)vsd.significand << 1) | 1; 386 rem += ((u64)vsd.significand << 1) | 1;
381 } 387 }
382 vsd.significand |= rem != 0; 388 vsd.significand |= rem != 0;
383 } 389 }
384 } 390 }
385 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); 391 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
386 392
387 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); 393 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
388 } 394 }
389 395
390 /* 396 /*
391 * Equal := ZC 397 * Equal := ZC
392 * Less than := N 398 * Less than := N
393 * Greater than := C 399 * Greater than := C
394 * Unordered := CV 400 * Unordered := CV
395 */ 401 */
396 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) 402 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
397 { 403 {
398 s32 d; 404 s32 d;
399 u32 ret = 0; 405 u32 ret = 0;
400 406
401 d = vfp_get_float(sd); 407 d = vfp_get_float(sd);
402 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { 408 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
403 ret |= FPSCR_C | FPSCR_V; 409 ret |= FPSCR_C | FPSCR_V;
404 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 410 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
405 /* 411 /*
406 * Signalling NaN, or signalling on quiet NaN 412 * Signalling NaN, or signalling on quiet NaN
407 */ 413 */
408 ret |= FPSCR_IOC; 414 ret |= FPSCR_IOC;
409 } 415 }
410 416
411 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { 417 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
412 ret |= FPSCR_C | FPSCR_V; 418 ret |= FPSCR_C | FPSCR_V;
413 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 419 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
414 /* 420 /*
415 * Signalling NaN, or signalling on quiet NaN 421 * Signalling NaN, or signalling on quiet NaN
416 */ 422 */
417 ret |= FPSCR_IOC; 423 ret |= FPSCR_IOC;
418 } 424 }
419 425
420 if (ret == 0) { 426 if (ret == 0) {
421 if (d == m || vfp_single_packed_abs(d | m) == 0) { 427 if (d == m || vfp_single_packed_abs(d | m) == 0) {
422 /* 428 /*
423 * equal 429 * equal
424 */ 430 */
425 ret |= FPSCR_Z | FPSCR_C; 431 ret |= FPSCR_Z | FPSCR_C;
426 } else if (vfp_single_packed_sign(d ^ m)) { 432 } else if (vfp_single_packed_sign(d ^ m)) {
427 /* 433 /*
428 * different signs 434 * different signs
429 */ 435 */
430 if (vfp_single_packed_sign(d)) 436 if (vfp_single_packed_sign(d))
431 /* 437 /*
432 * d is negative, so d < m 438 * d is negative, so d < m
433 */ 439 */
434 ret |= FPSCR_N; 440 ret |= FPSCR_N;
435 else 441 else
436 /* 442 /*
437 * d is positive, so d > m 443 * d is positive, so d > m
438 */ 444 */
439 ret |= FPSCR_C; 445 ret |= FPSCR_C;
440 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { 446 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
441 /* 447 /*
442 * d < m 448 * d < m
443 */ 449 */
444 ret |= FPSCR_N; 450 ret |= FPSCR_N;
445 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { 451 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
446 /* 452 /*
447 * d > m 453 * d > m
448 */ 454 */
449 ret |= FPSCR_C; 455 ret |= FPSCR_C;
450 } 456 }
451 } 457 }
452 return ret; 458 return ret;
453 } 459 }
454 460
455 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) 461 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
456 { 462 {
457 return vfp_compare(sd, 0, m, fpscr); 463 return vfp_compare(sd, 0, m, fpscr);
458 } 464 }
459 465
460 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) 466 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
461 { 467 {
462 return vfp_compare(sd, 1, m, fpscr); 468 return vfp_compare(sd, 1, m, fpscr);
463 } 469 }
464 470
465 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) 471 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
466 { 472 {
467 return vfp_compare(sd, 0, 0, fpscr); 473 return vfp_compare(sd, 0, 0, fpscr);
468 } 474 }
469 475
470 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) 476 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
471 { 477 {
472 return vfp_compare(sd, 1, 0, fpscr); 478 return vfp_compare(sd, 1, 0, fpscr);
473 } 479 }
474 480
475 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) 481 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
476 { 482 {
477 struct vfp_single vsm; 483 struct vfp_single vsm;
478 struct vfp_double vdd; 484 struct vfp_double vdd;
479 int tm; 485 int tm;
480 u32 exceptions = 0; 486 u32 exceptions = 0;
481 487
482 vfp_single_unpack(&vsm, m); 488 vfp_single_unpack(&vsm, m);
483 489
484 tm = vfp_single_type(&vsm); 490 tm = vfp_single_type(&vsm);
485 491
486 /* 492 /*
487 * If we have a signalling NaN, signal invalid operation. 493 * If we have a signalling NaN, signal invalid operation.
488 */ 494 */
489 if (tm == VFP_SNAN) 495 if (tm == VFP_SNAN)
490 exceptions = FPSCR_IOC; 496 exceptions = FPSCR_IOC;
491 497
492 if (tm & VFP_DENORMAL) 498 if (tm & VFP_DENORMAL)
493 vfp_single_normalise_denormal(&vsm); 499 vfp_single_normalise_denormal(&vsm);
494 500
495 vdd.sign = vsm.sign; 501 vdd.sign = vsm.sign;
496 vdd.significand = (u64)vsm.significand << 32; 502 vdd.significand = (u64)vsm.significand << 32;
497 503
498 /* 504 /*
499 * If we have an infinity or NaN, the exponent must be 2047. 505 * If we have an infinity or NaN, the exponent must be 2047.
500 */ 506 */
501 if (tm & (VFP_INFINITY|VFP_NAN)) { 507 if (tm & (VFP_INFINITY|VFP_NAN)) {
502 vdd.exponent = 2047; 508 vdd.exponent = 2047;
503 if (tm & VFP_NAN) 509 if (tm & VFP_NAN)
504 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 510 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
505 goto pack_nan; 511 goto pack_nan;
506 } else if (tm & VFP_ZERO) 512 } else if (tm & VFP_ZERO)
507 vdd.exponent = 0; 513 vdd.exponent = 0;
508 else 514 else
509 vdd.exponent = vsm.exponent + (1023 - 127); 515 vdd.exponent = vsm.exponent + (1023 - 127);
510 516
511 /* 517 /*
512 * Technically, if bit 0 of dd is set, this is an invalid 518 * Technically, if bit 0 of dd is set, this is an invalid
513 * instruction. However, we ignore this for efficiency. 519 * instruction. However, we ignore this for efficiency.
514 */ 520 */
515 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); 521 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
516 522
517 pack_nan: 523 pack_nan:
518 vfp_put_double(dd, vfp_double_pack(&vdd)); 524 vfp_put_double(dd, vfp_double_pack(&vdd));
519 return exceptions; 525 return exceptions;
520 } 526 }
521 527
522 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) 528 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
523 { 529 {
524 struct vfp_single vs; 530 struct vfp_single vs;
525 531
526 vs.sign = 0; 532 vs.sign = 0;
527 vs.exponent = 127 + 31 - 1; 533 vs.exponent = 127 + 31 - 1;
528 vs.significand = (u32)m; 534 vs.significand = (u32)m;
529 535
530 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); 536 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
531 } 537 }
532 538
533 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) 539 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
534 { 540 {
535 struct vfp_single vs; 541 struct vfp_single vs;
536 542
537 vs.sign = (m & 0x80000000) >> 16; 543 vs.sign = (m & 0x80000000) >> 16;
538 vs.exponent = 127 + 31 - 1; 544 vs.exponent = 127 + 31 - 1;
539 vs.significand = vs.sign ? -m : m; 545 vs.significand = vs.sign ? -m : m;
540 546
541 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); 547 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
542 } 548 }
543 549
544 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) 550 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
545 { 551 {
546 struct vfp_single vsm; 552 struct vfp_single vsm;
547 u32 d, exceptions = 0; 553 u32 d, exceptions = 0;
548 int rmode = fpscr & FPSCR_RMODE_MASK; 554 int rmode = fpscr & FPSCR_RMODE_MASK;
549 int tm; 555 int tm;
550 556
551 vfp_single_unpack(&vsm, m); 557 vfp_single_unpack(&vsm, m);
552 vfp_single_dump("VSM", &vsm); 558 vfp_single_dump("VSM", &vsm);
553 559
554 /* 560 /*
555 * Do we have a denormalised number? 561 * Do we have a denormalised number?
556 */ 562 */
557 tm = vfp_single_type(&vsm); 563 tm = vfp_single_type(&vsm);
558 if (tm & VFP_DENORMAL) 564 if (tm & VFP_DENORMAL)
559 exceptions |= FPSCR_IDC; 565 exceptions |= FPSCR_IDC;
560 566
561 if (tm & VFP_NAN) 567 if (tm & VFP_NAN)
562 vsm.sign = 0; 568 vsm.sign = 0;
563 569
564 if (vsm.exponent >= 127 + 32) { 570 if (vsm.exponent >= 127 + 32) {
565 d = vsm.sign ? 0 : 0xffffffff; 571 d = vsm.sign ? 0 : 0xffffffff;
566 exceptions = FPSCR_IOC; 572 exceptions = FPSCR_IOC;
567 } else if (vsm.exponent >= 127 - 1) { 573 } else if (vsm.exponent >= 127 - 1) {
568 int shift = 127 + 31 - vsm.exponent; 574 int shift = 127 + 31 - vsm.exponent;
569 u32 rem, incr = 0; 575 u32 rem, incr = 0;
570 576
571 /* 577 /*
572 * 2^0 <= m < 2^32-2^8 578 * 2^0 <= m < 2^32-2^8
573 */ 579 */
574 d = (vsm.significand << 1) >> shift; 580 d = (vsm.significand << 1) >> shift;
575 rem = vsm.significand << (33 - shift); 581 rem = vsm.significand << (33 - shift);
576 582
577 if (rmode == FPSCR_ROUND_NEAREST) { 583 if (rmode == FPSCR_ROUND_NEAREST) {
578 incr = 0x80000000; 584 incr = 0x80000000;
579 if ((d & 1) == 0) 585 if ((d & 1) == 0)
580 incr -= 1; 586 incr -= 1;
581 } else if (rmode == FPSCR_ROUND_TOZERO) { 587 } else if (rmode == FPSCR_ROUND_TOZERO) {
582 incr = 0; 588 incr = 0;
583 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 589 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
584 incr = ~0; 590 incr = ~0;
585 } 591 }
586 592
587 if ((rem + incr) < rem) { 593 if ((rem + incr) < rem) {
588 if (d < 0xffffffff) 594 if (d < 0xffffffff)
589 d += 1; 595 d += 1;
590 else 596 else
591 exceptions |= FPSCR_IOC; 597 exceptions |= FPSCR_IOC;
592 } 598 }
593 599
594 if (d && vsm.sign) { 600 if (d && vsm.sign) {
595 d = 0; 601 d = 0;
596 exceptions |= FPSCR_IOC; 602 exceptions |= FPSCR_IOC;
597 } else if (rem) 603 } else if (rem)
598 exceptions |= FPSCR_IXC; 604 exceptions |= FPSCR_IXC;
599 } else { 605 } else {
600 d = 0; 606 d = 0;
601 if (vsm.exponent | vsm.significand) { 607 if (vsm.exponent | vsm.significand) {
602 exceptions |= FPSCR_IXC; 608 exceptions |= FPSCR_IXC;
603 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 609 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
604 d = 1; 610 d = 1;
605 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { 611 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
606 d = 0; 612 d = 0;
607 exceptions |= FPSCR_IOC; 613 exceptions |= FPSCR_IOC;
608 } 614 }
609 } 615 }
610 } 616 }
611 617
612 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 618 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
613 619
614 vfp_put_float(sd, d); 620 vfp_put_float(sd, d);
615 621
616 return exceptions; 622 return exceptions;
617 } 623 }
618 624
619 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) 625 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
620 { 626 {
621 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); 627 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
622 } 628 }
623 629
624 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) 630 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
625 { 631 {
626 struct vfp_single vsm; 632 struct vfp_single vsm;
627 u32 d, exceptions = 0; 633 u32 d, exceptions = 0;
628 int rmode = fpscr & FPSCR_RMODE_MASK; 634 int rmode = fpscr & FPSCR_RMODE_MASK;
629 635
630 vfp_single_unpack(&vsm, m); 636 vfp_single_unpack(&vsm, m);
631 vfp_single_dump("VSM", &vsm); 637 vfp_single_dump("VSM", &vsm);
632 638
633 /* 639 /*
634 * Do we have a denormalised number? 640 * Do we have a denormalised number?
635 */ 641 */
636 if (vfp_single_type(&vsm) & VFP_DENORMAL) 642 if (vfp_single_type(&vsm) & VFP_DENORMAL)
637 exceptions |= FPSCR_IDC; 643 exceptions |= FPSCR_IDC;
638 644
639 if (vsm.exponent >= 127 + 32) { 645 if (vsm.exponent >= 127 + 32) {
640 /* 646 /*
641 * m >= 2^31-2^7: invalid 647 * m >= 2^31-2^7: invalid
642 */ 648 */
643 d = 0x7fffffff; 649 d = 0x7fffffff;
644 if (vsm.sign) 650 if (vsm.sign)
645 d = ~d; 651 d = ~d;
646 exceptions |= FPSCR_IOC; 652 exceptions |= FPSCR_IOC;
647 } else if (vsm.exponent >= 127 - 1) { 653 } else if (vsm.exponent >= 127 - 1) {
648 int shift = 127 + 31 - vsm.exponent; 654 int shift = 127 + 31 - vsm.exponent;
649 u32 rem, incr = 0; 655 u32 rem, incr = 0;
650 656
651 /* 2^0 <= m <= 2^31-2^7 */ 657 /* 2^0 <= m <= 2^31-2^7 */
652 d = (vsm.significand << 1) >> shift; 658 d = (vsm.significand << 1) >> shift;
653 rem = vsm.significand << (33 - shift); 659 rem = vsm.significand << (33 - shift);
654 660
655 if (rmode == FPSCR_ROUND_NEAREST) { 661 if (rmode == FPSCR_ROUND_NEAREST) {
656 incr = 0x80000000; 662 incr = 0x80000000;
657 if ((d & 1) == 0) 663 if ((d & 1) == 0)
658 incr -= 1; 664 incr -= 1;
659 } else if (rmode == FPSCR_ROUND_TOZERO) { 665 } else if (rmode == FPSCR_ROUND_TOZERO) {
660 incr = 0; 666 incr = 0;
661 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 667 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
662 incr = ~0; 668 incr = ~0;
663 } 669 }
664 670
665 if ((rem + incr) < rem && d < 0xffffffff) 671 if ((rem + incr) < rem && d < 0xffffffff)
666 d += 1; 672 d += 1;
667 if (d > 0x7fffffff + (vsm.sign != 0)) { 673 if (d > 0x7fffffff + (vsm.sign != 0)) {
668 d = 0x7fffffff + (vsm.sign != 0); 674 d = 0x7fffffff + (vsm.sign != 0);
669 exceptions |= FPSCR_IOC; 675 exceptions |= FPSCR_IOC;
670 } else if (rem) 676 } else if (rem)
671 exceptions |= FPSCR_IXC; 677 exceptions |= FPSCR_IXC;
672 678
673 if (vsm.sign) 679 if (vsm.sign)
674 d = -d; 680 d = -d;
675 } else { 681 } else {
676 d = 0; 682 d = 0;
677 if (vsm.exponent | vsm.significand) { 683 if (vsm.exponent | vsm.significand) {
678 exceptions |= FPSCR_IXC; 684 exceptions |= FPSCR_IXC;
679 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 685 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
680 d = 1; 686 d = 1;
681 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) 687 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
682 d = -1; 688 d = -1;
683 } 689 }
684 } 690 }
685 691
686 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 692 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
687 693
688 vfp_put_float(sd, (s32)d); 694 vfp_put_float(sd, (s32)d);
689 695
690 return exceptions; 696 return exceptions;
691 } 697 }
692 698
693 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) 699 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
694 { 700 {
695 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); 701 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
696 } 702 }
697 703
698 static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { 704 static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = {
699 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, 705 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy,
700 [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, 706 [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs,
701 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, 707 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg,
702 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, 708 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt,
703 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, 709 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp,
704 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, 710 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe,
705 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, 711 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz,
706 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, 712 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez,
707 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, 713 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd,
708 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, 714 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito,
709 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, 715 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito,
710 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, 716 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui,
711 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, 717 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz,
712 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, 718 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi,
713 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, 719 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz,
714 }; 720 };
715 721
716 722
717 723
718 724
719 725
720 static u32 726 static u32
721 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, 727 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
722 struct vfp_single *vsm, u32 fpscr) 728 struct vfp_single *vsm, u32 fpscr)
723 { 729 {
724 struct vfp_single *vsp; 730 struct vfp_single *vsp;
725 u32 exceptions = 0; 731 u32 exceptions = 0;
726 int tn, tm; 732 int tn, tm;
727 733
728 tn = vfp_single_type(vsn); 734 tn = vfp_single_type(vsn);
729 tm = vfp_single_type(vsm); 735 tm = vfp_single_type(vsm);
730 736
731 if (tn & tm & VFP_INFINITY) { 737 if (tn & tm & VFP_INFINITY) {
732 /* 738 /*
733 * Two infinities. Are they different signs? 739 * Two infinities. Are they different signs?
734 */ 740 */
735 if (vsn->sign ^ vsm->sign) { 741 if (vsn->sign ^ vsm->sign) {
736 /* 742 /*
737 * different signs -> invalid 743 * different signs -> invalid
738 */ 744 */
739 exceptions = FPSCR_IOC; 745 exceptions = FPSCR_IOC;
740 vsp = &vfp_single_default_qnan; 746 vsp = &vfp_single_default_qnan;
741 } else { 747 } else {
742 /* 748 /*
743 * same signs -> valid 749 * same signs -> valid
744 */ 750 */
745 vsp = vsn; 751 vsp = vsn;
746 } 752 }
747 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 753 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
748 /* 754 /*
749 * One infinity and one number -> infinity 755 * One infinity and one number -> infinity
750 */ 756 */
751 vsp = vsn; 757 vsp = vsn;
752 } else { 758 } else {
753 /* 759 /*
754 * 'n' is a NaN of some type 760 * 'n' is a NaN of some type
755 */ 761 */
756 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 762 return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
757 } 763 }
758 *vsd = *vsp; 764 *vsd = *vsp;
759 return exceptions; 765 return exceptions;
760 } 766 }
761 767
762 static u32 768 static u32
763 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, 769 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
764 struct vfp_single *vsm, u32 fpscr) 770 struct vfp_single *vsm, u32 fpscr)
765 { 771 {
766 u32 exp_diff, m_sig; 772 u32 exp_diff, m_sig;
767 773
768 if (vsn->significand & 0x80000000 || 774 if (vsn->significand & 0x80000000 ||
769 vsm->significand & 0x80000000) { 775 vsm->significand & 0x80000000) {
770 pr_info("VFP: bad FP values in %s\n", __func__); 776 pr_info("VFP: bad FP values in %s\n", __func__);
771 vfp_single_dump("VSN", vsn); 777 vfp_single_dump("VSN", vsn);
772 vfp_single_dump("VSM", vsm); 778 vfp_single_dump("VSM", vsm);
773 } 779 }
774 780
775 /* 781 /*
776 * Ensure that 'n' is the largest magnitude number. Note that 782 * Ensure that 'n' is the largest magnitude number. Note that
777 * if 'n' and 'm' have equal exponents, we do not swap them. 783 * if 'n' and 'm' have equal exponents, we do not swap them.
778 * This ensures that NaN propagation works correctly. 784 * This ensures that NaN propagation works correctly.
779 */ 785 */
780 if (vsn->exponent < vsm->exponent) { 786 if (vsn->exponent < vsm->exponent) {
781 struct vfp_single *t = vsn; 787 struct vfp_single *t = vsn;
782 vsn = vsm; 788 vsn = vsm;
783 vsm = t; 789 vsm = t;
784 } 790 }
785 791
786 /* 792 /*
787 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 793 * Is 'n' an infinity or a NaN? Note that 'm' may be a number,
788 * infinity or a NaN here. 794 * infinity or a NaN here.
789 */ 795 */
790 if (vsn->exponent == 255) 796 if (vsn->exponent == 255)
791 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); 797 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
792 798
793 /* 799 /*
794 * We have two proper numbers, where 'vsn' is the larger magnitude. 800 * We have two proper numbers, where 'vsn' is the larger magnitude.
795 * 801 *
796 * Copy 'n' to 'd' before doing the arithmetic. 802 * Copy 'n' to 'd' before doing the arithmetic.
797 */ 803 */
798 *vsd = *vsn; 804 *vsd = *vsn;
799 805
800 /* 806 /*
801 * Align both numbers. 807 * Align both numbers.
802 */ 808 */
803 exp_diff = vsn->exponent - vsm->exponent; 809 exp_diff = vsn->exponent - vsm->exponent;
804 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); 810 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
805 811
806 /* 812 /*
807 * If the signs are different, we are really subtracting. 813 * If the signs are different, we are really subtracting.
808 */ 814 */
809 if (vsn->sign ^ vsm->sign) { 815 if (vsn->sign ^ vsm->sign) {
810 m_sig = vsn->significand - m_sig; 816 m_sig = vsn->significand - m_sig;
811 if ((s32)m_sig < 0) { 817 if ((s32)m_sig < 0) {
812 vsd->sign = vfp_sign_negate(vsd->sign); 818 vsd->sign = vfp_sign_negate(vsd->sign);
813 m_sig = -m_sig; 819 m_sig = -m_sig;
814 } else if (m_sig == 0) { 820 } else if (m_sig == 0) {
815 vsd->sign = (fpscr & FPSCR_RMODE_MASK) == 821 vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
816 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 822 FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
817 } 823 }
818 } else { 824 } else {
819 m_sig = vsn->significand + m_sig; 825 m_sig = vsn->significand + m_sig;
820 } 826 }
821 vsd->significand = m_sig; 827 vsd->significand = m_sig;
822 828
823 return 0; 829 return 0;
824 } 830 }
825 831
826 static u32 832 static u32
827 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) 833 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
828 { 834 {
829 vfp_single_dump("VSN", vsn); 835 vfp_single_dump("VSN", vsn);
830 vfp_single_dump("VSM", vsm); 836 vfp_single_dump("VSM", vsm);
831 837
832 /* 838 /*
833 * Ensure that 'n' is the largest magnitude number. Note that 839 * Ensure that 'n' is the largest magnitude number. Note that
834 * if 'n' and 'm' have equal exponents, we do not swap them. 840 * if 'n' and 'm' have equal exponents, we do not swap them.
835 * This ensures that NaN propagation works correctly. 841 * This ensures that NaN propagation works correctly.
836 */ 842 */
837 if (vsn->exponent < vsm->exponent) { 843 if (vsn->exponent < vsm->exponent) {
838 struct vfp_single *t = vsn; 844 struct vfp_single *t = vsn;
839 vsn = vsm; 845 vsn = vsm;
840 vsm = t; 846 vsm = t;
841 pr_debug("VFP: swapping M <-> N\n"); 847 pr_debug("VFP: swapping M <-> N\n");
842 } 848 }
843 849
844 vsd->sign = vsn->sign ^ vsm->sign; 850 vsd->sign = vsn->sign ^ vsm->sign;
845 851
846 /* 852 /*
847 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 853 * If 'n' is an infinity or NaN, handle it. 'm' may be anything.
848 */ 854 */
849 if (vsn->exponent == 255) { 855 if (vsn->exponent == 255) {
850 if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) 856 if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
851 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 857 return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
852 if ((vsm->exponent | vsm->significand) == 0) { 858 if ((vsm->exponent | vsm->significand) == 0) {
853 *vsd = vfp_single_default_qnan; 859 *vsd = vfp_single_default_qnan;
854 return FPSCR_IOC; 860 return FPSCR_IOC;
855 } 861 }
856 vsd->exponent = vsn->exponent; 862 vsd->exponent = vsn->exponent;
857 vsd->significand = 0; 863 vsd->significand = 0;
858 return 0; 864 return 0;
859 } 865 }
860 866
861 /* 867 /*
862 * If 'm' is zero, the result is always zero. In this case, 868 * If 'm' is zero, the result is always zero. In this case,
863 * 'n' may be zero or a number, but it doesn't matter which. 869 * 'n' may be zero or a number, but it doesn't matter which.
864 */ 870 */
865 if ((vsm->exponent | vsm->significand) == 0) { 871 if ((vsm->exponent | vsm->significand) == 0) {
866 vsd->exponent = 0; 872 vsd->exponent = 0;
867 vsd->significand = 0; 873 vsd->significand = 0;
868 return 0; 874 return 0;
869 } 875 }
870 876
871 /* 877 /*
872 * We add 2 to the destination exponent for the same reason as 878 * We add 2 to the destination exponent for the same reason as
873 * the addition case - though this time we have +1 from each 879 * the addition case - though this time we have +1 from each
874 * input operand. 880 * input operand.
875 */ 881 */
876 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; 882 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
877 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); 883 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
878 884
879 vfp_single_dump("VSD", vsd); 885 vfp_single_dump("VSD", vsd);
880 return 0; 886 return 0;
881 } 887 }
882 888
883 #define NEG_MULTIPLY (1 << 0) 889 #define NEG_MULTIPLY (1 << 0)
884 #define NEG_SUBTRACT (1 << 1) 890 #define NEG_SUBTRACT (1 << 1)
885 891
886 static u32 892 static u32
887 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) 893 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
888 { 894 {
889 struct vfp_single vsd, vsp, vsn, vsm; 895 struct vfp_single vsd, vsp, vsn, vsm;
890 u32 exceptions; 896 u32 exceptions;
891 s32 v; 897 s32 v;
892 898
893 v = vfp_get_float(sn); 899 v = vfp_get_float(sn);
894 pr_debug("VFP: s%u = %08x\n", sn, v); 900 pr_debug("VFP: s%u = %08x\n", sn, v);
895 vfp_single_unpack(&vsn, v); 901 vfp_single_unpack(&vsn, v);
896 if (vsn.exponent == 0 && vsn.significand) 902 if (vsn.exponent == 0 && vsn.significand)
897 vfp_single_normalise_denormal(&vsn); 903 vfp_single_normalise_denormal(&vsn);
898 904
899 vfp_single_unpack(&vsm, m); 905 vfp_single_unpack(&vsm, m);
900 if (vsm.exponent == 0 && vsm.significand) 906 if (vsm.exponent == 0 && vsm.significand)
901 vfp_single_normalise_denormal(&vsm); 907 vfp_single_normalise_denormal(&vsm);
902 908
903 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); 909 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
904 if (negate & NEG_MULTIPLY) 910 if (negate & NEG_MULTIPLY)
905 vsp.sign = vfp_sign_negate(vsp.sign); 911 vsp.sign = vfp_sign_negate(vsp.sign);
906 912
907 v = vfp_get_float(sd); 913 v = vfp_get_float(sd);
908 pr_debug("VFP: s%u = %08x\n", sd, v); 914 pr_debug("VFP: s%u = %08x\n", sd, v);
909 vfp_single_unpack(&vsn, v); 915 vfp_single_unpack(&vsn, v);
910 if (negate & NEG_SUBTRACT) 916 if (negate & NEG_SUBTRACT)
911 vsn.sign = vfp_sign_negate(vsn.sign); 917 vsn.sign = vfp_sign_negate(vsn.sign);
912 918
913 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); 919 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
914 920
915 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); 921 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
916 } 922 }
917 923
918 /* 924 /*
919 * Standard operations 925 * Standard operations
920 */ 926 */
921 927
922 /* 928 /*
923 * sd = sd + (sn * sm) 929 * sd = sd + (sn * sm)
924 */ 930 */
925 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) 931 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
926 { 932 {
927 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); 933 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
928 } 934 }
929 935
930 /* 936 /*
931 * sd = sd - (sn * sm) 937 * sd = sd - (sn * sm)
932 */ 938 */
933 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) 939 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
934 { 940 {
935 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); 941 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
936 } 942 }
937 943
938 /* 944 /*
939 * sd = -sd + (sn * sm) 945 * sd = -sd + (sn * sm)
940 */ 946 */
941 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) 947 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
942 { 948 {
943 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); 949 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
944 } 950 }
945 951
946 /* 952 /*
947 * sd = -sd - (sn * sm) 953 * sd = -sd - (sn * sm)
948 */ 954 */
949 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) 955 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
950 { 956 {
951 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 957 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
952 } 958 }
953 959
954 /* 960 /*
955 * sd = sn * sm 961 * sd = sn * sm
956 */ 962 */
957 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) 963 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
958 { 964 {
959 struct vfp_single vsd, vsn, vsm; 965 struct vfp_single vsd, vsn, vsm;
960 u32 exceptions; 966 u32 exceptions;
961 s32 n = vfp_get_float(sn); 967 s32 n = vfp_get_float(sn);
962 968
963 pr_debug("VFP: s%u = %08x\n", sn, n); 969 pr_debug("VFP: s%u = %08x\n", sn, n);
964 970
965 vfp_single_unpack(&vsn, n); 971 vfp_single_unpack(&vsn, n);
966 if (vsn.exponent == 0 && vsn.significand) 972 if (vsn.exponent == 0 && vsn.significand)
967 vfp_single_normalise_denormal(&vsn); 973 vfp_single_normalise_denormal(&vsn);
968 974
969 vfp_single_unpack(&vsm, m); 975 vfp_single_unpack(&vsm, m);
970 if (vsm.exponent == 0 && vsm.significand) 976 if (vsm.exponent == 0 && vsm.significand)
971 vfp_single_normalise_denormal(&vsm); 977 vfp_single_normalise_denormal(&vsm);
972 978
973 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 979 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
974 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); 980 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
975 } 981 }
976 982
977 /* 983 /*
978 * sd = -(sn * sm) 984 * sd = -(sn * sm)
979 */ 985 */
980 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) 986 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
981 { 987 {
982 struct vfp_single vsd, vsn, vsm; 988 struct vfp_single vsd, vsn, vsm;
983 u32 exceptions; 989 u32 exceptions;
984 s32 n = vfp_get_float(sn); 990 s32 n = vfp_get_float(sn);
985 991
986 pr_debug("VFP: s%u = %08x\n", sn, n); 992 pr_debug("VFP: s%u = %08x\n", sn, n);
987 993
988 vfp_single_unpack(&vsn, n); 994 vfp_single_unpack(&vsn, n);
989 if (vsn.exponent == 0 && vsn.significand) 995 if (vsn.exponent == 0 && vsn.significand)
990 vfp_single_normalise_denormal(&vsn); 996 vfp_single_normalise_denormal(&vsn);
991 997
992 vfp_single_unpack(&vsm, m); 998 vfp_single_unpack(&vsm, m);
993 if (vsm.exponent == 0 && vsm.significand) 999 if (vsm.exponent == 0 && vsm.significand)
994 vfp_single_normalise_denormal(&vsm); 1000 vfp_single_normalise_denormal(&vsm);
995 1001
996 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 1002 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
997 vsd.sign = vfp_sign_negate(vsd.sign); 1003 vsd.sign = vfp_sign_negate(vsd.sign);
998 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); 1004 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
999 } 1005 }
1000 1006
1001 /* 1007 /*
1002 * sd = sn + sm 1008 * sd = sn + sm
1003 */ 1009 */
1004 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) 1010 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1005 { 1011 {
1006 struct vfp_single vsd, vsn, vsm; 1012 struct vfp_single vsd, vsn, vsm;
1007 u32 exceptions; 1013 u32 exceptions;
1008 s32 n = vfp_get_float(sn); 1014 s32 n = vfp_get_float(sn);
1009 1015
1010 pr_debug("VFP: s%u = %08x\n", sn, n); 1016 pr_debug("VFP: s%u = %08x\n", sn, n);
1011 1017
1012 /* 1018 /*
1013 * Unpack and normalise denormals. 1019 * Unpack and normalise denormals.
1014 */ 1020 */
1015 vfp_single_unpack(&vsn, n); 1021 vfp_single_unpack(&vsn, n);
1016 if (vsn.exponent == 0 && vsn.significand) 1022 if (vsn.exponent == 0 && vsn.significand)
1017 vfp_single_normalise_denormal(&vsn); 1023 vfp_single_normalise_denormal(&vsn);
1018 1024
1019 vfp_single_unpack(&vsm, m); 1025 vfp_single_unpack(&vsm, m);
1020 if (vsm.exponent == 0 && vsm.significand) 1026 if (vsm.exponent == 0 && vsm.significand)
1021 vfp_single_normalise_denormal(&vsm); 1027 vfp_single_normalise_denormal(&vsm);
1022 1028
1023 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); 1029 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1024 1030
1025 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); 1031 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1026 } 1032 }
1027 1033
1028 /* 1034 /*
1029 * sd = sn - sm 1035 * sd = sn - sm
1030 */ 1036 */
1031 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) 1037 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1032 { 1038 {
1033 /* 1039 /*
1034 * Subtraction is addition with one sign inverted. 1040 * Subtraction is addition with one sign inverted.
1035 */ 1041 */
1036 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); 1042 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1037 } 1043 }
1038 1044
1039 /* 1045 /*
1040 * sd = sn / sm 1046 * sd = sn / sm
1041 */ 1047 */
1042 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) 1048 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1043 { 1049 {
1044 struct vfp_single vsd, vsn, vsm; 1050 struct vfp_single vsd, vsn, vsm;
1045 u32 exceptions = 0; 1051 u32 exceptions = 0;
1046 s32 n = vfp_get_float(sn); 1052 s32 n = vfp_get_float(sn);
1047 int tm, tn; 1053 int tm, tn;
1048 1054
1049 pr_debug("VFP: s%u = %08x\n", sn, n); 1055 pr_debug("VFP: s%u = %08x\n", sn, n);
1050 1056
1051 vfp_single_unpack(&vsn, n); 1057 vfp_single_unpack(&vsn, n);
1052 vfp_single_unpack(&vsm, m); 1058 vfp_single_unpack(&vsm, m);
1053 1059
1054 vsd.sign = vsn.sign ^ vsm.sign; 1060 vsd.sign = vsn.sign ^ vsm.sign;
1055 1061
1056 tn = vfp_single_type(&vsn); 1062 tn = vfp_single_type(&vsn);
1057 tm = vfp_single_type(&vsm); 1063 tm = vfp_single_type(&vsm);
1058 1064
1059 /* 1065 /*
1060 * Is n a NAN? 1066 * Is n a NAN?
1061 */ 1067 */
1062 if (tn & VFP_NAN) 1068 if (tn & VFP_NAN)
1063 goto vsn_nan; 1069 goto vsn_nan;
1064 1070
1065 /* 1071 /*
1066 * Is m a NAN? 1072 * Is m a NAN?
1067 */ 1073 */
1068 if (tm & VFP_NAN) 1074 if (tm & VFP_NAN)
1069 goto vsm_nan; 1075 goto vsm_nan;
1070 1076
1071 /* 1077 /*
1072 * If n and m are infinity, the result is invalid 1078 * If n and m are infinity, the result is invalid
1073 * If n and m are zero, the result is invalid 1079 * If n and m are zero, the result is invalid
1074 */ 1080 */
1075 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1081 if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1076 goto invalid; 1082 goto invalid;
1077 1083
1078 /* 1084 /*
1079 * If n is infinity, the result is infinity 1085 * If n is infinity, the result is infinity
1080 */ 1086 */
1081 if (tn & VFP_INFINITY) 1087 if (tn & VFP_INFINITY)
1082 goto infinity; 1088 goto infinity;
1083 1089
1084 /* 1090 /*
1085 * If m is zero, raise div0 exception 1091 * If m is zero, raise div0 exception
1086 */ 1092 */
1087 if (tm & VFP_ZERO) 1093 if (tm & VFP_ZERO)
1088 goto divzero; 1094 goto divzero;
1089 1095
1090 /* 1096 /*
1091 * If m is infinity, or n is zero, the result is zero 1097 * If m is infinity, or n is zero, the result is zero
1092 */ 1098 */
1093 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1099 if (tm & VFP_INFINITY || tn & VFP_ZERO)
1094 goto zero; 1100 goto zero;
1095 1101
1096 if (tn & VFP_DENORMAL) 1102 if (tn & VFP_DENORMAL)
1097 vfp_single_normalise_denormal(&vsn); 1103 vfp_single_normalise_denormal(&vsn);
1098 if (tm & VFP_DENORMAL) 1104 if (tm & VFP_DENORMAL)
1099 vfp_single_normalise_denormal(&vsm); 1105 vfp_single_normalise_denormal(&vsm);
1100 1106
1101 /* 1107 /*
1102 * Ok, we have two numbers, we can perform division. 1108 * Ok, we have two numbers, we can perform division.
1103 */ 1109 */
1104 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; 1110 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1105 vsm.significand <<= 1; 1111 vsm.significand <<= 1;
1106 if (vsm.significand <= (2 * vsn.significand)) { 1112 if (vsm.significand <= (2 * vsn.significand)) {
1107 vsn.significand >>= 1; 1113 vsn.significand >>= 1;
1108 vsd.exponent++; 1114 vsd.exponent++;
1109 } 1115 }
1110 vsd.significand = ((u64)vsn.significand << 32) / vsm.significand; 1116 {
1117 u64 significand = (u64)vsn.significand << 32;
1118 do_div(significand, vsm.significand);
1119 vsd.significand = significand;
1120 }
1111 if ((vsd.significand & 0x3f) == 0) 1121 if ((vsd.significand & 0x3f) == 0)
1112 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); 1122 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1113 1123
1114 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); 1124 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1115 1125
1116 vsn_nan: 1126 vsn_nan:
1117 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); 1127 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1118 pack: 1128 pack:
1119 vfp_put_float(sd, vfp_single_pack(&vsd)); 1129 vfp_put_float(sd, vfp_single_pack(&vsd));
1120 return exceptions; 1130 return exceptions;
1121 1131
1122 vsm_nan: 1132 vsm_nan:
1123 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); 1133 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1124 goto pack; 1134 goto pack;
1125 1135
1126 zero: 1136 zero:
1127 vsd.exponent = 0; 1137 vsd.exponent = 0;
1128 vsd.significand = 0; 1138 vsd.significand = 0;
1129 goto pack; 1139 goto pack;
1130 1140
1131 divzero: 1141 divzero:
1132 exceptions = FPSCR_DZC; 1142 exceptions = FPSCR_DZC;
1133 infinity: 1143 infinity:
1134 vsd.exponent = 255; 1144 vsd.exponent = 255;
1135 vsd.significand = 0; 1145 vsd.significand = 0;
1136 goto pack; 1146 goto pack;
1137 1147
1138 invalid: 1148 invalid:
1139 vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); 1149 vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan));
1140 return FPSCR_IOC; 1150 return FPSCR_IOC;
1141 } 1151 }
1142 1152
1143 static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { 1153 static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = {
1144 [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, 1154 [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac,
1145 [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, 1155 [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac,
1146 [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, 1156 [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc,
1147 [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, 1157 [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc,
1148 [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, 1158 [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul,
1149 [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, 1159 [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul,
1150 [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, 1160 [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd,
1151 [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, 1161 [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub,
1152 [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, 1162 [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv,
1153 }; 1163 };
1154 1164
1155 #define FREG_BANK(x) ((x) & 0x18) 1165 #define FREG_BANK(x) ((x) & 0x18)
1156 #define FREG_IDX(x) ((x) & 7) 1166 #define FREG_IDX(x) ((x) & 7)
1157 1167
1158 u32 vfp_single_cpdo(u32 inst, u32 fpscr) 1168 u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1159 { 1169 {
1160 u32 op = inst & FOP_MASK; 1170 u32 op = inst & FOP_MASK;
1161 u32 exceptions = 0; 1171 u32 exceptions = 0;
1162 unsigned int sd = vfp_get_sd(inst); 1172 unsigned int sd = vfp_get_sd(inst);
1163 unsigned int sn = vfp_get_sn(inst); 1173 unsigned int sn = vfp_get_sn(inst);
1164 unsigned int sm = vfp_get_sm(inst); 1174 unsigned int sm = vfp_get_sm(inst);
1165 unsigned int vecitr, veclen, vecstride; 1175 unsigned int vecitr, veclen, vecstride;
1166 u32 (*fop)(int, int, s32, u32); 1176 u32 (*fop)(int, int, s32, u32);
1167 1177
1168 veclen = fpscr & FPSCR_LENGTH_MASK; 1178 veclen = fpscr & FPSCR_LENGTH_MASK;
1169 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); 1179 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1170 1180
1171 /* 1181 /*
1172 * If destination bank is zero, vector length is always '1'. 1182 * If destination bank is zero, vector length is always '1'.
1173 * ARM DDI0100F C5.1.3, C5.3.2. 1183 * ARM DDI0100F C5.1.3, C5.3.2.
1174 */ 1184 */
1175 if (FREG_BANK(sd) == 0) 1185 if (FREG_BANK(sd) == 0)
1176 veclen = 0; 1186 veclen = 0;
1177 1187
1178 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1188 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1179 (veclen >> FPSCR_LENGTH_BIT) + 1); 1189 (veclen >> FPSCR_LENGTH_BIT) + 1);
1180 1190
1181 fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)]; 1191 fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)];
1182 if (!fop) 1192 if (!fop)
1183 goto invalid; 1193 goto invalid;
1184 1194
1185 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1195 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1186 s32 m = vfp_get_float(sm); 1196 s32 m = vfp_get_float(sm);
1187 u32 except; 1197 u32 except;
1188 1198
1189 if (op == FOP_EXT) 1199 if (op == FOP_EXT)
1190 pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", 1200 pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n",
1191 vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); 1201 vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m);
1192 else 1202 else
1193 pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", 1203 pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n",
1194 vecitr >> FPSCR_LENGTH_BIT, sd, sn, 1204 vecitr >> FPSCR_LENGTH_BIT, sd, sn,
1195 FOP_TO_IDX(op), sm, m); 1205 FOP_TO_IDX(op), sm, m);
1196 1206
1197 except = fop(sd, sn, m, fpscr); 1207 except = fop(sd, sn, m, fpscr);
1198 pr_debug("VFP: itr%d: exceptions=%08x\n", 1208 pr_debug("VFP: itr%d: exceptions=%08x\n",
1199 vecitr >> FPSCR_LENGTH_BIT, except); 1209 vecitr >> FPSCR_LENGTH_BIT, except);
1200 1210
1201 exceptions |= except; 1211 exceptions |= except;
1202 1212
1203 /* 1213 /*
1204 * This ensures that comparisons only operate on scalars; 1214 * This ensures that comparisons only operate on scalars;
1205 * comparisons always return with one FPSCR status bit set. 1215 * comparisons always return with one FPSCR status bit set.
1206 */ 1216 */
1207 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1217 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
1208 break; 1218 break;
1209 1219
1210 /* 1220 /*
1211 * CHECK: It appears to be undefined whether we stop when 1221 * CHECK: It appears to be undefined whether we stop when
1212 * we encounter an exception. We continue. 1222 * we encounter an exception. We continue.
1213 */ 1223 */
1214 1224
1215 sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); 1225 sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7);
1216 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); 1226 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1217 if (FREG_BANK(sm) != 0) 1227 if (FREG_BANK(sm) != 0)
1218 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); 1228 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1219 } 1229 }
1220 return exceptions; 1230 return exceptions;
1221 1231
1222 invalid: 1232 invalid:
1223 return (u32)-1; 1233 return (u32)-1;
1224 } 1234 }
1225 1235