Commit 438a76167959061e371025f727fabec2ad9e70a7
Committed by
Russell King
1 parent
b3402cf50e
Exists in
master
and in
7 other branches
[PATCH] ARM: Fix VFP to use do_div()
VFP used __divdi3 64-bit division needlessly. Convert it to use our 64-bit by 32-bit division instead. Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Showing 3 changed files with 27 additions and 4 deletions Inline Diff
arch/arm/vfp/vfp.h
1 | /* | 1 | /* |
2 | * linux/arch/arm/vfp/vfp.h | 2 | * linux/arch/arm/vfp/vfp.h |
3 | * | 3 | * |
4 | * Copyright (C) 2004 ARM Limited. | 4 | * Copyright (C) 2004 ARM Limited. |
5 | * Written by Deep Blue Solutions Limited. | 5 | * Written by Deep Blue Solutions Limited. |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) | 12 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) |
13 | { | 13 | { |
14 | if (shift) { | 14 | if (shift) { |
15 | if (shift < 32) | 15 | if (shift < 32) |
16 | val = val >> shift | ((val << (32 - shift)) != 0); | 16 | val = val >> shift | ((val << (32 - shift)) != 0); |
17 | else | 17 | else |
18 | val = val != 0; | 18 | val = val != 0; |
19 | } | 19 | } |
20 | return val; | 20 | return val; |
21 | } | 21 | } |
22 | 22 | ||
23 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) | 23 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) |
24 | { | 24 | { |
25 | if (shift) { | 25 | if (shift) { |
26 | if (shift < 64) | 26 | if (shift < 64) |
27 | val = val >> shift | ((val << (64 - shift)) != 0); | 27 | val = val >> shift | ((val << (64 - shift)) != 0); |
28 | else | 28 | else |
29 | val = val != 0; | 29 | val = val != 0; |
30 | } | 30 | } |
31 | return val; | 31 | return val; |
32 | } | 32 | } |
33 | 33 | ||
34 | static inline u32 vfp_hi64to32jamming(u64 val) | 34 | static inline u32 vfp_hi64to32jamming(u64 val) |
35 | { | 35 | { |
36 | u32 v; | 36 | u32 v; |
37 | 37 | ||
38 | asm( | 38 | asm( |
39 | "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t" | 39 | "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t" |
40 | "movcc %0, %R1\n\t" | 40 | "movcc %0, %R1\n\t" |
41 | "orrcs %0, %R1, #1" | 41 | "orrcs %0, %R1, #1" |
42 | : "=r" (v) : "r" (val) : "cc"); | 42 | : "=r" (v) : "r" (val) : "cc"); |
43 | 43 | ||
44 | return v; | 44 | return v; |
45 | } | 45 | } |
46 | 46 | ||
47 | static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | 47 | static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) |
48 | { | 48 | { |
49 | asm( "adds %Q0, %Q2, %Q4\n\t" | 49 | asm( "adds %Q0, %Q2, %Q4\n\t" |
50 | "adcs %R0, %R2, %R4\n\t" | 50 | "adcs %R0, %R2, %R4\n\t" |
51 | "adcs %Q1, %Q3, %Q5\n\t" | 51 | "adcs %Q1, %Q3, %Q5\n\t" |
52 | "adc %R1, %R3, %R5" | 52 | "adc %R1, %R3, %R5" |
53 | : "=r" (nl), "=r" (nh) | 53 | : "=r" (nl), "=r" (nh) |
54 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) | 54 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) |
55 | : "cc"); | 55 | : "cc"); |
56 | *resh = nh; | 56 | *resh = nh; |
57 | *resl = nl; | 57 | *resl = nl; |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | 60 | static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) |
61 | { | 61 | { |
62 | asm( "subs %Q0, %Q2, %Q4\n\t" | 62 | asm( "subs %Q0, %Q2, %Q4\n\t" |
63 | "sbcs %R0, %R2, %R4\n\t" | 63 | "sbcs %R0, %R2, %R4\n\t" |
64 | "sbcs %Q1, %Q3, %Q5\n\t" | 64 | "sbcs %Q1, %Q3, %Q5\n\t" |
65 | "sbc %R1, %R3, %R5\n\t" | 65 | "sbc %R1, %R3, %R5\n\t" |
66 | : "=r" (nl), "=r" (nh) | 66 | : "=r" (nl), "=r" (nh) |
67 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) | 67 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) |
68 | : "cc"); | 68 | : "cc"); |
69 | *resh = nh; | 69 | *resh = nh; |
70 | *resl = nl; | 70 | *resl = nl; |
71 | } | 71 | } |
72 | 72 | ||
73 | static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) | 73 | static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) |
74 | { | 74 | { |
75 | u32 nh, nl, mh, ml; | 75 | u32 nh, nl, mh, ml; |
76 | u64 rh, rma, rmb, rl; | 76 | u64 rh, rma, rmb, rl; |
77 | 77 | ||
78 | nl = n; | 78 | nl = n; |
79 | ml = m; | 79 | ml = m; |
80 | rl = (u64)nl * ml; | 80 | rl = (u64)nl * ml; |
81 | 81 | ||
82 | nh = n >> 32; | 82 | nh = n >> 32; |
83 | rma = (u64)nh * ml; | 83 | rma = (u64)nh * ml; |
84 | 84 | ||
85 | mh = m >> 32; | 85 | mh = m >> 32; |
86 | rmb = (u64)nl * mh; | 86 | rmb = (u64)nl * mh; |
87 | rma += rmb; | 87 | rma += rmb; |
88 | 88 | ||
89 | rh = (u64)nh * mh; | 89 | rh = (u64)nh * mh; |
90 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); | 90 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); |
91 | 91 | ||
92 | rma <<= 32; | 92 | rma <<= 32; |
93 | rl += rma; | 93 | rl += rma; |
94 | rh += (rl < rma); | 94 | rh += (rl < rma); |
95 | 95 | ||
96 | *resl = rl; | 96 | *resl = rl; |
97 | *resh = rh; | 97 | *resh = rh; |
98 | } | 98 | } |
99 | 99 | ||
100 | static inline void shift64left(u64 *resh, u64 *resl, u64 n) | 100 | static inline void shift64left(u64 *resh, u64 *resl, u64 n) |
101 | { | 101 | { |
102 | *resh = n >> 63; | 102 | *resh = n >> 63; |
103 | *resl = n << 1; | 103 | *resl = n << 1; |
104 | } | 104 | } |
105 | 105 | ||
106 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) | 106 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) |
107 | { | 107 | { |
108 | u64 rh, rl; | 108 | u64 rh, rl; |
109 | mul64to128(&rh, &rl, n, m); | 109 | mul64to128(&rh, &rl, n, m); |
110 | return rh | (rl != 0); | 110 | return rh | (rl != 0); |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) | 113 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) |
114 | { | 114 | { |
115 | u64 mh, ml, remh, reml, termh, terml, z; | 115 | u64 mh, ml, remh, reml, termh, terml, z; |
116 | 116 | ||
117 | if (nh >= m) | 117 | if (nh >= m) |
118 | return ~0ULL; | 118 | return ~0ULL; |
119 | mh = m >> 32; | 119 | mh = m >> 32; |
120 | z = (mh << 32 <= nh) ? 0xffffffff00000000ULL : (nh / mh) << 32; | 120 | if (mh << 32 <= nh) { |
121 | z = 0xffffffff00000000ULL; | ||
122 | } else { | ||
123 | z = nh; | ||
124 | do_div(z, mh); | ||
125 | z <<= 32; | ||
126 | } | ||
121 | mul64to128(&termh, &terml, m, z); | 127 | mul64to128(&termh, &terml, m, z); |
122 | sub128(&remh, &reml, nh, nl, termh, terml); | 128 | sub128(&remh, &reml, nh, nl, termh, terml); |
123 | ml = m << 32; | 129 | ml = m << 32; |
124 | while ((s64)remh < 0) { | 130 | while ((s64)remh < 0) { |
125 | z -= 0x100000000ULL; | 131 | z -= 0x100000000ULL; |
126 | add128(&remh, &reml, remh, reml, mh, ml); | 132 | add128(&remh, &reml, remh, reml, mh, ml); |
127 | } | 133 | } |
128 | remh = (remh << 32) | (reml >> 32); | 134 | remh = (remh << 32) | (reml >> 32); |
129 | z |= (mh << 32 <= remh) ? 0xffffffff : remh / mh; | 135 | if (mh << 32 <= remh) { |
136 | z |= 0xffffffff; | ||
137 | } else { | ||
138 | do_div(remh, mh); | ||
139 | z |= remh; | ||
140 | } | ||
130 | return z; | 141 | return z; |
131 | } | 142 | } |
132 | 143 | ||
133 | /* | 144 | /* |
134 | * Operations on unpacked elements | 145 | * Operations on unpacked elements |
135 | */ | 146 | */ |
136 | #define vfp_sign_negate(sign) (sign ^ 0x8000) | 147 | #define vfp_sign_negate(sign) (sign ^ 0x8000) |
137 | 148 | ||
138 | /* | 149 | /* |
139 | * Single-precision | 150 | * Single-precision |
140 | */ | 151 | */ |
141 | struct vfp_single { | 152 | struct vfp_single { |
142 | s16 exponent; | 153 | s16 exponent; |
143 | u16 sign; | 154 | u16 sign; |
144 | u32 significand; | 155 | u32 significand; |
145 | }; | 156 | }; |
146 | 157 | ||
147 | extern s32 vfp_get_float(unsigned int reg); | 158 | extern s32 vfp_get_float(unsigned int reg); |
148 | extern void vfp_put_float(unsigned int reg, s32 val); | 159 | extern void vfp_put_float(unsigned int reg, s32 val); |
149 | 160 | ||
150 | /* | 161 | /* |
151 | * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa | 162 | * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa |
152 | * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent | 163 | * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent |
153 | * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand | 164 | * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand |
154 | * which are not propagated to the float upon packing. | 165 | * which are not propagated to the float upon packing. |
155 | */ | 166 | */ |
156 | #define VFP_SINGLE_MANTISSA_BITS (23) | 167 | #define VFP_SINGLE_MANTISSA_BITS (23) |
157 | #define VFP_SINGLE_EXPONENT_BITS (8) | 168 | #define VFP_SINGLE_EXPONENT_BITS (8) |
158 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) | 169 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) |
159 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) | 170 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) |
160 | 171 | ||
161 | /* | 172 | /* |
162 | * The bit in an unpacked float which indicates that it is a quiet NaN | 173 | * The bit in an unpacked float which indicates that it is a quiet NaN |
163 | */ | 174 | */ |
164 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) | 175 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) |
165 | 176 | ||
166 | /* | 177 | /* |
167 | * Operations on packed single-precision numbers | 178 | * Operations on packed single-precision numbers |
168 | */ | 179 | */ |
169 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) | 180 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) |
170 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) | 181 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) |
171 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) | 182 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) |
172 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) | 183 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) |
173 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) | 184 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) |
174 | 185 | ||
175 | /* | 186 | /* |
176 | * Unpack a single-precision float. Note that this returns the magnitude | 187 | * Unpack a single-precision float. Note that this returns the magnitude |
177 | * of the single-precision float mantissa with the 1. if necessary, | 188 | * of the single-precision float mantissa with the 1. if necessary, |
178 | * aligned to bit 30. | 189 | * aligned to bit 30. |
179 | */ | 190 | */ |
180 | static inline void vfp_single_unpack(struct vfp_single *s, s32 val) | 191 | static inline void vfp_single_unpack(struct vfp_single *s, s32 val) |
181 | { | 192 | { |
182 | u32 significand; | 193 | u32 significand; |
183 | 194 | ||
184 | s->sign = vfp_single_packed_sign(val) >> 16, | 195 | s->sign = vfp_single_packed_sign(val) >> 16, |
185 | s->exponent = vfp_single_packed_exponent(val); | 196 | s->exponent = vfp_single_packed_exponent(val); |
186 | 197 | ||
187 | significand = (u32) val; | 198 | significand = (u32) val; |
188 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; | 199 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; |
189 | if (s->exponent && s->exponent != 255) | 200 | if (s->exponent && s->exponent != 255) |
190 | significand |= 0x40000000; | 201 | significand |= 0x40000000; |
191 | s->significand = significand; | 202 | s->significand = significand; |
192 | } | 203 | } |
193 | 204 | ||
194 | /* | 205 | /* |
195 | * Re-pack a single-precision float. This assumes that the float is | 206 | * Re-pack a single-precision float. This assumes that the float is |
196 | * already normalised such that the MSB is bit 30, _not_ bit 31. | 207 | * already normalised such that the MSB is bit 30, _not_ bit 31. |
197 | */ | 208 | */ |
198 | static inline s32 vfp_single_pack(struct vfp_single *s) | 209 | static inline s32 vfp_single_pack(struct vfp_single *s) |
199 | { | 210 | { |
200 | u32 val; | 211 | u32 val; |
201 | val = (s->sign << 16) + | 212 | val = (s->sign << 16) + |
202 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + | 213 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + |
203 | (s->significand >> VFP_SINGLE_LOW_BITS); | 214 | (s->significand >> VFP_SINGLE_LOW_BITS); |
204 | return (s32)val; | 215 | return (s32)val; |
205 | } | 216 | } |
206 | 217 | ||
207 | #define VFP_NUMBER (1<<0) | 218 | #define VFP_NUMBER (1<<0) |
208 | #define VFP_ZERO (1<<1) | 219 | #define VFP_ZERO (1<<1) |
209 | #define VFP_DENORMAL (1<<2) | 220 | #define VFP_DENORMAL (1<<2) |
210 | #define VFP_INFINITY (1<<3) | 221 | #define VFP_INFINITY (1<<3) |
211 | #define VFP_NAN (1<<4) | 222 | #define VFP_NAN (1<<4) |
212 | #define VFP_NAN_SIGNAL (1<<5) | 223 | #define VFP_NAN_SIGNAL (1<<5) |
213 | 224 | ||
214 | #define VFP_QNAN (VFP_NAN) | 225 | #define VFP_QNAN (VFP_NAN) |
215 | #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) | 226 | #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) |
216 | 227 | ||
217 | static inline int vfp_single_type(struct vfp_single *s) | 228 | static inline int vfp_single_type(struct vfp_single *s) |
218 | { | 229 | { |
219 | int type = VFP_NUMBER; | 230 | int type = VFP_NUMBER; |
220 | if (s->exponent == 255) { | 231 | if (s->exponent == 255) { |
221 | if (s->significand == 0) | 232 | if (s->significand == 0) |
222 | type = VFP_INFINITY; | 233 | type = VFP_INFINITY; |
223 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) | 234 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) |
224 | type = VFP_QNAN; | 235 | type = VFP_QNAN; |
225 | else | 236 | else |
226 | type = VFP_SNAN; | 237 | type = VFP_SNAN; |
227 | } else if (s->exponent == 0) { | 238 | } else if (s->exponent == 0) { |
228 | if (s->significand == 0) | 239 | if (s->significand == 0) |
229 | type |= VFP_ZERO; | 240 | type |= VFP_ZERO; |
230 | else | 241 | else |
231 | type |= VFP_DENORMAL; | 242 | type |= VFP_DENORMAL; |
232 | } | 243 | } |
233 | return type; | 244 | return type; |
234 | } | 245 | } |
235 | 246 | ||
236 | #ifndef DEBUG | 247 | #ifndef DEBUG |
237 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) | 248 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) |
238 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions); | 249 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions); |
239 | #else | 250 | #else |
240 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); | 251 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); |
241 | #endif | 252 | #endif |
242 | 253 | ||
243 | /* | 254 | /* |
244 | * Double-precision | 255 | * Double-precision |
245 | */ | 256 | */ |
246 | struct vfp_double { | 257 | struct vfp_double { |
247 | s16 exponent; | 258 | s16 exponent; |
248 | u16 sign; | 259 | u16 sign; |
249 | u64 significand; | 260 | u64 significand; |
250 | }; | 261 | }; |
251 | 262 | ||
252 | /* | 263 | /* |
253 | * VFP_REG_ZERO is a special register number for vfp_get_double | 264 | * VFP_REG_ZERO is a special register number for vfp_get_double |
254 | * which returns (double)0.0. This is useful for the compare with | 265 | * which returns (double)0.0. This is useful for the compare with |
255 | * zero instructions. | 266 | * zero instructions. |
256 | */ | 267 | */ |
257 | #define VFP_REG_ZERO 16 | 268 | #define VFP_REG_ZERO 16 |
258 | extern u64 vfp_get_double(unsigned int reg); | 269 | extern u64 vfp_get_double(unsigned int reg); |
259 | extern void vfp_put_double(unsigned int reg, u64 val); | 270 | extern void vfp_put_double(unsigned int reg, u64 val); |
260 | 271 | ||
261 | #define VFP_DOUBLE_MANTISSA_BITS (52) | 272 | #define VFP_DOUBLE_MANTISSA_BITS (52) |
262 | #define VFP_DOUBLE_EXPONENT_BITS (11) | 273 | #define VFP_DOUBLE_EXPONENT_BITS (11) |
263 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) | 274 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) |
264 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) | 275 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) |
265 | 276 | ||
266 | /* | 277 | /* |
267 | * The bit in an unpacked double which indicates that it is a quiet NaN | 278 | * The bit in an unpacked double which indicates that it is a quiet NaN |
268 | */ | 279 | */ |
269 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) | 280 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) |
270 | 281 | ||
271 | /* | 282 | /* |
272 | * Operations on packed single-precision numbers | 283 | * Operations on packed single-precision numbers |
273 | */ | 284 | */ |
274 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) | 285 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) |
275 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) | 286 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) |
276 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) | 287 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) |
277 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) | 288 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) |
278 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) | 289 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) |
279 | 290 | ||
280 | /* | 291 | /* |
281 | * Unpack a double-precision float. Note that this returns the magnitude | 292 | * Unpack a double-precision float. Note that this returns the magnitude |
282 | * of the double-precision float mantissa with the 1. if necessary, | 293 | * of the double-precision float mantissa with the 1. if necessary, |
283 | * aligned to bit 62. | 294 | * aligned to bit 62. |
284 | */ | 295 | */ |
285 | static inline void vfp_double_unpack(struct vfp_double *s, s64 val) | 296 | static inline void vfp_double_unpack(struct vfp_double *s, s64 val) |
286 | { | 297 | { |
287 | u64 significand; | 298 | u64 significand; |
288 | 299 | ||
289 | s->sign = vfp_double_packed_sign(val) >> 48; | 300 | s->sign = vfp_double_packed_sign(val) >> 48; |
290 | s->exponent = vfp_double_packed_exponent(val); | 301 | s->exponent = vfp_double_packed_exponent(val); |
291 | 302 | ||
292 | significand = (u64) val; | 303 | significand = (u64) val; |
293 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; | 304 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; |
294 | if (s->exponent && s->exponent != 2047) | 305 | if (s->exponent && s->exponent != 2047) |
295 | significand |= (1ULL << 62); | 306 | significand |= (1ULL << 62); |
296 | s->significand = significand; | 307 | s->significand = significand; |
297 | } | 308 | } |
298 | 309 | ||
299 | /* | 310 | /* |
300 | * Re-pack a double-precision float. This assumes that the float is | 311 | * Re-pack a double-precision float. This assumes that the float is |
301 | * already normalised such that the MSB is bit 30, _not_ bit 31. | 312 | * already normalised such that the MSB is bit 30, _not_ bit 31. |
302 | */ | 313 | */ |
303 | static inline s64 vfp_double_pack(struct vfp_double *s) | 314 | static inline s64 vfp_double_pack(struct vfp_double *s) |
304 | { | 315 | { |
305 | u64 val; | 316 | u64 val; |
306 | val = ((u64)s->sign << 48) + | 317 | val = ((u64)s->sign << 48) + |
307 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + | 318 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + |
308 | (s->significand >> VFP_DOUBLE_LOW_BITS); | 319 | (s->significand >> VFP_DOUBLE_LOW_BITS); |
309 | return (s64)val; | 320 | return (s64)val; |
310 | } | 321 | } |
311 | 322 | ||
312 | static inline int vfp_double_type(struct vfp_double *s) | 323 | static inline int vfp_double_type(struct vfp_double *s) |
313 | { | 324 | { |
314 | int type = VFP_NUMBER; | 325 | int type = VFP_NUMBER; |
315 | if (s->exponent == 2047) { | 326 | if (s->exponent == 2047) { |
316 | if (s->significand == 0) | 327 | if (s->significand == 0) |
317 | type = VFP_INFINITY; | 328 | type = VFP_INFINITY; |
318 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) | 329 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) |
319 | type = VFP_QNAN; | 330 | type = VFP_QNAN; |
320 | else | 331 | else |
321 | type = VFP_SNAN; | 332 | type = VFP_SNAN; |
322 | } else if (s->exponent == 0) { | 333 | } else if (s->exponent == 0) { |
323 | if (s->significand == 0) | 334 | if (s->significand == 0) |
324 | type |= VFP_ZERO; | 335 | type |= VFP_ZERO; |
325 | else | 336 | else |
326 | type |= VFP_DENORMAL; | 337 | type |= VFP_DENORMAL; |
327 | } | 338 | } |
328 | return type; | 339 | return type; |
329 | } | 340 | } |
330 | 341 | ||
331 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); | 342 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); |
332 | 343 | ||
333 | /* | 344 | /* |
334 | * System registers | 345 | * System registers |
335 | */ | 346 | */ |
336 | extern u32 vfp_get_sys(unsigned int reg); | 347 | extern u32 vfp_get_sys(unsigned int reg); |
337 | extern void vfp_put_sys(unsigned int reg, u32 val); | 348 | extern void vfp_put_sys(unsigned int reg, u32 val); |
338 | 349 | ||
339 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); | 350 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); |
340 | 351 | ||
341 | /* | 352 | /* |
342 | * A special flag to tell the normalisation code not to normalise. | 353 | * A special flag to tell the normalisation code not to normalise. |
343 | */ | 354 | */ |
344 | #define VFP_NAN_FLAG 0x100 | 355 | #define VFP_NAN_FLAG 0x100 |
345 | 356 |
arch/arm/vfp/vfpdouble.c
1 | /* | 1 | /* |
2 | * linux/arch/arm/vfp/vfpdouble.c | 2 | * linux/arch/arm/vfp/vfpdouble.c |
3 | * | 3 | * |
4 | * This code is derived in part from John R. Housers softfloat library, which | 4 | * This code is derived in part from John R. Housers softfloat library, which |
5 | * carries the following notice: | 5 | * carries the following notice: |
6 | * | 6 | * |
7 | * =========================================================================== | 7 | * =========================================================================== |
8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | 8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point |
9 | * Arithmetic Package, Release 2. | 9 | * Arithmetic Package, Release 2. |
10 | * | 10 | * |
11 | * Written by John R. Hauser. This work was made possible in part by the | 11 | * Written by John R. Hauser. This work was made possible in part by the |
12 | * International Computer Science Institute, located at Suite 600, 1947 Center | 12 | * International Computer Science Institute, located at Suite 600, 1947 Center |
13 | * Street, Berkeley, California 94704. Funding was partially provided by the | 13 | * Street, Berkeley, California 94704. Funding was partially provided by the |
14 | * National Science Foundation under grant MIP-9311980. The original version | 14 | * National Science Foundation under grant MIP-9311980. The original version |
15 | * of this code was written as part of a project to build a fixed-point vector | 15 | * of this code was written as part of a project to build a fixed-point vector |
16 | * processor in collaboration with the University of California at Berkeley, | 16 | * processor in collaboration with the University of California at Berkeley, |
17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | 17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information |
18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | 18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ |
19 | * arithmetic/softfloat.html'. | 19 | * arithmetic/softfloat.html'. |
20 | * | 20 | * |
21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | 21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort |
22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | 22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT |
23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | 23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO |
24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | 24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY |
25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | 25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. |
26 | * | 26 | * |
27 | * Derivative works are acceptable, even for commercial purposes, so long as | 27 | * Derivative works are acceptable, even for commercial purposes, so long as |
28 | * (1) they include prominent notice that the work is derivative, and (2) they | 28 | * (1) they include prominent notice that the work is derivative, and (2) they |
29 | * include prominent notice akin to these three paragraphs for those parts of | 29 | * include prominent notice akin to these three paragraphs for those parts of |
30 | * this code that are retained. | 30 | * this code that are retained. |
31 | * =========================================================================== | 31 | * =========================================================================== |
32 | */ | 32 | */ |
33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
34 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
35 | |||
36 | #include <asm/div64.h> | ||
35 | #include <asm/ptrace.h> | 37 | #include <asm/ptrace.h> |
36 | #include <asm/vfp.h> | 38 | #include <asm/vfp.h> |
37 | 39 | ||
38 | #include "vfpinstr.h" | 40 | #include "vfpinstr.h" |
39 | #include "vfp.h" | 41 | #include "vfp.h" |
40 | 42 | ||
41 | static struct vfp_double vfp_double_default_qnan = { | 43 | static struct vfp_double vfp_double_default_qnan = { |
42 | .exponent = 2047, | 44 | .exponent = 2047, |
43 | .sign = 0, | 45 | .sign = 0, |
44 | .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, | 46 | .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, |
45 | }; | 47 | }; |
46 | 48 | ||
47 | static void vfp_double_dump(const char *str, struct vfp_double *d) | 49 | static void vfp_double_dump(const char *str, struct vfp_double *d) |
48 | { | 50 | { |
49 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", | 51 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", |
50 | str, d->sign != 0, d->exponent, d->significand); | 52 | str, d->sign != 0, d->exponent, d->significand); |
51 | } | 53 | } |
52 | 54 | ||
53 | static void vfp_double_normalise_denormal(struct vfp_double *vd) | 55 | static void vfp_double_normalise_denormal(struct vfp_double *vd) |
54 | { | 56 | { |
55 | int bits = 31 - fls(vd->significand >> 32); | 57 | int bits = 31 - fls(vd->significand >> 32); |
56 | if (bits == 31) | 58 | if (bits == 31) |
57 | bits = 62 - fls(vd->significand); | 59 | bits = 62 - fls(vd->significand); |
58 | 60 | ||
59 | vfp_double_dump("normalise_denormal: in", vd); | 61 | vfp_double_dump("normalise_denormal: in", vd); |
60 | 62 | ||
61 | if (bits) { | 63 | if (bits) { |
62 | vd->exponent -= bits - 1; | 64 | vd->exponent -= bits - 1; |
63 | vd->significand <<= bits; | 65 | vd->significand <<= bits; |
64 | } | 66 | } |
65 | 67 | ||
66 | vfp_double_dump("normalise_denormal: out", vd); | 68 | vfp_double_dump("normalise_denormal: out", vd); |
67 | } | 69 | } |
68 | 70 | ||
69 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | 71 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) |
70 | { | 72 | { |
71 | u64 significand, incr; | 73 | u64 significand, incr; |
72 | int exponent, shift, underflow; | 74 | int exponent, shift, underflow; |
73 | u32 rmode; | 75 | u32 rmode; |
74 | 76 | ||
75 | vfp_double_dump("pack: in", vd); | 77 | vfp_double_dump("pack: in", vd); |
76 | 78 | ||
77 | /* | 79 | /* |
78 | * Infinities and NaNs are a special case. | 80 | * Infinities and NaNs are a special case. |
79 | */ | 81 | */ |
80 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | 82 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) |
81 | goto pack; | 83 | goto pack; |
82 | 84 | ||
83 | /* | 85 | /* |
84 | * Special-case zero. | 86 | * Special-case zero. |
85 | */ | 87 | */ |
86 | if (vd->significand == 0) { | 88 | if (vd->significand == 0) { |
87 | vd->exponent = 0; | 89 | vd->exponent = 0; |
88 | goto pack; | 90 | goto pack; |
89 | } | 91 | } |
90 | 92 | ||
91 | exponent = vd->exponent; | 93 | exponent = vd->exponent; |
92 | significand = vd->significand; | 94 | significand = vd->significand; |
93 | 95 | ||
94 | shift = 32 - fls(significand >> 32); | 96 | shift = 32 - fls(significand >> 32); |
95 | if (shift == 32) | 97 | if (shift == 32) |
96 | shift = 64 - fls(significand); | 98 | shift = 64 - fls(significand); |
97 | if (shift) { | 99 | if (shift) { |
98 | exponent -= shift; | 100 | exponent -= shift; |
99 | significand <<= shift; | 101 | significand <<= shift; |
100 | } | 102 | } |
101 | 103 | ||
102 | #ifdef DEBUG | 104 | #ifdef DEBUG |
103 | vd->exponent = exponent; | 105 | vd->exponent = exponent; |
104 | vd->significand = significand; | 106 | vd->significand = significand; |
105 | vfp_double_dump("pack: normalised", vd); | 107 | vfp_double_dump("pack: normalised", vd); |
106 | #endif | 108 | #endif |
107 | 109 | ||
108 | /* | 110 | /* |
109 | * Tiny number? | 111 | * Tiny number? |
110 | */ | 112 | */ |
111 | underflow = exponent < 0; | 113 | underflow = exponent < 0; |
112 | if (underflow) { | 114 | if (underflow) { |
113 | significand = vfp_shiftright64jamming(significand, -exponent); | 115 | significand = vfp_shiftright64jamming(significand, -exponent); |
114 | exponent = 0; | 116 | exponent = 0; |
115 | #ifdef DEBUG | 117 | #ifdef DEBUG |
116 | vd->exponent = exponent; | 118 | vd->exponent = exponent; |
117 | vd->significand = significand; | 119 | vd->significand = significand; |
118 | vfp_double_dump("pack: tiny number", vd); | 120 | vfp_double_dump("pack: tiny number", vd); |
119 | #endif | 121 | #endif |
120 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | 122 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) |
121 | underflow = 0; | 123 | underflow = 0; |
122 | } | 124 | } |
123 | 125 | ||
124 | /* | 126 | /* |
125 | * Select rounding increment. | 127 | * Select rounding increment. |
126 | */ | 128 | */ |
127 | incr = 0; | 129 | incr = 0; |
128 | rmode = fpscr & FPSCR_RMODE_MASK; | 130 | rmode = fpscr & FPSCR_RMODE_MASK; |
129 | 131 | ||
130 | if (rmode == FPSCR_ROUND_NEAREST) { | 132 | if (rmode == FPSCR_ROUND_NEAREST) { |
131 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | 133 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; |
132 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | 134 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) |
133 | incr -= 1; | 135 | incr -= 1; |
134 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 136 | } else if (rmode == FPSCR_ROUND_TOZERO) { |
135 | incr = 0; | 137 | incr = 0; |
136 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | 138 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) |
137 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | 139 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; |
138 | 140 | ||
139 | pr_debug("VFP: rounding increment = 0x%08llx\n", incr); | 141 | pr_debug("VFP: rounding increment = 0x%08llx\n", incr); |
140 | 142 | ||
141 | /* | 143 | /* |
142 | * Is our rounding going to overflow? | 144 | * Is our rounding going to overflow? |
143 | */ | 145 | */ |
144 | if ((significand + incr) < significand) { | 146 | if ((significand + incr) < significand) { |
145 | exponent += 1; | 147 | exponent += 1; |
146 | significand = (significand >> 1) | (significand & 1); | 148 | significand = (significand >> 1) | (significand & 1); |
147 | incr >>= 1; | 149 | incr >>= 1; |
148 | #ifdef DEBUG | 150 | #ifdef DEBUG |
149 | vd->exponent = exponent; | 151 | vd->exponent = exponent; |
150 | vd->significand = significand; | 152 | vd->significand = significand; |
151 | vfp_double_dump("pack: overflow", vd); | 153 | vfp_double_dump("pack: overflow", vd); |
152 | #endif | 154 | #endif |
153 | } | 155 | } |
154 | 156 | ||
155 | /* | 157 | /* |
156 | * If any of the low bits (which will be shifted out of the | 158 | * If any of the low bits (which will be shifted out of the |
157 | * number) are non-zero, the result is inexact. | 159 | * number) are non-zero, the result is inexact. |
158 | */ | 160 | */ |
159 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | 161 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) |
160 | exceptions |= FPSCR_IXC; | 162 | exceptions |= FPSCR_IXC; |
161 | 163 | ||
162 | /* | 164 | /* |
163 | * Do our rounding. | 165 | * Do our rounding. |
164 | */ | 166 | */ |
165 | significand += incr; | 167 | significand += incr; |
166 | 168 | ||
167 | /* | 169 | /* |
168 | * Infinity? | 170 | * Infinity? |
169 | */ | 171 | */ |
170 | if (exponent >= 2046) { | 172 | if (exponent >= 2046) { |
171 | exceptions |= FPSCR_OFC | FPSCR_IXC; | 173 | exceptions |= FPSCR_OFC | FPSCR_IXC; |
172 | if (incr == 0) { | 174 | if (incr == 0) { |
173 | vd->exponent = 2045; | 175 | vd->exponent = 2045; |
174 | vd->significand = 0x7fffffffffffffffULL; | 176 | vd->significand = 0x7fffffffffffffffULL; |
175 | } else { | 177 | } else { |
176 | vd->exponent = 2047; /* infinity */ | 178 | vd->exponent = 2047; /* infinity */ |
177 | vd->significand = 0; | 179 | vd->significand = 0; |
178 | } | 180 | } |
179 | } else { | 181 | } else { |
180 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | 182 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) |
181 | exponent = 0; | 183 | exponent = 0; |
182 | if (exponent || significand > 0x8000000000000000ULL) | 184 | if (exponent || significand > 0x8000000000000000ULL) |
183 | underflow = 0; | 185 | underflow = 0; |
184 | if (underflow) | 186 | if (underflow) |
185 | exceptions |= FPSCR_UFC; | 187 | exceptions |= FPSCR_UFC; |
186 | vd->exponent = exponent; | 188 | vd->exponent = exponent; |
187 | vd->significand = significand >> 1; | 189 | vd->significand = significand >> 1; |
188 | } | 190 | } |
189 | 191 | ||
190 | pack: | 192 | pack: |
191 | vfp_double_dump("pack: final", vd); | 193 | vfp_double_dump("pack: final", vd); |
192 | { | 194 | { |
193 | s64 d = vfp_double_pack(vd); | 195 | s64 d = vfp_double_pack(vd); |
194 | pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, | 196 | pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, |
195 | dd, d, exceptions); | 197 | dd, d, exceptions); |
196 | vfp_put_double(dd, d); | 198 | vfp_put_double(dd, d); |
197 | } | 199 | } |
198 | return exceptions & ~VFP_NAN_FLAG; | 200 | return exceptions & ~VFP_NAN_FLAG; |
199 | } | 201 | } |
200 | 202 | ||
201 | /* | 203 | /* |
202 | * Propagate the NaN, setting exceptions if it is signalling. | 204 | * Propagate the NaN, setting exceptions if it is signalling. |
203 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. | 205 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. |
204 | */ | 206 | */ |
205 | static u32 | 207 | static u32 |
206 | vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, | 208 | vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, |
207 | struct vfp_double *vdm, u32 fpscr) | 209 | struct vfp_double *vdm, u32 fpscr) |
208 | { | 210 | { |
209 | struct vfp_double *nan; | 211 | struct vfp_double *nan; |
210 | int tn, tm = 0; | 212 | int tn, tm = 0; |
211 | 213 | ||
212 | tn = vfp_double_type(vdn); | 214 | tn = vfp_double_type(vdn); |
213 | 215 | ||
214 | if (vdm) | 216 | if (vdm) |
215 | tm = vfp_double_type(vdm); | 217 | tm = vfp_double_type(vdm); |
216 | 218 | ||
217 | if (fpscr & FPSCR_DEFAULT_NAN) | 219 | if (fpscr & FPSCR_DEFAULT_NAN) |
218 | /* | 220 | /* |
219 | * Default NaN mode - always returns a quiet NaN | 221 | * Default NaN mode - always returns a quiet NaN |
220 | */ | 222 | */ |
221 | nan = &vfp_double_default_qnan; | 223 | nan = &vfp_double_default_qnan; |
222 | else { | 224 | else { |
223 | /* | 225 | /* |
224 | * Contemporary mode - select the first signalling | 226 | * Contemporary mode - select the first signalling |
225 | * NAN, or if neither are signalling, the first | 227 | * NAN, or if neither are signalling, the first |
226 | * quiet NAN. | 228 | * quiet NAN. |
227 | */ | 229 | */ |
228 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | 230 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) |
229 | nan = vdn; | 231 | nan = vdn; |
230 | else | 232 | else |
231 | nan = vdm; | 233 | nan = vdm; |
232 | /* | 234 | /* |
233 | * Make the NaN quiet. | 235 | * Make the NaN quiet. |
234 | */ | 236 | */ |
235 | nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | 237 | nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; |
236 | } | 238 | } |
237 | 239 | ||
238 | *vdd = *nan; | 240 | *vdd = *nan; |
239 | 241 | ||
240 | /* | 242 | /* |
241 | * If one was a signalling NAN, raise invalid operation. | 243 | * If one was a signalling NAN, raise invalid operation. |
242 | */ | 244 | */ |
243 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | 245 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; |
244 | } | 246 | } |
245 | 247 | ||
246 | /* | 248 | /* |
247 | * Extended operations | 249 | * Extended operations |
248 | */ | 250 | */ |
249 | static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) | 251 | static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) |
250 | { | 252 | { |
251 | vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm))); | 253 | vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm))); |
252 | return 0; | 254 | return 0; |
253 | } | 255 | } |
254 | 256 | ||
255 | static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) | 257 | static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) |
256 | { | 258 | { |
257 | vfp_put_double(dd, vfp_get_double(dm)); | 259 | vfp_put_double(dd, vfp_get_double(dm)); |
258 | return 0; | 260 | return 0; |
259 | } | 261 | } |
260 | 262 | ||
261 | static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) | 263 | static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) |
262 | { | 264 | { |
263 | vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm))); | 265 | vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm))); |
264 | return 0; | 266 | return 0; |
265 | } | 267 | } |
266 | 268 | ||
267 | static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) | 269 | static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) |
268 | { | 270 | { |
269 | struct vfp_double vdm, vdd; | 271 | struct vfp_double vdm, vdd; |
270 | int ret, tm; | 272 | int ret, tm; |
271 | 273 | ||
272 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 274 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
273 | tm = vfp_double_type(&vdm); | 275 | tm = vfp_double_type(&vdm); |
274 | if (tm & (VFP_NAN|VFP_INFINITY)) { | 276 | if (tm & (VFP_NAN|VFP_INFINITY)) { |
275 | struct vfp_double *vdp = &vdd; | 277 | struct vfp_double *vdp = &vdd; |
276 | 278 | ||
277 | if (tm & VFP_NAN) | 279 | if (tm & VFP_NAN) |
278 | ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); | 280 | ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); |
279 | else if (vdm.sign == 0) { | 281 | else if (vdm.sign == 0) { |
280 | sqrt_copy: | 282 | sqrt_copy: |
281 | vdp = &vdm; | 283 | vdp = &vdm; |
282 | ret = 0; | 284 | ret = 0; |
283 | } else { | 285 | } else { |
284 | sqrt_invalid: | 286 | sqrt_invalid: |
285 | vdp = &vfp_double_default_qnan; | 287 | vdp = &vfp_double_default_qnan; |
286 | ret = FPSCR_IOC; | 288 | ret = FPSCR_IOC; |
287 | } | 289 | } |
288 | vfp_put_double(dd, vfp_double_pack(vdp)); | 290 | vfp_put_double(dd, vfp_double_pack(vdp)); |
289 | return ret; | 291 | return ret; |
290 | } | 292 | } |
291 | 293 | ||
292 | /* | 294 | /* |
293 | * sqrt(+/- 0) == +/- 0 | 295 | * sqrt(+/- 0) == +/- 0 |
294 | */ | 296 | */ |
295 | if (tm & VFP_ZERO) | 297 | if (tm & VFP_ZERO) |
296 | goto sqrt_copy; | 298 | goto sqrt_copy; |
297 | 299 | ||
298 | /* | 300 | /* |
299 | * Normalise a denormalised number | 301 | * Normalise a denormalised number |
300 | */ | 302 | */ |
301 | if (tm & VFP_DENORMAL) | 303 | if (tm & VFP_DENORMAL) |
302 | vfp_double_normalise_denormal(&vdm); | 304 | vfp_double_normalise_denormal(&vdm); |
303 | 305 | ||
304 | /* | 306 | /* |
305 | * sqrt(<0) = invalid | 307 | * sqrt(<0) = invalid |
306 | */ | 308 | */ |
307 | if (vdm.sign) | 309 | if (vdm.sign) |
308 | goto sqrt_invalid; | 310 | goto sqrt_invalid; |
309 | 311 | ||
310 | vfp_double_dump("sqrt", &vdm); | 312 | vfp_double_dump("sqrt", &vdm); |
311 | 313 | ||
312 | /* | 314 | /* |
313 | * Estimate the square root. | 315 | * Estimate the square root. |
314 | */ | 316 | */ |
315 | vdd.sign = 0; | 317 | vdd.sign = 0; |
316 | vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; | 318 | vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; |
317 | vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; | 319 | vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; |
318 | 320 | ||
319 | vfp_double_dump("sqrt estimate1", &vdd); | 321 | vfp_double_dump("sqrt estimate1", &vdd); |
320 | 322 | ||
321 | vdm.significand >>= 1 + (vdm.exponent & 1); | 323 | vdm.significand >>= 1 + (vdm.exponent & 1); |
322 | vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); | 324 | vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); |
323 | 325 | ||
324 | vfp_double_dump("sqrt estimate2", &vdd); | 326 | vfp_double_dump("sqrt estimate2", &vdd); |
325 | 327 | ||
326 | /* | 328 | /* |
327 | * And now adjust. | 329 | * And now adjust. |
328 | */ | 330 | */ |
329 | if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { | 331 | if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { |
330 | if (vdd.significand < 2) { | 332 | if (vdd.significand < 2) { |
331 | vdd.significand = ~0ULL; | 333 | vdd.significand = ~0ULL; |
332 | } else { | 334 | } else { |
333 | u64 termh, terml, remh, reml; | 335 | u64 termh, terml, remh, reml; |
334 | vdm.significand <<= 2; | 336 | vdm.significand <<= 2; |
335 | mul64to128(&termh, &terml, vdd.significand, vdd.significand); | 337 | mul64to128(&termh, &terml, vdd.significand, vdd.significand); |
336 | sub128(&remh, &reml, vdm.significand, 0, termh, terml); | 338 | sub128(&remh, &reml, vdm.significand, 0, termh, terml); |
337 | while ((s64)remh < 0) { | 339 | while ((s64)remh < 0) { |
338 | vdd.significand -= 1; | 340 | vdd.significand -= 1; |
339 | shift64left(&termh, &terml, vdd.significand); | 341 | shift64left(&termh, &terml, vdd.significand); |
340 | terml |= 1; | 342 | terml |= 1; |
341 | add128(&remh, &reml, remh, reml, termh, terml); | 343 | add128(&remh, &reml, remh, reml, termh, terml); |
342 | } | 344 | } |
343 | vdd.significand |= (remh | reml) != 0; | 345 | vdd.significand |= (remh | reml) != 0; |
344 | } | 346 | } |
345 | } | 347 | } |
346 | vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); | 348 | vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); |
347 | 349 | ||
348 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); | 350 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); |
349 | } | 351 | } |
350 | 352 | ||
351 | /* | 353 | /* |
352 | * Equal := ZC | 354 | * Equal := ZC |
353 | * Less than := N | 355 | * Less than := N |
354 | * Greater than := C | 356 | * Greater than := C |
355 | * Unordered := CV | 357 | * Unordered := CV |
356 | */ | 358 | */ |
357 | static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) | 359 | static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) |
358 | { | 360 | { |
359 | s64 d, m; | 361 | s64 d, m; |
360 | u32 ret = 0; | 362 | u32 ret = 0; |
361 | 363 | ||
362 | m = vfp_get_double(dm); | 364 | m = vfp_get_double(dm); |
363 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | 365 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { |
364 | ret |= FPSCR_C | FPSCR_V; | 366 | ret |= FPSCR_C | FPSCR_V; |
365 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 367 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) |
366 | /* | 368 | /* |
367 | * Signalling NaN, or signalling on quiet NaN | 369 | * Signalling NaN, or signalling on quiet NaN |
368 | */ | 370 | */ |
369 | ret |= FPSCR_IOC; | 371 | ret |= FPSCR_IOC; |
370 | } | 372 | } |
371 | 373 | ||
372 | d = vfp_get_double(dd); | 374 | d = vfp_get_double(dd); |
373 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | 375 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { |
374 | ret |= FPSCR_C | FPSCR_V; | 376 | ret |= FPSCR_C | FPSCR_V; |
375 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 377 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) |
376 | /* | 378 | /* |
377 | * Signalling NaN, or signalling on quiet NaN | 379 | * Signalling NaN, or signalling on quiet NaN |
378 | */ | 380 | */ |
379 | ret |= FPSCR_IOC; | 381 | ret |= FPSCR_IOC; |
380 | } | 382 | } |
381 | 383 | ||
382 | if (ret == 0) { | 384 | if (ret == 0) { |
383 | if (d == m || vfp_double_packed_abs(d | m) == 0) { | 385 | if (d == m || vfp_double_packed_abs(d | m) == 0) { |
384 | /* | 386 | /* |
385 | * equal | 387 | * equal |
386 | */ | 388 | */ |
387 | ret |= FPSCR_Z | FPSCR_C; | 389 | ret |= FPSCR_Z | FPSCR_C; |
388 | } else if (vfp_double_packed_sign(d ^ m)) { | 390 | } else if (vfp_double_packed_sign(d ^ m)) { |
389 | /* | 391 | /* |
390 | * different signs | 392 | * different signs |
391 | */ | 393 | */ |
392 | if (vfp_double_packed_sign(d)) | 394 | if (vfp_double_packed_sign(d)) |
393 | /* | 395 | /* |
394 | * d is negative, so d < m | 396 | * d is negative, so d < m |
395 | */ | 397 | */ |
396 | ret |= FPSCR_N; | 398 | ret |= FPSCR_N; |
397 | else | 399 | else |
398 | /* | 400 | /* |
399 | * d is positive, so d > m | 401 | * d is positive, so d > m |
400 | */ | 402 | */ |
401 | ret |= FPSCR_C; | 403 | ret |= FPSCR_C; |
402 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | 404 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { |
403 | /* | 405 | /* |
404 | * d < m | 406 | * d < m |
405 | */ | 407 | */ |
406 | ret |= FPSCR_N; | 408 | ret |= FPSCR_N; |
407 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | 409 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { |
408 | /* | 410 | /* |
409 | * d > m | 411 | * d > m |
410 | */ | 412 | */ |
411 | ret |= FPSCR_C; | 413 | ret |= FPSCR_C; |
412 | } | 414 | } |
413 | } | 415 | } |
414 | 416 | ||
415 | return ret; | 417 | return ret; |
416 | } | 418 | } |
417 | 419 | ||
418 | static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) | 420 | static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) |
419 | { | 421 | { |
420 | return vfp_compare(dd, 0, dm, fpscr); | 422 | return vfp_compare(dd, 0, dm, fpscr); |
421 | } | 423 | } |
422 | 424 | ||
423 | static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) | 425 | static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) |
424 | { | 426 | { |
425 | return vfp_compare(dd, 1, dm, fpscr); | 427 | return vfp_compare(dd, 1, dm, fpscr); |
426 | } | 428 | } |
427 | 429 | ||
428 | static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) | 430 | static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) |
429 | { | 431 | { |
430 | return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); | 432 | return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); |
431 | } | 433 | } |
432 | 434 | ||
433 | static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) | 435 | static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) |
434 | { | 436 | { |
435 | return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); | 437 | return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); |
436 | } | 438 | } |
437 | 439 | ||
438 | static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) | 440 | static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) |
439 | { | 441 | { |
440 | struct vfp_double vdm; | 442 | struct vfp_double vdm; |
441 | struct vfp_single vsd; | 443 | struct vfp_single vsd; |
442 | int tm; | 444 | int tm; |
443 | u32 exceptions = 0; | 445 | u32 exceptions = 0; |
444 | 446 | ||
445 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 447 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
446 | 448 | ||
447 | tm = vfp_double_type(&vdm); | 449 | tm = vfp_double_type(&vdm); |
448 | 450 | ||
449 | /* | 451 | /* |
450 | * If we have a signalling NaN, signal invalid operation. | 452 | * If we have a signalling NaN, signal invalid operation. |
451 | */ | 453 | */ |
452 | if (tm == VFP_SNAN) | 454 | if (tm == VFP_SNAN) |
453 | exceptions = FPSCR_IOC; | 455 | exceptions = FPSCR_IOC; |
454 | 456 | ||
455 | if (tm & VFP_DENORMAL) | 457 | if (tm & VFP_DENORMAL) |
456 | vfp_double_normalise_denormal(&vdm); | 458 | vfp_double_normalise_denormal(&vdm); |
457 | 459 | ||
458 | vsd.sign = vdm.sign; | 460 | vsd.sign = vdm.sign; |
459 | vsd.significand = vfp_hi64to32jamming(vdm.significand); | 461 | vsd.significand = vfp_hi64to32jamming(vdm.significand); |
460 | 462 | ||
461 | /* | 463 | /* |
462 | * If we have an infinity or a NaN, the exponent must be 255 | 464 | * If we have an infinity or a NaN, the exponent must be 255 |
463 | */ | 465 | */ |
464 | if (tm & (VFP_INFINITY|VFP_NAN)) { | 466 | if (tm & (VFP_INFINITY|VFP_NAN)) { |
465 | vsd.exponent = 255; | 467 | vsd.exponent = 255; |
466 | if (tm & VFP_NAN) | 468 | if (tm & VFP_NAN) |
467 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | 469 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; |
468 | goto pack_nan; | 470 | goto pack_nan; |
469 | } else if (tm & VFP_ZERO) | 471 | } else if (tm & VFP_ZERO) |
470 | vsd.exponent = 0; | 472 | vsd.exponent = 0; |
471 | else | 473 | else |
472 | vsd.exponent = vdm.exponent - (1023 - 127); | 474 | vsd.exponent = vdm.exponent - (1023 - 127); |
473 | 475 | ||
474 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); | 476 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); |
475 | 477 | ||
476 | pack_nan: | 478 | pack_nan: |
477 | vfp_put_float(sd, vfp_single_pack(&vsd)); | 479 | vfp_put_float(sd, vfp_single_pack(&vsd)); |
478 | return exceptions; | 480 | return exceptions; |
479 | } | 481 | } |
480 | 482 | ||
481 | static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) | 483 | static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) |
482 | { | 484 | { |
483 | struct vfp_double vdm; | 485 | struct vfp_double vdm; |
484 | u32 m = vfp_get_float(dm); | 486 | u32 m = vfp_get_float(dm); |
485 | 487 | ||
486 | vdm.sign = 0; | 488 | vdm.sign = 0; |
487 | vdm.exponent = 1023 + 63 - 1; | 489 | vdm.exponent = 1023 + 63 - 1; |
488 | vdm.significand = (u64)m; | 490 | vdm.significand = (u64)m; |
489 | 491 | ||
490 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); | 492 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); |
491 | } | 493 | } |
492 | 494 | ||
493 | static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) | 495 | static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) |
494 | { | 496 | { |
495 | struct vfp_double vdm; | 497 | struct vfp_double vdm; |
496 | u32 m = vfp_get_float(dm); | 498 | u32 m = vfp_get_float(dm); |
497 | 499 | ||
498 | vdm.sign = (m & 0x80000000) >> 16; | 500 | vdm.sign = (m & 0x80000000) >> 16; |
499 | vdm.exponent = 1023 + 63 - 1; | 501 | vdm.exponent = 1023 + 63 - 1; |
500 | vdm.significand = vdm.sign ? -m : m; | 502 | vdm.significand = vdm.sign ? -m : m; |
501 | 503 | ||
502 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); | 504 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); |
503 | } | 505 | } |
504 | 506 | ||
505 | static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) | 507 | static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) |
506 | { | 508 | { |
507 | struct vfp_double vdm; | 509 | struct vfp_double vdm; |
508 | u32 d, exceptions = 0; | 510 | u32 d, exceptions = 0; |
509 | int rmode = fpscr & FPSCR_RMODE_MASK; | 511 | int rmode = fpscr & FPSCR_RMODE_MASK; |
510 | int tm; | 512 | int tm; |
511 | 513 | ||
512 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 514 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
513 | 515 | ||
514 | /* | 516 | /* |
515 | * Do we have a denormalised number? | 517 | * Do we have a denormalised number? |
516 | */ | 518 | */ |
517 | tm = vfp_double_type(&vdm); | 519 | tm = vfp_double_type(&vdm); |
518 | if (tm & VFP_DENORMAL) | 520 | if (tm & VFP_DENORMAL) |
519 | exceptions |= FPSCR_IDC; | 521 | exceptions |= FPSCR_IDC; |
520 | 522 | ||
521 | if (tm & VFP_NAN) | 523 | if (tm & VFP_NAN) |
522 | vdm.sign = 0; | 524 | vdm.sign = 0; |
523 | 525 | ||
524 | if (vdm.exponent >= 1023 + 32) { | 526 | if (vdm.exponent >= 1023 + 32) { |
525 | d = vdm.sign ? 0 : 0xffffffff; | 527 | d = vdm.sign ? 0 : 0xffffffff; |
526 | exceptions = FPSCR_IOC; | 528 | exceptions = FPSCR_IOC; |
527 | } else if (vdm.exponent >= 1023 - 1) { | 529 | } else if (vdm.exponent >= 1023 - 1) { |
528 | int shift = 1023 + 63 - vdm.exponent; | 530 | int shift = 1023 + 63 - vdm.exponent; |
529 | u64 rem, incr = 0; | 531 | u64 rem, incr = 0; |
530 | 532 | ||
531 | /* | 533 | /* |
532 | * 2^0 <= m < 2^32-2^8 | 534 | * 2^0 <= m < 2^32-2^8 |
533 | */ | 535 | */ |
534 | d = (vdm.significand << 1) >> shift; | 536 | d = (vdm.significand << 1) >> shift; |
535 | rem = vdm.significand << (65 - shift); | 537 | rem = vdm.significand << (65 - shift); |
536 | 538 | ||
537 | if (rmode == FPSCR_ROUND_NEAREST) { | 539 | if (rmode == FPSCR_ROUND_NEAREST) { |
538 | incr = 0x8000000000000000ULL; | 540 | incr = 0x8000000000000000ULL; |
539 | if ((d & 1) == 0) | 541 | if ((d & 1) == 0) |
540 | incr -= 1; | 542 | incr -= 1; |
541 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 543 | } else if (rmode == FPSCR_ROUND_TOZERO) { |
542 | incr = 0; | 544 | incr = 0; |
543 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | 545 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { |
544 | incr = ~0ULL; | 546 | incr = ~0ULL; |
545 | } | 547 | } |
546 | 548 | ||
547 | if ((rem + incr) < rem) { | 549 | if ((rem + incr) < rem) { |
548 | if (d < 0xffffffff) | 550 | if (d < 0xffffffff) |
549 | d += 1; | 551 | d += 1; |
550 | else | 552 | else |
551 | exceptions |= FPSCR_IOC; | 553 | exceptions |= FPSCR_IOC; |
552 | } | 554 | } |
553 | 555 | ||
554 | if (d && vdm.sign) { | 556 | if (d && vdm.sign) { |
555 | d = 0; | 557 | d = 0; |
556 | exceptions |= FPSCR_IOC; | 558 | exceptions |= FPSCR_IOC; |
557 | } else if (rem) | 559 | } else if (rem) |
558 | exceptions |= FPSCR_IXC; | 560 | exceptions |= FPSCR_IXC; |
559 | } else { | 561 | } else { |
560 | d = 0; | 562 | d = 0; |
561 | if (vdm.exponent | vdm.significand) { | 563 | if (vdm.exponent | vdm.significand) { |
562 | exceptions |= FPSCR_IXC; | 564 | exceptions |= FPSCR_IXC; |
563 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | 565 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) |
564 | d = 1; | 566 | d = 1; |
565 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { | 567 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { |
566 | d = 0; | 568 | d = 0; |
567 | exceptions |= FPSCR_IOC; | 569 | exceptions |= FPSCR_IOC; |
568 | } | 570 | } |
569 | } | 571 | } |
570 | } | 572 | } |
571 | 573 | ||
572 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 574 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); |
573 | 575 | ||
574 | vfp_put_float(sd, d); | 576 | vfp_put_float(sd, d); |
575 | 577 | ||
576 | return exceptions; | 578 | return exceptions; |
577 | } | 579 | } |
578 | 580 | ||
579 | static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) | 581 | static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) |
580 | { | 582 | { |
581 | return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); | 583 | return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); |
582 | } | 584 | } |
583 | 585 | ||
584 | static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) | 586 | static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) |
585 | { | 587 | { |
586 | struct vfp_double vdm; | 588 | struct vfp_double vdm; |
587 | u32 d, exceptions = 0; | 589 | u32 d, exceptions = 0; |
588 | int rmode = fpscr & FPSCR_RMODE_MASK; | 590 | int rmode = fpscr & FPSCR_RMODE_MASK; |
589 | 591 | ||
590 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 592 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
591 | vfp_double_dump("VDM", &vdm); | 593 | vfp_double_dump("VDM", &vdm); |
592 | 594 | ||
593 | /* | 595 | /* |
594 | * Do we have denormalised number? | 596 | * Do we have denormalised number? |
595 | */ | 597 | */ |
596 | if (vfp_double_type(&vdm) & VFP_DENORMAL) | 598 | if (vfp_double_type(&vdm) & VFP_DENORMAL) |
597 | exceptions |= FPSCR_IDC; | 599 | exceptions |= FPSCR_IDC; |
598 | 600 | ||
599 | if (vdm.exponent >= 1023 + 32) { | 601 | if (vdm.exponent >= 1023 + 32) { |
600 | d = 0x7fffffff; | 602 | d = 0x7fffffff; |
601 | if (vdm.sign) | 603 | if (vdm.sign) |
602 | d = ~d; | 604 | d = ~d; |
603 | exceptions |= FPSCR_IOC; | 605 | exceptions |= FPSCR_IOC; |
604 | } else if (vdm.exponent >= 1023 - 1) { | 606 | } else if (vdm.exponent >= 1023 - 1) { |
605 | int shift = 1023 + 63 - vdm.exponent; /* 58 */ | 607 | int shift = 1023 + 63 - vdm.exponent; /* 58 */ |
606 | u64 rem, incr = 0; | 608 | u64 rem, incr = 0; |
607 | 609 | ||
608 | d = (vdm.significand << 1) >> shift; | 610 | d = (vdm.significand << 1) >> shift; |
609 | rem = vdm.significand << (65 - shift); | 611 | rem = vdm.significand << (65 - shift); |
610 | 612 | ||
611 | if (rmode == FPSCR_ROUND_NEAREST) { | 613 | if (rmode == FPSCR_ROUND_NEAREST) { |
612 | incr = 0x8000000000000000ULL; | 614 | incr = 0x8000000000000000ULL; |
613 | if ((d & 1) == 0) | 615 | if ((d & 1) == 0) |
614 | incr -= 1; | 616 | incr -= 1; |
615 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 617 | } else if (rmode == FPSCR_ROUND_TOZERO) { |
616 | incr = 0; | 618 | incr = 0; |
617 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | 619 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { |
618 | incr = ~0ULL; | 620 | incr = ~0ULL; |
619 | } | 621 | } |
620 | 622 | ||
621 | if ((rem + incr) < rem && d < 0xffffffff) | 623 | if ((rem + incr) < rem && d < 0xffffffff) |
622 | d += 1; | 624 | d += 1; |
623 | if (d > 0x7fffffff + (vdm.sign != 0)) { | 625 | if (d > 0x7fffffff + (vdm.sign != 0)) { |
624 | d = 0x7fffffff + (vdm.sign != 0); | 626 | d = 0x7fffffff + (vdm.sign != 0); |
625 | exceptions |= FPSCR_IOC; | 627 | exceptions |= FPSCR_IOC; |
626 | } else if (rem) | 628 | } else if (rem) |
627 | exceptions |= FPSCR_IXC; | 629 | exceptions |= FPSCR_IXC; |
628 | 630 | ||
629 | if (vdm.sign) | 631 | if (vdm.sign) |
630 | d = -d; | 632 | d = -d; |
631 | } else { | 633 | } else { |
632 | d = 0; | 634 | d = 0; |
633 | if (vdm.exponent | vdm.significand) { | 635 | if (vdm.exponent | vdm.significand) { |
634 | exceptions |= FPSCR_IXC; | 636 | exceptions |= FPSCR_IXC; |
635 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | 637 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) |
636 | d = 1; | 638 | d = 1; |
637 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) | 639 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) |
638 | d = -1; | 640 | d = -1; |
639 | } | 641 | } |
640 | } | 642 | } |
641 | 643 | ||
642 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 644 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); |
643 | 645 | ||
644 | vfp_put_float(sd, (s32)d); | 646 | vfp_put_float(sd, (s32)d); |
645 | 647 | ||
646 | return exceptions; | 648 | return exceptions; |
647 | } | 649 | } |
648 | 650 | ||
649 | static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) | 651 | static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) |
650 | { | 652 | { |
651 | return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); | 653 | return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); |
652 | } | 654 | } |
653 | 655 | ||
654 | 656 | ||
655 | static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { | 657 | static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { |
656 | [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy, | 658 | [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy, |
657 | [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs, | 659 | [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs, |
658 | [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg, | 660 | [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg, |
659 | [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt, | 661 | [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt, |
660 | [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp, | 662 | [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp, |
661 | [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe, | 663 | [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe, |
662 | [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz, | 664 | [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz, |
663 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez, | 665 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez, |
664 | [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts, | 666 | [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts, |
665 | [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito, | 667 | [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito, |
666 | [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito, | 668 | [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito, |
667 | [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui, | 669 | [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui, |
668 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz, | 670 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz, |
669 | [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi, | 671 | [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi, |
670 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz, | 672 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz, |
671 | }; | 673 | }; |
672 | 674 | ||
673 | 675 | ||
674 | 676 | ||
675 | 677 | ||
676 | static u32 | 678 | static u32 |
677 | vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, | 679 | vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, |
678 | struct vfp_double *vdm, u32 fpscr) | 680 | struct vfp_double *vdm, u32 fpscr) |
679 | { | 681 | { |
680 | struct vfp_double *vdp; | 682 | struct vfp_double *vdp; |
681 | u32 exceptions = 0; | 683 | u32 exceptions = 0; |
682 | int tn, tm; | 684 | int tn, tm; |
683 | 685 | ||
684 | tn = vfp_double_type(vdn); | 686 | tn = vfp_double_type(vdn); |
685 | tm = vfp_double_type(vdm); | 687 | tm = vfp_double_type(vdm); |
686 | 688 | ||
687 | if (tn & tm & VFP_INFINITY) { | 689 | if (tn & tm & VFP_INFINITY) { |
688 | /* | 690 | /* |
689 | * Two infinities. Are they different signs? | 691 | * Two infinities. Are they different signs? |
690 | */ | 692 | */ |
691 | if (vdn->sign ^ vdm->sign) { | 693 | if (vdn->sign ^ vdm->sign) { |
692 | /* | 694 | /* |
693 | * different signs -> invalid | 695 | * different signs -> invalid |
694 | */ | 696 | */ |
695 | exceptions = FPSCR_IOC; | 697 | exceptions = FPSCR_IOC; |
696 | vdp = &vfp_double_default_qnan; | 698 | vdp = &vfp_double_default_qnan; |
697 | } else { | 699 | } else { |
698 | /* | 700 | /* |
699 | * same signs -> valid | 701 | * same signs -> valid |
700 | */ | 702 | */ |
701 | vdp = vdn; | 703 | vdp = vdn; |
702 | } | 704 | } |
703 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | 705 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { |
704 | /* | 706 | /* |
705 | * One infinity and one number -> infinity | 707 | * One infinity and one number -> infinity |
706 | */ | 708 | */ |
707 | vdp = vdn; | 709 | vdp = vdn; |
708 | } else { | 710 | } else { |
709 | /* | 711 | /* |
710 | * 'n' is a NaN of some type | 712 | * 'n' is a NaN of some type |
711 | */ | 713 | */ |
712 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | 714 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); |
713 | } | 715 | } |
714 | *vdd = *vdp; | 716 | *vdd = *vdp; |
715 | return exceptions; | 717 | return exceptions; |
716 | } | 718 | } |
717 | 719 | ||
718 | static u32 | 720 | static u32 |
719 | vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, | 721 | vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, |
720 | struct vfp_double *vdm, u32 fpscr) | 722 | struct vfp_double *vdm, u32 fpscr) |
721 | { | 723 | { |
722 | u32 exp_diff; | 724 | u32 exp_diff; |
723 | u64 m_sig; | 725 | u64 m_sig; |
724 | 726 | ||
725 | if (vdn->significand & (1ULL << 63) || | 727 | if (vdn->significand & (1ULL << 63) || |
726 | vdm->significand & (1ULL << 63)) { | 728 | vdm->significand & (1ULL << 63)) { |
727 | pr_info("VFP: bad FP values in %s\n", __func__); | 729 | pr_info("VFP: bad FP values in %s\n", __func__); |
728 | vfp_double_dump("VDN", vdn); | 730 | vfp_double_dump("VDN", vdn); |
729 | vfp_double_dump("VDM", vdm); | 731 | vfp_double_dump("VDM", vdm); |
730 | } | 732 | } |
731 | 733 | ||
732 | /* | 734 | /* |
733 | * Ensure that 'n' is the largest magnitude number. Note that | 735 | * Ensure that 'n' is the largest magnitude number. Note that |
734 | * if 'n' and 'm' have equal exponents, we do not swap them. | 736 | * if 'n' and 'm' have equal exponents, we do not swap them. |
735 | * This ensures that NaN propagation works correctly. | 737 | * This ensures that NaN propagation works correctly. |
736 | */ | 738 | */ |
737 | if (vdn->exponent < vdm->exponent) { | 739 | if (vdn->exponent < vdm->exponent) { |
738 | struct vfp_double *t = vdn; | 740 | struct vfp_double *t = vdn; |
739 | vdn = vdm; | 741 | vdn = vdm; |
740 | vdm = t; | 742 | vdm = t; |
741 | } | 743 | } |
742 | 744 | ||
743 | /* | 745 | /* |
744 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, | 746 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, |
745 | * infinity or a NaN here. | 747 | * infinity or a NaN here. |
746 | */ | 748 | */ |
747 | if (vdn->exponent == 2047) | 749 | if (vdn->exponent == 2047) |
748 | return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); | 750 | return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); |
749 | 751 | ||
750 | /* | 752 | /* |
751 | * We have two proper numbers, where 'vdn' is the larger magnitude. | 753 | * We have two proper numbers, where 'vdn' is the larger magnitude. |
752 | * | 754 | * |
753 | * Copy 'n' to 'd' before doing the arithmetic. | 755 | * Copy 'n' to 'd' before doing the arithmetic. |
754 | */ | 756 | */ |
755 | *vdd = *vdn; | 757 | *vdd = *vdn; |
756 | 758 | ||
757 | /* | 759 | /* |
758 | * Align 'm' with the result. | 760 | * Align 'm' with the result. |
759 | */ | 761 | */ |
760 | exp_diff = vdn->exponent - vdm->exponent; | 762 | exp_diff = vdn->exponent - vdm->exponent; |
761 | m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); | 763 | m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); |
762 | 764 | ||
763 | /* | 765 | /* |
764 | * If the signs are different, we are really subtracting. | 766 | * If the signs are different, we are really subtracting. |
765 | */ | 767 | */ |
766 | if (vdn->sign ^ vdm->sign) { | 768 | if (vdn->sign ^ vdm->sign) { |
767 | m_sig = vdn->significand - m_sig; | 769 | m_sig = vdn->significand - m_sig; |
768 | if ((s64)m_sig < 0) { | 770 | if ((s64)m_sig < 0) { |
769 | vdd->sign = vfp_sign_negate(vdd->sign); | 771 | vdd->sign = vfp_sign_negate(vdd->sign); |
770 | m_sig = -m_sig; | 772 | m_sig = -m_sig; |
771 | } | 773 | } |
772 | } else { | 774 | } else { |
773 | m_sig += vdn->significand; | 775 | m_sig += vdn->significand; |
774 | } | 776 | } |
775 | vdd->significand = m_sig; | 777 | vdd->significand = m_sig; |
776 | 778 | ||
777 | return 0; | 779 | return 0; |
778 | } | 780 | } |
779 | 781 | ||
780 | static u32 | 782 | static u32 |
781 | vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, | 783 | vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, |
782 | struct vfp_double *vdm, u32 fpscr) | 784 | struct vfp_double *vdm, u32 fpscr) |
783 | { | 785 | { |
784 | vfp_double_dump("VDN", vdn); | 786 | vfp_double_dump("VDN", vdn); |
785 | vfp_double_dump("VDM", vdm); | 787 | vfp_double_dump("VDM", vdm); |
786 | 788 | ||
787 | /* | 789 | /* |
788 | * Ensure that 'n' is the largest magnitude number. Note that | 790 | * Ensure that 'n' is the largest magnitude number. Note that |
789 | * if 'n' and 'm' have equal exponents, we do not swap them. | 791 | * if 'n' and 'm' have equal exponents, we do not swap them. |
790 | * This ensures that NaN propagation works correctly. | 792 | * This ensures that NaN propagation works correctly. |
791 | */ | 793 | */ |
792 | if (vdn->exponent < vdm->exponent) { | 794 | if (vdn->exponent < vdm->exponent) { |
793 | struct vfp_double *t = vdn; | 795 | struct vfp_double *t = vdn; |
794 | vdn = vdm; | 796 | vdn = vdm; |
795 | vdm = t; | 797 | vdm = t; |
796 | pr_debug("VFP: swapping M <-> N\n"); | 798 | pr_debug("VFP: swapping M <-> N\n"); |
797 | } | 799 | } |
798 | 800 | ||
799 | vdd->sign = vdn->sign ^ vdm->sign; | 801 | vdd->sign = vdn->sign ^ vdm->sign; |
800 | 802 | ||
801 | /* | 803 | /* |
802 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. | 804 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. |
803 | */ | 805 | */ |
804 | if (vdn->exponent == 2047) { | 806 | if (vdn->exponent == 2047) { |
805 | if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) | 807 | if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) |
806 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | 808 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); |
807 | if ((vdm->exponent | vdm->significand) == 0) { | 809 | if ((vdm->exponent | vdm->significand) == 0) { |
808 | *vdd = vfp_double_default_qnan; | 810 | *vdd = vfp_double_default_qnan; |
809 | return FPSCR_IOC; | 811 | return FPSCR_IOC; |
810 | } | 812 | } |
811 | vdd->exponent = vdn->exponent; | 813 | vdd->exponent = vdn->exponent; |
812 | vdd->significand = 0; | 814 | vdd->significand = 0; |
813 | return 0; | 815 | return 0; |
814 | } | 816 | } |
815 | 817 | ||
816 | /* | 818 | /* |
817 | * If 'm' is zero, the result is always zero. In this case, | 819 | * If 'm' is zero, the result is always zero. In this case, |
818 | * 'n' may be zero or a number, but it doesn't matter which. | 820 | * 'n' may be zero or a number, but it doesn't matter which. |
819 | */ | 821 | */ |
820 | if ((vdm->exponent | vdm->significand) == 0) { | 822 | if ((vdm->exponent | vdm->significand) == 0) { |
821 | vdd->exponent = 0; | 823 | vdd->exponent = 0; |
822 | vdd->significand = 0; | 824 | vdd->significand = 0; |
823 | return 0; | 825 | return 0; |
824 | } | 826 | } |
825 | 827 | ||
826 | /* | 828 | /* |
827 | * We add 2 to the destination exponent for the same reason | 829 | * We add 2 to the destination exponent for the same reason |
828 | * as the addition case - though this time we have +1 from | 830 | * as the addition case - though this time we have +1 from |
829 | * each input operand. | 831 | * each input operand. |
830 | */ | 832 | */ |
831 | vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; | 833 | vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; |
832 | vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); | 834 | vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); |
833 | 835 | ||
834 | vfp_double_dump("VDD", vdd); | 836 | vfp_double_dump("VDD", vdd); |
835 | return 0; | 837 | return 0; |
836 | } | 838 | } |
837 | 839 | ||
838 | #define NEG_MULTIPLY (1 << 0) | 840 | #define NEG_MULTIPLY (1 << 0) |
839 | #define NEG_SUBTRACT (1 << 1) | 841 | #define NEG_SUBTRACT (1 << 1) |
840 | 842 | ||
841 | static u32 | 843 | static u32 |
842 | vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) | 844 | vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) |
843 | { | 845 | { |
844 | struct vfp_double vdd, vdp, vdn, vdm; | 846 | struct vfp_double vdd, vdp, vdn, vdm; |
845 | u32 exceptions; | 847 | u32 exceptions; |
846 | 848 | ||
847 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 849 | vfp_double_unpack(&vdn, vfp_get_double(dn)); |
848 | if (vdn.exponent == 0 && vdn.significand) | 850 | if (vdn.exponent == 0 && vdn.significand) |
849 | vfp_double_normalise_denormal(&vdn); | 851 | vfp_double_normalise_denormal(&vdn); |
850 | 852 | ||
851 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 853 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
852 | if (vdm.exponent == 0 && vdm.significand) | 854 | if (vdm.exponent == 0 && vdm.significand) |
853 | vfp_double_normalise_denormal(&vdm); | 855 | vfp_double_normalise_denormal(&vdm); |
854 | 856 | ||
855 | exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); | 857 | exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); |
856 | if (negate & NEG_MULTIPLY) | 858 | if (negate & NEG_MULTIPLY) |
857 | vdp.sign = vfp_sign_negate(vdp.sign); | 859 | vdp.sign = vfp_sign_negate(vdp.sign); |
858 | 860 | ||
859 | vfp_double_unpack(&vdn, vfp_get_double(dd)); | 861 | vfp_double_unpack(&vdn, vfp_get_double(dd)); |
860 | if (negate & NEG_SUBTRACT) | 862 | if (negate & NEG_SUBTRACT) |
861 | vdn.sign = vfp_sign_negate(vdn.sign); | 863 | vdn.sign = vfp_sign_negate(vdn.sign); |
862 | 864 | ||
863 | exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); | 865 | exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); |
864 | 866 | ||
865 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); | 867 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); |
866 | } | 868 | } |
867 | 869 | ||
868 | /* | 870 | /* |
869 | * Standard operations | 871 | * Standard operations |
870 | */ | 872 | */ |
871 | 873 | ||
872 | /* | 874 | /* |
873 | * sd = sd + (sn * sm) | 875 | * sd = sd + (sn * sm) |
874 | */ | 876 | */ |
875 | static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) | 877 | static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) |
876 | { | 878 | { |
877 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); | 879 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); |
878 | } | 880 | } |
879 | 881 | ||
880 | /* | 882 | /* |
881 | * sd = sd - (sn * sm) | 883 | * sd = sd - (sn * sm) |
882 | */ | 884 | */ |
883 | static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) | 885 | static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) |
884 | { | 886 | { |
885 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); | 887 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); |
886 | } | 888 | } |
887 | 889 | ||
888 | /* | 890 | /* |
889 | * sd = -sd + (sn * sm) | 891 | * sd = -sd + (sn * sm) |
890 | */ | 892 | */ |
891 | static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) | 893 | static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) |
892 | { | 894 | { |
893 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); | 895 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); |
894 | } | 896 | } |
895 | 897 | ||
896 | /* | 898 | /* |
897 | * sd = -sd - (sn * sm) | 899 | * sd = -sd - (sn * sm) |
898 | */ | 900 | */ |
899 | static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) | 901 | static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) |
900 | { | 902 | { |
901 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | 903 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); |
902 | } | 904 | } |
903 | 905 | ||
904 | /* | 906 | /* |
905 | * sd = sn * sm | 907 | * sd = sn * sm |
906 | */ | 908 | */ |
907 | static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) | 909 | static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) |
908 | { | 910 | { |
909 | struct vfp_double vdd, vdn, vdm; | 911 | struct vfp_double vdd, vdn, vdm; |
910 | u32 exceptions; | 912 | u32 exceptions; |
911 | 913 | ||
912 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 914 | vfp_double_unpack(&vdn, vfp_get_double(dn)); |
913 | if (vdn.exponent == 0 && vdn.significand) | 915 | if (vdn.exponent == 0 && vdn.significand) |
914 | vfp_double_normalise_denormal(&vdn); | 916 | vfp_double_normalise_denormal(&vdn); |
915 | 917 | ||
916 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 918 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
917 | if (vdm.exponent == 0 && vdm.significand) | 919 | if (vdm.exponent == 0 && vdm.significand) |
918 | vfp_double_normalise_denormal(&vdm); | 920 | vfp_double_normalise_denormal(&vdm); |
919 | 921 | ||
920 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | 922 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); |
921 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); | 923 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); |
922 | } | 924 | } |
923 | 925 | ||
924 | /* | 926 | /* |
925 | * sd = -(sn * sm) | 927 | * sd = -(sn * sm) |
926 | */ | 928 | */ |
927 | static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) | 929 | static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) |
928 | { | 930 | { |
929 | struct vfp_double vdd, vdn, vdm; | 931 | struct vfp_double vdd, vdn, vdm; |
930 | u32 exceptions; | 932 | u32 exceptions; |
931 | 933 | ||
932 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 934 | vfp_double_unpack(&vdn, vfp_get_double(dn)); |
933 | if (vdn.exponent == 0 && vdn.significand) | 935 | if (vdn.exponent == 0 && vdn.significand) |
934 | vfp_double_normalise_denormal(&vdn); | 936 | vfp_double_normalise_denormal(&vdn); |
935 | 937 | ||
936 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 938 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
937 | if (vdm.exponent == 0 && vdm.significand) | 939 | if (vdm.exponent == 0 && vdm.significand) |
938 | vfp_double_normalise_denormal(&vdm); | 940 | vfp_double_normalise_denormal(&vdm); |
939 | 941 | ||
940 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | 942 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); |
941 | vdd.sign = vfp_sign_negate(vdd.sign); | 943 | vdd.sign = vfp_sign_negate(vdd.sign); |
942 | 944 | ||
943 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); | 945 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); |
944 | } | 946 | } |
945 | 947 | ||
946 | /* | 948 | /* |
947 | * sd = sn + sm | 949 | * sd = sn + sm |
948 | */ | 950 | */ |
949 | static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) | 951 | static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) |
950 | { | 952 | { |
951 | struct vfp_double vdd, vdn, vdm; | 953 | struct vfp_double vdd, vdn, vdm; |
952 | u32 exceptions; | 954 | u32 exceptions; |
953 | 955 | ||
954 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 956 | vfp_double_unpack(&vdn, vfp_get_double(dn)); |
955 | if (vdn.exponent == 0 && vdn.significand) | 957 | if (vdn.exponent == 0 && vdn.significand) |
956 | vfp_double_normalise_denormal(&vdn); | 958 | vfp_double_normalise_denormal(&vdn); |
957 | 959 | ||
958 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 960 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
959 | if (vdm.exponent == 0 && vdm.significand) | 961 | if (vdm.exponent == 0 && vdm.significand) |
960 | vfp_double_normalise_denormal(&vdm); | 962 | vfp_double_normalise_denormal(&vdm); |
961 | 963 | ||
962 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | 964 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); |
963 | 965 | ||
964 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); | 966 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); |
965 | } | 967 | } |
966 | 968 | ||
967 | /* | 969 | /* |
968 | * sd = sn - sm | 970 | * sd = sn - sm |
969 | */ | 971 | */ |
970 | static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) | 972 | static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) |
971 | { | 973 | { |
972 | struct vfp_double vdd, vdn, vdm; | 974 | struct vfp_double vdd, vdn, vdm; |
973 | u32 exceptions; | 975 | u32 exceptions; |
974 | 976 | ||
975 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 977 | vfp_double_unpack(&vdn, vfp_get_double(dn)); |
976 | if (vdn.exponent == 0 && vdn.significand) | 978 | if (vdn.exponent == 0 && vdn.significand) |
977 | vfp_double_normalise_denormal(&vdn); | 979 | vfp_double_normalise_denormal(&vdn); |
978 | 980 | ||
979 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 981 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
980 | if (vdm.exponent == 0 && vdm.significand) | 982 | if (vdm.exponent == 0 && vdm.significand) |
981 | vfp_double_normalise_denormal(&vdm); | 983 | vfp_double_normalise_denormal(&vdm); |
982 | 984 | ||
983 | /* | 985 | /* |
984 | * Subtraction is like addition, but with a negated operand. | 986 | * Subtraction is like addition, but with a negated operand. |
985 | */ | 987 | */ |
986 | vdm.sign = vfp_sign_negate(vdm.sign); | 988 | vdm.sign = vfp_sign_negate(vdm.sign); |
987 | 989 | ||
988 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | 990 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); |
989 | 991 | ||
990 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); | 992 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); |
991 | } | 993 | } |
992 | 994 | ||
993 | /* | 995 | /* |
994 | * sd = sn / sm | 996 | * sd = sn / sm |
995 | */ | 997 | */ |
996 | static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) | 998 | static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) |
997 | { | 999 | { |
998 | struct vfp_double vdd, vdn, vdm; | 1000 | struct vfp_double vdd, vdn, vdm; |
999 | u32 exceptions = 0; | 1001 | u32 exceptions = 0; |
1000 | int tm, tn; | 1002 | int tm, tn; |
1001 | 1003 | ||
1002 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | 1004 | vfp_double_unpack(&vdn, vfp_get_double(dn)); |
1003 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | 1005 | vfp_double_unpack(&vdm, vfp_get_double(dm)); |
1004 | 1006 | ||
1005 | vdd.sign = vdn.sign ^ vdm.sign; | 1007 | vdd.sign = vdn.sign ^ vdm.sign; |
1006 | 1008 | ||
1007 | tn = vfp_double_type(&vdn); | 1009 | tn = vfp_double_type(&vdn); |
1008 | tm = vfp_double_type(&vdm); | 1010 | tm = vfp_double_type(&vdm); |
1009 | 1011 | ||
1010 | /* | 1012 | /* |
1011 | * Is n a NAN? | 1013 | * Is n a NAN? |
1012 | */ | 1014 | */ |
1013 | if (tn & VFP_NAN) | 1015 | if (tn & VFP_NAN) |
1014 | goto vdn_nan; | 1016 | goto vdn_nan; |
1015 | 1017 | ||
1016 | /* | 1018 | /* |
1017 | * Is m a NAN? | 1019 | * Is m a NAN? |
1018 | */ | 1020 | */ |
1019 | if (tm & VFP_NAN) | 1021 | if (tm & VFP_NAN) |
1020 | goto vdm_nan; | 1022 | goto vdm_nan; |
1021 | 1023 | ||
1022 | /* | 1024 | /* |
1023 | * If n and m are infinity, the result is invalid | 1025 | * If n and m are infinity, the result is invalid |
1024 | * If n and m are zero, the result is invalid | 1026 | * If n and m are zero, the result is invalid |
1025 | */ | 1027 | */ |
1026 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | 1028 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) |
1027 | goto invalid; | 1029 | goto invalid; |
1028 | 1030 | ||
1029 | /* | 1031 | /* |
1030 | * If n is infinity, the result is infinity | 1032 | * If n is infinity, the result is infinity |
1031 | */ | 1033 | */ |
1032 | if (tn & VFP_INFINITY) | 1034 | if (tn & VFP_INFINITY) |
1033 | goto infinity; | 1035 | goto infinity; |
1034 | 1036 | ||
1035 | /* | 1037 | /* |
1036 | * If m is zero, raise div0 exceptions | 1038 | * If m is zero, raise div0 exceptions |
1037 | */ | 1039 | */ |
1038 | if (tm & VFP_ZERO) | 1040 | if (tm & VFP_ZERO) |
1039 | goto divzero; | 1041 | goto divzero; |
1040 | 1042 | ||
1041 | /* | 1043 | /* |
1042 | * If m is infinity, or n is zero, the result is zero | 1044 | * If m is infinity, or n is zero, the result is zero |
1043 | */ | 1045 | */ |
1044 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | 1046 | if (tm & VFP_INFINITY || tn & VFP_ZERO) |
1045 | goto zero; | 1047 | goto zero; |
1046 | 1048 | ||
1047 | if (tn & VFP_DENORMAL) | 1049 | if (tn & VFP_DENORMAL) |
1048 | vfp_double_normalise_denormal(&vdn); | 1050 | vfp_double_normalise_denormal(&vdn); |
1049 | if (tm & VFP_DENORMAL) | 1051 | if (tm & VFP_DENORMAL) |
1050 | vfp_double_normalise_denormal(&vdm); | 1052 | vfp_double_normalise_denormal(&vdm); |
1051 | 1053 | ||
1052 | /* | 1054 | /* |
1053 | * Ok, we have two numbers, we can perform division. | 1055 | * Ok, we have two numbers, we can perform division. |
1054 | */ | 1056 | */ |
1055 | vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; | 1057 | vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; |
1056 | vdm.significand <<= 1; | 1058 | vdm.significand <<= 1; |
1057 | if (vdm.significand <= (2 * vdn.significand)) { | 1059 | if (vdm.significand <= (2 * vdn.significand)) { |
1058 | vdn.significand >>= 1; | 1060 | vdn.significand >>= 1; |
1059 | vdd.exponent++; | 1061 | vdd.exponent++; |
1060 | } | 1062 | } |
1061 | vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); | 1063 | vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); |
1062 | if ((vdd.significand & 0x1ff) <= 2) { | 1064 | if ((vdd.significand & 0x1ff) <= 2) { |
1063 | u64 termh, terml, remh, reml; | 1065 | u64 termh, terml, remh, reml; |
1064 | mul64to128(&termh, &terml, vdm.significand, vdd.significand); | 1066 | mul64to128(&termh, &terml, vdm.significand, vdd.significand); |
1065 | sub128(&remh, &reml, vdn.significand, 0, termh, terml); | 1067 | sub128(&remh, &reml, vdn.significand, 0, termh, terml); |
1066 | while ((s64)remh < 0) { | 1068 | while ((s64)remh < 0) { |
1067 | vdd.significand -= 1; | 1069 | vdd.significand -= 1; |
1068 | add128(&remh, &reml, remh, reml, 0, vdm.significand); | 1070 | add128(&remh, &reml, remh, reml, 0, vdm.significand); |
1069 | } | 1071 | } |
1070 | vdd.significand |= (reml != 0); | 1072 | vdd.significand |= (reml != 0); |
1071 | } | 1073 | } |
1072 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); | 1074 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); |
1073 | 1075 | ||
1074 | vdn_nan: | 1076 | vdn_nan: |
1075 | exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); | 1077 | exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); |
1076 | pack: | 1078 | pack: |
1077 | vfp_put_double(dd, vfp_double_pack(&vdd)); | 1079 | vfp_put_double(dd, vfp_double_pack(&vdd)); |
1078 | return exceptions; | 1080 | return exceptions; |
1079 | 1081 | ||
1080 | vdm_nan: | 1082 | vdm_nan: |
1081 | exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); | 1083 | exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); |
1082 | goto pack; | 1084 | goto pack; |
1083 | 1085 | ||
1084 | zero: | 1086 | zero: |
1085 | vdd.exponent = 0; | 1087 | vdd.exponent = 0; |
1086 | vdd.significand = 0; | 1088 | vdd.significand = 0; |
1087 | goto pack; | 1089 | goto pack; |
1088 | 1090 | ||
1089 | divzero: | 1091 | divzero: |
1090 | exceptions = FPSCR_DZC; | 1092 | exceptions = FPSCR_DZC; |
1091 | infinity: | 1093 | infinity: |
1092 | vdd.exponent = 2047; | 1094 | vdd.exponent = 2047; |
1093 | vdd.significand = 0; | 1095 | vdd.significand = 0; |
1094 | goto pack; | 1096 | goto pack; |
1095 | 1097 | ||
1096 | invalid: | 1098 | invalid: |
1097 | vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan)); | 1099 | vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan)); |
1098 | return FPSCR_IOC; | 1100 | return FPSCR_IOC; |
1099 | } | 1101 | } |
1100 | 1102 | ||
1101 | static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { | 1103 | static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { |
1102 | [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac, | 1104 | [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac, |
1103 | [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac, | 1105 | [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac, |
1104 | [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc, | 1106 | [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc, |
1105 | [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc, | 1107 | [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc, |
1106 | [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul, | 1108 | [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul, |
1107 | [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul, | 1109 | [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul, |
1108 | [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd, | 1110 | [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd, |
1109 | [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub, | 1111 | [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub, |
1110 | [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv, | 1112 | [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv, |
1111 | }; | 1113 | }; |
1112 | 1114 | ||
1113 | #define FREG_BANK(x) ((x) & 0x0c) | 1115 | #define FREG_BANK(x) ((x) & 0x0c) |
1114 | #define FREG_IDX(x) ((x) & 3) | 1116 | #define FREG_IDX(x) ((x) & 3) |
1115 | 1117 | ||
1116 | u32 vfp_double_cpdo(u32 inst, u32 fpscr) | 1118 | u32 vfp_double_cpdo(u32 inst, u32 fpscr) |
1117 | { | 1119 | { |
1118 | u32 op = inst & FOP_MASK; | 1120 | u32 op = inst & FOP_MASK; |
1119 | u32 exceptions = 0; | 1121 | u32 exceptions = 0; |
1120 | unsigned int dd = vfp_get_sd(inst); | 1122 | unsigned int dd = vfp_get_sd(inst); |
1121 | unsigned int dn = vfp_get_sn(inst); | 1123 | unsigned int dn = vfp_get_sn(inst); |
1122 | unsigned int dm = vfp_get_sm(inst); | 1124 | unsigned int dm = vfp_get_sm(inst); |
1123 | unsigned int vecitr, veclen, vecstride; | 1125 | unsigned int vecitr, veclen, vecstride; |
1124 | u32 (*fop)(int, int, s32, u32); | 1126 | u32 (*fop)(int, int, s32, u32); |
1125 | 1127 | ||
1126 | veclen = fpscr & FPSCR_LENGTH_MASK; | 1128 | veclen = fpscr & FPSCR_LENGTH_MASK; |
1127 | vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; | 1129 | vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; |
1128 | 1130 | ||
1129 | /* | 1131 | /* |
1130 | * If destination bank is zero, vector length is always '1'. | 1132 | * If destination bank is zero, vector length is always '1'. |
1131 | * ARM DDI0100F C5.1.3, C5.3.2. | 1133 | * ARM DDI0100F C5.1.3, C5.3.2. |
1132 | */ | 1134 | */ |
1133 | if (FREG_BANK(dd) == 0) | 1135 | if (FREG_BANK(dd) == 0) |
1134 | veclen = 0; | 1136 | veclen = 0; |
1135 | 1137 | ||
1136 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | 1138 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, |
1137 | (veclen >> FPSCR_LENGTH_BIT) + 1); | 1139 | (veclen >> FPSCR_LENGTH_BIT) + 1); |
1138 | 1140 | ||
1139 | fop = (op == FOP_EXT) ? fop_extfns[dn] : fop_fns[FOP_TO_IDX(op)]; | 1141 | fop = (op == FOP_EXT) ? fop_extfns[dn] : fop_fns[FOP_TO_IDX(op)]; |
1140 | if (!fop) | 1142 | if (!fop) |
1141 | goto invalid; | 1143 | goto invalid; |
1142 | 1144 | ||
1143 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | 1145 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { |
1144 | u32 except; | 1146 | u32 except; |
1145 | 1147 | ||
1146 | if (op == FOP_EXT) | 1148 | if (op == FOP_EXT) |
1147 | pr_debug("VFP: itr%d (d%u.%u) = op[%u] (d%u.%u)\n", | 1149 | pr_debug("VFP: itr%d (d%u.%u) = op[%u] (d%u.%u)\n", |
1148 | vecitr >> FPSCR_LENGTH_BIT, | 1150 | vecitr >> FPSCR_LENGTH_BIT, |
1149 | dd >> 1, dd & 1, dn, | 1151 | dd >> 1, dd & 1, dn, |
1150 | dm >> 1, dm & 1); | 1152 | dm >> 1, dm & 1); |
1151 | else | 1153 | else |
1152 | pr_debug("VFP: itr%d (d%u.%u) = (d%u.%u) op[%u] (d%u.%u)\n", | 1154 | pr_debug("VFP: itr%d (d%u.%u) = (d%u.%u) op[%u] (d%u.%u)\n", |
1153 | vecitr >> FPSCR_LENGTH_BIT, | 1155 | vecitr >> FPSCR_LENGTH_BIT, |
1154 | dd >> 1, dd & 1, | 1156 | dd >> 1, dd & 1, |
1155 | dn >> 1, dn & 1, | 1157 | dn >> 1, dn & 1, |
1156 | FOP_TO_IDX(op), | 1158 | FOP_TO_IDX(op), |
1157 | dm >> 1, dm & 1); | 1159 | dm >> 1, dm & 1); |
1158 | 1160 | ||
1159 | except = fop(dd, dn, dm, fpscr); | 1161 | except = fop(dd, dn, dm, fpscr); |
1160 | pr_debug("VFP: itr%d: exceptions=%08x\n", | 1162 | pr_debug("VFP: itr%d: exceptions=%08x\n", |
1161 | vecitr >> FPSCR_LENGTH_BIT, except); | 1163 | vecitr >> FPSCR_LENGTH_BIT, except); |
1162 | 1164 | ||
1163 | exceptions |= except; | 1165 | exceptions |= except; |
1164 | 1166 | ||
1165 | /* | 1167 | /* |
1166 | * This ensures that comparisons only operate on scalars; | 1168 | * This ensures that comparisons only operate on scalars; |
1167 | * comparisons always return with one FPSCR status bit set. | 1169 | * comparisons always return with one FPSCR status bit set. |
1168 | */ | 1170 | */ |
1169 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | 1171 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) |
1170 | break; | 1172 | break; |
1171 | 1173 | ||
1172 | /* | 1174 | /* |
1173 | * CHECK: It appears to be undefined whether we stop when | 1175 | * CHECK: It appears to be undefined whether we stop when |
1174 | * we encounter an exception. We continue. | 1176 | * we encounter an exception. We continue. |
1175 | */ | 1177 | */ |
1176 | 1178 | ||
1177 | dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6); | 1179 | dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6); |
1178 | dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); | 1180 | dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); |
1179 | if (FREG_BANK(dm) != 0) | 1181 | if (FREG_BANK(dm) != 0) |
1180 | dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); | 1182 | dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); |
1181 | } | 1183 | } |
1182 | return exceptions; | 1184 | return exceptions; |
1183 | 1185 | ||
1184 | invalid: | 1186 | invalid: |
1185 | return ~0; | 1187 | return ~0; |
1186 | } | 1188 | } |
1187 | 1189 |
arch/arm/vfp/vfpsingle.c
1 | /* | 1 | /* |
2 | * linux/arch/arm/vfp/vfpsingle.c | 2 | * linux/arch/arm/vfp/vfpsingle.c |
3 | * | 3 | * |
4 | * This code is derived in part from John R. Housers softfloat library, which | 4 | * This code is derived in part from John R. Housers softfloat library, which |
5 | * carries the following notice: | 5 | * carries the following notice: |
6 | * | 6 | * |
7 | * =========================================================================== | 7 | * =========================================================================== |
8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | 8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point |
9 | * Arithmetic Package, Release 2. | 9 | * Arithmetic Package, Release 2. |
10 | * | 10 | * |
11 | * Written by John R. Hauser. This work was made possible in part by the | 11 | * Written by John R. Hauser. This work was made possible in part by the |
12 | * International Computer Science Institute, located at Suite 600, 1947 Center | 12 | * International Computer Science Institute, located at Suite 600, 1947 Center |
13 | * Street, Berkeley, California 94704. Funding was partially provided by the | 13 | * Street, Berkeley, California 94704. Funding was partially provided by the |
14 | * National Science Foundation under grant MIP-9311980. The original version | 14 | * National Science Foundation under grant MIP-9311980. The original version |
15 | * of this code was written as part of a project to build a fixed-point vector | 15 | * of this code was written as part of a project to build a fixed-point vector |
16 | * processor in collaboration with the University of California at Berkeley, | 16 | * processor in collaboration with the University of California at Berkeley, |
17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | 17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information |
18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | 18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ |
19 | * arithmetic/softfloat.html'. | 19 | * arithmetic/softfloat.html'. |
20 | * | 20 | * |
21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | 21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort |
22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | 22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT |
23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | 23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO |
24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | 24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY |
25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | 25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. |
26 | * | 26 | * |
27 | * Derivative works are acceptable, even for commercial purposes, so long as | 27 | * Derivative works are acceptable, even for commercial purposes, so long as |
28 | * (1) they include prominent notice that the work is derivative, and (2) they | 28 | * (1) they include prominent notice that the work is derivative, and (2) they |
29 | * include prominent notice akin to these three paragraphs for those parts of | 29 | * include prominent notice akin to these three paragraphs for those parts of |
30 | * this code that are retained. | 30 | * this code that are retained. |
31 | * =========================================================================== | 31 | * =========================================================================== |
32 | */ | 32 | */ |
33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
34 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
35 | |||
36 | #include <asm/div64.h> | ||
35 | #include <asm/ptrace.h> | 37 | #include <asm/ptrace.h> |
36 | #include <asm/vfp.h> | 38 | #include <asm/vfp.h> |
37 | 39 | ||
38 | #include "vfpinstr.h" | 40 | #include "vfpinstr.h" |
39 | #include "vfp.h" | 41 | #include "vfp.h" |
40 | 42 | ||
41 | static struct vfp_single vfp_single_default_qnan = { | 43 | static struct vfp_single vfp_single_default_qnan = { |
42 | .exponent = 255, | 44 | .exponent = 255, |
43 | .sign = 0, | 45 | .sign = 0, |
44 | .significand = VFP_SINGLE_SIGNIFICAND_QNAN, | 46 | .significand = VFP_SINGLE_SIGNIFICAND_QNAN, |
45 | }; | 47 | }; |
46 | 48 | ||
47 | static void vfp_single_dump(const char *str, struct vfp_single *s) | 49 | static void vfp_single_dump(const char *str, struct vfp_single *s) |
48 | { | 50 | { |
49 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", | 51 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", |
50 | str, s->sign != 0, s->exponent, s->significand); | 52 | str, s->sign != 0, s->exponent, s->significand); |
51 | } | 53 | } |
52 | 54 | ||
53 | static void vfp_single_normalise_denormal(struct vfp_single *vs) | 55 | static void vfp_single_normalise_denormal(struct vfp_single *vs) |
54 | { | 56 | { |
55 | int bits = 31 - fls(vs->significand); | 57 | int bits = 31 - fls(vs->significand); |
56 | 58 | ||
57 | vfp_single_dump("normalise_denormal: in", vs); | 59 | vfp_single_dump("normalise_denormal: in", vs); |
58 | 60 | ||
59 | if (bits) { | 61 | if (bits) { |
60 | vs->exponent -= bits - 1; | 62 | vs->exponent -= bits - 1; |
61 | vs->significand <<= bits; | 63 | vs->significand <<= bits; |
62 | } | 64 | } |
63 | 65 | ||
64 | vfp_single_dump("normalise_denormal: out", vs); | 66 | vfp_single_dump("normalise_denormal: out", vs); |
65 | } | 67 | } |
66 | 68 | ||
67 | #ifndef DEBUG | 69 | #ifndef DEBUG |
68 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) | 70 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) |
69 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) | 71 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) |
70 | #else | 72 | #else |
71 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) | 73 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) |
72 | #endif | 74 | #endif |
73 | { | 75 | { |
74 | u32 significand, incr, rmode; | 76 | u32 significand, incr, rmode; |
75 | int exponent, shift, underflow; | 77 | int exponent, shift, underflow; |
76 | 78 | ||
77 | vfp_single_dump("pack: in", vs); | 79 | vfp_single_dump("pack: in", vs); |
78 | 80 | ||
79 | /* | 81 | /* |
80 | * Infinities and NaNs are a special case. | 82 | * Infinities and NaNs are a special case. |
81 | */ | 83 | */ |
82 | if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) | 84 | if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) |
83 | goto pack; | 85 | goto pack; |
84 | 86 | ||
85 | /* | 87 | /* |
86 | * Special-case zero. | 88 | * Special-case zero. |
87 | */ | 89 | */ |
88 | if (vs->significand == 0) { | 90 | if (vs->significand == 0) { |
89 | vs->exponent = 0; | 91 | vs->exponent = 0; |
90 | goto pack; | 92 | goto pack; |
91 | } | 93 | } |
92 | 94 | ||
93 | exponent = vs->exponent; | 95 | exponent = vs->exponent; |
94 | significand = vs->significand; | 96 | significand = vs->significand; |
95 | 97 | ||
96 | /* | 98 | /* |
97 | * Normalise first. Note that we shift the significand up to | 99 | * Normalise first. Note that we shift the significand up to |
98 | * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least | 100 | * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least |
99 | * significant bit. | 101 | * significant bit. |
100 | */ | 102 | */ |
101 | shift = 32 - fls(significand); | 103 | shift = 32 - fls(significand); |
102 | if (shift < 32 && shift) { | 104 | if (shift < 32 && shift) { |
103 | exponent -= shift; | 105 | exponent -= shift; |
104 | significand <<= shift; | 106 | significand <<= shift; |
105 | } | 107 | } |
106 | 108 | ||
107 | #ifdef DEBUG | 109 | #ifdef DEBUG |
108 | vs->exponent = exponent; | 110 | vs->exponent = exponent; |
109 | vs->significand = significand; | 111 | vs->significand = significand; |
110 | vfp_single_dump("pack: normalised", vs); | 112 | vfp_single_dump("pack: normalised", vs); |
111 | #endif | 113 | #endif |
112 | 114 | ||
113 | /* | 115 | /* |
114 | * Tiny number? | 116 | * Tiny number? |
115 | */ | 117 | */ |
116 | underflow = exponent < 0; | 118 | underflow = exponent < 0; |
117 | if (underflow) { | 119 | if (underflow) { |
118 | significand = vfp_shiftright32jamming(significand, -exponent); | 120 | significand = vfp_shiftright32jamming(significand, -exponent); |
119 | exponent = 0; | 121 | exponent = 0; |
120 | #ifdef DEBUG | 122 | #ifdef DEBUG |
121 | vs->exponent = exponent; | 123 | vs->exponent = exponent; |
122 | vs->significand = significand; | 124 | vs->significand = significand; |
123 | vfp_single_dump("pack: tiny number", vs); | 125 | vfp_single_dump("pack: tiny number", vs); |
124 | #endif | 126 | #endif |
125 | if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) | 127 | if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) |
126 | underflow = 0; | 128 | underflow = 0; |
127 | } | 129 | } |
128 | 130 | ||
129 | /* | 131 | /* |
130 | * Select rounding increment. | 132 | * Select rounding increment. |
131 | */ | 133 | */ |
132 | incr = 0; | 134 | incr = 0; |
133 | rmode = fpscr & FPSCR_RMODE_MASK; | 135 | rmode = fpscr & FPSCR_RMODE_MASK; |
134 | 136 | ||
135 | if (rmode == FPSCR_ROUND_NEAREST) { | 137 | if (rmode == FPSCR_ROUND_NEAREST) { |
136 | incr = 1 << VFP_SINGLE_LOW_BITS; | 138 | incr = 1 << VFP_SINGLE_LOW_BITS; |
137 | if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) | 139 | if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) |
138 | incr -= 1; | 140 | incr -= 1; |
139 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 141 | } else if (rmode == FPSCR_ROUND_TOZERO) { |
140 | incr = 0; | 142 | incr = 0; |
141 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) | 143 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) |
142 | incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; | 144 | incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; |
143 | 145 | ||
144 | pr_debug("VFP: rounding increment = 0x%08x\n", incr); | 146 | pr_debug("VFP: rounding increment = 0x%08x\n", incr); |
145 | 147 | ||
146 | /* | 148 | /* |
147 | * Is our rounding going to overflow? | 149 | * Is our rounding going to overflow? |
148 | */ | 150 | */ |
149 | if ((significand + incr) < significand) { | 151 | if ((significand + incr) < significand) { |
150 | exponent += 1; | 152 | exponent += 1; |
151 | significand = (significand >> 1) | (significand & 1); | 153 | significand = (significand >> 1) | (significand & 1); |
152 | incr >>= 1; | 154 | incr >>= 1; |
153 | #ifdef DEBUG | 155 | #ifdef DEBUG |
154 | vs->exponent = exponent; | 156 | vs->exponent = exponent; |
155 | vs->significand = significand; | 157 | vs->significand = significand; |
156 | vfp_single_dump("pack: overflow", vs); | 158 | vfp_single_dump("pack: overflow", vs); |
157 | #endif | 159 | #endif |
158 | } | 160 | } |
159 | 161 | ||
160 | /* | 162 | /* |
161 | * If any of the low bits (which will be shifted out of the | 163 | * If any of the low bits (which will be shifted out of the |
162 | * number) are non-zero, the result is inexact. | 164 | * number) are non-zero, the result is inexact. |
163 | */ | 165 | */ |
164 | if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) | 166 | if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) |
165 | exceptions |= FPSCR_IXC; | 167 | exceptions |= FPSCR_IXC; |
166 | 168 | ||
167 | /* | 169 | /* |
168 | * Do our rounding. | 170 | * Do our rounding. |
169 | */ | 171 | */ |
170 | significand += incr; | 172 | significand += incr; |
171 | 173 | ||
172 | /* | 174 | /* |
173 | * Infinity? | 175 | * Infinity? |
174 | */ | 176 | */ |
175 | if (exponent >= 254) { | 177 | if (exponent >= 254) { |
176 | exceptions |= FPSCR_OFC | FPSCR_IXC; | 178 | exceptions |= FPSCR_OFC | FPSCR_IXC; |
177 | if (incr == 0) { | 179 | if (incr == 0) { |
178 | vs->exponent = 253; | 180 | vs->exponent = 253; |
179 | vs->significand = 0x7fffffff; | 181 | vs->significand = 0x7fffffff; |
180 | } else { | 182 | } else { |
181 | vs->exponent = 255; /* infinity */ | 183 | vs->exponent = 255; /* infinity */ |
182 | vs->significand = 0; | 184 | vs->significand = 0; |
183 | } | 185 | } |
184 | } else { | 186 | } else { |
185 | if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) | 187 | if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) |
186 | exponent = 0; | 188 | exponent = 0; |
187 | if (exponent || significand > 0x80000000) | 189 | if (exponent || significand > 0x80000000) |
188 | underflow = 0; | 190 | underflow = 0; |
189 | if (underflow) | 191 | if (underflow) |
190 | exceptions |= FPSCR_UFC; | 192 | exceptions |= FPSCR_UFC; |
191 | vs->exponent = exponent; | 193 | vs->exponent = exponent; |
192 | vs->significand = significand >> 1; | 194 | vs->significand = significand >> 1; |
193 | } | 195 | } |
194 | 196 | ||
195 | pack: | 197 | pack: |
196 | vfp_single_dump("pack: final", vs); | 198 | vfp_single_dump("pack: final", vs); |
197 | { | 199 | { |
198 | s32 d = vfp_single_pack(vs); | 200 | s32 d = vfp_single_pack(vs); |
199 | pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, | 201 | pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, |
200 | sd, d, exceptions); | 202 | sd, d, exceptions); |
201 | vfp_put_float(sd, d); | 203 | vfp_put_float(sd, d); |
202 | } | 204 | } |
203 | 205 | ||
204 | return exceptions & ~VFP_NAN_FLAG; | 206 | return exceptions & ~VFP_NAN_FLAG; |
205 | } | 207 | } |
206 | 208 | ||
207 | /* | 209 | /* |
208 | * Propagate the NaN, setting exceptions if it is signalling. | 210 | * Propagate the NaN, setting exceptions if it is signalling. |
209 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. | 211 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. |
210 | */ | 212 | */ |
211 | static u32 | 213 | static u32 |
212 | vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, | 214 | vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, |
213 | struct vfp_single *vsm, u32 fpscr) | 215 | struct vfp_single *vsm, u32 fpscr) |
214 | { | 216 | { |
215 | struct vfp_single *nan; | 217 | struct vfp_single *nan; |
216 | int tn, tm = 0; | 218 | int tn, tm = 0; |
217 | 219 | ||
218 | tn = vfp_single_type(vsn); | 220 | tn = vfp_single_type(vsn); |
219 | 221 | ||
220 | if (vsm) | 222 | if (vsm) |
221 | tm = vfp_single_type(vsm); | 223 | tm = vfp_single_type(vsm); |
222 | 224 | ||
223 | if (fpscr & FPSCR_DEFAULT_NAN) | 225 | if (fpscr & FPSCR_DEFAULT_NAN) |
224 | /* | 226 | /* |
225 | * Default NaN mode - always returns a quiet NaN | 227 | * Default NaN mode - always returns a quiet NaN |
226 | */ | 228 | */ |
227 | nan = &vfp_single_default_qnan; | 229 | nan = &vfp_single_default_qnan; |
228 | else { | 230 | else { |
229 | /* | 231 | /* |
230 | * Contemporary mode - select the first signalling | 232 | * Contemporary mode - select the first signalling |
231 | * NAN, or if neither are signalling, the first | 233 | * NAN, or if neither are signalling, the first |
232 | * quiet NAN. | 234 | * quiet NAN. |
233 | */ | 235 | */ |
234 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | 236 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) |
235 | nan = vsn; | 237 | nan = vsn; |
236 | else | 238 | else |
237 | nan = vsm; | 239 | nan = vsm; |
238 | /* | 240 | /* |
239 | * Make the NaN quiet. | 241 | * Make the NaN quiet. |
240 | */ | 242 | */ |
241 | nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | 243 | nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; |
242 | } | 244 | } |
243 | 245 | ||
244 | *vsd = *nan; | 246 | *vsd = *nan; |
245 | 247 | ||
246 | /* | 248 | /* |
247 | * If one was a signalling NAN, raise invalid operation. | 249 | * If one was a signalling NAN, raise invalid operation. |
248 | */ | 250 | */ |
249 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | 251 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; |
250 | } | 252 | } |
251 | 253 | ||
252 | 254 | ||
253 | /* | 255 | /* |
254 | * Extended operations | 256 | * Extended operations |
255 | */ | 257 | */ |
256 | static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) | 258 | static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) |
257 | { | 259 | { |
258 | vfp_put_float(sd, vfp_single_packed_abs(m)); | 260 | vfp_put_float(sd, vfp_single_packed_abs(m)); |
259 | return 0; | 261 | return 0; |
260 | } | 262 | } |
261 | 263 | ||
262 | static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) | 264 | static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) |
263 | { | 265 | { |
264 | vfp_put_float(sd, m); | 266 | vfp_put_float(sd, m); |
265 | return 0; | 267 | return 0; |
266 | } | 268 | } |
267 | 269 | ||
268 | static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) | 270 | static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) |
269 | { | 271 | { |
270 | vfp_put_float(sd, vfp_single_packed_negate(m)); | 272 | vfp_put_float(sd, vfp_single_packed_negate(m)); |
271 | return 0; | 273 | return 0; |
272 | } | 274 | } |
273 | 275 | ||
274 | static const u16 sqrt_oddadjust[] = { | 276 | static const u16 sqrt_oddadjust[] = { |
275 | 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, | 277 | 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, |
276 | 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 | 278 | 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 |
277 | }; | 279 | }; |
278 | 280 | ||
279 | static const u16 sqrt_evenadjust[] = { | 281 | static const u16 sqrt_evenadjust[] = { |
280 | 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, | 282 | 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, |
281 | 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 | 283 | 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 |
282 | }; | 284 | }; |
283 | 285 | ||
284 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) | 286 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) |
285 | { | 287 | { |
286 | int index; | 288 | int index; |
287 | u32 z, a; | 289 | u32 z, a; |
288 | 290 | ||
289 | if ((significand & 0xc0000000) != 0x40000000) { | 291 | if ((significand & 0xc0000000) != 0x40000000) { |
290 | printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); | 292 | printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); |
291 | } | 293 | } |
292 | 294 | ||
293 | a = significand << 1; | 295 | a = significand << 1; |
294 | index = (a >> 27) & 15; | 296 | index = (a >> 27) & 15; |
295 | if (exponent & 1) { | 297 | if (exponent & 1) { |
296 | z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; | 298 | z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; |
297 | z = ((a / z) << 14) + (z << 15); | 299 | z = ((a / z) << 14) + (z << 15); |
298 | a >>= 1; | 300 | a >>= 1; |
299 | } else { | 301 | } else { |
300 | z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; | 302 | z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; |
301 | z = a / z + z; | 303 | z = a / z + z; |
302 | z = (z >= 0x20000) ? 0xffff8000 : (z << 15); | 304 | z = (z >= 0x20000) ? 0xffff8000 : (z << 15); |
303 | if (z <= a) | 305 | if (z <= a) |
304 | return (s32)a >> 1; | 306 | return (s32)a >> 1; |
305 | } | 307 | } |
306 | return (u32)(((u64)a << 31) / z) + (z >> 1); | 308 | { |
309 | u64 v = (u64)a << 31; | ||
310 | do_div(v, z); | ||
311 | return v + (z >> 1); | ||
312 | } | ||
307 | } | 313 | } |
308 | 314 | ||
309 | static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) | 315 | static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) |
310 | { | 316 | { |
311 | struct vfp_single vsm, vsd; | 317 | struct vfp_single vsm, vsd; |
312 | int ret, tm; | 318 | int ret, tm; |
313 | 319 | ||
314 | vfp_single_unpack(&vsm, m); | 320 | vfp_single_unpack(&vsm, m); |
315 | tm = vfp_single_type(&vsm); | 321 | tm = vfp_single_type(&vsm); |
316 | if (tm & (VFP_NAN|VFP_INFINITY)) { | 322 | if (tm & (VFP_NAN|VFP_INFINITY)) { |
317 | struct vfp_single *vsp = &vsd; | 323 | struct vfp_single *vsp = &vsd; |
318 | 324 | ||
319 | if (tm & VFP_NAN) | 325 | if (tm & VFP_NAN) |
320 | ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); | 326 | ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); |
321 | else if (vsm.sign == 0) { | 327 | else if (vsm.sign == 0) { |
322 | sqrt_copy: | 328 | sqrt_copy: |
323 | vsp = &vsm; | 329 | vsp = &vsm; |
324 | ret = 0; | 330 | ret = 0; |
325 | } else { | 331 | } else { |
326 | sqrt_invalid: | 332 | sqrt_invalid: |
327 | vsp = &vfp_single_default_qnan; | 333 | vsp = &vfp_single_default_qnan; |
328 | ret = FPSCR_IOC; | 334 | ret = FPSCR_IOC; |
329 | } | 335 | } |
330 | vfp_put_float(sd, vfp_single_pack(vsp)); | 336 | vfp_put_float(sd, vfp_single_pack(vsp)); |
331 | return ret; | 337 | return ret; |
332 | } | 338 | } |
333 | 339 | ||
334 | /* | 340 | /* |
335 | * sqrt(+/- 0) == +/- 0 | 341 | * sqrt(+/- 0) == +/- 0 |
336 | */ | 342 | */ |
337 | if (tm & VFP_ZERO) | 343 | if (tm & VFP_ZERO) |
338 | goto sqrt_copy; | 344 | goto sqrt_copy; |
339 | 345 | ||
340 | /* | 346 | /* |
341 | * Normalise a denormalised number | 347 | * Normalise a denormalised number |
342 | */ | 348 | */ |
343 | if (tm & VFP_DENORMAL) | 349 | if (tm & VFP_DENORMAL) |
344 | vfp_single_normalise_denormal(&vsm); | 350 | vfp_single_normalise_denormal(&vsm); |
345 | 351 | ||
346 | /* | 352 | /* |
347 | * sqrt(<0) = invalid | 353 | * sqrt(<0) = invalid |
348 | */ | 354 | */ |
349 | if (vsm.sign) | 355 | if (vsm.sign) |
350 | goto sqrt_invalid; | 356 | goto sqrt_invalid; |
351 | 357 | ||
352 | vfp_single_dump("sqrt", &vsm); | 358 | vfp_single_dump("sqrt", &vsm); |
353 | 359 | ||
354 | /* | 360 | /* |
355 | * Estimate the square root. | 361 | * Estimate the square root. |
356 | */ | 362 | */ |
357 | vsd.sign = 0; | 363 | vsd.sign = 0; |
358 | vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; | 364 | vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; |
359 | vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; | 365 | vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; |
360 | 366 | ||
361 | vfp_single_dump("sqrt estimate", &vsd); | 367 | vfp_single_dump("sqrt estimate", &vsd); |
362 | 368 | ||
363 | /* | 369 | /* |
364 | * And now adjust. | 370 | * And now adjust. |
365 | */ | 371 | */ |
366 | if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { | 372 | if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { |
367 | if (vsd.significand < 2) { | 373 | if (vsd.significand < 2) { |
368 | vsd.significand = 0xffffffff; | 374 | vsd.significand = 0xffffffff; |
369 | } else { | 375 | } else { |
370 | u64 term; | 376 | u64 term; |
371 | s64 rem; | 377 | s64 rem; |
372 | vsm.significand <<= !(vsm.exponent & 1); | 378 | vsm.significand <<= !(vsm.exponent & 1); |
373 | term = (u64)vsd.significand * vsd.significand; | 379 | term = (u64)vsd.significand * vsd.significand; |
374 | rem = ((u64)vsm.significand << 32) - term; | 380 | rem = ((u64)vsm.significand << 32) - term; |
375 | 381 | ||
376 | pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); | 382 | pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); |
377 | 383 | ||
378 | while (rem < 0) { | 384 | while (rem < 0) { |
379 | vsd.significand -= 1; | 385 | vsd.significand -= 1; |
380 | rem += ((u64)vsd.significand << 1) | 1; | 386 | rem += ((u64)vsd.significand << 1) | 1; |
381 | } | 387 | } |
382 | vsd.significand |= rem != 0; | 388 | vsd.significand |= rem != 0; |
383 | } | 389 | } |
384 | } | 390 | } |
385 | vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); | 391 | vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); |
386 | 392 | ||
387 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); | 393 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); |
388 | } | 394 | } |
389 | 395 | ||
390 | /* | 396 | /* |
391 | * Equal := ZC | 397 | * Equal := ZC |
392 | * Less than := N | 398 | * Less than := N |
393 | * Greater than := C | 399 | * Greater than := C |
394 | * Unordered := CV | 400 | * Unordered := CV |
395 | */ | 401 | */ |
396 | static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) | 402 | static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) |
397 | { | 403 | { |
398 | s32 d; | 404 | s32 d; |
399 | u32 ret = 0; | 405 | u32 ret = 0; |
400 | 406 | ||
401 | d = vfp_get_float(sd); | 407 | d = vfp_get_float(sd); |
402 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { | 408 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { |
403 | ret |= FPSCR_C | FPSCR_V; | 409 | ret |= FPSCR_C | FPSCR_V; |
404 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 410 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) |
405 | /* | 411 | /* |
406 | * Signalling NaN, or signalling on quiet NaN | 412 | * Signalling NaN, or signalling on quiet NaN |
407 | */ | 413 | */ |
408 | ret |= FPSCR_IOC; | 414 | ret |= FPSCR_IOC; |
409 | } | 415 | } |
410 | 416 | ||
411 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { | 417 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { |
412 | ret |= FPSCR_C | FPSCR_V; | 418 | ret |= FPSCR_C | FPSCR_V; |
413 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 419 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) |
414 | /* | 420 | /* |
415 | * Signalling NaN, or signalling on quiet NaN | 421 | * Signalling NaN, or signalling on quiet NaN |
416 | */ | 422 | */ |
417 | ret |= FPSCR_IOC; | 423 | ret |= FPSCR_IOC; |
418 | } | 424 | } |
419 | 425 | ||
420 | if (ret == 0) { | 426 | if (ret == 0) { |
421 | if (d == m || vfp_single_packed_abs(d | m) == 0) { | 427 | if (d == m || vfp_single_packed_abs(d | m) == 0) { |
422 | /* | 428 | /* |
423 | * equal | 429 | * equal |
424 | */ | 430 | */ |
425 | ret |= FPSCR_Z | FPSCR_C; | 431 | ret |= FPSCR_Z | FPSCR_C; |
426 | } else if (vfp_single_packed_sign(d ^ m)) { | 432 | } else if (vfp_single_packed_sign(d ^ m)) { |
427 | /* | 433 | /* |
428 | * different signs | 434 | * different signs |
429 | */ | 435 | */ |
430 | if (vfp_single_packed_sign(d)) | 436 | if (vfp_single_packed_sign(d)) |
431 | /* | 437 | /* |
432 | * d is negative, so d < m | 438 | * d is negative, so d < m |
433 | */ | 439 | */ |
434 | ret |= FPSCR_N; | 440 | ret |= FPSCR_N; |
435 | else | 441 | else |
436 | /* | 442 | /* |
437 | * d is positive, so d > m | 443 | * d is positive, so d > m |
438 | */ | 444 | */ |
439 | ret |= FPSCR_C; | 445 | ret |= FPSCR_C; |
440 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { | 446 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { |
441 | /* | 447 | /* |
442 | * d < m | 448 | * d < m |
443 | */ | 449 | */ |
444 | ret |= FPSCR_N; | 450 | ret |= FPSCR_N; |
445 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { | 451 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { |
446 | /* | 452 | /* |
447 | * d > m | 453 | * d > m |
448 | */ | 454 | */ |
449 | ret |= FPSCR_C; | 455 | ret |= FPSCR_C; |
450 | } | 456 | } |
451 | } | 457 | } |
452 | return ret; | 458 | return ret; |
453 | } | 459 | } |
454 | 460 | ||
455 | static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) | 461 | static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) |
456 | { | 462 | { |
457 | return vfp_compare(sd, 0, m, fpscr); | 463 | return vfp_compare(sd, 0, m, fpscr); |
458 | } | 464 | } |
459 | 465 | ||
460 | static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) | 466 | static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) |
461 | { | 467 | { |
462 | return vfp_compare(sd, 1, m, fpscr); | 468 | return vfp_compare(sd, 1, m, fpscr); |
463 | } | 469 | } |
464 | 470 | ||
465 | static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) | 471 | static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) |
466 | { | 472 | { |
467 | return vfp_compare(sd, 0, 0, fpscr); | 473 | return vfp_compare(sd, 0, 0, fpscr); |
468 | } | 474 | } |
469 | 475 | ||
470 | static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) | 476 | static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) |
471 | { | 477 | { |
472 | return vfp_compare(sd, 1, 0, fpscr); | 478 | return vfp_compare(sd, 1, 0, fpscr); |
473 | } | 479 | } |
474 | 480 | ||
475 | static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) | 481 | static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) |
476 | { | 482 | { |
477 | struct vfp_single vsm; | 483 | struct vfp_single vsm; |
478 | struct vfp_double vdd; | 484 | struct vfp_double vdd; |
479 | int tm; | 485 | int tm; |
480 | u32 exceptions = 0; | 486 | u32 exceptions = 0; |
481 | 487 | ||
482 | vfp_single_unpack(&vsm, m); | 488 | vfp_single_unpack(&vsm, m); |
483 | 489 | ||
484 | tm = vfp_single_type(&vsm); | 490 | tm = vfp_single_type(&vsm); |
485 | 491 | ||
486 | /* | 492 | /* |
487 | * If we have a signalling NaN, signal invalid operation. | 493 | * If we have a signalling NaN, signal invalid operation. |
488 | */ | 494 | */ |
489 | if (tm == VFP_SNAN) | 495 | if (tm == VFP_SNAN) |
490 | exceptions = FPSCR_IOC; | 496 | exceptions = FPSCR_IOC; |
491 | 497 | ||
492 | if (tm & VFP_DENORMAL) | 498 | if (tm & VFP_DENORMAL) |
493 | vfp_single_normalise_denormal(&vsm); | 499 | vfp_single_normalise_denormal(&vsm); |
494 | 500 | ||
495 | vdd.sign = vsm.sign; | 501 | vdd.sign = vsm.sign; |
496 | vdd.significand = (u64)vsm.significand << 32; | 502 | vdd.significand = (u64)vsm.significand << 32; |
497 | 503 | ||
498 | /* | 504 | /* |
499 | * If we have an infinity or NaN, the exponent must be 2047. | 505 | * If we have an infinity or NaN, the exponent must be 2047. |
500 | */ | 506 | */ |
501 | if (tm & (VFP_INFINITY|VFP_NAN)) { | 507 | if (tm & (VFP_INFINITY|VFP_NAN)) { |
502 | vdd.exponent = 2047; | 508 | vdd.exponent = 2047; |
503 | if (tm & VFP_NAN) | 509 | if (tm & VFP_NAN) |
504 | vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | 510 | vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; |
505 | goto pack_nan; | 511 | goto pack_nan; |
506 | } else if (tm & VFP_ZERO) | 512 | } else if (tm & VFP_ZERO) |
507 | vdd.exponent = 0; | 513 | vdd.exponent = 0; |
508 | else | 514 | else |
509 | vdd.exponent = vsm.exponent + (1023 - 127); | 515 | vdd.exponent = vsm.exponent + (1023 - 127); |
510 | 516 | ||
511 | /* | 517 | /* |
512 | * Technically, if bit 0 of dd is set, this is an invalid | 518 | * Technically, if bit 0 of dd is set, this is an invalid |
513 | * instruction. However, we ignore this for efficiency. | 519 | * instruction. However, we ignore this for efficiency. |
514 | */ | 520 | */ |
515 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); | 521 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); |
516 | 522 | ||
517 | pack_nan: | 523 | pack_nan: |
518 | vfp_put_double(dd, vfp_double_pack(&vdd)); | 524 | vfp_put_double(dd, vfp_double_pack(&vdd)); |
519 | return exceptions; | 525 | return exceptions; |
520 | } | 526 | } |
521 | 527 | ||
522 | static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) | 528 | static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) |
523 | { | 529 | { |
524 | struct vfp_single vs; | 530 | struct vfp_single vs; |
525 | 531 | ||
526 | vs.sign = 0; | 532 | vs.sign = 0; |
527 | vs.exponent = 127 + 31 - 1; | 533 | vs.exponent = 127 + 31 - 1; |
528 | vs.significand = (u32)m; | 534 | vs.significand = (u32)m; |
529 | 535 | ||
530 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); | 536 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); |
531 | } | 537 | } |
532 | 538 | ||
533 | static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) | 539 | static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) |
534 | { | 540 | { |
535 | struct vfp_single vs; | 541 | struct vfp_single vs; |
536 | 542 | ||
537 | vs.sign = (m & 0x80000000) >> 16; | 543 | vs.sign = (m & 0x80000000) >> 16; |
538 | vs.exponent = 127 + 31 - 1; | 544 | vs.exponent = 127 + 31 - 1; |
539 | vs.significand = vs.sign ? -m : m; | 545 | vs.significand = vs.sign ? -m : m; |
540 | 546 | ||
541 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); | 547 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); |
542 | } | 548 | } |
543 | 549 | ||
544 | static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) | 550 | static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) |
545 | { | 551 | { |
546 | struct vfp_single vsm; | 552 | struct vfp_single vsm; |
547 | u32 d, exceptions = 0; | 553 | u32 d, exceptions = 0; |
548 | int rmode = fpscr & FPSCR_RMODE_MASK; | 554 | int rmode = fpscr & FPSCR_RMODE_MASK; |
549 | int tm; | 555 | int tm; |
550 | 556 | ||
551 | vfp_single_unpack(&vsm, m); | 557 | vfp_single_unpack(&vsm, m); |
552 | vfp_single_dump("VSM", &vsm); | 558 | vfp_single_dump("VSM", &vsm); |
553 | 559 | ||
554 | /* | 560 | /* |
555 | * Do we have a denormalised number? | 561 | * Do we have a denormalised number? |
556 | */ | 562 | */ |
557 | tm = vfp_single_type(&vsm); | 563 | tm = vfp_single_type(&vsm); |
558 | if (tm & VFP_DENORMAL) | 564 | if (tm & VFP_DENORMAL) |
559 | exceptions |= FPSCR_IDC; | 565 | exceptions |= FPSCR_IDC; |
560 | 566 | ||
561 | if (tm & VFP_NAN) | 567 | if (tm & VFP_NAN) |
562 | vsm.sign = 0; | 568 | vsm.sign = 0; |
563 | 569 | ||
564 | if (vsm.exponent >= 127 + 32) { | 570 | if (vsm.exponent >= 127 + 32) { |
565 | d = vsm.sign ? 0 : 0xffffffff; | 571 | d = vsm.sign ? 0 : 0xffffffff; |
566 | exceptions = FPSCR_IOC; | 572 | exceptions = FPSCR_IOC; |
567 | } else if (vsm.exponent >= 127 - 1) { | 573 | } else if (vsm.exponent >= 127 - 1) { |
568 | int shift = 127 + 31 - vsm.exponent; | 574 | int shift = 127 + 31 - vsm.exponent; |
569 | u32 rem, incr = 0; | 575 | u32 rem, incr = 0; |
570 | 576 | ||
571 | /* | 577 | /* |
572 | * 2^0 <= m < 2^32-2^8 | 578 | * 2^0 <= m < 2^32-2^8 |
573 | */ | 579 | */ |
574 | d = (vsm.significand << 1) >> shift; | 580 | d = (vsm.significand << 1) >> shift; |
575 | rem = vsm.significand << (33 - shift); | 581 | rem = vsm.significand << (33 - shift); |
576 | 582 | ||
577 | if (rmode == FPSCR_ROUND_NEAREST) { | 583 | if (rmode == FPSCR_ROUND_NEAREST) { |
578 | incr = 0x80000000; | 584 | incr = 0x80000000; |
579 | if ((d & 1) == 0) | 585 | if ((d & 1) == 0) |
580 | incr -= 1; | 586 | incr -= 1; |
581 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 587 | } else if (rmode == FPSCR_ROUND_TOZERO) { |
582 | incr = 0; | 588 | incr = 0; |
583 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { | 589 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { |
584 | incr = ~0; | 590 | incr = ~0; |
585 | } | 591 | } |
586 | 592 | ||
587 | if ((rem + incr) < rem) { | 593 | if ((rem + incr) < rem) { |
588 | if (d < 0xffffffff) | 594 | if (d < 0xffffffff) |
589 | d += 1; | 595 | d += 1; |
590 | else | 596 | else |
591 | exceptions |= FPSCR_IOC; | 597 | exceptions |= FPSCR_IOC; |
592 | } | 598 | } |
593 | 599 | ||
594 | if (d && vsm.sign) { | 600 | if (d && vsm.sign) { |
595 | d = 0; | 601 | d = 0; |
596 | exceptions |= FPSCR_IOC; | 602 | exceptions |= FPSCR_IOC; |
597 | } else if (rem) | 603 | } else if (rem) |
598 | exceptions |= FPSCR_IXC; | 604 | exceptions |= FPSCR_IXC; |
599 | } else { | 605 | } else { |
600 | d = 0; | 606 | d = 0; |
601 | if (vsm.exponent | vsm.significand) { | 607 | if (vsm.exponent | vsm.significand) { |
602 | exceptions |= FPSCR_IXC; | 608 | exceptions |= FPSCR_IXC; |
603 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) | 609 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) |
604 | d = 1; | 610 | d = 1; |
605 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { | 611 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { |
606 | d = 0; | 612 | d = 0; |
607 | exceptions |= FPSCR_IOC; | 613 | exceptions |= FPSCR_IOC; |
608 | } | 614 | } |
609 | } | 615 | } |
610 | } | 616 | } |
611 | 617 | ||
612 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 618 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); |
613 | 619 | ||
614 | vfp_put_float(sd, d); | 620 | vfp_put_float(sd, d); |
615 | 621 | ||
616 | return exceptions; | 622 | return exceptions; |
617 | } | 623 | } |
618 | 624 | ||
619 | static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) | 625 | static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) |
620 | { | 626 | { |
621 | return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); | 627 | return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); |
622 | } | 628 | } |
623 | 629 | ||
624 | static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) | 630 | static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) |
625 | { | 631 | { |
626 | struct vfp_single vsm; | 632 | struct vfp_single vsm; |
627 | u32 d, exceptions = 0; | 633 | u32 d, exceptions = 0; |
628 | int rmode = fpscr & FPSCR_RMODE_MASK; | 634 | int rmode = fpscr & FPSCR_RMODE_MASK; |
629 | 635 | ||
630 | vfp_single_unpack(&vsm, m); | 636 | vfp_single_unpack(&vsm, m); |
631 | vfp_single_dump("VSM", &vsm); | 637 | vfp_single_dump("VSM", &vsm); |
632 | 638 | ||
633 | /* | 639 | /* |
634 | * Do we have a denormalised number? | 640 | * Do we have a denormalised number? |
635 | */ | 641 | */ |
636 | if (vfp_single_type(&vsm) & VFP_DENORMAL) | 642 | if (vfp_single_type(&vsm) & VFP_DENORMAL) |
637 | exceptions |= FPSCR_IDC; | 643 | exceptions |= FPSCR_IDC; |
638 | 644 | ||
639 | if (vsm.exponent >= 127 + 32) { | 645 | if (vsm.exponent >= 127 + 32) { |
640 | /* | 646 | /* |
641 | * m >= 2^31-2^7: invalid | 647 | * m >= 2^31-2^7: invalid |
642 | */ | 648 | */ |
643 | d = 0x7fffffff; | 649 | d = 0x7fffffff; |
644 | if (vsm.sign) | 650 | if (vsm.sign) |
645 | d = ~d; | 651 | d = ~d; |
646 | exceptions |= FPSCR_IOC; | 652 | exceptions |= FPSCR_IOC; |
647 | } else if (vsm.exponent >= 127 - 1) { | 653 | } else if (vsm.exponent >= 127 - 1) { |
648 | int shift = 127 + 31 - vsm.exponent; | 654 | int shift = 127 + 31 - vsm.exponent; |
649 | u32 rem, incr = 0; | 655 | u32 rem, incr = 0; |
650 | 656 | ||
651 | /* 2^0 <= m <= 2^31-2^7 */ | 657 | /* 2^0 <= m <= 2^31-2^7 */ |
652 | d = (vsm.significand << 1) >> shift; | 658 | d = (vsm.significand << 1) >> shift; |
653 | rem = vsm.significand << (33 - shift); | 659 | rem = vsm.significand << (33 - shift); |
654 | 660 | ||
655 | if (rmode == FPSCR_ROUND_NEAREST) { | 661 | if (rmode == FPSCR_ROUND_NEAREST) { |
656 | incr = 0x80000000; | 662 | incr = 0x80000000; |
657 | if ((d & 1) == 0) | 663 | if ((d & 1) == 0) |
658 | incr -= 1; | 664 | incr -= 1; |
659 | } else if (rmode == FPSCR_ROUND_TOZERO) { | 665 | } else if (rmode == FPSCR_ROUND_TOZERO) { |
660 | incr = 0; | 666 | incr = 0; |
661 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { | 667 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { |
662 | incr = ~0; | 668 | incr = ~0; |
663 | } | 669 | } |
664 | 670 | ||
665 | if ((rem + incr) < rem && d < 0xffffffff) | 671 | if ((rem + incr) < rem && d < 0xffffffff) |
666 | d += 1; | 672 | d += 1; |
667 | if (d > 0x7fffffff + (vsm.sign != 0)) { | 673 | if (d > 0x7fffffff + (vsm.sign != 0)) { |
668 | d = 0x7fffffff + (vsm.sign != 0); | 674 | d = 0x7fffffff + (vsm.sign != 0); |
669 | exceptions |= FPSCR_IOC; | 675 | exceptions |= FPSCR_IOC; |
670 | } else if (rem) | 676 | } else if (rem) |
671 | exceptions |= FPSCR_IXC; | 677 | exceptions |= FPSCR_IXC; |
672 | 678 | ||
673 | if (vsm.sign) | 679 | if (vsm.sign) |
674 | d = -d; | 680 | d = -d; |
675 | } else { | 681 | } else { |
676 | d = 0; | 682 | d = 0; |
677 | if (vsm.exponent | vsm.significand) { | 683 | if (vsm.exponent | vsm.significand) { |
678 | exceptions |= FPSCR_IXC; | 684 | exceptions |= FPSCR_IXC; |
679 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) | 685 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) |
680 | d = 1; | 686 | d = 1; |
681 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) | 687 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) |
682 | d = -1; | 688 | d = -1; |
683 | } | 689 | } |
684 | } | 690 | } |
685 | 691 | ||
686 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | 692 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); |
687 | 693 | ||
688 | vfp_put_float(sd, (s32)d); | 694 | vfp_put_float(sd, (s32)d); |
689 | 695 | ||
690 | return exceptions; | 696 | return exceptions; |
691 | } | 697 | } |
692 | 698 | ||
693 | static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) | 699 | static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) |
694 | { | 700 | { |
695 | return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); | 701 | return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); |
696 | } | 702 | } |
697 | 703 | ||
698 | static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { | 704 | static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { |
699 | [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, | 705 | [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, |
700 | [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, | 706 | [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, |
701 | [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, | 707 | [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, |
702 | [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, | 708 | [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, |
703 | [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, | 709 | [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, |
704 | [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, | 710 | [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, |
705 | [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, | 711 | [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, |
706 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, | 712 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, |
707 | [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, | 713 | [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, |
708 | [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, | 714 | [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, |
709 | [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, | 715 | [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, |
710 | [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, | 716 | [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, |
711 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, | 717 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, |
712 | [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, | 718 | [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, |
713 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, | 719 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, |
714 | }; | 720 | }; |
715 | 721 | ||
716 | 722 | ||
717 | 723 | ||
718 | 724 | ||
719 | 725 | ||
720 | static u32 | 726 | static u32 |
721 | vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, | 727 | vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, |
722 | struct vfp_single *vsm, u32 fpscr) | 728 | struct vfp_single *vsm, u32 fpscr) |
723 | { | 729 | { |
724 | struct vfp_single *vsp; | 730 | struct vfp_single *vsp; |
725 | u32 exceptions = 0; | 731 | u32 exceptions = 0; |
726 | int tn, tm; | 732 | int tn, tm; |
727 | 733 | ||
728 | tn = vfp_single_type(vsn); | 734 | tn = vfp_single_type(vsn); |
729 | tm = vfp_single_type(vsm); | 735 | tm = vfp_single_type(vsm); |
730 | 736 | ||
731 | if (tn & tm & VFP_INFINITY) { | 737 | if (tn & tm & VFP_INFINITY) { |
732 | /* | 738 | /* |
733 | * Two infinities. Are they different signs? | 739 | * Two infinities. Are they different signs? |
734 | */ | 740 | */ |
735 | if (vsn->sign ^ vsm->sign) { | 741 | if (vsn->sign ^ vsm->sign) { |
736 | /* | 742 | /* |
737 | * different signs -> invalid | 743 | * different signs -> invalid |
738 | */ | 744 | */ |
739 | exceptions = FPSCR_IOC; | 745 | exceptions = FPSCR_IOC; |
740 | vsp = &vfp_single_default_qnan; | 746 | vsp = &vfp_single_default_qnan; |
741 | } else { | 747 | } else { |
742 | /* | 748 | /* |
743 | * same signs -> valid | 749 | * same signs -> valid |
744 | */ | 750 | */ |
745 | vsp = vsn; | 751 | vsp = vsn; |
746 | } | 752 | } |
747 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | 753 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { |
748 | /* | 754 | /* |
749 | * One infinity and one number -> infinity | 755 | * One infinity and one number -> infinity |
750 | */ | 756 | */ |
751 | vsp = vsn; | 757 | vsp = vsn; |
752 | } else { | 758 | } else { |
753 | /* | 759 | /* |
754 | * 'n' is a NaN of some type | 760 | * 'n' is a NaN of some type |
755 | */ | 761 | */ |
756 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); | 762 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); |
757 | } | 763 | } |
758 | *vsd = *vsp; | 764 | *vsd = *vsp; |
759 | return exceptions; | 765 | return exceptions; |
760 | } | 766 | } |
761 | 767 | ||
762 | static u32 | 768 | static u32 |
763 | vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, | 769 | vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, |
764 | struct vfp_single *vsm, u32 fpscr) | 770 | struct vfp_single *vsm, u32 fpscr) |
765 | { | 771 | { |
766 | u32 exp_diff, m_sig; | 772 | u32 exp_diff, m_sig; |
767 | 773 | ||
768 | if (vsn->significand & 0x80000000 || | 774 | if (vsn->significand & 0x80000000 || |
769 | vsm->significand & 0x80000000) { | 775 | vsm->significand & 0x80000000) { |
770 | pr_info("VFP: bad FP values in %s\n", __func__); | 776 | pr_info("VFP: bad FP values in %s\n", __func__); |
771 | vfp_single_dump("VSN", vsn); | 777 | vfp_single_dump("VSN", vsn); |
772 | vfp_single_dump("VSM", vsm); | 778 | vfp_single_dump("VSM", vsm); |
773 | } | 779 | } |
774 | 780 | ||
775 | /* | 781 | /* |
776 | * Ensure that 'n' is the largest magnitude number. Note that | 782 | * Ensure that 'n' is the largest magnitude number. Note that |
777 | * if 'n' and 'm' have equal exponents, we do not swap them. | 783 | * if 'n' and 'm' have equal exponents, we do not swap them. |
778 | * This ensures that NaN propagation works correctly. | 784 | * This ensures that NaN propagation works correctly. |
779 | */ | 785 | */ |
780 | if (vsn->exponent < vsm->exponent) { | 786 | if (vsn->exponent < vsm->exponent) { |
781 | struct vfp_single *t = vsn; | 787 | struct vfp_single *t = vsn; |
782 | vsn = vsm; | 788 | vsn = vsm; |
783 | vsm = t; | 789 | vsm = t; |
784 | } | 790 | } |
785 | 791 | ||
786 | /* | 792 | /* |
787 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, | 793 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, |
788 | * infinity or a NaN here. | 794 | * infinity or a NaN here. |
789 | */ | 795 | */ |
790 | if (vsn->exponent == 255) | 796 | if (vsn->exponent == 255) |
791 | return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); | 797 | return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); |
792 | 798 | ||
793 | /* | 799 | /* |
794 | * We have two proper numbers, where 'vsn' is the larger magnitude. | 800 | * We have two proper numbers, where 'vsn' is the larger magnitude. |
795 | * | 801 | * |
796 | * Copy 'n' to 'd' before doing the arithmetic. | 802 | * Copy 'n' to 'd' before doing the arithmetic. |
797 | */ | 803 | */ |
798 | *vsd = *vsn; | 804 | *vsd = *vsn; |
799 | 805 | ||
800 | /* | 806 | /* |
801 | * Align both numbers. | 807 | * Align both numbers. |
802 | */ | 808 | */ |
803 | exp_diff = vsn->exponent - vsm->exponent; | 809 | exp_diff = vsn->exponent - vsm->exponent; |
804 | m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); | 810 | m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); |
805 | 811 | ||
806 | /* | 812 | /* |
807 | * If the signs are different, we are really subtracting. | 813 | * If the signs are different, we are really subtracting. |
808 | */ | 814 | */ |
809 | if (vsn->sign ^ vsm->sign) { | 815 | if (vsn->sign ^ vsm->sign) { |
810 | m_sig = vsn->significand - m_sig; | 816 | m_sig = vsn->significand - m_sig; |
811 | if ((s32)m_sig < 0) { | 817 | if ((s32)m_sig < 0) { |
812 | vsd->sign = vfp_sign_negate(vsd->sign); | 818 | vsd->sign = vfp_sign_negate(vsd->sign); |
813 | m_sig = -m_sig; | 819 | m_sig = -m_sig; |
814 | } else if (m_sig == 0) { | 820 | } else if (m_sig == 0) { |
815 | vsd->sign = (fpscr & FPSCR_RMODE_MASK) == | 821 | vsd->sign = (fpscr & FPSCR_RMODE_MASK) == |
816 | FPSCR_ROUND_MINUSINF ? 0x8000 : 0; | 822 | FPSCR_ROUND_MINUSINF ? 0x8000 : 0; |
817 | } | 823 | } |
818 | } else { | 824 | } else { |
819 | m_sig = vsn->significand + m_sig; | 825 | m_sig = vsn->significand + m_sig; |
820 | } | 826 | } |
821 | vsd->significand = m_sig; | 827 | vsd->significand = m_sig; |
822 | 828 | ||
823 | return 0; | 829 | return 0; |
824 | } | 830 | } |
825 | 831 | ||
826 | static u32 | 832 | static u32 |
827 | vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) | 833 | vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) |
828 | { | 834 | { |
829 | vfp_single_dump("VSN", vsn); | 835 | vfp_single_dump("VSN", vsn); |
830 | vfp_single_dump("VSM", vsm); | 836 | vfp_single_dump("VSM", vsm); |
831 | 837 | ||
832 | /* | 838 | /* |
833 | * Ensure that 'n' is the largest magnitude number. Note that | 839 | * Ensure that 'n' is the largest magnitude number. Note that |
834 | * if 'n' and 'm' have equal exponents, we do not swap them. | 840 | * if 'n' and 'm' have equal exponents, we do not swap them. |
835 | * This ensures that NaN propagation works correctly. | 841 | * This ensures that NaN propagation works correctly. |
836 | */ | 842 | */ |
837 | if (vsn->exponent < vsm->exponent) { | 843 | if (vsn->exponent < vsm->exponent) { |
838 | struct vfp_single *t = vsn; | 844 | struct vfp_single *t = vsn; |
839 | vsn = vsm; | 845 | vsn = vsm; |
840 | vsm = t; | 846 | vsm = t; |
841 | pr_debug("VFP: swapping M <-> N\n"); | 847 | pr_debug("VFP: swapping M <-> N\n"); |
842 | } | 848 | } |
843 | 849 | ||
844 | vsd->sign = vsn->sign ^ vsm->sign; | 850 | vsd->sign = vsn->sign ^ vsm->sign; |
845 | 851 | ||
846 | /* | 852 | /* |
847 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. | 853 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. |
848 | */ | 854 | */ |
849 | if (vsn->exponent == 255) { | 855 | if (vsn->exponent == 255) { |
850 | if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) | 856 | if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) |
851 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); | 857 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); |
852 | if ((vsm->exponent | vsm->significand) == 0) { | 858 | if ((vsm->exponent | vsm->significand) == 0) { |
853 | *vsd = vfp_single_default_qnan; | 859 | *vsd = vfp_single_default_qnan; |
854 | return FPSCR_IOC; | 860 | return FPSCR_IOC; |
855 | } | 861 | } |
856 | vsd->exponent = vsn->exponent; | 862 | vsd->exponent = vsn->exponent; |
857 | vsd->significand = 0; | 863 | vsd->significand = 0; |
858 | return 0; | 864 | return 0; |
859 | } | 865 | } |
860 | 866 | ||
861 | /* | 867 | /* |
862 | * If 'm' is zero, the result is always zero. In this case, | 868 | * If 'm' is zero, the result is always zero. In this case, |
863 | * 'n' may be zero or a number, but it doesn't matter which. | 869 | * 'n' may be zero or a number, but it doesn't matter which. |
864 | */ | 870 | */ |
865 | if ((vsm->exponent | vsm->significand) == 0) { | 871 | if ((vsm->exponent | vsm->significand) == 0) { |
866 | vsd->exponent = 0; | 872 | vsd->exponent = 0; |
867 | vsd->significand = 0; | 873 | vsd->significand = 0; |
868 | return 0; | 874 | return 0; |
869 | } | 875 | } |
870 | 876 | ||
871 | /* | 877 | /* |
872 | * We add 2 to the destination exponent for the same reason as | 878 | * We add 2 to the destination exponent for the same reason as |
873 | * the addition case - though this time we have +1 from each | 879 | * the addition case - though this time we have +1 from each |
874 | * input operand. | 880 | * input operand. |
875 | */ | 881 | */ |
876 | vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; | 882 | vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; |
877 | vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); | 883 | vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); |
878 | 884 | ||
879 | vfp_single_dump("VSD", vsd); | 885 | vfp_single_dump("VSD", vsd); |
880 | return 0; | 886 | return 0; |
881 | } | 887 | } |
882 | 888 | ||
883 | #define NEG_MULTIPLY (1 << 0) | 889 | #define NEG_MULTIPLY (1 << 0) |
884 | #define NEG_SUBTRACT (1 << 1) | 890 | #define NEG_SUBTRACT (1 << 1) |
885 | 891 | ||
886 | static u32 | 892 | static u32 |
887 | vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) | 893 | vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) |
888 | { | 894 | { |
889 | struct vfp_single vsd, vsp, vsn, vsm; | 895 | struct vfp_single vsd, vsp, vsn, vsm; |
890 | u32 exceptions; | 896 | u32 exceptions; |
891 | s32 v; | 897 | s32 v; |
892 | 898 | ||
893 | v = vfp_get_float(sn); | 899 | v = vfp_get_float(sn); |
894 | pr_debug("VFP: s%u = %08x\n", sn, v); | 900 | pr_debug("VFP: s%u = %08x\n", sn, v); |
895 | vfp_single_unpack(&vsn, v); | 901 | vfp_single_unpack(&vsn, v); |
896 | if (vsn.exponent == 0 && vsn.significand) | 902 | if (vsn.exponent == 0 && vsn.significand) |
897 | vfp_single_normalise_denormal(&vsn); | 903 | vfp_single_normalise_denormal(&vsn); |
898 | 904 | ||
899 | vfp_single_unpack(&vsm, m); | 905 | vfp_single_unpack(&vsm, m); |
900 | if (vsm.exponent == 0 && vsm.significand) | 906 | if (vsm.exponent == 0 && vsm.significand) |
901 | vfp_single_normalise_denormal(&vsm); | 907 | vfp_single_normalise_denormal(&vsm); |
902 | 908 | ||
903 | exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); | 909 | exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); |
904 | if (negate & NEG_MULTIPLY) | 910 | if (negate & NEG_MULTIPLY) |
905 | vsp.sign = vfp_sign_negate(vsp.sign); | 911 | vsp.sign = vfp_sign_negate(vsp.sign); |
906 | 912 | ||
907 | v = vfp_get_float(sd); | 913 | v = vfp_get_float(sd); |
908 | pr_debug("VFP: s%u = %08x\n", sd, v); | 914 | pr_debug("VFP: s%u = %08x\n", sd, v); |
909 | vfp_single_unpack(&vsn, v); | 915 | vfp_single_unpack(&vsn, v); |
910 | if (negate & NEG_SUBTRACT) | 916 | if (negate & NEG_SUBTRACT) |
911 | vsn.sign = vfp_sign_negate(vsn.sign); | 917 | vsn.sign = vfp_sign_negate(vsn.sign); |
912 | 918 | ||
913 | exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); | 919 | exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); |
914 | 920 | ||
915 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); | 921 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); |
916 | } | 922 | } |
917 | 923 | ||
918 | /* | 924 | /* |
919 | * Standard operations | 925 | * Standard operations |
920 | */ | 926 | */ |
921 | 927 | ||
922 | /* | 928 | /* |
923 | * sd = sd + (sn * sm) | 929 | * sd = sd + (sn * sm) |
924 | */ | 930 | */ |
925 | static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) | 931 | static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) |
926 | { | 932 | { |
927 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); | 933 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); |
928 | } | 934 | } |
929 | 935 | ||
930 | /* | 936 | /* |
931 | * sd = sd - (sn * sm) | 937 | * sd = sd - (sn * sm) |
932 | */ | 938 | */ |
933 | static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) | 939 | static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) |
934 | { | 940 | { |
935 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); | 941 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); |
936 | } | 942 | } |
937 | 943 | ||
938 | /* | 944 | /* |
939 | * sd = -sd + (sn * sm) | 945 | * sd = -sd + (sn * sm) |
940 | */ | 946 | */ |
941 | static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) | 947 | static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) |
942 | { | 948 | { |
943 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); | 949 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); |
944 | } | 950 | } |
945 | 951 | ||
946 | /* | 952 | /* |
947 | * sd = -sd - (sn * sm) | 953 | * sd = -sd - (sn * sm) |
948 | */ | 954 | */ |
949 | static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) | 955 | static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) |
950 | { | 956 | { |
951 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | 957 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); |
952 | } | 958 | } |
953 | 959 | ||
954 | /* | 960 | /* |
955 | * sd = sn * sm | 961 | * sd = sn * sm |
956 | */ | 962 | */ |
957 | static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) | 963 | static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) |
958 | { | 964 | { |
959 | struct vfp_single vsd, vsn, vsm; | 965 | struct vfp_single vsd, vsn, vsm; |
960 | u32 exceptions; | 966 | u32 exceptions; |
961 | s32 n = vfp_get_float(sn); | 967 | s32 n = vfp_get_float(sn); |
962 | 968 | ||
963 | pr_debug("VFP: s%u = %08x\n", sn, n); | 969 | pr_debug("VFP: s%u = %08x\n", sn, n); |
964 | 970 | ||
965 | vfp_single_unpack(&vsn, n); | 971 | vfp_single_unpack(&vsn, n); |
966 | if (vsn.exponent == 0 && vsn.significand) | 972 | if (vsn.exponent == 0 && vsn.significand) |
967 | vfp_single_normalise_denormal(&vsn); | 973 | vfp_single_normalise_denormal(&vsn); |
968 | 974 | ||
969 | vfp_single_unpack(&vsm, m); | 975 | vfp_single_unpack(&vsm, m); |
970 | if (vsm.exponent == 0 && vsm.significand) | 976 | if (vsm.exponent == 0 && vsm.significand) |
971 | vfp_single_normalise_denormal(&vsm); | 977 | vfp_single_normalise_denormal(&vsm); |
972 | 978 | ||
973 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); | 979 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); |
974 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); | 980 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); |
975 | } | 981 | } |
976 | 982 | ||
977 | /* | 983 | /* |
978 | * sd = -(sn * sm) | 984 | * sd = -(sn * sm) |
979 | */ | 985 | */ |
980 | static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) | 986 | static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) |
981 | { | 987 | { |
982 | struct vfp_single vsd, vsn, vsm; | 988 | struct vfp_single vsd, vsn, vsm; |
983 | u32 exceptions; | 989 | u32 exceptions; |
984 | s32 n = vfp_get_float(sn); | 990 | s32 n = vfp_get_float(sn); |
985 | 991 | ||
986 | pr_debug("VFP: s%u = %08x\n", sn, n); | 992 | pr_debug("VFP: s%u = %08x\n", sn, n); |
987 | 993 | ||
988 | vfp_single_unpack(&vsn, n); | 994 | vfp_single_unpack(&vsn, n); |
989 | if (vsn.exponent == 0 && vsn.significand) | 995 | if (vsn.exponent == 0 && vsn.significand) |
990 | vfp_single_normalise_denormal(&vsn); | 996 | vfp_single_normalise_denormal(&vsn); |
991 | 997 | ||
992 | vfp_single_unpack(&vsm, m); | 998 | vfp_single_unpack(&vsm, m); |
993 | if (vsm.exponent == 0 && vsm.significand) | 999 | if (vsm.exponent == 0 && vsm.significand) |
994 | vfp_single_normalise_denormal(&vsm); | 1000 | vfp_single_normalise_denormal(&vsm); |
995 | 1001 | ||
996 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); | 1002 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); |
997 | vsd.sign = vfp_sign_negate(vsd.sign); | 1003 | vsd.sign = vfp_sign_negate(vsd.sign); |
998 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); | 1004 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); |
999 | } | 1005 | } |
1000 | 1006 | ||
1001 | /* | 1007 | /* |
1002 | * sd = sn + sm | 1008 | * sd = sn + sm |
1003 | */ | 1009 | */ |
1004 | static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) | 1010 | static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) |
1005 | { | 1011 | { |
1006 | struct vfp_single vsd, vsn, vsm; | 1012 | struct vfp_single vsd, vsn, vsm; |
1007 | u32 exceptions; | 1013 | u32 exceptions; |
1008 | s32 n = vfp_get_float(sn); | 1014 | s32 n = vfp_get_float(sn); |
1009 | 1015 | ||
1010 | pr_debug("VFP: s%u = %08x\n", sn, n); | 1016 | pr_debug("VFP: s%u = %08x\n", sn, n); |
1011 | 1017 | ||
1012 | /* | 1018 | /* |
1013 | * Unpack and normalise denormals. | 1019 | * Unpack and normalise denormals. |
1014 | */ | 1020 | */ |
1015 | vfp_single_unpack(&vsn, n); | 1021 | vfp_single_unpack(&vsn, n); |
1016 | if (vsn.exponent == 0 && vsn.significand) | 1022 | if (vsn.exponent == 0 && vsn.significand) |
1017 | vfp_single_normalise_denormal(&vsn); | 1023 | vfp_single_normalise_denormal(&vsn); |
1018 | 1024 | ||
1019 | vfp_single_unpack(&vsm, m); | 1025 | vfp_single_unpack(&vsm, m); |
1020 | if (vsm.exponent == 0 && vsm.significand) | 1026 | if (vsm.exponent == 0 && vsm.significand) |
1021 | vfp_single_normalise_denormal(&vsm); | 1027 | vfp_single_normalise_denormal(&vsm); |
1022 | 1028 | ||
1023 | exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); | 1029 | exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); |
1024 | 1030 | ||
1025 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); | 1031 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); |
1026 | } | 1032 | } |
1027 | 1033 | ||
1028 | /* | 1034 | /* |
1029 | * sd = sn - sm | 1035 | * sd = sn - sm |
1030 | */ | 1036 | */ |
1031 | static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) | 1037 | static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) |
1032 | { | 1038 | { |
1033 | /* | 1039 | /* |
1034 | * Subtraction is addition with one sign inverted. | 1040 | * Subtraction is addition with one sign inverted. |
1035 | */ | 1041 | */ |
1036 | return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); | 1042 | return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); |
1037 | } | 1043 | } |
1038 | 1044 | ||
1039 | /* | 1045 | /* |
1040 | * sd = sn / sm | 1046 | * sd = sn / sm |
1041 | */ | 1047 | */ |
1042 | static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) | 1048 | static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) |
1043 | { | 1049 | { |
1044 | struct vfp_single vsd, vsn, vsm; | 1050 | struct vfp_single vsd, vsn, vsm; |
1045 | u32 exceptions = 0; | 1051 | u32 exceptions = 0; |
1046 | s32 n = vfp_get_float(sn); | 1052 | s32 n = vfp_get_float(sn); |
1047 | int tm, tn; | 1053 | int tm, tn; |
1048 | 1054 | ||
1049 | pr_debug("VFP: s%u = %08x\n", sn, n); | 1055 | pr_debug("VFP: s%u = %08x\n", sn, n); |
1050 | 1056 | ||
1051 | vfp_single_unpack(&vsn, n); | 1057 | vfp_single_unpack(&vsn, n); |
1052 | vfp_single_unpack(&vsm, m); | 1058 | vfp_single_unpack(&vsm, m); |
1053 | 1059 | ||
1054 | vsd.sign = vsn.sign ^ vsm.sign; | 1060 | vsd.sign = vsn.sign ^ vsm.sign; |
1055 | 1061 | ||
1056 | tn = vfp_single_type(&vsn); | 1062 | tn = vfp_single_type(&vsn); |
1057 | tm = vfp_single_type(&vsm); | 1063 | tm = vfp_single_type(&vsm); |
1058 | 1064 | ||
1059 | /* | 1065 | /* |
1060 | * Is n a NAN? | 1066 | * Is n a NAN? |
1061 | */ | 1067 | */ |
1062 | if (tn & VFP_NAN) | 1068 | if (tn & VFP_NAN) |
1063 | goto vsn_nan; | 1069 | goto vsn_nan; |
1064 | 1070 | ||
1065 | /* | 1071 | /* |
1066 | * Is m a NAN? | 1072 | * Is m a NAN? |
1067 | */ | 1073 | */ |
1068 | if (tm & VFP_NAN) | 1074 | if (tm & VFP_NAN) |
1069 | goto vsm_nan; | 1075 | goto vsm_nan; |
1070 | 1076 | ||
1071 | /* | 1077 | /* |
1072 | * If n and m are infinity, the result is invalid | 1078 | * If n and m are infinity, the result is invalid |
1073 | * If n and m are zero, the result is invalid | 1079 | * If n and m are zero, the result is invalid |
1074 | */ | 1080 | */ |
1075 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | 1081 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) |
1076 | goto invalid; | 1082 | goto invalid; |
1077 | 1083 | ||
1078 | /* | 1084 | /* |
1079 | * If n is infinity, the result is infinity | 1085 | * If n is infinity, the result is infinity |
1080 | */ | 1086 | */ |
1081 | if (tn & VFP_INFINITY) | 1087 | if (tn & VFP_INFINITY) |
1082 | goto infinity; | 1088 | goto infinity; |
1083 | 1089 | ||
1084 | /* | 1090 | /* |
1085 | * If m is zero, raise div0 exception | 1091 | * If m is zero, raise div0 exception |
1086 | */ | 1092 | */ |
1087 | if (tm & VFP_ZERO) | 1093 | if (tm & VFP_ZERO) |
1088 | goto divzero; | 1094 | goto divzero; |
1089 | 1095 | ||
1090 | /* | 1096 | /* |
1091 | * If m is infinity, or n is zero, the result is zero | 1097 | * If m is infinity, or n is zero, the result is zero |
1092 | */ | 1098 | */ |
1093 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | 1099 | if (tm & VFP_INFINITY || tn & VFP_ZERO) |
1094 | goto zero; | 1100 | goto zero; |
1095 | 1101 | ||
1096 | if (tn & VFP_DENORMAL) | 1102 | if (tn & VFP_DENORMAL) |
1097 | vfp_single_normalise_denormal(&vsn); | 1103 | vfp_single_normalise_denormal(&vsn); |
1098 | if (tm & VFP_DENORMAL) | 1104 | if (tm & VFP_DENORMAL) |
1099 | vfp_single_normalise_denormal(&vsm); | 1105 | vfp_single_normalise_denormal(&vsm); |
1100 | 1106 | ||
1101 | /* | 1107 | /* |
1102 | * Ok, we have two numbers, we can perform division. | 1108 | * Ok, we have two numbers, we can perform division. |
1103 | */ | 1109 | */ |
1104 | vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; | 1110 | vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; |
1105 | vsm.significand <<= 1; | 1111 | vsm.significand <<= 1; |
1106 | if (vsm.significand <= (2 * vsn.significand)) { | 1112 | if (vsm.significand <= (2 * vsn.significand)) { |
1107 | vsn.significand >>= 1; | 1113 | vsn.significand >>= 1; |
1108 | vsd.exponent++; | 1114 | vsd.exponent++; |
1109 | } | 1115 | } |
1110 | vsd.significand = ((u64)vsn.significand << 32) / vsm.significand; | 1116 | { |
1117 | u64 significand = (u64)vsn.significand << 32; | ||
1118 | do_div(significand, vsm.significand); | ||
1119 | vsd.significand = significand; | ||
1120 | } | ||
1111 | if ((vsd.significand & 0x3f) == 0) | 1121 | if ((vsd.significand & 0x3f) == 0) |
1112 | vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); | 1122 | vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); |
1113 | 1123 | ||
1114 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); | 1124 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); |
1115 | 1125 | ||
1116 | vsn_nan: | 1126 | vsn_nan: |
1117 | exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); | 1127 | exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); |
1118 | pack: | 1128 | pack: |
1119 | vfp_put_float(sd, vfp_single_pack(&vsd)); | 1129 | vfp_put_float(sd, vfp_single_pack(&vsd)); |
1120 | return exceptions; | 1130 | return exceptions; |
1121 | 1131 | ||
1122 | vsm_nan: | 1132 | vsm_nan: |
1123 | exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); | 1133 | exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); |
1124 | goto pack; | 1134 | goto pack; |
1125 | 1135 | ||
1126 | zero: | 1136 | zero: |
1127 | vsd.exponent = 0; | 1137 | vsd.exponent = 0; |
1128 | vsd.significand = 0; | 1138 | vsd.significand = 0; |
1129 | goto pack; | 1139 | goto pack; |
1130 | 1140 | ||
1131 | divzero: | 1141 | divzero: |
1132 | exceptions = FPSCR_DZC; | 1142 | exceptions = FPSCR_DZC; |
1133 | infinity: | 1143 | infinity: |
1134 | vsd.exponent = 255; | 1144 | vsd.exponent = 255; |
1135 | vsd.significand = 0; | 1145 | vsd.significand = 0; |
1136 | goto pack; | 1146 | goto pack; |
1137 | 1147 | ||
1138 | invalid: | 1148 | invalid: |
1139 | vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); | 1149 | vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); |
1140 | return FPSCR_IOC; | 1150 | return FPSCR_IOC; |
1141 | } | 1151 | } |
1142 | 1152 | ||
1143 | static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { | 1153 | static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { |
1144 | [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, | 1154 | [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, |
1145 | [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, | 1155 | [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, |
1146 | [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, | 1156 | [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, |
1147 | [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, | 1157 | [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, |
1148 | [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, | 1158 | [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, |
1149 | [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, | 1159 | [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, |
1150 | [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, | 1160 | [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, |
1151 | [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, | 1161 | [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, |
1152 | [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, | 1162 | [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, |
1153 | }; | 1163 | }; |
1154 | 1164 | ||
1155 | #define FREG_BANK(x) ((x) & 0x18) | 1165 | #define FREG_BANK(x) ((x) & 0x18) |
1156 | #define FREG_IDX(x) ((x) & 7) | 1166 | #define FREG_IDX(x) ((x) & 7) |
1157 | 1167 | ||
1158 | u32 vfp_single_cpdo(u32 inst, u32 fpscr) | 1168 | u32 vfp_single_cpdo(u32 inst, u32 fpscr) |
1159 | { | 1169 | { |
1160 | u32 op = inst & FOP_MASK; | 1170 | u32 op = inst & FOP_MASK; |
1161 | u32 exceptions = 0; | 1171 | u32 exceptions = 0; |
1162 | unsigned int sd = vfp_get_sd(inst); | 1172 | unsigned int sd = vfp_get_sd(inst); |
1163 | unsigned int sn = vfp_get_sn(inst); | 1173 | unsigned int sn = vfp_get_sn(inst); |
1164 | unsigned int sm = vfp_get_sm(inst); | 1174 | unsigned int sm = vfp_get_sm(inst); |
1165 | unsigned int vecitr, veclen, vecstride; | 1175 | unsigned int vecitr, veclen, vecstride; |
1166 | u32 (*fop)(int, int, s32, u32); | 1176 | u32 (*fop)(int, int, s32, u32); |
1167 | 1177 | ||
1168 | veclen = fpscr & FPSCR_LENGTH_MASK; | 1178 | veclen = fpscr & FPSCR_LENGTH_MASK; |
1169 | vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); | 1179 | vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); |
1170 | 1180 | ||
1171 | /* | 1181 | /* |
1172 | * If destination bank is zero, vector length is always '1'. | 1182 | * If destination bank is zero, vector length is always '1'. |
1173 | * ARM DDI0100F C5.1.3, C5.3.2. | 1183 | * ARM DDI0100F C5.1.3, C5.3.2. |
1174 | */ | 1184 | */ |
1175 | if (FREG_BANK(sd) == 0) | 1185 | if (FREG_BANK(sd) == 0) |
1176 | veclen = 0; | 1186 | veclen = 0; |
1177 | 1187 | ||
1178 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | 1188 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, |
1179 | (veclen >> FPSCR_LENGTH_BIT) + 1); | 1189 | (veclen >> FPSCR_LENGTH_BIT) + 1); |
1180 | 1190 | ||
1181 | fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)]; | 1191 | fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)]; |
1182 | if (!fop) | 1192 | if (!fop) |
1183 | goto invalid; | 1193 | goto invalid; |
1184 | 1194 | ||
1185 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | 1195 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { |
1186 | s32 m = vfp_get_float(sm); | 1196 | s32 m = vfp_get_float(sm); |
1187 | u32 except; | 1197 | u32 except; |
1188 | 1198 | ||
1189 | if (op == FOP_EXT) | 1199 | if (op == FOP_EXT) |
1190 | pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", | 1200 | pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", |
1191 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); | 1201 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); |
1192 | else | 1202 | else |
1193 | pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", | 1203 | pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", |
1194 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, | 1204 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, |
1195 | FOP_TO_IDX(op), sm, m); | 1205 | FOP_TO_IDX(op), sm, m); |
1196 | 1206 | ||
1197 | except = fop(sd, sn, m, fpscr); | 1207 | except = fop(sd, sn, m, fpscr); |
1198 | pr_debug("VFP: itr%d: exceptions=%08x\n", | 1208 | pr_debug("VFP: itr%d: exceptions=%08x\n", |
1199 | vecitr >> FPSCR_LENGTH_BIT, except); | 1209 | vecitr >> FPSCR_LENGTH_BIT, except); |
1200 | 1210 | ||
1201 | exceptions |= except; | 1211 | exceptions |= except; |
1202 | 1212 | ||
1203 | /* | 1213 | /* |
1204 | * This ensures that comparisons only operate on scalars; | 1214 | * This ensures that comparisons only operate on scalars; |
1205 | * comparisons always return with one FPSCR status bit set. | 1215 | * comparisons always return with one FPSCR status bit set. |
1206 | */ | 1216 | */ |
1207 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | 1217 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) |
1208 | break; | 1218 | break; |
1209 | 1219 | ||
1210 | /* | 1220 | /* |
1211 | * CHECK: It appears to be undefined whether we stop when | 1221 | * CHECK: It appears to be undefined whether we stop when |
1212 | * we encounter an exception. We continue. | 1222 | * we encounter an exception. We continue. |
1213 | */ | 1223 | */ |
1214 | 1224 | ||
1215 | sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); | 1225 | sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); |
1216 | sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); | 1226 | sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); |
1217 | if (FREG_BANK(sm) != 0) | 1227 | if (FREG_BANK(sm) != 0) |
1218 | sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); | 1228 | sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); |
1219 | } | 1229 | } |
1220 | return exceptions; | 1230 | return exceptions; |
1221 | 1231 | ||
1222 | invalid: | 1232 | invalid: |
1223 | return (u32)-1; | 1233 | return (u32)-1; |
1224 | } | 1234 | } |
1225 | 1235 |