Commit a366064c3ff46c985a3c7243468be197d29874dc

Authored by Grant Grundler
Committed by Kyle McMartin
1 parent 2464212f68

[PARISC] Update bitops from parisc tree

Optimize ext2_find_next_zero_bit. Gives about 25% perf improvement with a
rsync test with ext3.

Signed-off-by: Randolph Chung <tausq@parisc-linux.org>

fix ext3 performance - ext2_find_next_zero() was culprit.
Kudos to jejb for pointing out the the possibility that ext2_test_bit
and ext2_find_next_zero() may in fact not be enumerating bits in
the bitmap because of endianess. Took sparc64 implementation and
adapted it to our tree. I suspect the real problem is ffz() wants
an unsigned long and was getting garbage in the top half of the
unsigned int. Not confirmed but that's what I suspect.

Signed-off-by: Grant Grundler <grundler@parisc-linux.org>

Fix find_next_bit for 32-bit
Make masking consistent for bitops

From: Joel Soete <soete.joel@tiscali.be>
Signed-off-by: Randolph Chung <tausq@parisc-linux.org>

Add back incorrectly removed ext2_find_first_zero_bit definition

Signed-off-by: James Bottomley <jejb@parisc-linux.org>

Fixup bitops.h to use volatile for *_bit() ops

Based on this email thread:
       http://marc.theaimsgroup.com/?t=108826637900003

In a nutshell:
        *_bit() want use of volatile.
        __*_bit() are "relaxed" and don't use spinlock or volatile.

other minor changes:
o replaces hweight64() macro with alias to generic_hweight64() (Joel Soete)
o cleanup ext2* macros so (a) it's obvious what the XOR magic is about
  and (b) one version that works for both 32/64-bit.
o replace 2 uses of CONFIG_64BIT with __LP64__. bitops.h used both.
  I think header files that might go to user space should use
  something userspace will know about (__LP64__).

Signed-off-by: Grant Grundler <grundler@parisc-linux.org>

Move SHIFT_PER_LONG to standard location for BITS_PER_LONG (asm/types.h)
and ditch the second definition of BITS_PER_LONG in bitops.h

Signed-off-by: Grant Grundler <grundler@parisc-linux.org>

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>

Showing 2 changed files with 134 additions and 158 deletions Side-by-side Diff

include/asm-parisc/bitops.h
... ... @@ -2,7 +2,7 @@
2 2 #define _PARISC_BITOPS_H
3 3  
4 4 #include <linux/compiler.h>
5   -#include <asm/spinlock.h>
  5 +#include <asm/types.h> /* for BITS_PER_LONG/SHIFT_PER_LONG */
6 6 #include <asm/byteorder.h>
7 7 #include <asm/atomic.h>
8 8  
9 9  
10 10  
11 11  
12 12  
13 13  
14 14  
15 15  
16 16  
17 17  
18 18  
19 19  
20 20  
21 21  
22 22  
23 23  
24 24  
25 25  
26 26  
27 27  
28 28  
29 29  
30 30  
31 31  
32 32  
33 33  
34 34  
35 35  
36 36  
37 37  
38 38  
39 39  
40 40  
41 41  
42 42  
43 43  
44 44  
45 45  
46 46  
47 47  
48 48  
49 49  
50 50  
51 51  
52 52  
53 53  
54 54  
55 55  
... ... @@ -12,194 +12,158 @@
12 12 * to include/asm-i386/bitops.h or kerneldoc
13 13 */
14 14  
15   -#ifdef __LP64__
16   -# define SHIFT_PER_LONG 6
17   -#ifndef BITS_PER_LONG
18   -# define BITS_PER_LONG 64
19   -#endif
20   -#else
21   -# define SHIFT_PER_LONG 5
22   -#ifndef BITS_PER_LONG
23   -# define BITS_PER_LONG 32
24   -#endif
25   -#endif
  15 +#define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1))
26 16  
27   -#define CHOP_SHIFTCOUNT(x) ((x) & (BITS_PER_LONG - 1))
28 17  
29   -
30 18 #define smp_mb__before_clear_bit() smp_mb()
31 19 #define smp_mb__after_clear_bit() smp_mb()
32 20  
33   -static __inline__ void set_bit(int nr, volatile unsigned long * address)
  21 +/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
  22 + * on use of volatile and __*_bit() (set/clear/change):
  23 + * *_bit() want use of volatile.
  24 + * __*_bit() are "relaxed" and don't use spinlock or volatile.
  25 + */
  26 +
  27 +static __inline__ void set_bit(int nr, volatile unsigned long * addr)
34 28 {
35   - unsigned long mask;
36   - unsigned long *addr = (unsigned long *) address;
  29 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
37 30 unsigned long flags;
38 31  
39 32 addr += (nr >> SHIFT_PER_LONG);
40   - mask = 1L << CHOP_SHIFTCOUNT(nr);
41 33 _atomic_spin_lock_irqsave(addr, flags);
42 34 *addr |= mask;
43 35 _atomic_spin_unlock_irqrestore(addr, flags);
44 36 }
45 37  
46   -static __inline__ void __set_bit(int nr, volatile unsigned long * address)
  38 +static __inline__ void __set_bit(unsigned long nr, volatile unsigned long * addr)
47 39 {
48   - unsigned long mask;
49   - unsigned long *addr = (unsigned long *) address;
  40 + unsigned long *m = (unsigned long *) addr + (nr >> SHIFT_PER_LONG);
50 41  
51   - addr += (nr >> SHIFT_PER_LONG);
52   - mask = 1L << CHOP_SHIFTCOUNT(nr);
53   - *addr |= mask;
  42 + *m |= 1UL << CHOP_SHIFTCOUNT(nr);
54 43 }
55 44  
56   -static __inline__ void clear_bit(int nr, volatile unsigned long * address)
  45 +static __inline__ void clear_bit(int nr, volatile unsigned long * addr)
57 46 {
58   - unsigned long mask;
59   - unsigned long *addr = (unsigned long *) address;
  47 + unsigned long mask = ~(1UL << CHOP_SHIFTCOUNT(nr));
60 48 unsigned long flags;
61 49  
62 50 addr += (nr >> SHIFT_PER_LONG);
63   - mask = 1L << CHOP_SHIFTCOUNT(nr);
64 51 _atomic_spin_lock_irqsave(addr, flags);
65   - *addr &= ~mask;
  52 + *addr &= mask;
66 53 _atomic_spin_unlock_irqrestore(addr, flags);
67 54 }
68 55  
69   -static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long * address)
  56 +static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long * addr)
70 57 {
71   - unsigned long mask;
72   - unsigned long *addr = (unsigned long *) address;
  58 + unsigned long *m = (unsigned long *) addr + (nr >> SHIFT_PER_LONG);
73 59  
74   - addr += (nr >> SHIFT_PER_LONG);
75   - mask = 1L << CHOP_SHIFTCOUNT(nr);
76   - *addr &= ~mask;
  60 + *m &= ~(1UL << CHOP_SHIFTCOUNT(nr));
77 61 }
78 62  
79   -static __inline__ void change_bit(int nr, volatile unsigned long * address)
  63 +static __inline__ void change_bit(int nr, volatile unsigned long * addr)
80 64 {
81   - unsigned long mask;
82   - unsigned long *addr = (unsigned long *) address;
  65 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
83 66 unsigned long flags;
84 67  
85 68 addr += (nr >> SHIFT_PER_LONG);
86   - mask = 1L << CHOP_SHIFTCOUNT(nr);
87 69 _atomic_spin_lock_irqsave(addr, flags);
88 70 *addr ^= mask;
89 71 _atomic_spin_unlock_irqrestore(addr, flags);
90 72 }
91 73  
92   -static __inline__ void __change_bit(int nr, volatile unsigned long * address)
  74 +static __inline__ void __change_bit(unsigned long nr, volatile unsigned long * addr)
93 75 {
94   - unsigned long mask;
95   - unsigned long *addr = (unsigned long *) address;
  76 + unsigned long *m = (unsigned long *) addr + (nr >> SHIFT_PER_LONG);
96 77  
97   - addr += (nr >> SHIFT_PER_LONG);
98   - mask = 1L << CHOP_SHIFTCOUNT(nr);
99   - *addr ^= mask;
  78 + *m ^= 1UL << CHOP_SHIFTCOUNT(nr);
100 79 }
101 80  
102   -static __inline__ int test_and_set_bit(int nr, volatile unsigned long * address)
  81 +static __inline__ int test_and_set_bit(int nr, volatile unsigned long * addr)
103 82 {
104   - unsigned long mask;
105   - unsigned long *addr = (unsigned long *) address;
106   - int oldbit;
  83 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
  84 + unsigned long oldbit;
107 85 unsigned long flags;
108 86  
109 87 addr += (nr >> SHIFT_PER_LONG);
110   - mask = 1L << CHOP_SHIFTCOUNT(nr);
111 88 _atomic_spin_lock_irqsave(addr, flags);
112   - oldbit = (*addr & mask) ? 1 : 0;
113   - *addr |= mask;
  89 + oldbit = *addr;
  90 + *addr = oldbit | mask;
114 91 _atomic_spin_unlock_irqrestore(addr, flags);
115 92  
116   - return oldbit;
  93 + return (oldbit & mask) ? 1 : 0;
117 94 }
118 95  
119 96 static __inline__ int __test_and_set_bit(int nr, volatile unsigned long * address)
120 97 {
121   - unsigned long mask;
122   - unsigned long *addr = (unsigned long *) address;
123   - int oldbit;
  98 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
  99 + unsigned long oldbit;
  100 + unsigned long *addr = (unsigned long *)address + (nr >> SHIFT_PER_LONG);
124 101  
125   - addr += (nr >> SHIFT_PER_LONG);
126   - mask = 1L << CHOP_SHIFTCOUNT(nr);
127   - oldbit = (*addr & mask) ? 1 : 0;
128   - *addr |= mask;
  102 + oldbit = *addr;
  103 + *addr = oldbit | mask;
129 104  
130   - return oldbit;
  105 + return (oldbit & mask) ? 1 : 0;
131 106 }
132 107  
133   -static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * address)
  108 +static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * addr)
134 109 {
135   - unsigned long mask;
136   - unsigned long *addr = (unsigned long *) address;
137   - int oldbit;
  110 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
  111 + unsigned long oldbit;
138 112 unsigned long flags;
139 113  
140 114 addr += (nr >> SHIFT_PER_LONG);
141   - mask = 1L << CHOP_SHIFTCOUNT(nr);
142 115 _atomic_spin_lock_irqsave(addr, flags);
143   - oldbit = (*addr & mask) ? 1 : 0;
144   - *addr &= ~mask;
  116 + oldbit = *addr;
  117 + *addr = oldbit & ~mask;
145 118 _atomic_spin_unlock_irqrestore(addr, flags);
146 119  
147   - return oldbit;
  120 + return (oldbit & mask) ? 1 : 0;
148 121 }
149 122  
150 123 static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long * address)
151 124 {
152   - unsigned long mask;
153   - unsigned long *addr = (unsigned long *) address;
154   - int oldbit;
  125 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
  126 + unsigned long *addr = (unsigned long *)address + (nr >> SHIFT_PER_LONG);
  127 + unsigned long oldbit;
155 128  
156   - addr += (nr >> SHIFT_PER_LONG);
157   - mask = 1L << CHOP_SHIFTCOUNT(nr);
158   - oldbit = (*addr & mask) ? 1 : 0;
159   - *addr &= ~mask;
  129 + oldbit = *addr;
  130 + *addr = oldbit & ~mask;
160 131  
161   - return oldbit;
  132 + return (oldbit & mask) ? 1 : 0;
162 133 }
163 134  
164   -static __inline__ int test_and_change_bit(int nr, volatile unsigned long * address)
  135 +static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr)
165 136 {
166   - unsigned long mask;
167   - unsigned long *addr = (unsigned long *) address;
168   - int oldbit;
  137 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
  138 + unsigned long oldbit;
169 139 unsigned long flags;
170 140  
171 141 addr += (nr >> SHIFT_PER_LONG);
172   - mask = 1L << CHOP_SHIFTCOUNT(nr);
173 142 _atomic_spin_lock_irqsave(addr, flags);
174   - oldbit = (*addr & mask) ? 1 : 0;
175   - *addr ^= mask;
  143 + oldbit = *addr;
  144 + *addr = oldbit ^ mask;
176 145 _atomic_spin_unlock_irqrestore(addr, flags);
177 146  
178   - return oldbit;
  147 + return (oldbit & mask) ? 1 : 0;
179 148 }
180 149  
181 150 static __inline__ int __test_and_change_bit(int nr, volatile unsigned long * address)
182 151 {
183   - unsigned long mask;
184   - unsigned long *addr = (unsigned long *) address;
185   - int oldbit;
  152 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
  153 + unsigned long *addr = (unsigned long *)address + (nr >> SHIFT_PER_LONG);
  154 + unsigned long oldbit;
186 155  
187   - addr += (nr >> SHIFT_PER_LONG);
188   - mask = 1L << CHOP_SHIFTCOUNT(nr);
189   - oldbit = (*addr & mask) ? 1 : 0;
190   - *addr ^= mask;
  156 + oldbit = *addr;
  157 + *addr = oldbit ^ mask;
191 158  
192   - return oldbit;
  159 + return (oldbit & mask) ? 1 : 0;
193 160 }
194 161  
195 162 static __inline__ int test_bit(int nr, const volatile unsigned long *address)
196 163 {
197   - unsigned long mask;
198   - const unsigned long *addr = (const unsigned long *)address;
  164 + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr);
  165 + const unsigned long *addr = (const unsigned long *)address + (nr >> SHIFT_PER_LONG);
199 166  
200   - addr += (nr >> SHIFT_PER_LONG);
201   - mask = 1L << CHOP_SHIFTCOUNT(nr);
202   -
203 167 return !!(*addr & mask);
204 168 }
205 169  
... ... @@ -229,7 +193,7 @@
229 193 unsigned long ret;
230 194  
231 195 __asm__(
232   -#if BITS_PER_LONG > 32
  196 +#ifdef __LP64__
233 197 " ldi 63,%1\n"
234 198 " extrd,u,*<> %0,63,32,%%r0\n"
235 199 " extrd,u,*TR %0,31,32,%0\n" /* move top 32-bits down */
... ... @@ -304,14 +268,7 @@
304 268 * hweightN: returns the hamming weight (i.e. the number
305 269 * of bits set) of a N-bit word
306 270 */
307   -#define hweight64(x) \
308   -({ \
309   - unsigned long __x = (x); \
310   - unsigned int __w; \
311   - __w = generic_hweight32((unsigned int) __x); \
312   - __w += generic_hweight32((unsigned int) (__x>>32)); \
313   - __w; \
314   -})
  271 +#define hweight64(x) generic_hweight64(x)
315 272 #define hweight32(x) generic_hweight32(x)
316 273 #define hweight16(x) generic_hweight16(x)
317 274 #define hweight8(x) generic_hweight8(x)
318 275  
319 276  
... ... @@ -324,24 +281,22 @@
324 281 */
325 282 static inline int sched_find_first_bit(const unsigned long *b)
326 283 {
327   -#ifndef __LP64__
  284 +#ifdef __LP64__
328 285 if (unlikely(b[0]))
329 286 return __ffs(b[0]);
330 287 if (unlikely(b[1]))
  288 + return __ffs(b[1]) + 64;
  289 + return __ffs(b[2]) + 128;
  290 +#else
  291 + if (unlikely(b[0]))
  292 + return __ffs(b[0]);
  293 + if (unlikely(b[1]))
331 294 return __ffs(b[1]) + 32;
332 295 if (unlikely(b[2]))
333 296 return __ffs(b[2]) + 64;
334 297 if (b[3])
335 298 return __ffs(b[3]) + 96;
336 299 return __ffs(b[4]) + 128;
337   -#else
338   - if (unlikely(b[0]))
339   - return __ffs(b[0]);
340   - if (unlikely(((unsigned int)b[1])))
341   - return __ffs(b[1]) + 64;
342   - if (b[1] >> 32)
343   - return __ffs(b[1] >> 32) + 96;
344   - return __ffs(b[2]) + 128;
345 300 #endif
346 301 }
347 302  
... ... @@ -391,7 +346,7 @@
391 346  
392 347 static __inline__ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
393 348 {
394   - const unsigned long *p = addr + (offset >> 6);
  349 + const unsigned long *p = addr + (offset >> SHIFT_PER_LONG);
395 350 unsigned long result = offset & ~(BITS_PER_LONG-1);
396 351 unsigned long tmp;
397 352  
398 353  
399 354  
400 355  
401 356  
402 357  
403 358  
404 359  
405 360  
406 361  
407 362  
408 363  
409 364  
410 365  
411 366  
412 367  
413 368  
414 369  
415 370  
... ... @@ -445,70 +400,89 @@
445 400 * test_and_{set,clear}_bit guarantee atomicity without
446 401 * disabling interrupts.
447 402 */
448   -#ifdef __LP64__
449   -#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x38, (unsigned long *)addr)
450   -#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x38, (unsigned long *)addr)
451   -#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x38, (unsigned long *)addr)
452   -#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x38, (unsigned long *)addr)
453   -#else
454   -#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)addr)
455   -#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x18, (unsigned long *)addr)
456   -#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)addr)
457   -#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x18, (unsigned long *)addr)
458   -#endif
459 403  
460   -#endif /* __KERNEL__ */
  404 +/* '3' is bits per byte */
  405 +#define LE_BYTE_ADDR ((sizeof(unsigned long) - 1) << 3)
461 406  
462   -static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
463   -{
464   - __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
  407 +#define ext2_test_bit(nr, addr) \
  408 + test_bit((nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
  409 +#define ext2_set_bit(nr, addr) \
  410 + __test_and_set_bit((nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
  411 +#define ext2_clear_bit(nr, addr) \
  412 + __test_and_clear_bit((nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
465 413  
466   - return (ADDR[nr >> 3] >> (nr & 7)) & 1;
467   -}
  414 +#define ext2_set_bit_atomic(l,nr,addr) \
  415 + test_and_set_bit((nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
  416 +#define ext2_clear_bit_atomic(l,nr,addr) \
  417 + test_and_clear_bit( (nr) ^ LE_BYTE_ADDR, (unsigned long *)addr)
468 418  
469   -/*
470   - * This implementation of ext2_find_{first,next}_zero_bit was stolen from
471   - * Linus' asm-alpha/bitops.h and modified for a big-endian machine.
472   - */
  419 +#endif /* __KERNEL__ */
473 420  
  421 +
474 422 #define ext2_find_first_zero_bit(addr, size) \
475   - ext2_find_next_zero_bit((addr), (size), 0)
  423 + ext2_find_next_zero_bit((addr), (size), 0)
476 424  
477   -extern __inline__ unsigned long ext2_find_next_zero_bit(void *addr,
478   - unsigned long size, unsigned long offset)
  425 +/* include/linux/byteorder does not support "unsigned long" type */
  426 +static inline unsigned long ext2_swabp(unsigned long * x)
479 427 {
480   - unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
481   - unsigned int result = offset & ~31UL;
482   - unsigned int tmp;
  428 +#ifdef __LP64__
  429 + return (unsigned long) __swab64p((u64 *) x);
  430 +#else
  431 + return (unsigned long) __swab32p((u32 *) x);
  432 +#endif
  433 +}
483 434  
  435 +/* include/linux/byteorder doesn't support "unsigned long" type */
  436 +static inline unsigned long ext2_swab(unsigned long y)
  437 +{
  438 +#ifdef __LP64__
  439 + return (unsigned long) __swab64((u64) y);
  440 +#else
  441 + return (unsigned long) __swab32((u32) y);
  442 +#endif
  443 +}
  444 +
  445 +static __inline__ unsigned long ext2_find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
  446 +{
  447 + unsigned long *p = (unsigned long *) addr + (offset >> SHIFT_PER_LONG);
  448 + unsigned long result = offset & ~(BITS_PER_LONG - 1);
  449 + unsigned long tmp;
  450 +
484 451 if (offset >= size)
485 452 return size;
486 453 size -= result;
487   - offset &= 31UL;
  454 + offset &= (BITS_PER_LONG - 1UL);
488 455 if (offset) {
489   - tmp = cpu_to_le32p(p++);
490   - tmp |= ~0UL >> (32-offset);
491   - if (size < 32)
  456 + tmp = ext2_swabp(p++);
  457 + tmp |= (~0UL >> (BITS_PER_LONG - offset));
  458 + if (size < BITS_PER_LONG)
492 459 goto found_first;
493   - if (tmp != ~0U)
  460 + if (~tmp)
494 461 goto found_middle;
495   - size -= 32;
496   - result += 32;
  462 + size -= BITS_PER_LONG;
  463 + result += BITS_PER_LONG;
497 464 }
498   - while (size >= 32) {
499   - if ((tmp = cpu_to_le32p(p++)) != ~0U)
500   - goto found_middle;
501   - result += 32;
502   - size -= 32;
  465 +
  466 + while (size & ~(BITS_PER_LONG - 1)) {
  467 + if (~(tmp = *(p++)))
  468 + goto found_middle_swap;
  469 + result += BITS_PER_LONG;
  470 + size -= BITS_PER_LONG;
503 471 }
504 472 if (!size)
505 473 return result;
506   - tmp = cpu_to_le32p(p);
  474 + tmp = ext2_swabp(p);
507 475 found_first:
508   - tmp |= ~0U << size;
  476 + tmp |= ~0UL << size;
  477 + if (tmp == ~0UL) /* Are any bits zero? */
  478 + return result + size; /* Nope. Skip ffz */
509 479 found_middle:
510 480 return result + ffz(tmp);
  481 +
  482 +found_middle_swap:
  483 + return result + ffz(ext2_swab(tmp));
511 484 }
  485 +
512 486  
513 487 /* Bitmap functions for the minix filesystem. */
514 488 #define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr)
include/asm-parisc/types.h
... ... @@ -33,8 +33,10 @@
33 33  
34 34 #ifdef __LP64__
35 35 #define BITS_PER_LONG 64
  36 +#define SHIFT_PER_LONG 6
36 37 #else
37 38 #define BITS_PER_LONG 32
  39 +#define SHIFT_PER_LONG 5
38 40 #endif
39 41  
40 42 #ifndef __ASSEMBLY__