Commit efb80e7e097d0888e59fbbe4ded2ac5a256f556d

Authored by Kyle McMartin
Committed by Kyle McMartin
1 parent 6f7d998e94

[PARISC] import necessary bits of libgcc.a

Currently we're hacking libs-y to include libgcc.a, but this has
unforeseen consequences since the userspace libgcc is linked with fpregs
enabled. We need the kernel to stop using fpregs in an uncontrolled manner
to implement lazy fpu state saves.

Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>

Showing 31 changed files with 4628 additions and 24 deletions Side-by-side Diff

arch/parisc/Makefile
... ... @@ -69,7 +69,7 @@
69 69 kernel-$(CONFIG_HPUX) += hpux/
70 70  
71 71 core-y += $(addprefix arch/parisc/, $(kernel-y))
72   -libs-y += arch/parisc/lib/ `$(CC) -print-libgcc-file-name`
  72 +libs-y += arch/parisc/lib/
73 73  
74 74 drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/
75 75  
arch/parisc/kernel/parisc_ksyms.c
... ... @@ -122,30 +122,8 @@
122 122 EXPORT_SYMBOL($$divI_14);
123 123 EXPORT_SYMBOL($$divI_15);
124 124  
125   -extern void __ashrdi3(void);
126   -extern void __ashldi3(void);
127   -extern void __lshrdi3(void);
128   -extern void __muldi3(void);
129   -
130   -EXPORT_SYMBOL(__ashrdi3);
131   -EXPORT_SYMBOL(__ashldi3);
132   -EXPORT_SYMBOL(__lshrdi3);
133   -EXPORT_SYMBOL(__muldi3);
134   -
135 125 asmlinkage void * __canonicalize_funcptr_for_compare(void *);
136 126 EXPORT_SYMBOL(__canonicalize_funcptr_for_compare);
137   -
138   -#ifdef CONFIG_64BIT
139   -extern void __divdi3(void);
140   -extern void __udivdi3(void);
141   -extern void __umoddi3(void);
142   -extern void __moddi3(void);
143   -
144   -EXPORT_SYMBOL(__divdi3);
145   -EXPORT_SYMBOL(__udivdi3);
146   -EXPORT_SYMBOL(__umoddi3);
147   -EXPORT_SYMBOL(__moddi3);
148   -#endif
149 127  
150 128 #ifndef CONFIG_64BIT
151 129 extern void $$dyncall(void);
arch/parisc/lib/Makefile
... ... @@ -4,5 +4,5 @@
4 4  
5 5 lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o
6 6  
7   -obj-y := iomap.o
  7 +obj-y := libgcc/ milli/ iomap.o
arch/parisc/lib/libgcc/Makefile
  1 +obj-y := __ashldi3.o __ashrdi3.o __clzsi2.o __divdi3.o __divsi3.o \
  2 + __lshrdi3.o __moddi3.o __modsi3.o __udivdi3.o \
  3 + __udivmoddi4.o __udivmodsi4.o __udivsi3.o \
  4 + __umoddi3.o __umodsi3.o __muldi3.o __umulsidi3.o
arch/parisc/lib/libgcc/__ashldi3.c
  1 +#include "libgcc.h"
  2 +
  3 +u64 __ashldi3(u64 v, int cnt)
  4 +{
  5 + int c = cnt & 31;
  6 + u32 vl = (u32) v;
  7 + u32 vh = (u32) (v >> 32);
  8 +
  9 + if (cnt & 32) {
  10 + vh = (vl << c);
  11 + vl = 0;
  12 + } else {
  13 + vh = (vh << c) + (vl >> (32 - c));
  14 + vl = (vl << c);
  15 + }
  16 +
  17 + return ((u64) vh << 32) + vl;
  18 +}
  19 +EXPORT_SYMBOL(__ashldi3);
arch/parisc/lib/libgcc/__ashrdi3.c
  1 +#include "libgcc.h"
  2 +
  3 +u64 __ashrdi3(u64 v, int cnt)
  4 +{
  5 + int c = cnt & 31;
  6 + u32 vl = (u32) v;
  7 + u32 vh = (u32) (v >> 32);
  8 +
  9 + if (cnt & 32) {
  10 + vl = ((s32) vh >> c);
  11 + vh = (s32) vh >> 31;
  12 + } else {
  13 + vl = (vl >> c) + (vh << (32 - c));
  14 + vh = ((s32) vh >> c);
  15 + }
  16 +
  17 + return ((u64) vh << 32) + vl;
  18 +}
  19 +EXPORT_SYMBOL(__ashrdi3);
arch/parisc/lib/libgcc/__clzsi2.c
  1 +#include "libgcc.h"
  2 +
  3 +u32 __clzsi2(u32 v)
  4 +{
  5 + int p = 31;
  6 +
  7 + if (v & 0xffff0000) {
  8 + p -= 16;
  9 + v >>= 16;
  10 + }
  11 + if (v & 0xff00) {
  12 + p -= 8;
  13 + v >>= 8;
  14 + }
  15 + if (v & 0xf0) {
  16 + p -= 4;
  17 + v >>= 4;
  18 + }
  19 + if (v & 0xc) {
  20 + p -= 2;
  21 + v >>= 2;
  22 + }
  23 + if (v & 0x2) {
  24 + p -= 1;
  25 + v >>= 1;
  26 + }
  27 +
  28 + return p;
  29 +}
  30 +EXPORT_SYMBOL(__clzsi2);
arch/parisc/lib/libgcc/__divdi3.c
  1 +#include "libgcc.h"
  2 +
  3 +s64 __divdi3(s64 num, s64 den)
  4 +{
  5 + int minus = 0;
  6 + s64 v;
  7 +
  8 + if (num < 0) {
  9 + num = -num;
  10 + minus = 1;
  11 + }
  12 + if (den < 0) {
  13 + den = -den;
  14 + minus ^= 1;
  15 + }
  16 +
  17 + v = __udivmoddi4(num, den, NULL);
  18 + if (minus)
  19 + v = -v;
  20 +
  21 + return v;
  22 +}
  23 +EXPORT_SYMBOL(__divdi3);
arch/parisc/lib/libgcc/__divsi3.c
  1 +#include "libgcc.h"
  2 +
  3 +s32 __divsi3(s32 num, s32 den)
  4 +{
  5 + int minus = 0;
  6 + s32 v;
  7 +
  8 + if (num < 0) {
  9 + num = -num;
  10 + minus = 1;
  11 + }
  12 + if (den < 0) {
  13 + den = -den;
  14 + minus ^= 1;
  15 + }
  16 +
  17 + v = __udivmodsi4(num, den, NULL);
  18 + if (minus)
  19 + v = -v;
  20 +
  21 + return v;
  22 +}
  23 +EXPORT_SYMBOL(__divsi3);
arch/parisc/lib/libgcc/__lshrdi3.c
  1 +#include "libgcc.h"
  2 +
  3 +u64 __lshrdi3(u64 v, int cnt)
  4 +{
  5 + int c = cnt & 31;
  6 + u32 vl = (u32) v;
  7 + u32 vh = (u32) (v >> 32);
  8 +
  9 + if (cnt & 32) {
  10 + vl = (vh >> c);
  11 + vh = 0;
  12 + } else {
  13 + vl = (vl >> c) + (vh << (32 - c));
  14 + vh = (vh >> c);
  15 + }
  16 +
  17 + return ((u64) vh << 32) + vl;
  18 +}
  19 +EXPORT_SYMBOL(__lshrdi3);
arch/parisc/lib/libgcc/__moddi3.c
  1 +#include "libgcc.h"
  2 +
  3 +s64 __moddi3(s64 num, s64 den)
  4 +{
  5 + int minus = 0;
  6 + s64 v;
  7 +
  8 + if (num < 0) {
  9 + num = -num;
  10 + minus = 1;
  11 + }
  12 + if (den < 0) {
  13 + den = -den;
  14 + minus ^= 1;
  15 + }
  16 +
  17 + (void)__udivmoddi4(num, den, (u64 *) & v);
  18 + if (minus)
  19 + v = -v;
  20 +
  21 + return v;
  22 +}
  23 +EXPORT_SYMBOL(__moddi3);
arch/parisc/lib/libgcc/__modsi3.c
  1 +#include "libgcc.h"
  2 +
  3 +s32 __modsi3(s32 num, s32 den)
  4 +{
  5 + int minus = 0;
  6 + s32 v;
  7 +
  8 + if (num < 0) {
  9 + num = -num;
  10 + minus = 1;
  11 + }
  12 + if (den < 0) {
  13 + den = -den;
  14 + minus ^= 1;
  15 + }
  16 +
  17 + (void)__udivmodsi4(num, den, (u32 *) & v);
  18 + if (minus)
  19 + v = -v;
  20 +
  21 + return v;
  22 +}
  23 +EXPORT_SYMBOL(__modsi3);
arch/parisc/lib/libgcc/__muldi3.c
  1 +#include "libgcc.h"
  2 +
  3 +union DWunion {
  4 + struct {
  5 + s32 high;
  6 + s32 low;
  7 + } s;
  8 + s64 ll;
  9 +};
  10 +
  11 +s64 __muldi3(s64 u, s64 v)
  12 +{
  13 + const union DWunion uu = { .ll = u };
  14 + const union DWunion vv = { .ll = v };
  15 + union DWunion w = { .ll = __umulsidi3(uu.s.low, vv.s.low) };
  16 +
  17 + w.s.high += ((u32)uu.s.low * (u32)vv.s.high
  18 + + (u32)uu.s.high * (u32)vv.s.low);
  19 +
  20 + return w.ll;
  21 +}
  22 +EXPORT_SYMBOL(__muldi3);
arch/parisc/lib/libgcc/__udivdi3.c
  1 +#include "libgcc.h"
  2 +
  3 +u64 __udivdi3(u64 num, u64 den)
  4 +{
  5 + return __udivmoddi4(num, den, NULL);
  6 +}
  7 +EXPORT_SYMBOL(__udivdi3);
arch/parisc/lib/libgcc/__udivmoddi4.c
  1 +#include "libgcc.h"
  2 +
  3 +u64 __udivmoddi4(u64 num, u64 den, u64 * rem_p)
  4 +{
  5 + u64 quot = 0, qbit = 1;
  6 +
  7 + if (den == 0) {
  8 + BUG();
  9 + }
  10 +
  11 + /* Left-justify denominator and count shift */
  12 + while ((s64) den >= 0) {
  13 + den <<= 1;
  14 + qbit <<= 1;
  15 + }
  16 +
  17 + while (qbit) {
  18 + if (den <= num) {
  19 + num -= den;
  20 + quot += qbit;
  21 + }
  22 + den >>= 1;
  23 + qbit >>= 1;
  24 + }
  25 +
  26 + if (rem_p)
  27 + *rem_p = num;
  28 +
  29 + return quot;
  30 +}
  31 +EXPORT_SYMBOL(__udivmoddi4);
arch/parisc/lib/libgcc/__udivmodsi4.c
  1 +#include "libgcc.h"
  2 +
  3 +u32 __udivmodsi4(u32 num, u32 den, u32 * rem_p)
  4 +{
  5 + u32 quot = 0, qbit = 1;
  6 +
  7 + if (den == 0) {
  8 + BUG();
  9 + }
  10 +
  11 + /* Left-justify denominator and count shift */
  12 + while ((s32) den >= 0) {
  13 + den <<= 1;
  14 + qbit <<= 1;
  15 + }
  16 +
  17 + while (qbit) {
  18 + if (den <= num) {
  19 + num -= den;
  20 + quot += qbit;
  21 + }
  22 + den >>= 1;
  23 + qbit >>= 1;
  24 + }
  25 +
  26 + if (rem_p)
  27 + *rem_p = num;
  28 +
  29 + return quot;
  30 +}
  31 +EXPORT_SYMBOL(__udivmodsi4);
arch/parisc/lib/libgcc/__udivsi3.c
  1 +#include "libgcc.h"
  2 +
  3 +u32 __udivsi3(u32 num, u32 den)
  4 +{
  5 + return __udivmodsi4(num, den, NULL);
  6 +}
  7 +EXPORT_SYMBOL(__udivsi3);
arch/parisc/lib/libgcc/__umoddi3.c
  1 +#include "libgcc.h"
  2 +
  3 +u64 __umoddi3(u64 num, u64 den)
  4 +{
  5 + u64 v;
  6 +
  7 + (void)__udivmoddi4(num, den, &v);
  8 + return v;
  9 +}
  10 +EXPORT_SYMBOL(__umoddi3);
arch/parisc/lib/libgcc/__umodsi3.c
  1 +#include "libgcc.h"
  2 +
  3 +u32 __umodsi3(u32 num, u32 den)
  4 +{
  5 + u32 v;
  6 +
  7 + (void)__udivmodsi4(num, den, &v);
  8 + return v;
  9 +}
  10 +EXPORT_SYMBOL(__umodsi3);
arch/parisc/lib/libgcc/__umulsidi3.c
  1 +#include "libgcc.h"
  2 +
  3 +#define __ll_B ((u32) 1 << (32 / 2))
  4 +#define __ll_lowpart(t) ((u32) (t) & (__ll_B - 1))
  5 +#define __ll_highpart(t) ((u32) (t) >> 16)
  6 +
  7 +#define umul_ppmm(w1, w0, u, v) \
  8 + do { \
  9 + u32 __x0, __x1, __x2, __x3; \
  10 + u16 __ul, __vl, __uh, __vh; \
  11 + \
  12 + __ul = __ll_lowpart (u); \
  13 + __uh = __ll_highpart (u); \
  14 + __vl = __ll_lowpart (v); \
  15 + __vh = __ll_highpart (v); \
  16 + \
  17 + __x0 = (u32) __ul * __vl; \
  18 + __x1 = (u32) __ul * __vh; \
  19 + __x2 = (u32) __uh * __vl; \
  20 + __x3 = (u32) __uh * __vh; \
  21 + \
  22 + __x1 += __ll_highpart (__x0);/* this can't give carry */ \
  23 + __x1 += __x2; /* but this indeed can */ \
  24 + if (__x1 < __x2) /* did we get it? */ \
  25 + __x3 += __ll_B; /* yes, add it in the proper pos. */ \
  26 + \
  27 + (w1) = __x3 + __ll_highpart (__x1); \
  28 + (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
  29 + } while (0)
  30 +
  31 +union DWunion {
  32 + struct {
  33 + s32 high;
  34 + s32 low;
  35 + } s;
  36 + s64 ll;
  37 +};
  38 +
  39 +u64 __umulsidi3(u32 u, u32 v)
  40 +{
  41 + union DWunion __w;
  42 +
  43 + umul_ppmm(__w.s.high, __w.s.low, u, v);
  44 +
  45 + return __w.ll;
  46 +}
arch/parisc/lib/libgcc/libgcc.h
  1 +#ifndef _PA_LIBGCC_H_
  2 +#define _PA_LIBGCC_H_
  3 +
  4 +#include <linux/types.h>
  5 +#include <linux/module.h>
  6 +
  7 +/* Cribbed from klibc/libgcc/ */
  8 +u64 __ashldi3(u64 v, int cnt);
  9 +u64 __ashrdi3(u64 v, int cnt);
  10 +
  11 +u32 __clzsi2(u32 v);
  12 +
  13 +s64 __divdi3(s64 num, s64 den);
  14 +s32 __divsi3(s32 num, s32 den);
  15 +
  16 +u64 __lshrdi3(u64 v, int cnt);
  17 +
  18 +s64 __moddi3(s64 num, s64 den);
  19 +s32 __modsi3(s32 num, s32 den);
  20 +
  21 +u64 __udivdi3(u64 num, u64 den);
  22 +u32 __udivsi3(u32 num, u32 den);
  23 +
  24 +u64 __udivmoddi4(u64 num, u64 den, u64 * rem_p);
  25 +u32 __udivmodsi4(u32 num, u32 den, u32 * rem_p);
  26 +
  27 +u64 __umulsidi3(u32 u, u32 v);
  28 +
  29 +u64 __umoddi3(u64 num, u64 den);
  30 +u32 __umodsi3(u32 num, u32 den);
  31 +
  32 +#endif /*_PA_LIBGCC_H_*/
arch/parisc/lib/milli/Makefile
  1 +obj-y := dyncall.o divI.o divU.o remI.o remU.o div_const.o mulI.o
arch/parisc/lib/milli/divI.S
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#include "milli.h"
  14 +
  15 +#ifdef L_divI
  16 +/* ROUTINES: $$divI, $$divoI
  17 +
  18 + Single precision divide for signed binary integers.
  19 +
  20 + The quotient is truncated towards zero.
  21 + The sign of the quotient is the XOR of the signs of the dividend and
  22 + divisor.
  23 + Divide by zero is trapped.
  24 + Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
  25 +
  26 + INPUT REGISTERS:
  27 + . arg0 == dividend
  28 + . arg1 == divisor
  29 + . mrp == return pc
  30 + . sr0 == return space when called externally
  31 +
  32 + OUTPUT REGISTERS:
  33 + . arg0 = undefined
  34 + . arg1 = undefined
  35 + . ret1 = quotient
  36 +
  37 + OTHER REGISTERS AFFECTED:
  38 + . r1 = undefined
  39 +
  40 + SIDE EFFECTS:
  41 + . Causes a trap under the following conditions:
  42 + . divisor is zero (traps with ADDIT,= 0,25,0)
  43 + . dividend==-2**31 and divisor==-1 and routine is $$divoI
  44 + . (traps with ADDO 26,25,0)
  45 + . Changes memory at the following places:
  46 + . NONE
  47 +
  48 + PERMISSIBLE CONTEXT:
  49 + . Unwindable.
  50 + . Suitable for internal or external millicode.
  51 + . Assumes the special millicode register conventions.
  52 +
  53 + DISCUSSION:
  54 + . Branchs to other millicode routines using BE
  55 + . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
  56 + .
  57 + . For selected divisors, calls a divide by constant routine written by
  58 + . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
  59 + .
  60 + . The only overflow case is -2**31 divided by -1.
  61 + . Both routines return -2**31 but only $$divoI traps. */
  62 +
  63 +RDEFINE(temp,r1)
  64 +RDEFINE(retreg,ret1) /* r29 */
  65 +RDEFINE(temp1,arg0)
  66 + SUBSPA_MILLI_DIV
  67 + ATTR_MILLI
  68 + .import $$divI_2,millicode
  69 + .import $$divI_3,millicode
  70 + .import $$divI_4,millicode
  71 + .import $$divI_5,millicode
  72 + .import $$divI_6,millicode
  73 + .import $$divI_7,millicode
  74 + .import $$divI_8,millicode
  75 + .import $$divI_9,millicode
  76 + .import $$divI_10,millicode
  77 + .import $$divI_12,millicode
  78 + .import $$divI_14,millicode
  79 + .import $$divI_15,millicode
  80 + .export $$divI,millicode
  81 + .export $$divoI,millicode
  82 + .proc
  83 + .callinfo millicode
  84 + .entry
  85 +GSYM($$divoI)
  86 + comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
  87 +GSYM($$divI)
  88 + ldo -1(arg1),temp /* is there at most one bit set ? */
  89 + and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
  90 + addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
  91 + b,n LREF(neg_denom)
  92 +LSYM(pow2)
  93 + addi,>= 0,arg0,retreg /* if numerator is negative, add the */
  94 + add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
  95 + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
  96 + extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
  97 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
  98 + ldi 0xcc,temp1 /* setup 0xcc in temp1 */
  99 + extru,= arg1,23,8,temp /* test denominator with 0xff00 */
  100 + extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
  101 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
  102 + ldi 0xaa,temp /* setup 0xaa in temp */
  103 + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
  104 + extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
  105 + and,= arg1,temp1,r0 /* test denominator with 0xcc */
  106 + extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
  107 + and,= arg1,temp,r0 /* test denominator with 0xaa */
  108 + extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
  109 + MILLIRETN
  110 +LSYM(neg_denom)
  111 + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
  112 + b,n LREF(regular_seq)
  113 + sub r0,arg1,temp /* make denominator positive */
  114 + comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
  115 + ldo -1(temp),retreg /* is there at most one bit set ? */
  116 + and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
  117 + b,n LREF(regular_seq)
  118 + sub r0,arg0,retreg /* negate numerator */
  119 + comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
  120 + copy retreg,arg0 /* set up arg0, arg1 and temp */
  121 + copy temp,arg1 /* before branching to pow2 */
  122 + b LREF(pow2)
  123 + ldo -1(arg1),temp
  124 +LSYM(regular_seq)
  125 + comib,>>=,n 15,arg1,LREF(small_divisor)
  126 + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
  127 +LSYM(normal)
  128 + subi 0,retreg,retreg /* make it positive */
  129 + sub 0,arg1,temp /* clear carry, */
  130 + /* negate the divisor */
  131 + ds 0,temp,0 /* set V-bit to the comple- */
  132 + /* ment of the divisor sign */
  133 + add retreg,retreg,retreg /* shift msb bit into carry */
  134 + ds r0,arg1,temp /* 1st divide step, if no carry */
  135 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  136 + ds temp,arg1,temp /* 2nd divide step */
  137 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  138 + ds temp,arg1,temp /* 3rd divide step */
  139 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  140 + ds temp,arg1,temp /* 4th divide step */
  141 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  142 + ds temp,arg1,temp /* 5th divide step */
  143 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  144 + ds temp,arg1,temp /* 6th divide step */
  145 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  146 + ds temp,arg1,temp /* 7th divide step */
  147 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  148 + ds temp,arg1,temp /* 8th divide step */
  149 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  150 + ds temp,arg1,temp /* 9th divide step */
  151 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  152 + ds temp,arg1,temp /* 10th divide step */
  153 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  154 + ds temp,arg1,temp /* 11th divide step */
  155 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  156 + ds temp,arg1,temp /* 12th divide step */
  157 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  158 + ds temp,arg1,temp /* 13th divide step */
  159 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  160 + ds temp,arg1,temp /* 14th divide step */
  161 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  162 + ds temp,arg1,temp /* 15th divide step */
  163 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  164 + ds temp,arg1,temp /* 16th divide step */
  165 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  166 + ds temp,arg1,temp /* 17th divide step */
  167 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  168 + ds temp,arg1,temp /* 18th divide step */
  169 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  170 + ds temp,arg1,temp /* 19th divide step */
  171 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  172 + ds temp,arg1,temp /* 20th divide step */
  173 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  174 + ds temp,arg1,temp /* 21st divide step */
  175 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  176 + ds temp,arg1,temp /* 22nd divide step */
  177 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  178 + ds temp,arg1,temp /* 23rd divide step */
  179 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  180 + ds temp,arg1,temp /* 24th divide step */
  181 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  182 + ds temp,arg1,temp /* 25th divide step */
  183 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  184 + ds temp,arg1,temp /* 26th divide step */
  185 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  186 + ds temp,arg1,temp /* 27th divide step */
  187 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  188 + ds temp,arg1,temp /* 28th divide step */
  189 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  190 + ds temp,arg1,temp /* 29th divide step */
  191 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  192 + ds temp,arg1,temp /* 30th divide step */
  193 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  194 + ds temp,arg1,temp /* 31st divide step */
  195 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  196 + ds temp,arg1,temp /* 32nd divide step, */
  197 + addc retreg,retreg,retreg /* shift last retreg bit into retreg */
  198 + xor,>= arg0,arg1,0 /* get correct sign of quotient */
  199 + sub 0,retreg,retreg /* based on operand signs */
  200 + MILLIRETN
  201 + nop
  202 +
  203 +LSYM(small_divisor)
  204 +
  205 +#if defined(CONFIG_64BIT)
  206 +/* Clear the upper 32 bits of the arg1 register. We are working with */
  207 +/* small divisors (and 32-bit integers) We must not be mislead */
  208 +/* by "1" bits left in the upper 32 bits. */
  209 + depd %r0,31,32,%r25
  210 +#endif
  211 + blr,n arg1,r0
  212 + nop
  213 +/* table for divisor == 0,1, ... ,15 */
  214 + addit,= 0,arg1,r0 /* trap if divisor == 0 */
  215 + nop
  216 + MILLIRET /* divisor == 1 */
  217 + copy arg0,retreg
  218 + MILLI_BEN($$divI_2) /* divisor == 2 */
  219 + nop
  220 + MILLI_BEN($$divI_3) /* divisor == 3 */
  221 + nop
  222 + MILLI_BEN($$divI_4) /* divisor == 4 */
  223 + nop
  224 + MILLI_BEN($$divI_5) /* divisor == 5 */
  225 + nop
  226 + MILLI_BEN($$divI_6) /* divisor == 6 */
  227 + nop
  228 + MILLI_BEN($$divI_7) /* divisor == 7 */
  229 + nop
  230 + MILLI_BEN($$divI_8) /* divisor == 8 */
  231 + nop
  232 + MILLI_BEN($$divI_9) /* divisor == 9 */
  233 + nop
  234 + MILLI_BEN($$divI_10) /* divisor == 10 */
  235 + nop
  236 + b LREF(normal) /* divisor == 11 */
  237 + add,>= 0,arg0,retreg
  238 + MILLI_BEN($$divI_12) /* divisor == 12 */
  239 + nop
  240 + b LREF(normal) /* divisor == 13 */
  241 + add,>= 0,arg0,retreg
  242 + MILLI_BEN($$divI_14) /* divisor == 14 */
  243 + nop
  244 + MILLI_BEN($$divI_15) /* divisor == 15 */
  245 + nop
  246 +
  247 +LSYM(negative1)
  248 + sub 0,arg0,retreg /* result is negation of dividend */
  249 + MILLIRET
  250 + addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
  251 + .exit
  252 + .procend
  253 + .end
  254 +#endif
arch/parisc/lib/milli/divU.S
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#include "milli.h"
  14 +
  15 +#ifdef L_divU
  16 +/* ROUTINE: $$divU
  17 + .
  18 + . Single precision divide for unsigned integers.
  19 + .
  20 + . Quotient is truncated towards zero.
  21 + . Traps on divide by zero.
  22 +
  23 + INPUT REGISTERS:
  24 + . arg0 == dividend
  25 + . arg1 == divisor
  26 + . mrp == return pc
  27 + . sr0 == return space when called externally
  28 +
  29 + OUTPUT REGISTERS:
  30 + . arg0 = undefined
  31 + . arg1 = undefined
  32 + . ret1 = quotient
  33 +
  34 + OTHER REGISTERS AFFECTED:
  35 + . r1 = undefined
  36 +
  37 + SIDE EFFECTS:
  38 + . Causes a trap under the following conditions:
  39 + . divisor is zero
  40 + . Changes memory at the following places:
  41 + . NONE
  42 +
  43 + PERMISSIBLE CONTEXT:
  44 + . Unwindable.
  45 + . Does not create a stack frame.
  46 + . Suitable for internal or external millicode.
  47 + . Assumes the special millicode register conventions.
  48 +
  49 + DISCUSSION:
  50 + . Branchs to other millicode routines using BE:
  51 + . $$divU_# for 3,5,6,7,9,10,12,14,15
  52 + .
  53 + . For selected small divisors calls the special divide by constant
  54 + . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
  55 +
  56 +RDEFINE(temp,r1)
  57 +RDEFINE(retreg,ret1) /* r29 */
  58 +RDEFINE(temp1,arg0)
  59 + SUBSPA_MILLI_DIV
  60 + ATTR_MILLI
  61 + .export $$divU,millicode
  62 + .import $$divU_3,millicode
  63 + .import $$divU_5,millicode
  64 + .import $$divU_6,millicode
  65 + .import $$divU_7,millicode
  66 + .import $$divU_9,millicode
  67 + .import $$divU_10,millicode
  68 + .import $$divU_12,millicode
  69 + .import $$divU_14,millicode
  70 + .import $$divU_15,millicode
  71 + .proc
  72 + .callinfo millicode
  73 + .entry
  74 +GSYM($$divU)
  75 +/* The subtract is not nullified since it does no harm and can be used
  76 + by the two cases that branch back to "normal". */
  77 + ldo -1(arg1),temp /* is there at most one bit set ? */
  78 + and,= arg1,temp,r0 /* if so, denominator is power of 2 */
  79 + b LREF(regular_seq)
  80 + addit,= 0,arg1,0 /* trap for zero dvr */
  81 + copy arg0,retreg
  82 + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
  83 + extru retreg,15,16,retreg /* retreg = retreg >> 16 */
  84 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
  85 + ldi 0xcc,temp1 /* setup 0xcc in temp1 */
  86 + extru,= arg1,23,8,temp /* test denominator with 0xff00 */
  87 + extru retreg,23,24,retreg /* retreg = retreg >> 8 */
  88 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
  89 + ldi 0xaa,temp /* setup 0xaa in temp */
  90 + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
  91 + extru retreg,27,28,retreg /* retreg = retreg >> 4 */
  92 + and,= arg1,temp1,r0 /* test denominator with 0xcc */
  93 + extru retreg,29,30,retreg /* retreg = retreg >> 2 */
  94 + and,= arg1,temp,r0 /* test denominator with 0xaa */
  95 + extru retreg,30,31,retreg /* retreg = retreg >> 1 */
  96 + MILLIRETN
  97 + nop
  98 +LSYM(regular_seq)
  99 + comib,>= 15,arg1,LREF(special_divisor)
  100 + subi 0,arg1,temp /* clear carry, negate the divisor */
  101 + ds r0,temp,r0 /* set V-bit to 1 */
  102 +LSYM(normal)
  103 + add arg0,arg0,retreg /* shift msb bit into carry */
  104 + ds r0,arg1,temp /* 1st divide step, if no carry */
  105 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  106 + ds temp,arg1,temp /* 2nd divide step */
  107 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  108 + ds temp,arg1,temp /* 3rd divide step */
  109 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  110 + ds temp,arg1,temp /* 4th divide step */
  111 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  112 + ds temp,arg1,temp /* 5th divide step */
  113 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  114 + ds temp,arg1,temp /* 6th divide step */
  115 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  116 + ds temp,arg1,temp /* 7th divide step */
  117 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  118 + ds temp,arg1,temp /* 8th divide step */
  119 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  120 + ds temp,arg1,temp /* 9th divide step */
  121 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  122 + ds temp,arg1,temp /* 10th divide step */
  123 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  124 + ds temp,arg1,temp /* 11th divide step */
  125 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  126 + ds temp,arg1,temp /* 12th divide step */
  127 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  128 + ds temp,arg1,temp /* 13th divide step */
  129 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  130 + ds temp,arg1,temp /* 14th divide step */
  131 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  132 + ds temp,arg1,temp /* 15th divide step */
  133 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  134 + ds temp,arg1,temp /* 16th divide step */
  135 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  136 + ds temp,arg1,temp /* 17th divide step */
  137 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  138 + ds temp,arg1,temp /* 18th divide step */
  139 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  140 + ds temp,arg1,temp /* 19th divide step */
  141 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  142 + ds temp,arg1,temp /* 20th divide step */
  143 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  144 + ds temp,arg1,temp /* 21st divide step */
  145 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  146 + ds temp,arg1,temp /* 22nd divide step */
  147 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  148 + ds temp,arg1,temp /* 23rd divide step */
  149 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  150 + ds temp,arg1,temp /* 24th divide step */
  151 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  152 + ds temp,arg1,temp /* 25th divide step */
  153 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  154 + ds temp,arg1,temp /* 26th divide step */
  155 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  156 + ds temp,arg1,temp /* 27th divide step */
  157 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  158 + ds temp,arg1,temp /* 28th divide step */
  159 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  160 + ds temp,arg1,temp /* 29th divide step */
  161 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  162 + ds temp,arg1,temp /* 30th divide step */
  163 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  164 + ds temp,arg1,temp /* 31st divide step */
  165 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  166 + ds temp,arg1,temp /* 32nd divide step, */
  167 + MILLIRET
  168 + addc retreg,retreg,retreg /* shift last retreg bit into retreg */
  169 +
  170 +/* Handle the cases where divisor is a small constant or has high bit on. */
  171 +LSYM(special_divisor)
  172 +/* blr arg1,r0 */
  173 +/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
  174 +
  175 +/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
  176 + generating such a blr, comib sequence. A problem in nullification. So I
  177 + rewrote this code. */
  178 +
  179 +#if defined(CONFIG_64BIT)
  180 +/* Clear the upper 32 bits of the arg1 register. We are working with
  181 + small divisors (and 32-bit unsigned integers) We must not be mislead
  182 + by "1" bits left in the upper 32 bits. */
  183 + depd %r0,31,32,%r25
  184 +#endif
  185 + comib,> 0,arg1,LREF(big_divisor)
  186 + nop
  187 + blr arg1,r0
  188 + nop
  189 +
  190 +LSYM(zero_divisor) /* this label is here to provide external visibility */
  191 + addit,= 0,arg1,0 /* trap for zero dvr */
  192 + nop
  193 + MILLIRET /* divisor == 1 */
  194 + copy arg0,retreg
  195 + MILLIRET /* divisor == 2 */
  196 + extru arg0,30,31,retreg
  197 + MILLI_BEN($$divU_3) /* divisor == 3 */
  198 + nop
  199 + MILLIRET /* divisor == 4 */
  200 + extru arg0,29,30,retreg
  201 + MILLI_BEN($$divU_5) /* divisor == 5 */
  202 + nop
  203 + MILLI_BEN($$divU_6) /* divisor == 6 */
  204 + nop
  205 + MILLI_BEN($$divU_7) /* divisor == 7 */
  206 + nop
  207 + MILLIRET /* divisor == 8 */
  208 + extru arg0,28,29,retreg
  209 + MILLI_BEN($$divU_9) /* divisor == 9 */
  210 + nop
  211 + MILLI_BEN($$divU_10) /* divisor == 10 */
  212 + nop
  213 + b LREF(normal) /* divisor == 11 */
  214 + ds r0,temp,r0 /* set V-bit to 1 */
  215 + MILLI_BEN($$divU_12) /* divisor == 12 */
  216 + nop
  217 + b LREF(normal) /* divisor == 13 */
  218 + ds r0,temp,r0 /* set V-bit to 1 */
  219 + MILLI_BEN($$divU_14) /* divisor == 14 */
  220 + nop
  221 + MILLI_BEN($$divU_15) /* divisor == 15 */
  222 + nop
  223 +
  224 +/* Handle the case where the high bit is on in the divisor.
  225 + Compute: if( dividend>=divisor) quotient=1; else quotient=0;
  226 + Note: dividend>==divisor iff dividend-divisor does not borrow
  227 + and not borrow iff carry. */
  228 +LSYM(big_divisor)
  229 + sub arg0,arg1,r0
  230 + MILLIRET
  231 + addc r0,r0,retreg
  232 + .exit
  233 + .procend
  234 + .end
  235 +#endif
arch/parisc/lib/milli/div_const.S
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#include "milli.h"
  14 +
  15 +#ifdef L_div_const
  16 +/* ROUTINE: $$divI_2
  17 + . $$divI_3 $$divU_3
  18 + . $$divI_4
  19 + . $$divI_5 $$divU_5
  20 + . $$divI_6 $$divU_6
  21 + . $$divI_7 $$divU_7
  22 + . $$divI_8
  23 + . $$divI_9 $$divU_9
  24 + . $$divI_10 $$divU_10
  25 + .
  26 + . $$divI_12 $$divU_12
  27 + .
  28 + . $$divI_14 $$divU_14
  29 + . $$divI_15 $$divU_15
  30 + . $$divI_16
  31 + . $$divI_17 $$divU_17
  32 + .
  33 + . Divide by selected constants for single precision binary integers.
  34 +
  35 + INPUT REGISTERS:
  36 + . arg0 == dividend
  37 + . mrp == return pc
  38 + . sr0 == return space when called externally
  39 +
  40 + OUTPUT REGISTERS:
  41 + . arg0 = undefined
  42 + . arg1 = undefined
  43 + . ret1 = quotient
  44 +
  45 + OTHER REGISTERS AFFECTED:
  46 + . r1 = undefined
  47 +
  48 + SIDE EFFECTS:
  49 + . Causes a trap under the following conditions: NONE
  50 + . Changes memory at the following places: NONE
  51 +
  52 + PERMISSIBLE CONTEXT:
  53 + . Unwindable.
  54 + . Does not create a stack frame.
  55 + . Suitable for internal or external millicode.
  56 + . Assumes the special millicode register conventions.
  57 +
  58 + DISCUSSION:
  59 + . Calls other millicode routines using mrp: NONE
  60 + . Calls other millicode routines: NONE */
  61 +
  62 +
  63 +/* TRUNCATED DIVISION BY SMALL INTEGERS
  64 +
  65 + We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
  66 + (with y fixed).
  67 +
  68 + Let a = floor(z/y), for some choice of z. Note that z will be
  69 + chosen so that division by z is cheap.
  70 +
  71 + Let r be the remainder(z/y). In other words, r = z - ay.
  72 +
  73 + Now, our method is to choose a value for b such that
  74 +
  75 + q'(x) = floor((ax+b)/z)
  76 +
  77 + is equal to q(x) over as large a range of x as possible. If the
  78 + two are equal over a sufficiently large range, and if it is easy to
  79 + form the product (ax), and it is easy to divide by z, then we can
  80 + perform the division much faster than the general division algorithm.
  81 +
  82 + So, we want the following to be true:
  83 +
  84 + . For x in the following range:
  85 + .
  86 + . ky <= x < (k+1)y
  87 + .
  88 + . implies that
  89 + .
  90 + . k <= (ax+b)/z < (k+1)
  91 +
  92 + We want to determine b such that this is true for all k in the
  93 + range {0..K} for some maximum K.
  94 +
  95 + Since (ax+b) is an increasing function of x, we can take each
  96 + bound separately to determine the "best" value for b.
  97 +
  98 + (ax+b)/z < (k+1) implies
  99 +
  100 + (a((k+1)y-1)+b < (k+1)z implies
  101 +
  102 + b < a + (k+1)(z-ay) implies
  103 +
  104 + b < a + (k+1)r
  105 +
  106 + This needs to be true for all k in the range {0..K}. In
  107 + particular, it is true for k = 0 and this leads to a maximum
  108 + acceptable value for b.
  109 +
  110 + b < a+r or b <= a+r-1
  111 +
  112 + Taking the other bound, we have
  113 +
  114 + k <= (ax+b)/z implies
  115 +
  116 + k <= (aky+b)/z implies
  117 +
  118 + k(z-ay) <= b implies
  119 +
  120 + kr <= b
  121 +
  122 + Clearly, the largest range for k will be achieved by maximizing b,
  123 + when r is not zero. When r is zero, then the simplest choice for b
  124 + is 0. When r is not 0, set
  125 +
  126 + . b = a+r-1
  127 +
  128 + Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
  129 + for all x in the range:
  130 +
  131 + . 0 <= x < (K+1)y
  132 +
  133 + We need to determine what K is. Of our two bounds,
  134 +
  135 + . b < a+(k+1)r is satisfied for all k >= 0, by construction.
  136 +
  137 + The other bound is
  138 +
  139 + . kr <= b
  140 +
  141 + This is always true if r = 0. If r is not 0 (the usual case), then
  142 + K = floor((a+r-1)/r), is the maximum value for k.
  143 +
  144 + Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
  145 + answer for q(x) = floor(x/y) when x is in the range
  146 +
  147 + (0,(K+1)y-1) K = floor((a+r-1)/r)
  148 +
  149 + To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
  150 + the formula for q'(x) yields the correct value of q(x) for all x
  151 + representable by a single word in HPPA.
  152 +
  153 + We are also constrained in that computing the product (ax), adding
  154 + b, and dividing by z must all be done quickly, otherwise we will be
  155 + better off going through the general algorithm using the DS
  156 + instruction, which uses approximately 70 cycles.
  157 +
  158 + For each y, there is a choice of z which satisfies the constraints
  159 + for (K+1)y >= 2**32. We may not, however, be able to satisfy the
  160 + timing constraints for arbitrary y. It seems that z being equal to
  161 + a power of 2 or a power of 2 minus 1 is as good as we can do, since
  162 + it minimizes the time to do division by z. We want the choice of z
  163 + to also result in a value for (a) that minimizes the computation of
  164 + the product (ax). This is best achieved if (a) has a regular bit
  165 + pattern (so the multiplication can be done with shifts and adds).
  166 + The value of (a) also needs to be less than 2**32 so the product is
  167 + always guaranteed to fit in 2 words.
  168 +
  169 + In actual practice, the following should be done:
  170 +
  171 + 1) For negative x, you should take the absolute value and remember
  172 + . the fact so that the result can be negated. This obviously does
  173 + . not apply in the unsigned case.
  174 + 2) For even y, you should factor out the power of 2 that divides y
  175 + . and divide x by it. You can then proceed by dividing by the
  176 + . odd factor of y.
  177 +
  178 + Here is a table of some odd values of y, and corresponding choices
  179 + for z which are "good".
  180 +
  181 + y z r a (hex) max x (hex)
  182 +
  183 + 3 2**32 1 55555555 100000001
  184 + 5 2**32 1 33333333 100000003
  185 + 7 2**24-1 0 249249 (infinite)
  186 + 9 2**24-1 0 1c71c7 (infinite)
  187 + 11 2**20-1 0 1745d (infinite)
  188 + 13 2**24-1 0 13b13b (infinite)
  189 + 15 2**32 1 11111111 10000000d
  190 + 17 2**32 1 f0f0f0f 10000000f
  191 +
  192 + If r is 1, then b = a+r-1 = a. This simplifies the computation
  193 + of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
  194 + then b = 0 is ok to use which simplifies (ax+b).
  195 +
  196 + The bit patterns for 55555555, 33333333, and 11111111 are obviously
  197 + very regular. The bit patterns for the other values of a above are:
  198 +
  199 + y (hex) (binary)
  200 +
  201 + 7 249249 001001001001001001001001 << regular >>
  202 + 9 1c71c7 000111000111000111000111 << regular >>
  203 + 11 1745d 000000010111010001011101 << irregular >>
  204 + 13 13b13b 000100111011000100111011 << irregular >>
  205 +
  206 + The bit patterns for (a) corresponding to (y) of 11 and 13 may be
  207 + too irregular to warrant using this method.
  208 +
  209 + When z is a power of 2 minus 1, then the division by z is slightly
  210 + more complicated, involving an iterative solution.
  211 +
  212 + The code presented here solves division by 1 through 17, except for
  213 + 11 and 13. There are algorithms for both signed and unsigned
  214 + quantities given.
  215 +
  216 + TIMINGS (cycles)
  217 +
  218 + divisor positive negative unsigned
  219 +
  220 + . 1 2 2 2
  221 + . 2 4 4 2
  222 + . 3 19 21 19
  223 + . 4 4 4 2
  224 + . 5 18 22 19
  225 + . 6 19 22 19
  226 + . 8 4 4 2
  227 + . 10 18 19 17
  228 + . 12 18 20 18
  229 + . 15 16 18 16
  230 + . 16 4 4 2
  231 + . 17 16 18 16
  232 +
  233 + Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
  234 + a loop body is executed until the tentative quotient is 0. The
  235 + number of times the loop body is executed varies depending on the
  236 + dividend, but is never more than two times. If the dividend is
  237 + less than the divisor, then the loop body is not executed at all.
  238 + Each iteration adds 4 cycles to the timings.
  239 +
  240 + divisor positive negative unsigned
  241 +
  242 + . 7 19+4n 20+4n 20+4n n = number of iterations
  243 + . 9 21+4n 22+4n 21+4n
  244 + . 14 21+4n 22+4n 20+4n
  245 +
  246 + To give an idea of how the number of iterations varies, here is a
  247 + table of dividend versus number of iterations when dividing by 7.
  248 +
  249 + smallest largest required
  250 + dividend dividend iterations
  251 +
  252 + . 0 6 0
  253 + . 7 0x6ffffff 1
  254 + 0x1000006 0xffffffff 2
  255 +
  256 + There is some overlap in the range of numbers requiring 1 and 2
  257 + iterations. */
  258 +
  259 +RDEFINE(t2,r1)
  260 +RDEFINE(x2,arg0) /* r26 */
  261 +RDEFINE(t1,arg1) /* r25 */
  262 +RDEFINE(x1,ret1) /* r29 */
  263 +
  264 + SUBSPA_MILLI_DIV
  265 + ATTR_MILLI
  266 +
  267 + .proc
  268 + .callinfo millicode
  269 + .entry
  270 +/* NONE of these routines require a stack frame
  271 + ALL of these routines are unwindable from millicode */
  272 +
  273 +GSYM($$divide_by_constant)
  274 + .export $$divide_by_constant,millicode
  275 +/* Provides a "nice" label for the code covered by the unwind descriptor
  276 + for things like gprof. */
  277 +
  278 +/* DIVISION BY 2 (shift by 1) */
  279 +GSYM($$divI_2)
  280 + .export $$divI_2,millicode
  281 + comclr,>= arg0,0,0
  282 + addi 1,arg0,arg0
  283 + MILLIRET
  284 + extrs arg0,30,31,ret1
  285 +
  286 +
  287 +/* DIVISION BY 4 (shift by 2) */
  288 +GSYM($$divI_4)
  289 + .export $$divI_4,millicode
  290 + comclr,>= arg0,0,0
  291 + addi 3,arg0,arg0
  292 + MILLIRET
  293 + extrs arg0,29,30,ret1
  294 +
  295 +
  296 +/* DIVISION BY 8 (shift by 3) */
  297 +GSYM($$divI_8)
  298 + .export $$divI_8,millicode
  299 + comclr,>= arg0,0,0
  300 + addi 7,arg0,arg0
  301 + MILLIRET
  302 + extrs arg0,28,29,ret1
  303 +
  304 +/* DIVISION BY 16 (shift by 4) */
  305 +GSYM($$divI_16)
  306 + .export $$divI_16,millicode
  307 + comclr,>= arg0,0,0
  308 + addi 15,arg0,arg0
  309 + MILLIRET
  310 + extrs arg0,27,28,ret1
  311 +
  312 +/****************************************************************************
  313 +*
  314 +* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
  315 +*
  316 +* includes 3,5,15,17 and also 6,10,12
  317 +*
  318 +****************************************************************************/
  319 +
  320 +/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
  321 +
  322 +GSYM($$divI_3)
  323 + .export $$divI_3,millicode
  324 + comb,<,N x2,0,LREF(neg3)
  325 +
  326 + addi 1,x2,x2 /* this cannot overflow */
  327 + extru x2,1,2,x1 /* multiply by 5 to get started */
  328 + sh2add x2,x2,x2
  329 + b LREF(pos)
  330 + addc x1,0,x1
  331 +
  332 +LSYM(neg3)
  333 + subi 1,x2,x2 /* this cannot overflow */
  334 + extru x2,1,2,x1 /* multiply by 5 to get started */
  335 + sh2add x2,x2,x2
  336 + b LREF(neg)
  337 + addc x1,0,x1
  338 +
  339 +GSYM($$divU_3)
  340 + .export $$divU_3,millicode
  341 + addi 1,x2,x2 /* this CAN overflow */
  342 + addc 0,0,x1
  343 + shd x1,x2,30,t1 /* multiply by 5 to get started */
  344 + sh2add x2,x2,x2
  345 + b LREF(pos)
  346 + addc x1,t1,x1
  347 +
  348 +/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
  349 +
  350 +GSYM($$divI_5)
  351 + .export $$divI_5,millicode
  352 + comb,<,N x2,0,LREF(neg5)
  353 +
  354 + addi 3,x2,t1 /* this cannot overflow */
  355 + sh1add x2,t1,x2 /* multiply by 3 to get started */
  356 + b LREF(pos)
  357 + addc 0,0,x1
  358 +
  359 +LSYM(neg5)
  360 + sub 0,x2,x2 /* negate x2 */
  361 + addi 1,x2,x2 /* this cannot overflow */
  362 + shd 0,x2,31,x1 /* get top bit (can be 1) */
  363 + sh1add x2,x2,x2 /* multiply by 3 to get started */
  364 + b LREF(neg)
  365 + addc x1,0,x1
  366 +
  367 +GSYM($$divU_5)
  368 + .export $$divU_5,millicode
  369 + addi 1,x2,x2 /* this CAN overflow */
  370 + addc 0,0,x1
  371 + shd x1,x2,31,t1 /* multiply by 3 to get started */
  372 + sh1add x2,x2,x2
  373 + b LREF(pos)
  374 + addc t1,x1,x1
  375 +
  376 +/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
  377 +GSYM($$divI_6)
  378 + .export $$divI_6,millicode
  379 + comb,<,N x2,0,LREF(neg6)
  380 + extru x2,30,31,x2 /* divide by 2 */
  381 + addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
  382 + sh2add x2,t1,x2 /* multiply by 5 to get started */
  383 + b LREF(pos)
  384 + addc 0,0,x1
  385 +
  386 +LSYM(neg6)
  387 + subi 2,x2,x2 /* negate, divide by 2, and add 1 */
  388 + /* negation and adding 1 are done */
  389 + /* at the same time by the SUBI */
  390 + extru x2,30,31,x2
  391 + shd 0,x2,30,x1
  392 + sh2add x2,x2,x2 /* multiply by 5 to get started */
  393 + b LREF(neg)
  394 + addc x1,0,x1
  395 +
  396 +GSYM($$divU_6)
  397 + .export $$divU_6,millicode
  398 + extru x2,30,31,x2 /* divide by 2 */
  399 + addi 1,x2,x2 /* cannot carry */
  400 + shd 0,x2,30,x1 /* multiply by 5 to get started */
  401 + sh2add x2,x2,x2
  402 + b LREF(pos)
  403 + addc x1,0,x1
  404 +
  405 +/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
  406 +GSYM($$divU_10)
  407 + .export $$divU_10,millicode
  408 + extru x2,30,31,x2 /* divide by 2 */
  409 + addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
  410 + sh1add x2,t1,x2 /* multiply by 3 to get started */
  411 + addc 0,0,x1
  412 +LSYM(pos)
  413 + shd x1,x2,28,t1 /* multiply by 0x11 */
  414 + shd x2,0,28,t2
  415 + add x2,t2,x2
  416 + addc x1,t1,x1
  417 +LSYM(pos_for_17)
  418 + shd x1,x2,24,t1 /* multiply by 0x101 */
  419 + shd x2,0,24,t2
  420 + add x2,t2,x2
  421 + addc x1,t1,x1
  422 +
  423 + shd x1,x2,16,t1 /* multiply by 0x10001 */
  424 + shd x2,0,16,t2
  425 + add x2,t2,x2
  426 + MILLIRET
  427 + addc x1,t1,x1
  428 +
  429 +GSYM($$divI_10)
  430 + .export $$divI_10,millicode
  431 + comb,< x2,0,LREF(neg10)
  432 + copy 0,x1
  433 + extru x2,30,31,x2 /* divide by 2 */
  434 + addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
  435 + sh1add x2,x2,x2 /* multiply by 3 to get started */
  436 +
  437 +LSYM(neg10)
  438 + subi 2,x2,x2 /* negate, divide by 2, and add 1 */
  439 + /* negation and adding 1 are done */
  440 + /* at the same time by the SUBI */
  441 + extru x2,30,31,x2
  442 + sh1add x2,x2,x2 /* multiply by 3 to get started */
  443 +LSYM(neg)
  444 + shd x1,x2,28,t1 /* multiply by 0x11 */
  445 + shd x2,0,28,t2
  446 + add x2,t2,x2
  447 + addc x1,t1,x1
  448 +LSYM(neg_for_17)
  449 + shd x1,x2,24,t1 /* multiply by 0x101 */
  450 + shd x2,0,24,t2
  451 + add x2,t2,x2
  452 + addc x1,t1,x1
  453 +
  454 + shd x1,x2,16,t1 /* multiply by 0x10001 */
  455 + shd x2,0,16,t2
  456 + add x2,t2,x2
  457 + addc x1,t1,x1
  458 + MILLIRET
  459 + sub 0,x1,x1
  460 +
  461 +/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
  462 +GSYM($$divI_12)
  463 + .export $$divI_12,millicode
  464 + comb,< x2,0,LREF(neg12)
  465 + copy 0,x1
  466 + extru x2,29,30,x2 /* divide by 4 */
  467 + addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
  468 + sh2add x2,x2,x2 /* multiply by 5 to get started */
  469 +
  470 +LSYM(neg12)
  471 + subi 4,x2,x2 /* negate, divide by 4, and add 1 */
  472 + /* negation and adding 1 are done */
  473 + /* at the same time by the SUBI */
  474 + extru x2,29,30,x2
  475 + b LREF(neg)
  476 + sh2add x2,x2,x2 /* multiply by 5 to get started */
  477 +
  478 +GSYM($$divU_12)
  479 + .export $$divU_12,millicode
  480 + extru x2,29,30,x2 /* divide by 4 */
  481 + addi 5,x2,t1 /* cannot carry */
  482 + sh2add x2,t1,x2 /* multiply by 5 to get started */
  483 + b LREF(pos)
  484 + addc 0,0,x1
  485 +
  486 +/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
  487 +GSYM($$divI_15)
  488 + .export $$divI_15,millicode
  489 + comb,< x2,0,LREF(neg15)
  490 + copy 0,x1
  491 + addib,tr 1,x2,LREF(pos)+4
  492 + shd x1,x2,28,t1
  493 +
  494 +LSYM(neg15)
  495 + b LREF(neg)
  496 + subi 1,x2,x2
  497 +
  498 +GSYM($$divU_15)
  499 + .export $$divU_15,millicode
  500 + addi 1,x2,x2 /* this CAN overflow */
  501 + b LREF(pos)
  502 + addc 0,0,x1
  503 +
  504 +/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
  505 +GSYM($$divI_17)
  506 + .export $$divI_17,millicode
  507 + comb,<,n x2,0,LREF(neg17)
  508 + addi 1,x2,x2 /* this cannot overflow */
  509 + shd 0,x2,28,t1 /* multiply by 0xf to get started */
  510 + shd x2,0,28,t2
  511 + sub t2,x2,x2
  512 + b LREF(pos_for_17)
  513 + subb t1,0,x1
  514 +
  515 +LSYM(neg17)
  516 + subi 1,x2,x2 /* this cannot overflow */
  517 + shd 0,x2,28,t1 /* multiply by 0xf to get started */
  518 + shd x2,0,28,t2
  519 + sub t2,x2,x2
  520 + b LREF(neg_for_17)
  521 + subb t1,0,x1
  522 +
  523 +GSYM($$divU_17)
  524 + .export $$divU_17,millicode
  525 + addi 1,x2,x2 /* this CAN overflow */
  526 + addc 0,0,x1
  527 + shd x1,x2,28,t1 /* multiply by 0xf to get started */
  528 +LSYM(u17)
  529 + shd x2,0,28,t2
  530 + sub t2,x2,x2
  531 + b LREF(pos_for_17)
  532 + subb t1,x1,x1
  533 +
  534 +
  535 +/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
  536 + includes 7,9 and also 14
  537 +
  538 +
  539 + z = 2**24-1
  540 + r = z mod x = 0
  541 +
  542 + so choose b = 0
  543 +
  544 + Also, in order to divide by z = 2**24-1, we approximate by dividing
  545 + by (z+1) = 2**24 (which is easy), and then correcting.
  546 +
  547 + (ax) = (z+1)q' + r
  548 + . = zq' + (q'+r)
  549 +
  550 + So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
  551 + Then the true remainder of (ax)/z is (q'+r). Repeat the process
  552 + with this new remainder, adding the tentative quotients together,
  553 + until a tentative quotient is 0 (and then we are done). There is
  554 + one last correction to be done. It is possible that (q'+r) = z.
  555 + If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
  556 + in fact, we need to add 1 more to the quotient. Now, it turns
  557 + out that this happens if and only if the original value x is
  558 + an exact multiple of y. So, to avoid a three instruction test at
  559 + the end, instead use 1 instruction to add 1 to x at the beginning. */
  560 +
  561 +/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
  562 +GSYM($$divI_7)
  563 + .export $$divI_7,millicode
  564 + comb,<,n x2,0,LREF(neg7)
  565 +LSYM(7)
  566 + addi 1,x2,x2 /* cannot overflow */
  567 + shd 0,x2,29,x1
  568 + sh3add x2,x2,x2
  569 + addc x1,0,x1
  570 +LSYM(pos7)
  571 + shd x1,x2,26,t1
  572 + shd x2,0,26,t2
  573 + add x2,t2,x2
  574 + addc x1,t1,x1
  575 +
  576 + shd x1,x2,20,t1
  577 + shd x2,0,20,t2
  578 + add x2,t2,x2
  579 + addc x1,t1,t1
  580 +
  581 + /* computed <t1,x2>. Now divide it by (2**24 - 1) */
  582 +
  583 + copy 0,x1
  584 + shd,= t1,x2,24,t1 /* tentative quotient */
  585 +LSYM(1)
  586 + addb,tr t1,x1,LREF(2) /* add to previous quotient */
  587 + extru x2,31,24,x2 /* new remainder (unadjusted) */
  588 +
  589 + MILLIRETN
  590 +
  591 +LSYM(2)
  592 + addb,tr t1,x2,LREF(1) /* adjust remainder */
  593 + extru,= x2,7,8,t1 /* new quotient */
  594 +
  595 +LSYM(neg7)
  596 + subi 1,x2,x2 /* negate x2 and add 1 */
  597 +LSYM(8)
  598 + shd 0,x2,29,x1
  599 + sh3add x2,x2,x2
  600 + addc x1,0,x1
  601 +
  602 +LSYM(neg7_shift)
  603 + shd x1,x2,26,t1
  604 + shd x2,0,26,t2
  605 + add x2,t2,x2
  606 + addc x1,t1,x1
  607 +
  608 + shd x1,x2,20,t1
  609 + shd x2,0,20,t2
  610 + add x2,t2,x2
  611 + addc x1,t1,t1
  612 +
  613 + /* computed <t1,x2>. Now divide it by (2**24 - 1) */
  614 +
  615 + copy 0,x1
  616 + shd,= t1,x2,24,t1 /* tentative quotient */
  617 +LSYM(3)
  618 + addb,tr t1,x1,LREF(4) /* add to previous quotient */
  619 + extru x2,31,24,x2 /* new remainder (unadjusted) */
  620 +
  621 + MILLIRET
  622 + sub 0,x1,x1 /* negate result */
  623 +
  624 +LSYM(4)
  625 + addb,tr t1,x2,LREF(3) /* adjust remainder */
  626 + extru,= x2,7,8,t1 /* new quotient */
  627 +
  628 +GSYM($$divU_7)
  629 + .export $$divU_7,millicode
  630 + addi 1,x2,x2 /* can carry */
  631 + addc 0,0,x1
  632 + shd x1,x2,29,t1
  633 + sh3add x2,x2,x2
  634 + b LREF(pos7)
  635 + addc t1,x1,x1
  636 +
  637 +/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
  638 +GSYM($$divI_9)
  639 + .export $$divI_9,millicode
  640 + comb,<,n x2,0,LREF(neg9)
  641 + addi 1,x2,x2 /* cannot overflow */
  642 + shd 0,x2,29,t1
  643 + shd x2,0,29,t2
  644 + sub t2,x2,x2
  645 + b LREF(pos7)
  646 + subb t1,0,x1
  647 +
  648 +LSYM(neg9)
  649 + subi 1,x2,x2 /* negate and add 1 */
  650 + shd 0,x2,29,t1
  651 + shd x2,0,29,t2
  652 + sub t2,x2,x2
  653 + b LREF(neg7_shift)
  654 + subb t1,0,x1
  655 +
  656 +GSYM($$divU_9)
  657 + .export $$divU_9,millicode
  658 + addi 1,x2,x2 /* can carry */
  659 + addc 0,0,x1
  660 + shd x1,x2,29,t1
  661 + shd x2,0,29,t2
  662 + sub t2,x2,x2
  663 + b LREF(pos7)
  664 + subb t1,x1,x1
  665 +
  666 +/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
  667 +GSYM($$divI_14)
  668 + .export $$divI_14,millicode
  669 + comb,<,n x2,0,LREF(neg14)
  670 +GSYM($$divU_14)
  671 + .export $$divU_14,millicode
  672 + b LREF(7) /* go to 7 case */
  673 + extru x2,30,31,x2 /* divide by 2 */
  674 +
  675 +LSYM(neg14)
  676 + subi 2,x2,x2 /* negate (and add 2) */
  677 + b LREF(8)
  678 + extru x2,30,31,x2 /* divide by 2 */
  679 + .exit
  680 + .procend
  681 + .end
  682 +#endif
arch/parisc/lib/milli/dyncall.S
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#include "milli.h"
  14 +
  15 +#ifdef L_dyncall
  16 + SUBSPA_MILLI
  17 + ATTR_DATA
  18 +GSYM($$dyncall)
  19 + .export $$dyncall,millicode
  20 + .proc
  21 + .callinfo millicode
  22 + .entry
  23 + bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
  24 + depi 0,31,2,%r22 ; clear the two least significant bits
  25 + ldw 4(%r22),%r19 ; load new LTP value
  26 + ldw 0(%r22),%r22 ; load address of target
  27 +LSYM(1)
  28 + bv %r0(%r22) ; branch to the real target
  29 + stw %r2,-24(%r30) ; save return address into frame marker
  30 + .exit
  31 + .procend
  32 +#endif
arch/parisc/lib/milli/milli.S
Changes suppressed. Click to show
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#ifdef CONFIG_64BIT
  14 + .level 2.0w
  15 +#endif
  16 +
  17 +/* Hardware General Registers. */
  18 +r0: .reg %r0
  19 +r1: .reg %r1
  20 +r2: .reg %r2
  21 +r3: .reg %r3
  22 +r4: .reg %r4
  23 +r5: .reg %r5
  24 +r6: .reg %r6
  25 +r7: .reg %r7
  26 +r8: .reg %r8
  27 +r9: .reg %r9
  28 +r10: .reg %r10
  29 +r11: .reg %r11
  30 +r12: .reg %r12
  31 +r13: .reg %r13
  32 +r14: .reg %r14
  33 +r15: .reg %r15
  34 +r16: .reg %r16
  35 +r17: .reg %r17
  36 +r18: .reg %r18
  37 +r19: .reg %r19
  38 +r20: .reg %r20
  39 +r21: .reg %r21
  40 +r22: .reg %r22
  41 +r23: .reg %r23
  42 +r24: .reg %r24
  43 +r25: .reg %r25
  44 +r26: .reg %r26
  45 +r27: .reg %r27
  46 +r28: .reg %r28
  47 +r29: .reg %r29
  48 +r30: .reg %r30
  49 +r31: .reg %r31
  50 +
  51 +/* Hardware Space Registers. */
  52 +sr0: .reg %sr0
  53 +sr1: .reg %sr1
  54 +sr2: .reg %sr2
  55 +sr3: .reg %sr3
  56 +sr4: .reg %sr4
  57 +sr5: .reg %sr5
  58 +sr6: .reg %sr6
  59 +sr7: .reg %sr7
  60 +
  61 +/* Hardware Floating Point Registers. */
  62 +fr0: .reg %fr0
  63 +fr1: .reg %fr1
  64 +fr2: .reg %fr2
  65 +fr3: .reg %fr3
  66 +fr4: .reg %fr4
  67 +fr5: .reg %fr5
  68 +fr6: .reg %fr6
  69 +fr7: .reg %fr7
  70 +fr8: .reg %fr8
  71 +fr9: .reg %fr9
  72 +fr10: .reg %fr10
  73 +fr11: .reg %fr11
  74 +fr12: .reg %fr12
  75 +fr13: .reg %fr13
  76 +fr14: .reg %fr14
  77 +fr15: .reg %fr15
  78 +
  79 +/* Hardware Control Registers. */
  80 +cr11: .reg %cr11
  81 +sar: .reg %cr11 /* Shift Amount Register */
  82 +
  83 +/* Software Architecture General Registers. */
  84 +rp: .reg r2 /* return pointer */
  85 +#ifdef CONFIG_64BIT
  86 +mrp: .reg r2 /* millicode return pointer */
  87 +#else
  88 +mrp: .reg r31 /* millicode return pointer */
  89 +#endif
  90 +ret0: .reg r28 /* return value */
  91 +ret1: .reg r29 /* return value (high part of double) */
  92 +sp: .reg r30 /* stack pointer */
  93 +dp: .reg r27 /* data pointer */
  94 +arg0: .reg r26 /* argument */
  95 +arg1: .reg r25 /* argument or high part of double argument */
  96 +arg2: .reg r24 /* argument */
  97 +arg3: .reg r23 /* argument or high part of double argument */
  98 +
  99 +/* Software Architecture Space Registers. */
  100 +/* sr0 ; return link from BLE */
  101 +sret: .reg sr1 /* return value */
  102 +sarg: .reg sr1 /* argument */
  103 +/* sr4 ; PC SPACE tracker */
  104 +/* sr5 ; process private data */
  105 +
  106 +/* Frame Offsets (millicode convention!) Used when calling other
  107 + millicode routines. Stack unwinding is dependent upon these
  108 + definitions. */
  109 +r31_slot: .equ -20 /* "current RP" slot */
  110 +sr0_slot: .equ -16 /* "static link" slot */
  111 +#if defined(CONFIG_64BIT)
  112 +mrp_slot: .equ -16 /* "current RP" slot */
  113 +psp_slot: .equ -8 /* "previous SP" slot */
  114 +#else
  115 +mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
  116 +#endif
  117 +
  118 +
  119 +#define DEFINE(name,value)name: .EQU value
  120 +#define RDEFINE(name,value)name: .REG value
  121 +#ifdef milliext
  122 +#define MILLI_BE(lbl) BE lbl(sr7,r0)
  123 +#define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
  124 +#define MILLI_BLE(lbl) BLE lbl(sr7,r0)
  125 +#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
  126 +#define MILLIRETN BE,n 0(sr0,mrp)
  127 +#define MILLIRET BE 0(sr0,mrp)
  128 +#define MILLI_RETN BE,n 0(sr0,mrp)
  129 +#define MILLI_RET BE 0(sr0,mrp)
  130 +#else
  131 +#define MILLI_BE(lbl) B lbl
  132 +#define MILLI_BEN(lbl) B,n lbl
  133 +#define MILLI_BLE(lbl) BL lbl,mrp
  134 +#define MILLI_BLEN(lbl) BL,n lbl,mrp
  135 +#define MILLIRETN BV,n 0(mrp)
  136 +#define MILLIRET BV 0(mrp)
  137 +#define MILLI_RETN BV,n 0(mrp)
  138 +#define MILLI_RET BV 0(mrp)
  139 +#endif
  140 +
  141 +#define CAT(a,b) a##b
  142 +
  143 +#define SUBSPA_MILLI .section .text
  144 +#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
  145 +#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
  146 +#define ATTR_MILLI
  147 +#define SUBSPA_DATA .section .data
  148 +#define ATTR_DATA
  149 +#define GLOBAL $global$
  150 +#define GSYM(sym) !sym:
  151 +#define LSYM(sym) !CAT(.L,sym:)
  152 +#define LREF(sym) CAT(.L,sym)
  153 +
  154 +#ifdef L_dyncall
  155 + SUBSPA_MILLI
  156 + ATTR_DATA
  157 +GSYM($$dyncall)
  158 + .export $$dyncall,millicode
  159 + .proc
  160 + .callinfo millicode
  161 + .entry
  162 + bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
  163 + depi 0,31,2,%r22 ; clear the two least significant bits
  164 + ldw 4(%r22),%r19 ; load new LTP value
  165 + ldw 0(%r22),%r22 ; load address of target
  166 +LSYM(1)
  167 + bv %r0(%r22) ; branch to the real target
  168 + stw %r2,-24(%r30) ; save return address into frame marker
  169 + .exit
  170 + .procend
  171 +#endif
  172 +
  173 +#ifdef L_divI
  174 +/* ROUTINES: $$divI, $$divoI
  175 +
  176 + Single precision divide for signed binary integers.
  177 +
  178 + The quotient is truncated towards zero.
  179 + The sign of the quotient is the XOR of the signs of the dividend and
  180 + divisor.
  181 + Divide by zero is trapped.
  182 + Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
  183 +
  184 + INPUT REGISTERS:
  185 + . arg0 == dividend
  186 + . arg1 == divisor
  187 + . mrp == return pc
  188 + . sr0 == return space when called externally
  189 +
  190 + OUTPUT REGISTERS:
  191 + . arg0 = undefined
  192 + . arg1 = undefined
  193 + . ret1 = quotient
  194 +
  195 + OTHER REGISTERS AFFECTED:
  196 + . r1 = undefined
  197 +
  198 + SIDE EFFECTS:
  199 + . Causes a trap under the following conditions:
  200 + . divisor is zero (traps with ADDIT,= 0,25,0)
  201 + . dividend==-2**31 and divisor==-1 and routine is $$divoI
  202 + . (traps with ADDO 26,25,0)
  203 + . Changes memory at the following places:
  204 + . NONE
  205 +
  206 + PERMISSIBLE CONTEXT:
  207 + . Unwindable.
  208 + . Suitable for internal or external millicode.
  209 + . Assumes the special millicode register conventions.
  210 +
  211 + DISCUSSION:
  212 + . Branchs to other millicode routines using BE
  213 + . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
  214 + .
  215 + . For selected divisors, calls a divide by constant routine written by
  216 + . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
  217 + .
  218 + . The only overflow case is -2**31 divided by -1.
  219 + . Both routines return -2**31 but only $$divoI traps. */
  220 +
  221 +RDEFINE(temp,r1)
  222 +RDEFINE(retreg,ret1) /* r29 */
  223 +RDEFINE(temp1,arg0)
  224 + SUBSPA_MILLI_DIV
  225 + ATTR_MILLI
  226 + .import $$divI_2,millicode
  227 + .import $$divI_3,millicode
  228 + .import $$divI_4,millicode
  229 + .import $$divI_5,millicode
  230 + .import $$divI_6,millicode
  231 + .import $$divI_7,millicode
  232 + .import $$divI_8,millicode
  233 + .import $$divI_9,millicode
  234 + .import $$divI_10,millicode
  235 + .import $$divI_12,millicode
  236 + .import $$divI_14,millicode
  237 + .import $$divI_15,millicode
  238 + .export $$divI,millicode
  239 + .export $$divoI,millicode
  240 + .proc
  241 + .callinfo millicode
  242 + .entry
  243 +GSYM($$divoI)
  244 + comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
  245 +GSYM($$divI)
  246 + ldo -1(arg1),temp /* is there at most one bit set ? */
  247 + and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
  248 + addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
  249 + b,n LREF(neg_denom)
  250 +LSYM(pow2)
  251 + addi,>= 0,arg0,retreg /* if numerator is negative, add the */
  252 + add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
  253 + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
  254 + extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
  255 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
  256 + ldi 0xcc,temp1 /* setup 0xcc in temp1 */
  257 + extru,= arg1,23,8,temp /* test denominator with 0xff00 */
  258 + extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
  259 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
  260 + ldi 0xaa,temp /* setup 0xaa in temp */
  261 + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
  262 + extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
  263 + and,= arg1,temp1,r0 /* test denominator with 0xcc */
  264 + extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
  265 + and,= arg1,temp,r0 /* test denominator with 0xaa */
  266 + extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
  267 + MILLIRETN
  268 +LSYM(neg_denom)
  269 + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
  270 + b,n LREF(regular_seq)
  271 + sub r0,arg1,temp /* make denominator positive */
  272 + comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
  273 + ldo -1(temp),retreg /* is there at most one bit set ? */
  274 + and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
  275 + b,n LREF(regular_seq)
  276 + sub r0,arg0,retreg /* negate numerator */
  277 + comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
  278 + copy retreg,arg0 /* set up arg0, arg1 and temp */
  279 + copy temp,arg1 /* before branching to pow2 */
  280 + b LREF(pow2)
  281 + ldo -1(arg1),temp
  282 +LSYM(regular_seq)
  283 + comib,>>=,n 15,arg1,LREF(small_divisor)
  284 + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
  285 +LSYM(normal)
  286 + subi 0,retreg,retreg /* make it positive */
  287 + sub 0,arg1,temp /* clear carry, */
  288 + /* negate the divisor */
  289 + ds 0,temp,0 /* set V-bit to the comple- */
  290 + /* ment of the divisor sign */
  291 + add retreg,retreg,retreg /* shift msb bit into carry */
  292 + ds r0,arg1,temp /* 1st divide step, if no carry */
  293 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  294 + ds temp,arg1,temp /* 2nd divide step */
  295 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  296 + ds temp,arg1,temp /* 3rd divide step */
  297 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  298 + ds temp,arg1,temp /* 4th divide step */
  299 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  300 + ds temp,arg1,temp /* 5th divide step */
  301 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  302 + ds temp,arg1,temp /* 6th divide step */
  303 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  304 + ds temp,arg1,temp /* 7th divide step */
  305 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  306 + ds temp,arg1,temp /* 8th divide step */
  307 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  308 + ds temp,arg1,temp /* 9th divide step */
  309 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  310 + ds temp,arg1,temp /* 10th divide step */
  311 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  312 + ds temp,arg1,temp /* 11th divide step */
  313 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  314 + ds temp,arg1,temp /* 12th divide step */
  315 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  316 + ds temp,arg1,temp /* 13th divide step */
  317 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  318 + ds temp,arg1,temp /* 14th divide step */
  319 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  320 + ds temp,arg1,temp /* 15th divide step */
  321 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  322 + ds temp,arg1,temp /* 16th divide step */
  323 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  324 + ds temp,arg1,temp /* 17th divide step */
  325 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  326 + ds temp,arg1,temp /* 18th divide step */
  327 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  328 + ds temp,arg1,temp /* 19th divide step */
  329 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  330 + ds temp,arg1,temp /* 20th divide step */
  331 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  332 + ds temp,arg1,temp /* 21st divide step */
  333 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  334 + ds temp,arg1,temp /* 22nd divide step */
  335 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  336 + ds temp,arg1,temp /* 23rd divide step */
  337 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  338 + ds temp,arg1,temp /* 24th divide step */
  339 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  340 + ds temp,arg1,temp /* 25th divide step */
  341 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  342 + ds temp,arg1,temp /* 26th divide step */
  343 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  344 + ds temp,arg1,temp /* 27th divide step */
  345 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  346 + ds temp,arg1,temp /* 28th divide step */
  347 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  348 + ds temp,arg1,temp /* 29th divide step */
  349 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  350 + ds temp,arg1,temp /* 30th divide step */
  351 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  352 + ds temp,arg1,temp /* 31st divide step */
  353 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  354 + ds temp,arg1,temp /* 32nd divide step, */
  355 + addc retreg,retreg,retreg /* shift last retreg bit into retreg */
  356 + xor,>= arg0,arg1,0 /* get correct sign of quotient */
  357 + sub 0,retreg,retreg /* based on operand signs */
  358 + MILLIRETN
  359 + nop
  360 +
  361 +LSYM(small_divisor)
  362 +
  363 +#if defined(CONFIG_64BIT)
  364 +/* Clear the upper 32 bits of the arg1 register. We are working with */
  365 +/* small divisors (and 32-bit integers) We must not be mislead */
  366 +/* by "1" bits left in the upper 32 bits. */
  367 + depd %r0,31,32,%r25
  368 +#endif
  369 + blr,n arg1,r0
  370 + nop
  371 +/* table for divisor == 0,1, ... ,15 */
  372 + addit,= 0,arg1,r0 /* trap if divisor == 0 */
  373 + nop
  374 + MILLIRET /* divisor == 1 */
  375 + copy arg0,retreg
  376 + MILLI_BEN($$divI_2) /* divisor == 2 */
  377 + nop
  378 + MILLI_BEN($$divI_3) /* divisor == 3 */
  379 + nop
  380 + MILLI_BEN($$divI_4) /* divisor == 4 */
  381 + nop
  382 + MILLI_BEN($$divI_5) /* divisor == 5 */
  383 + nop
  384 + MILLI_BEN($$divI_6) /* divisor == 6 */
  385 + nop
  386 + MILLI_BEN($$divI_7) /* divisor == 7 */
  387 + nop
  388 + MILLI_BEN($$divI_8) /* divisor == 8 */
  389 + nop
  390 + MILLI_BEN($$divI_9) /* divisor == 9 */
  391 + nop
  392 + MILLI_BEN($$divI_10) /* divisor == 10 */
  393 + nop
  394 + b LREF(normal) /* divisor == 11 */
  395 + add,>= 0,arg0,retreg
  396 + MILLI_BEN($$divI_12) /* divisor == 12 */
  397 + nop
  398 + b LREF(normal) /* divisor == 13 */
  399 + add,>= 0,arg0,retreg
  400 + MILLI_BEN($$divI_14) /* divisor == 14 */
  401 + nop
  402 + MILLI_BEN($$divI_15) /* divisor == 15 */
  403 + nop
  404 +
  405 +LSYM(negative1)
  406 + sub 0,arg0,retreg /* result is negation of dividend */
  407 + MILLIRET
  408 + addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
  409 + .exit
  410 + .procend
  411 + .end
  412 +#endif
  413 +
  414 +#ifdef L_divU
  415 +/* ROUTINE: $$divU
  416 + .
  417 + . Single precision divide for unsigned integers.
  418 + .
  419 + . Quotient is truncated towards zero.
  420 + . Traps on divide by zero.
  421 +
  422 + INPUT REGISTERS:
  423 + . arg0 == dividend
  424 + . arg1 == divisor
  425 + . mrp == return pc
  426 + . sr0 == return space when called externally
  427 +
  428 + OUTPUT REGISTERS:
  429 + . arg0 = undefined
  430 + . arg1 = undefined
  431 + . ret1 = quotient
  432 +
  433 + OTHER REGISTERS AFFECTED:
  434 + . r1 = undefined
  435 +
  436 + SIDE EFFECTS:
  437 + . Causes a trap under the following conditions:
  438 + . divisor is zero
  439 + . Changes memory at the following places:
  440 + . NONE
  441 +
  442 + PERMISSIBLE CONTEXT:
  443 + . Unwindable.
  444 + . Does not create a stack frame.
  445 + . Suitable for internal or external millicode.
  446 + . Assumes the special millicode register conventions.
  447 +
  448 + DISCUSSION:
  449 + . Branchs to other millicode routines using BE:
  450 + . $$divU_# for 3,5,6,7,9,10,12,14,15
  451 + .
  452 + . For selected small divisors calls the special divide by constant
  453 + . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
  454 +
  455 +RDEFINE(temp,r1)
  456 +RDEFINE(retreg,ret1) /* r29 */
  457 +RDEFINE(temp1,arg0)
  458 + SUBSPA_MILLI_DIV
  459 + ATTR_MILLI
  460 + .export $$divU,millicode
  461 + .import $$divU_3,millicode
  462 + .import $$divU_5,millicode
  463 + .import $$divU_6,millicode
  464 + .import $$divU_7,millicode
  465 + .import $$divU_9,millicode
  466 + .import $$divU_10,millicode
  467 + .import $$divU_12,millicode
  468 + .import $$divU_14,millicode
  469 + .import $$divU_15,millicode
  470 + .proc
  471 + .callinfo millicode
  472 + .entry
  473 +GSYM($$divU)
  474 +/* The subtract is not nullified since it does no harm and can be used
  475 + by the two cases that branch back to "normal". */
  476 + ldo -1(arg1),temp /* is there at most one bit set ? */
  477 + and,= arg1,temp,r0 /* if so, denominator is power of 2 */
  478 + b LREF(regular_seq)
  479 + addit,= 0,arg1,0 /* trap for zero dvr */
  480 + copy arg0,retreg
  481 + extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
  482 + extru retreg,15,16,retreg /* retreg = retreg >> 16 */
  483 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
  484 + ldi 0xcc,temp1 /* setup 0xcc in temp1 */
  485 + extru,= arg1,23,8,temp /* test denominator with 0xff00 */
  486 + extru retreg,23,24,retreg /* retreg = retreg >> 8 */
  487 + or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
  488 + ldi 0xaa,temp /* setup 0xaa in temp */
  489 + extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
  490 + extru retreg,27,28,retreg /* retreg = retreg >> 4 */
  491 + and,= arg1,temp1,r0 /* test denominator with 0xcc */
  492 + extru retreg,29,30,retreg /* retreg = retreg >> 2 */
  493 + and,= arg1,temp,r0 /* test denominator with 0xaa */
  494 + extru retreg,30,31,retreg /* retreg = retreg >> 1 */
  495 + MILLIRETN
  496 + nop
  497 +LSYM(regular_seq)
  498 + comib,>= 15,arg1,LREF(special_divisor)
  499 + subi 0,arg1,temp /* clear carry, negate the divisor */
  500 + ds r0,temp,r0 /* set V-bit to 1 */
  501 +LSYM(normal)
  502 + add arg0,arg0,retreg /* shift msb bit into carry */
  503 + ds r0,arg1,temp /* 1st divide step, if no carry */
  504 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  505 + ds temp,arg1,temp /* 2nd divide step */
  506 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  507 + ds temp,arg1,temp /* 3rd divide step */
  508 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  509 + ds temp,arg1,temp /* 4th divide step */
  510 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  511 + ds temp,arg1,temp /* 5th divide step */
  512 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  513 + ds temp,arg1,temp /* 6th divide step */
  514 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  515 + ds temp,arg1,temp /* 7th divide step */
  516 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  517 + ds temp,arg1,temp /* 8th divide step */
  518 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  519 + ds temp,arg1,temp /* 9th divide step */
  520 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  521 + ds temp,arg1,temp /* 10th divide step */
  522 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  523 + ds temp,arg1,temp /* 11th divide step */
  524 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  525 + ds temp,arg1,temp /* 12th divide step */
  526 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  527 + ds temp,arg1,temp /* 13th divide step */
  528 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  529 + ds temp,arg1,temp /* 14th divide step */
  530 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  531 + ds temp,arg1,temp /* 15th divide step */
  532 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  533 + ds temp,arg1,temp /* 16th divide step */
  534 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  535 + ds temp,arg1,temp /* 17th divide step */
  536 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  537 + ds temp,arg1,temp /* 18th divide step */
  538 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  539 + ds temp,arg1,temp /* 19th divide step */
  540 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  541 + ds temp,arg1,temp /* 20th divide step */
  542 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  543 + ds temp,arg1,temp /* 21st divide step */
  544 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  545 + ds temp,arg1,temp /* 22nd divide step */
  546 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  547 + ds temp,arg1,temp /* 23rd divide step */
  548 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  549 + ds temp,arg1,temp /* 24th divide step */
  550 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  551 + ds temp,arg1,temp /* 25th divide step */
  552 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  553 + ds temp,arg1,temp /* 26th divide step */
  554 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  555 + ds temp,arg1,temp /* 27th divide step */
  556 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  557 + ds temp,arg1,temp /* 28th divide step */
  558 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  559 + ds temp,arg1,temp /* 29th divide step */
  560 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  561 + ds temp,arg1,temp /* 30th divide step */
  562 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  563 + ds temp,arg1,temp /* 31st divide step */
  564 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  565 + ds temp,arg1,temp /* 32nd divide step, */
  566 + MILLIRET
  567 + addc retreg,retreg,retreg /* shift last retreg bit into retreg */
  568 +
  569 +/* Handle the cases where divisor is a small constant or has high bit on. */
  570 +LSYM(special_divisor)
  571 +/* blr arg1,r0 */
  572 +/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
  573 +
  574 +/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
  575 + generating such a blr, comib sequence. A problem in nullification. So I
  576 + rewrote this code. */
  577 +
  578 +#if defined(CONFIG_64BIT)
  579 +/* Clear the upper 32 bits of the arg1 register. We are working with
  580 + small divisors (and 32-bit unsigned integers) We must not be mislead
  581 + by "1" bits left in the upper 32 bits. */
  582 + depd %r0,31,32,%r25
  583 +#endif
  584 + comib,> 0,arg1,LREF(big_divisor)
  585 + nop
  586 + blr arg1,r0
  587 + nop
  588 +
  589 +LSYM(zero_divisor) /* this label is here to provide external visibility */
  590 + addit,= 0,arg1,0 /* trap for zero dvr */
  591 + nop
  592 + MILLIRET /* divisor == 1 */
  593 + copy arg0,retreg
  594 + MILLIRET /* divisor == 2 */
  595 + extru arg0,30,31,retreg
  596 + MILLI_BEN($$divU_3) /* divisor == 3 */
  597 + nop
  598 + MILLIRET /* divisor == 4 */
  599 + extru arg0,29,30,retreg
  600 + MILLI_BEN($$divU_5) /* divisor == 5 */
  601 + nop
  602 + MILLI_BEN($$divU_6) /* divisor == 6 */
  603 + nop
  604 + MILLI_BEN($$divU_7) /* divisor == 7 */
  605 + nop
  606 + MILLIRET /* divisor == 8 */
  607 + extru arg0,28,29,retreg
  608 + MILLI_BEN($$divU_9) /* divisor == 9 */
  609 + nop
  610 + MILLI_BEN($$divU_10) /* divisor == 10 */
  611 + nop
  612 + b LREF(normal) /* divisor == 11 */
  613 + ds r0,temp,r0 /* set V-bit to 1 */
  614 + MILLI_BEN($$divU_12) /* divisor == 12 */
  615 + nop
  616 + b LREF(normal) /* divisor == 13 */
  617 + ds r0,temp,r0 /* set V-bit to 1 */
  618 + MILLI_BEN($$divU_14) /* divisor == 14 */
  619 + nop
  620 + MILLI_BEN($$divU_15) /* divisor == 15 */
  621 + nop
  622 +
  623 +/* Handle the case where the high bit is on in the divisor.
  624 + Compute: if( dividend>=divisor) quotient=1; else quotient=0;
  625 + Note: dividend>==divisor iff dividend-divisor does not borrow
  626 + and not borrow iff carry. */
  627 +LSYM(big_divisor)
  628 + sub arg0,arg1,r0
  629 + MILLIRET
  630 + addc r0,r0,retreg
  631 + .exit
  632 + .procend
  633 + .end
  634 +#endif
  635 +
  636 +#ifdef L_remI
  637 +/* ROUTINE: $$remI
  638 +
  639 + DESCRIPTION:
  640 + . $$remI returns the remainder of the division of two signed 32-bit
  641 + . integers. The sign of the remainder is the same as the sign of
  642 + . the dividend.
  643 +
  644 +
  645 + INPUT REGISTERS:
  646 + . arg0 == dividend
  647 + . arg1 == divisor
  648 + . mrp == return pc
  649 + . sr0 == return space when called externally
  650 +
  651 + OUTPUT REGISTERS:
  652 + . arg0 = destroyed
  653 + . arg1 = destroyed
  654 + . ret1 = remainder
  655 +
  656 + OTHER REGISTERS AFFECTED:
  657 + . r1 = undefined
  658 +
  659 + SIDE EFFECTS:
  660 + . Causes a trap under the following conditions: DIVIDE BY ZERO
  661 + . Changes memory at the following places: NONE
  662 +
  663 + PERMISSIBLE CONTEXT:
  664 + . Unwindable
  665 + . Does not create a stack frame
  666 + . Is usable for internal or external microcode
  667 +
  668 + DISCUSSION:
  669 + . Calls other millicode routines via mrp: NONE
  670 + . Calls other millicode routines: NONE */
  671 +
  672 +RDEFINE(tmp,r1)
  673 +RDEFINE(retreg,ret1)
  674 +
  675 + SUBSPA_MILLI
  676 + ATTR_MILLI
  677 + .proc
  678 + .callinfo millicode
  679 + .entry
  680 +GSYM($$remI)
  681 +GSYM($$remoI)
  682 + .export $$remI,MILLICODE
  683 + .export $$remoI,MILLICODE
  684 + ldo -1(arg1),tmp /* is there at most one bit set ? */
  685 + and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
  686 + addi,> 0,arg1,r0 /* if denominator > 0, use power */
  687 + /* of 2 */
  688 + b,n LREF(neg_denom)
  689 +LSYM(pow2)
  690 + comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
  691 + and arg0,tmp,retreg /* get the result */
  692 + MILLIRETN
  693 +LSYM(neg_num)
  694 + subi 0,arg0,arg0 /* negate numerator */
  695 + and arg0,tmp,retreg /* get the result */
  696 + subi 0,retreg,retreg /* negate result */
  697 + MILLIRETN
  698 +LSYM(neg_denom)
  699 + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
  700 + /* of 2 */
  701 + b,n LREF(regular_seq)
  702 + sub r0,arg1,tmp /* make denominator positive */
  703 + comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
  704 + ldo -1(tmp),retreg /* is there at most one bit set ? */
  705 + and,= tmp,retreg,r0 /* if not, go to regular_seq */
  706 + b,n LREF(regular_seq)
  707 + comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
  708 + and arg0,retreg,retreg
  709 + MILLIRETN
  710 +LSYM(neg_num_2)
  711 + subi 0,arg0,tmp /* test against 0x80000000 */
  712 + and tmp,retreg,retreg
  713 + subi 0,retreg,retreg
  714 + MILLIRETN
  715 +LSYM(regular_seq)
  716 + addit,= 0,arg1,0 /* trap if div by zero */
  717 + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
  718 + sub 0,retreg,retreg /* make it positive */
  719 + sub 0,arg1, tmp /* clear carry, */
  720 + /* negate the divisor */
  721 + ds 0, tmp,0 /* set V-bit to the comple- */
  722 + /* ment of the divisor sign */
  723 + or 0,0, tmp /* clear tmp */
  724 + add retreg,retreg,retreg /* shift msb bit into carry */
  725 + ds tmp,arg1, tmp /* 1st divide step, if no carry */
  726 + /* out, msb of quotient = 0 */
  727 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  728 +LSYM(t1)
  729 + ds tmp,arg1, tmp /* 2nd divide step */
  730 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  731 + ds tmp,arg1, tmp /* 3rd divide step */
  732 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  733 + ds tmp,arg1, tmp /* 4th divide step */
  734 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  735 + ds tmp,arg1, tmp /* 5th divide step */
  736 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  737 + ds tmp,arg1, tmp /* 6th divide step */
  738 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  739 + ds tmp,arg1, tmp /* 7th divide step */
  740 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  741 + ds tmp,arg1, tmp /* 8th divide step */
  742 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  743 + ds tmp,arg1, tmp /* 9th divide step */
  744 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  745 + ds tmp,arg1, tmp /* 10th divide step */
  746 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  747 + ds tmp,arg1, tmp /* 11th divide step */
  748 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  749 + ds tmp,arg1, tmp /* 12th divide step */
  750 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  751 + ds tmp,arg1, tmp /* 13th divide step */
  752 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  753 + ds tmp,arg1, tmp /* 14th divide step */
  754 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  755 + ds tmp,arg1, tmp /* 15th divide step */
  756 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  757 + ds tmp,arg1, tmp /* 16th divide step */
  758 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  759 + ds tmp,arg1, tmp /* 17th divide step */
  760 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  761 + ds tmp,arg1, tmp /* 18th divide step */
  762 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  763 + ds tmp,arg1, tmp /* 19th divide step */
  764 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  765 + ds tmp,arg1, tmp /* 20th divide step */
  766 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  767 + ds tmp,arg1, tmp /* 21st divide step */
  768 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  769 + ds tmp,arg1, tmp /* 22nd divide step */
  770 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  771 + ds tmp,arg1, tmp /* 23rd divide step */
  772 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  773 + ds tmp,arg1, tmp /* 24th divide step */
  774 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  775 + ds tmp,arg1, tmp /* 25th divide step */
  776 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  777 + ds tmp,arg1, tmp /* 26th divide step */
  778 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  779 + ds tmp,arg1, tmp /* 27th divide step */
  780 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  781 + ds tmp,arg1, tmp /* 28th divide step */
  782 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  783 + ds tmp,arg1, tmp /* 29th divide step */
  784 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  785 + ds tmp,arg1, tmp /* 30th divide step */
  786 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  787 + ds tmp,arg1, tmp /* 31st divide step */
  788 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  789 + ds tmp,arg1, tmp /* 32nd divide step, */
  790 + addc retreg,retreg,retreg /* shift last bit into retreg */
  791 + movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
  792 + add,< arg1,0,0 /* if arg1 > 0, add arg1 */
  793 + add,tr tmp,arg1,retreg /* for correcting remainder tmp */
  794 + sub tmp,arg1,retreg /* else add absolute value arg1 */
  795 +LSYM(finish)
  796 + add,>= arg0,0,0 /* set sign of remainder */
  797 + sub 0,retreg,retreg /* to sign of dividend */
  798 + MILLIRET
  799 + nop
  800 + .exit
  801 + .procend
  802 +#ifdef milliext
  803 + .origin 0x00000200
  804 +#endif
  805 + .end
  806 +#endif
  807 +
  808 +#ifdef L_remU
  809 +/* ROUTINE: $$remU
  810 + . Single precision divide for remainder with unsigned binary integers.
  811 + .
  812 + . The remainder must be dividend-(dividend/divisor)*divisor.
  813 + . Divide by zero is trapped.
  814 +
  815 + INPUT REGISTERS:
  816 + . arg0 == dividend
  817 + . arg1 == divisor
  818 + . mrp == return pc
  819 + . sr0 == return space when called externally
  820 +
  821 + OUTPUT REGISTERS:
  822 + . arg0 = undefined
  823 + . arg1 = undefined
  824 + . ret1 = remainder
  825 +
  826 + OTHER REGISTERS AFFECTED:
  827 + . r1 = undefined
  828 +
  829 + SIDE EFFECTS:
  830 + . Causes a trap under the following conditions: DIVIDE BY ZERO
  831 + . Changes memory at the following places: NONE
  832 +
  833 + PERMISSIBLE CONTEXT:
  834 + . Unwindable.
  835 + . Does not create a stack frame.
  836 + . Suitable for internal or external millicode.
  837 + . Assumes the special millicode register conventions.
  838 +
  839 + DISCUSSION:
  840 + . Calls other millicode routines using mrp: NONE
  841 + . Calls other millicode routines: NONE */
  842 +
  843 +
  844 +RDEFINE(temp,r1)
  845 +RDEFINE(rmndr,ret1) /* r29 */
  846 + SUBSPA_MILLI
  847 + ATTR_MILLI
  848 + .export $$remU,millicode
  849 + .proc
  850 + .callinfo millicode
  851 + .entry
  852 +GSYM($$remU)
  853 + ldo -1(arg1),temp /* is there at most one bit set ? */
  854 + and,= arg1,temp,r0 /* if not, don't use power of 2 */
  855 + b LREF(regular_seq)
  856 + addit,= 0,arg1,r0 /* trap on div by zero */
  857 + and arg0,temp,rmndr /* get the result for power of 2 */
  858 + MILLIRETN
  859 +LSYM(regular_seq)
  860 + comib,>=,n 0,arg1,LREF(special_case)
  861 + subi 0,arg1,rmndr /* clear carry, negate the divisor */
  862 + ds r0,rmndr,r0 /* set V-bit to 1 */
  863 + add arg0,arg0,temp /* shift msb bit into carry */
  864 + ds r0,arg1,rmndr /* 1st divide step, if no carry */
  865 + addc temp,temp,temp /* shift temp with/into carry */
  866 + ds rmndr,arg1,rmndr /* 2nd divide step */
  867 + addc temp,temp,temp /* shift temp with/into carry */
  868 + ds rmndr,arg1,rmndr /* 3rd divide step */
  869 + addc temp,temp,temp /* shift temp with/into carry */
  870 + ds rmndr,arg1,rmndr /* 4th divide step */
  871 + addc temp,temp,temp /* shift temp with/into carry */
  872 + ds rmndr,arg1,rmndr /* 5th divide step */
  873 + addc temp,temp,temp /* shift temp with/into carry */
  874 + ds rmndr,arg1,rmndr /* 6th divide step */
  875 + addc temp,temp,temp /* shift temp with/into carry */
  876 + ds rmndr,arg1,rmndr /* 7th divide step */
  877 + addc temp,temp,temp /* shift temp with/into carry */
  878 + ds rmndr,arg1,rmndr /* 8th divide step */
  879 + addc temp,temp,temp /* shift temp with/into carry */
  880 + ds rmndr,arg1,rmndr /* 9th divide step */
  881 + addc temp,temp,temp /* shift temp with/into carry */
  882 + ds rmndr,arg1,rmndr /* 10th divide step */
  883 + addc temp,temp,temp /* shift temp with/into carry */
  884 + ds rmndr,arg1,rmndr /* 11th divide step */
  885 + addc temp,temp,temp /* shift temp with/into carry */
  886 + ds rmndr,arg1,rmndr /* 12th divide step */
  887 + addc temp,temp,temp /* shift temp with/into carry */
  888 + ds rmndr,arg1,rmndr /* 13th divide step */
  889 + addc temp,temp,temp /* shift temp with/into carry */
  890 + ds rmndr,arg1,rmndr /* 14th divide step */
  891 + addc temp,temp,temp /* shift temp with/into carry */
  892 + ds rmndr,arg1,rmndr /* 15th divide step */
  893 + addc temp,temp,temp /* shift temp with/into carry */
  894 + ds rmndr,arg1,rmndr /* 16th divide step */
  895 + addc temp,temp,temp /* shift temp with/into carry */
  896 + ds rmndr,arg1,rmndr /* 17th divide step */
  897 + addc temp,temp,temp /* shift temp with/into carry */
  898 + ds rmndr,arg1,rmndr /* 18th divide step */
  899 + addc temp,temp,temp /* shift temp with/into carry */
  900 + ds rmndr,arg1,rmndr /* 19th divide step */
  901 + addc temp,temp,temp /* shift temp with/into carry */
  902 + ds rmndr,arg1,rmndr /* 20th divide step */
  903 + addc temp,temp,temp /* shift temp with/into carry */
  904 + ds rmndr,arg1,rmndr /* 21st divide step */
  905 + addc temp,temp,temp /* shift temp with/into carry */
  906 + ds rmndr,arg1,rmndr /* 22nd divide step */
  907 + addc temp,temp,temp /* shift temp with/into carry */
  908 + ds rmndr,arg1,rmndr /* 23rd divide step */
  909 + addc temp,temp,temp /* shift temp with/into carry */
  910 + ds rmndr,arg1,rmndr /* 24th divide step */
  911 + addc temp,temp,temp /* shift temp with/into carry */
  912 + ds rmndr,arg1,rmndr /* 25th divide step */
  913 + addc temp,temp,temp /* shift temp with/into carry */
  914 + ds rmndr,arg1,rmndr /* 26th divide step */
  915 + addc temp,temp,temp /* shift temp with/into carry */
  916 + ds rmndr,arg1,rmndr /* 27th divide step */
  917 + addc temp,temp,temp /* shift temp with/into carry */
  918 + ds rmndr,arg1,rmndr /* 28th divide step */
  919 + addc temp,temp,temp /* shift temp with/into carry */
  920 + ds rmndr,arg1,rmndr /* 29th divide step */
  921 + addc temp,temp,temp /* shift temp with/into carry */
  922 + ds rmndr,arg1,rmndr /* 30th divide step */
  923 + addc temp,temp,temp /* shift temp with/into carry */
  924 + ds rmndr,arg1,rmndr /* 31st divide step */
  925 + addc temp,temp,temp /* shift temp with/into carry */
  926 + ds rmndr,arg1,rmndr /* 32nd divide step, */
  927 + comiclr,<= 0,rmndr,r0
  928 + add rmndr,arg1,rmndr /* correction */
  929 + MILLIRETN
  930 + nop
  931 +
  932 +/* Putting >= on the last DS and deleting COMICLR does not work! */
  933 +LSYM(special_case)
  934 + sub,>>= arg0,arg1,rmndr
  935 + copy arg0,rmndr
  936 + MILLIRETN
  937 + nop
  938 + .exit
  939 + .procend
  940 + .end
  941 +#endif
  942 +
  943 +#ifdef L_div_const
  944 +/* ROUTINE: $$divI_2
  945 + . $$divI_3 $$divU_3
  946 + . $$divI_4
  947 + . $$divI_5 $$divU_5
  948 + . $$divI_6 $$divU_6
  949 + . $$divI_7 $$divU_7
  950 + . $$divI_8
  951 + . $$divI_9 $$divU_9
  952 + . $$divI_10 $$divU_10
  953 + .
  954 + . $$divI_12 $$divU_12
  955 + .
  956 + . $$divI_14 $$divU_14
  957 + . $$divI_15 $$divU_15
  958 + . $$divI_16
  959 + . $$divI_17 $$divU_17
  960 + .
  961 + . Divide by selected constants for single precision binary integers.
  962 +
  963 + INPUT REGISTERS:
  964 + . arg0 == dividend
  965 + . mrp == return pc
  966 + . sr0 == return space when called externally
  967 +
  968 + OUTPUT REGISTERS:
  969 + . arg0 = undefined
  970 + . arg1 = undefined
  971 + . ret1 = quotient
  972 +
  973 + OTHER REGISTERS AFFECTED:
  974 + . r1 = undefined
  975 +
  976 + SIDE EFFECTS:
  977 + . Causes a trap under the following conditions: NONE
  978 + . Changes memory at the following places: NONE
  979 +
  980 + PERMISSIBLE CONTEXT:
  981 + . Unwindable.
  982 + . Does not create a stack frame.
  983 + . Suitable for internal or external millicode.
  984 + . Assumes the special millicode register conventions.
  985 +
  986 + DISCUSSION:
  987 + . Calls other millicode routines using mrp: NONE
  988 + . Calls other millicode routines: NONE */
  989 +
  990 +
  991 +/* TRUNCATED DIVISION BY SMALL INTEGERS
  992 +
  993 + We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
  994 + (with y fixed).
  995 +
  996 + Let a = floor(z/y), for some choice of z. Note that z will be
  997 + chosen so that division by z is cheap.
  998 +
  999 + Let r be the remainder(z/y). In other words, r = z - ay.
  1000 +
  1001 + Now, our method is to choose a value for b such that
  1002 +
  1003 + q'(x) = floor((ax+b)/z)
  1004 +
  1005 + is equal to q(x) over as large a range of x as possible. If the
  1006 + two are equal over a sufficiently large range, and if it is easy to
  1007 + form the product (ax), and it is easy to divide by z, then we can
  1008 + perform the division much faster than the general division algorithm.
  1009 +
  1010 + So, we want the following to be true:
  1011 +
  1012 + . For x in the following range:
  1013 + .
  1014 + . ky <= x < (k+1)y
  1015 + .
  1016 + . implies that
  1017 + .
  1018 + . k <= (ax+b)/z < (k+1)
  1019 +
  1020 + We want to determine b such that this is true for all k in the
  1021 + range {0..K} for some maximum K.
  1022 +
  1023 + Since (ax+b) is an increasing function of x, we can take each
  1024 + bound separately to determine the "best" value for b.
  1025 +
  1026 + (ax+b)/z < (k+1) implies
  1027 +
  1028 + (a((k+1)y-1)+b < (k+1)z implies
  1029 +
  1030 + b < a + (k+1)(z-ay) implies
  1031 +
  1032 + b < a + (k+1)r
  1033 +
  1034 + This needs to be true for all k in the range {0..K}. In
  1035 + particular, it is true for k = 0 and this leads to a maximum
  1036 + acceptable value for b.
  1037 +
  1038 + b < a+r or b <= a+r-1
  1039 +
  1040 + Taking the other bound, we have
  1041 +
  1042 + k <= (ax+b)/z implies
  1043 +
  1044 + k <= (aky+b)/z implies
  1045 +
  1046 + k(z-ay) <= b implies
  1047 +
  1048 + kr <= b
  1049 +
  1050 + Clearly, the largest range for k will be achieved by maximizing b,
  1051 + when r is not zero. When r is zero, then the simplest choice for b
  1052 + is 0. When r is not 0, set
  1053 +
  1054 + . b = a+r-1
  1055 +
  1056 + Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
  1057 + for all x in the range:
  1058 +
  1059 + . 0 <= x < (K+1)y
  1060 +
  1061 + We need to determine what K is. Of our two bounds,
  1062 +
  1063 + . b < a+(k+1)r is satisfied for all k >= 0, by construction.
  1064 +
  1065 + The other bound is
  1066 +
  1067 + . kr <= b
  1068 +
  1069 + This is always true if r = 0. If r is not 0 (the usual case), then
  1070 + K = floor((a+r-1)/r), is the maximum value for k.
  1071 +
  1072 + Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
  1073 + answer for q(x) = floor(x/y) when x is in the range
  1074 +
  1075 + (0,(K+1)y-1) K = floor((a+r-1)/r)
  1076 +
  1077 + To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
  1078 + the formula for q'(x) yields the correct value of q(x) for all x
  1079 + representable by a single word in HPPA.
  1080 +
  1081 + We are also constrained in that computing the product (ax), adding
  1082 + b, and dividing by z must all be done quickly, otherwise we will be
  1083 + better off going through the general algorithm using the DS
  1084 + instruction, which uses approximately 70 cycles.
  1085 +
  1086 + For each y, there is a choice of z which satisfies the constraints
  1087 + for (K+1)y >= 2**32. We may not, however, be able to satisfy the
  1088 + timing constraints for arbitrary y. It seems that z being equal to
  1089 + a power of 2 or a power of 2 minus 1 is as good as we can do, since
  1090 + it minimizes the time to do division by z. We want the choice of z
  1091 + to also result in a value for (a) that minimizes the computation of
  1092 + the product (ax). This is best achieved if (a) has a regular bit
  1093 + pattern (so the multiplication can be done with shifts and adds).
  1094 + The value of (a) also needs to be less than 2**32 so the product is
  1095 + always guaranteed to fit in 2 words.
  1096 +
  1097 + In actual practice, the following should be done:
  1098 +
  1099 + 1) For negative x, you should take the absolute value and remember
  1100 + . the fact so that the result can be negated. This obviously does
  1101 + . not apply in the unsigned case.
  1102 + 2) For even y, you should factor out the power of 2 that divides y
  1103 + . and divide x by it. You can then proceed by dividing by the
  1104 + . odd factor of y.
  1105 +
  1106 + Here is a table of some odd values of y, and corresponding choices
  1107 + for z which are "good".
  1108 +
  1109 + y z r a (hex) max x (hex)
  1110 +
  1111 + 3 2**32 1 55555555 100000001
  1112 + 5 2**32 1 33333333 100000003
  1113 + 7 2**24-1 0 249249 (infinite)
  1114 + 9 2**24-1 0 1c71c7 (infinite)
  1115 + 11 2**20-1 0 1745d (infinite)
  1116 + 13 2**24-1 0 13b13b (infinite)
  1117 + 15 2**32 1 11111111 10000000d
  1118 + 17 2**32 1 f0f0f0f 10000000f
  1119 +
  1120 + If r is 1, then b = a+r-1 = a. This simplifies the computation
  1121 + of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
  1122 + then b = 0 is ok to use which simplifies (ax+b).
  1123 +
  1124 + The bit patterns for 55555555, 33333333, and 11111111 are obviously
  1125 + very regular. The bit patterns for the other values of a above are:
  1126 +
  1127 + y (hex) (binary)
  1128 +
  1129 + 7 249249 001001001001001001001001 << regular >>
  1130 + 9 1c71c7 000111000111000111000111 << regular >>
  1131 + 11 1745d 000000010111010001011101 << irregular >>
  1132 + 13 13b13b 000100111011000100111011 << irregular >>
  1133 +
  1134 + The bit patterns for (a) corresponding to (y) of 11 and 13 may be
  1135 + too irregular to warrant using this method.
  1136 +
  1137 + When z is a power of 2 minus 1, then the division by z is slightly
  1138 + more complicated, involving an iterative solution.
  1139 +
  1140 + The code presented here solves division by 1 through 17, except for
  1141 + 11 and 13. There are algorithms for both signed and unsigned
  1142 + quantities given.
  1143 +
  1144 + TIMINGS (cycles)
  1145 +
  1146 + divisor positive negative unsigned
  1147 +
  1148 + . 1 2 2 2
  1149 + . 2 4 4 2
  1150 + . 3 19 21 19
  1151 + . 4 4 4 2
  1152 + . 5 18 22 19
  1153 + . 6 19 22 19
  1154 + . 8 4 4 2
  1155 + . 10 18 19 17
  1156 + . 12 18 20 18
  1157 + . 15 16 18 16
  1158 + . 16 4 4 2
  1159 + . 17 16 18 16
  1160 +
  1161 + Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
  1162 + a loop body is executed until the tentative quotient is 0. The
  1163 + number of times the loop body is executed varies depending on the
  1164 + dividend, but is never more than two times. If the dividend is
  1165 + less than the divisor, then the loop body is not executed at all.
  1166 + Each iteration adds 4 cycles to the timings.
  1167 +
  1168 + divisor positive negative unsigned
  1169 +
  1170 + . 7 19+4n 20+4n 20+4n n = number of iterations
  1171 + . 9 21+4n 22+4n 21+4n
  1172 + . 14 21+4n 22+4n 20+4n
  1173 +
  1174 + To give an idea of how the number of iterations varies, here is a
  1175 + table of dividend versus number of iterations when dividing by 7.
  1176 +
  1177 + smallest largest required
  1178 + dividend dividend iterations
  1179 +
  1180 + . 0 6 0
  1181 + . 7 0x6ffffff 1
  1182 + 0x1000006 0xffffffff 2
  1183 +
  1184 + There is some overlap in the range of numbers requiring 1 and 2
  1185 + iterations. */
  1186 +
  1187 +RDEFINE(t2,r1)
  1188 +RDEFINE(x2,arg0) /* r26 */
  1189 +RDEFINE(t1,arg1) /* r25 */
  1190 +RDEFINE(x1,ret1) /* r29 */
  1191 +
  1192 + SUBSPA_MILLI_DIV
  1193 + ATTR_MILLI
  1194 +
  1195 + .proc
  1196 + .callinfo millicode
  1197 + .entry
  1198 +/* NONE of these routines require a stack frame
  1199 + ALL of these routines are unwindable from millicode */
  1200 +
  1201 +GSYM($$divide_by_constant)
  1202 + .export $$divide_by_constant,millicode
  1203 +/* Provides a "nice" label for the code covered by the unwind descriptor
  1204 + for things like gprof. */
  1205 +
  1206 +/* DIVISION BY 2 (shift by 1) */
  1207 +GSYM($$divI_2)
  1208 + .export $$divI_2,millicode
  1209 + comclr,>= arg0,0,0
  1210 + addi 1,arg0,arg0
  1211 + MILLIRET
  1212 + extrs arg0,30,31,ret1
  1213 +
  1214 +
  1215 +/* DIVISION BY 4 (shift by 2) */
  1216 +GSYM($$divI_4)
  1217 + .export $$divI_4,millicode
  1218 + comclr,>= arg0,0,0
  1219 + addi 3,arg0,arg0
  1220 + MILLIRET
  1221 + extrs arg0,29,30,ret1
  1222 +
  1223 +
  1224 +/* DIVISION BY 8 (shift by 3) */
  1225 +GSYM($$divI_8)
  1226 + .export $$divI_8,millicode
  1227 + comclr,>= arg0,0,0
  1228 + addi 7,arg0,arg0
  1229 + MILLIRET
  1230 + extrs arg0,28,29,ret1
  1231 +
  1232 +/* DIVISION BY 16 (shift by 4) */
  1233 +GSYM($$divI_16)
  1234 + .export $$divI_16,millicode
  1235 + comclr,>= arg0,0,0
  1236 + addi 15,arg0,arg0
  1237 + MILLIRET
  1238 + extrs arg0,27,28,ret1
  1239 +
  1240 +/****************************************************************************
  1241 +*
  1242 +* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
  1243 +*
  1244 +* includes 3,5,15,17 and also 6,10,12
  1245 +*
  1246 +****************************************************************************/
  1247 +
  1248 +/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
  1249 +
  1250 +GSYM($$divI_3)
  1251 + .export $$divI_3,millicode
  1252 + comb,<,N x2,0,LREF(neg3)
  1253 +
  1254 + addi 1,x2,x2 /* this cannot overflow */
  1255 + extru x2,1,2,x1 /* multiply by 5 to get started */
  1256 + sh2add x2,x2,x2
  1257 + b LREF(pos)
  1258 + addc x1,0,x1
  1259 +
  1260 +LSYM(neg3)
  1261 + subi 1,x2,x2 /* this cannot overflow */
  1262 + extru x2,1,2,x1 /* multiply by 5 to get started */
  1263 + sh2add x2,x2,x2
  1264 + b LREF(neg)
  1265 + addc x1,0,x1
  1266 +
  1267 +GSYM($$divU_3)
  1268 + .export $$divU_3,millicode
  1269 + addi 1,x2,x2 /* this CAN overflow */
  1270 + addc 0,0,x1
  1271 + shd x1,x2,30,t1 /* multiply by 5 to get started */
  1272 + sh2add x2,x2,x2
  1273 + b LREF(pos)
  1274 + addc x1,t1,x1
  1275 +
  1276 +/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
  1277 +
  1278 +GSYM($$divI_5)
  1279 + .export $$divI_5,millicode
  1280 + comb,<,N x2,0,LREF(neg5)
  1281 +
  1282 + addi 3,x2,t1 /* this cannot overflow */
  1283 + sh1add x2,t1,x2 /* multiply by 3 to get started */
  1284 + b LREF(pos)
  1285 + addc 0,0,x1
  1286 +
  1287 +LSYM(neg5)
  1288 + sub 0,x2,x2 /* negate x2 */
  1289 + addi 1,x2,x2 /* this cannot overflow */
  1290 + shd 0,x2,31,x1 /* get top bit (can be 1) */
  1291 + sh1add x2,x2,x2 /* multiply by 3 to get started */
  1292 + b LREF(neg)
  1293 + addc x1,0,x1
  1294 +
  1295 +GSYM($$divU_5)
  1296 + .export $$divU_5,millicode
  1297 + addi 1,x2,x2 /* this CAN overflow */
  1298 + addc 0,0,x1
  1299 + shd x1,x2,31,t1 /* multiply by 3 to get started */
  1300 + sh1add x2,x2,x2
  1301 + b LREF(pos)
  1302 + addc t1,x1,x1
  1303 +
  1304 +/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
  1305 +GSYM($$divI_6)
  1306 + .export $$divI_6,millicode
  1307 + comb,<,N x2,0,LREF(neg6)
  1308 + extru x2,30,31,x2 /* divide by 2 */
  1309 + addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
  1310 + sh2add x2,t1,x2 /* multiply by 5 to get started */
  1311 + b LREF(pos)
  1312 + addc 0,0,x1
  1313 +
  1314 +LSYM(neg6)
  1315 + subi 2,x2,x2 /* negate, divide by 2, and add 1 */
  1316 + /* negation and adding 1 are done */
  1317 + /* at the same time by the SUBI */
  1318 + extru x2,30,31,x2
  1319 + shd 0,x2,30,x1
  1320 + sh2add x2,x2,x2 /* multiply by 5 to get started */
  1321 + b LREF(neg)
  1322 + addc x1,0,x1
  1323 +
  1324 +GSYM($$divU_6)
  1325 + .export $$divU_6,millicode
  1326 + extru x2,30,31,x2 /* divide by 2 */
  1327 + addi 1,x2,x2 /* cannot carry */
  1328 + shd 0,x2,30,x1 /* multiply by 5 to get started */
  1329 + sh2add x2,x2,x2
  1330 + b LREF(pos)
  1331 + addc x1,0,x1
  1332 +
  1333 +/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
  1334 +GSYM($$divU_10)
  1335 + .export $$divU_10,millicode
  1336 + extru x2,30,31,x2 /* divide by 2 */
  1337 + addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
  1338 + sh1add x2,t1,x2 /* multiply by 3 to get started */
  1339 + addc 0,0,x1
  1340 +LSYM(pos)
  1341 + shd x1,x2,28,t1 /* multiply by 0x11 */
  1342 + shd x2,0,28,t2
  1343 + add x2,t2,x2
  1344 + addc x1,t1,x1
  1345 +LSYM(pos_for_17)
  1346 + shd x1,x2,24,t1 /* multiply by 0x101 */
  1347 + shd x2,0,24,t2
  1348 + add x2,t2,x2
  1349 + addc x1,t1,x1
  1350 +
  1351 + shd x1,x2,16,t1 /* multiply by 0x10001 */
  1352 + shd x2,0,16,t2
  1353 + add x2,t2,x2
  1354 + MILLIRET
  1355 + addc x1,t1,x1
  1356 +
  1357 +GSYM($$divI_10)
  1358 + .export $$divI_10,millicode
  1359 + comb,< x2,0,LREF(neg10)
  1360 + copy 0,x1
  1361 + extru x2,30,31,x2 /* divide by 2 */
  1362 + addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
  1363 + sh1add x2,x2,x2 /* multiply by 3 to get started */
  1364 +
  1365 +LSYM(neg10)
  1366 + subi 2,x2,x2 /* negate, divide by 2, and add 1 */
  1367 + /* negation and adding 1 are done */
  1368 + /* at the same time by the SUBI */
  1369 + extru x2,30,31,x2
  1370 + sh1add x2,x2,x2 /* multiply by 3 to get started */
  1371 +LSYM(neg)
  1372 + shd x1,x2,28,t1 /* multiply by 0x11 */
  1373 + shd x2,0,28,t2
  1374 + add x2,t2,x2
  1375 + addc x1,t1,x1
  1376 +LSYM(neg_for_17)
  1377 + shd x1,x2,24,t1 /* multiply by 0x101 */
  1378 + shd x2,0,24,t2
  1379 + add x2,t2,x2
  1380 + addc x1,t1,x1
  1381 +
  1382 + shd x1,x2,16,t1 /* multiply by 0x10001 */
  1383 + shd x2,0,16,t2
  1384 + add x2,t2,x2
  1385 + addc x1,t1,x1
  1386 + MILLIRET
  1387 + sub 0,x1,x1
  1388 +
  1389 +/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
  1390 +GSYM($$divI_12)
  1391 + .export $$divI_12,millicode
  1392 + comb,< x2,0,LREF(neg12)
  1393 + copy 0,x1
  1394 + extru x2,29,30,x2 /* divide by 4 */
  1395 + addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
  1396 + sh2add x2,x2,x2 /* multiply by 5 to get started */
  1397 +
  1398 +LSYM(neg12)
  1399 + subi 4,x2,x2 /* negate, divide by 4, and add 1 */
  1400 + /* negation and adding 1 are done */
  1401 + /* at the same time by the SUBI */
  1402 + extru x2,29,30,x2
  1403 + b LREF(neg)
  1404 + sh2add x2,x2,x2 /* multiply by 5 to get started */
  1405 +
  1406 +GSYM($$divU_12)
  1407 + .export $$divU_12,millicode
  1408 + extru x2,29,30,x2 /* divide by 4 */
  1409 + addi 5,x2,t1 /* cannot carry */
  1410 + sh2add x2,t1,x2 /* multiply by 5 to get started */
  1411 + b LREF(pos)
  1412 + addc 0,0,x1
  1413 +
  1414 +/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
  1415 +GSYM($$divI_15)
  1416 + .export $$divI_15,millicode
  1417 + comb,< x2,0,LREF(neg15)
  1418 + copy 0,x1
  1419 + addib,tr 1,x2,LREF(pos)+4
  1420 + shd x1,x2,28,t1
  1421 +
  1422 +LSYM(neg15)
  1423 + b LREF(neg)
  1424 + subi 1,x2,x2
  1425 +
  1426 +GSYM($$divU_15)
  1427 + .export $$divU_15,millicode
  1428 + addi 1,x2,x2 /* this CAN overflow */
  1429 + b LREF(pos)
  1430 + addc 0,0,x1
  1431 +
  1432 +/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
  1433 +GSYM($$divI_17)
  1434 + .export $$divI_17,millicode
  1435 + comb,<,n x2,0,LREF(neg17)
  1436 + addi 1,x2,x2 /* this cannot overflow */
  1437 + shd 0,x2,28,t1 /* multiply by 0xf to get started */
  1438 + shd x2,0,28,t2
  1439 + sub t2,x2,x2
  1440 + b LREF(pos_for_17)
  1441 + subb t1,0,x1
  1442 +
  1443 +LSYM(neg17)
  1444 + subi 1,x2,x2 /* this cannot overflow */
  1445 + shd 0,x2,28,t1 /* multiply by 0xf to get started */
  1446 + shd x2,0,28,t2
  1447 + sub t2,x2,x2
  1448 + b LREF(neg_for_17)
  1449 + subb t1,0,x1
  1450 +
  1451 +GSYM($$divU_17)
  1452 + .export $$divU_17,millicode
  1453 + addi 1,x2,x2 /* this CAN overflow */
  1454 + addc 0,0,x1
  1455 + shd x1,x2,28,t1 /* multiply by 0xf to get started */
  1456 +LSYM(u17)
  1457 + shd x2,0,28,t2
  1458 + sub t2,x2,x2
  1459 + b LREF(pos_for_17)
  1460 + subb t1,x1,x1
  1461 +
  1462 +
  1463 +/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
  1464 + includes 7,9 and also 14
  1465 +
  1466 +
  1467 + z = 2**24-1
  1468 + r = z mod x = 0
  1469 +
  1470 + so choose b = 0
  1471 +
  1472 + Also, in order to divide by z = 2**24-1, we approximate by dividing
  1473 + by (z+1) = 2**24 (which is easy), and then correcting.
  1474 +
  1475 + (ax) = (z+1)q' + r
  1476 + . = zq' + (q'+r)
  1477 +
  1478 + So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
  1479 + Then the true remainder of (ax)/z is (q'+r). Repeat the process
  1480 + with this new remainder, adding the tentative quotients together,
  1481 + until a tentative quotient is 0 (and then we are done). There is
  1482 + one last correction to be done. It is possible that (q'+r) = z.
  1483 + If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
  1484 + in fact, we need to add 1 more to the quotient. Now, it turns
  1485 + out that this happens if and only if the original value x is
  1486 + an exact multiple of y. So, to avoid a three instruction test at
  1487 + the end, instead use 1 instruction to add 1 to x at the beginning. */
  1488 +
  1489 +/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
  1490 +GSYM($$divI_7)
  1491 + .export $$divI_7,millicode
  1492 + comb,<,n x2,0,LREF(neg7)
  1493 +LSYM(7)
  1494 + addi 1,x2,x2 /* cannot overflow */
  1495 + shd 0,x2,29,x1
  1496 + sh3add x2,x2,x2
  1497 + addc x1,0,x1
  1498 +LSYM(pos7)
  1499 + shd x1,x2,26,t1
  1500 + shd x2,0,26,t2
  1501 + add x2,t2,x2
  1502 + addc x1,t1,x1
  1503 +
  1504 + shd x1,x2,20,t1
  1505 + shd x2,0,20,t2
  1506 + add x2,t2,x2
  1507 + addc x1,t1,t1
  1508 +
  1509 + /* computed <t1,x2>. Now divide it by (2**24 - 1) */
  1510 +
  1511 + copy 0,x1
  1512 + shd,= t1,x2,24,t1 /* tentative quotient */
  1513 +LSYM(1)
  1514 + addb,tr t1,x1,LREF(2) /* add to previous quotient */
  1515 + extru x2,31,24,x2 /* new remainder (unadjusted) */
  1516 +
  1517 + MILLIRETN
  1518 +
  1519 +LSYM(2)
  1520 + addb,tr t1,x2,LREF(1) /* adjust remainder */
  1521 + extru,= x2,7,8,t1 /* new quotient */
  1522 +
  1523 +LSYM(neg7)
  1524 + subi 1,x2,x2 /* negate x2 and add 1 */
  1525 +LSYM(8)
  1526 + shd 0,x2,29,x1
  1527 + sh3add x2,x2,x2
  1528 + addc x1,0,x1
  1529 +
  1530 +LSYM(neg7_shift)
  1531 + shd x1,x2,26,t1
  1532 + shd x2,0,26,t2
  1533 + add x2,t2,x2
  1534 + addc x1,t1,x1
  1535 +
  1536 + shd x1,x2,20,t1
  1537 + shd x2,0,20,t2
  1538 + add x2,t2,x2
  1539 + addc x1,t1,t1
  1540 +
  1541 + /* computed <t1,x2>. Now divide it by (2**24 - 1) */
  1542 +
  1543 + copy 0,x1
  1544 + shd,= t1,x2,24,t1 /* tentative quotient */
  1545 +LSYM(3)
  1546 + addb,tr t1,x1,LREF(4) /* add to previous quotient */
  1547 + extru x2,31,24,x2 /* new remainder (unadjusted) */
  1548 +
  1549 + MILLIRET
  1550 + sub 0,x1,x1 /* negate result */
  1551 +
  1552 +LSYM(4)
  1553 + addb,tr t1,x2,LREF(3) /* adjust remainder */
  1554 + extru,= x2,7,8,t1 /* new quotient */
  1555 +
  1556 +GSYM($$divU_7)
  1557 + .export $$divU_7,millicode
  1558 + addi 1,x2,x2 /* can carry */
  1559 + addc 0,0,x1
  1560 + shd x1,x2,29,t1
  1561 + sh3add x2,x2,x2
  1562 + b LREF(pos7)
  1563 + addc t1,x1,x1
  1564 +
  1565 +/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
  1566 +GSYM($$divI_9)
  1567 + .export $$divI_9,millicode
  1568 + comb,<,n x2,0,LREF(neg9)
  1569 + addi 1,x2,x2 /* cannot overflow */
  1570 + shd 0,x2,29,t1
  1571 + shd x2,0,29,t2
  1572 + sub t2,x2,x2
  1573 + b LREF(pos7)
  1574 + subb t1,0,x1
  1575 +
  1576 +LSYM(neg9)
  1577 + subi 1,x2,x2 /* negate and add 1 */
  1578 + shd 0,x2,29,t1
  1579 + shd x2,0,29,t2
  1580 + sub t2,x2,x2
  1581 + b LREF(neg7_shift)
  1582 + subb t1,0,x1
  1583 +
  1584 +GSYM($$divU_9)
  1585 + .export $$divU_9,millicode
  1586 + addi 1,x2,x2 /* can carry */
  1587 + addc 0,0,x1
  1588 + shd x1,x2,29,t1
  1589 + shd x2,0,29,t2
  1590 + sub t2,x2,x2
  1591 + b LREF(pos7)
  1592 + subb t1,x1,x1
  1593 +
  1594 +/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
  1595 +GSYM($$divI_14)
  1596 + .export $$divI_14,millicode
  1597 + comb,<,n x2,0,LREF(neg14)
  1598 +GSYM($$divU_14)
  1599 + .export $$divU_14,millicode
  1600 + b LREF(7) /* go to 7 case */
  1601 + extru x2,30,31,x2 /* divide by 2 */
  1602 +
  1603 +LSYM(neg14)
  1604 + subi 2,x2,x2 /* negate (and add 2) */
  1605 + b LREF(8)
  1606 + extru x2,30,31,x2 /* divide by 2 */
  1607 + .exit
  1608 + .procend
  1609 + .end
  1610 +#endif
  1611 +
  1612 +#ifdef L_mulI
  1613 +/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
  1614 +/******************************************************************************
  1615 +This routine is used on PA2.0 processors when gcc -mno-fpregs is used
  1616 +
  1617 +ROUTINE: $$mulI
  1618 +
  1619 +
  1620 +DESCRIPTION:
  1621 +
  1622 + $$mulI multiplies two single word integers, giving a single
  1623 + word result.
  1624 +
  1625 +
  1626 +INPUT REGISTERS:
  1627 +
  1628 + arg0 = Operand 1
  1629 + arg1 = Operand 2
  1630 + r31 == return pc
  1631 + sr0 == return space when called externally
  1632 +
  1633 +
  1634 +OUTPUT REGISTERS:
  1635 +
  1636 + arg0 = undefined
  1637 + arg1 = undefined
  1638 + ret1 = result
  1639 +
  1640 +OTHER REGISTERS AFFECTED:
  1641 +
  1642 + r1 = undefined
  1643 +
  1644 +SIDE EFFECTS:
  1645 +
  1646 + Causes a trap under the following conditions: NONE
  1647 + Changes memory at the following places: NONE
  1648 +
  1649 +PERMISSIBLE CONTEXT:
  1650 +
  1651 + Unwindable
  1652 + Does not create a stack frame
  1653 + Is usable for internal or external microcode
  1654 +
  1655 +DISCUSSION:
  1656 +
  1657 + Calls other millicode routines via mrp: NONE
  1658 + Calls other millicode routines: NONE
  1659 +
  1660 +***************************************************************************/
  1661 +
  1662 +
  1663 +#define a0 %arg0
  1664 +#define a1 %arg1
  1665 +#define t0 %r1
  1666 +#define r %ret1
  1667 +
  1668 +#define a0__128a0 zdep a0,24,25,a0
  1669 +#define a0__256a0 zdep a0,23,24,a0
  1670 +#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
  1671 +#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
  1672 +#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
  1673 +#define b_n_ret_t0 b,n LREF(ret_t0)
  1674 +#define b_e_shift b LREF(e_shift)
  1675 +#define b_e_t0ma0 b LREF(e_t0ma0)
  1676 +#define b_e_t0 b LREF(e_t0)
  1677 +#define b_e_t0a0 b LREF(e_t0a0)
  1678 +#define b_e_t02a0 b LREF(e_t02a0)
  1679 +#define b_e_t04a0 b LREF(e_t04a0)
  1680 +#define b_e_2t0 b LREF(e_2t0)
  1681 +#define b_e_2t0a0 b LREF(e_2t0a0)
  1682 +#define b_e_2t04a0 b LREF(e2t04a0)
  1683 +#define b_e_3t0 b LREF(e_3t0)
  1684 +#define b_e_4t0 b LREF(e_4t0)
  1685 +#define b_e_4t0a0 b LREF(e_4t0a0)
  1686 +#define b_e_4t08a0 b LREF(e4t08a0)
  1687 +#define b_e_5t0 b LREF(e_5t0)
  1688 +#define b_e_8t0 b LREF(e_8t0)
  1689 +#define b_e_8t0a0 b LREF(e_8t0a0)
  1690 +#define r__r_a0 add r,a0,r
  1691 +#define r__r_2a0 sh1add a0,r,r
  1692 +#define r__r_4a0 sh2add a0,r,r
  1693 +#define r__r_8a0 sh3add a0,r,r
  1694 +#define r__r_t0 add r,t0,r
  1695 +#define r__r_2t0 sh1add t0,r,r
  1696 +#define r__r_4t0 sh2add t0,r,r
  1697 +#define r__r_8t0 sh3add t0,r,r
  1698 +#define t0__3a0 sh1add a0,a0,t0
  1699 +#define t0__4a0 sh2add a0,0,t0
  1700 +#define t0__5a0 sh2add a0,a0,t0
  1701 +#define t0__8a0 sh3add a0,0,t0
  1702 +#define t0__9a0 sh3add a0,a0,t0
  1703 +#define t0__16a0 zdep a0,27,28,t0
  1704 +#define t0__32a0 zdep a0,26,27,t0
  1705 +#define t0__64a0 zdep a0,25,26,t0
  1706 +#define t0__128a0 zdep a0,24,25,t0
  1707 +#define t0__t0ma0 sub t0,a0,t0
  1708 +#define t0__t0_a0 add t0,a0,t0
  1709 +#define t0__t0_2a0 sh1add a0,t0,t0
  1710 +#define t0__t0_4a0 sh2add a0,t0,t0
  1711 +#define t0__t0_8a0 sh3add a0,t0,t0
  1712 +#define t0__2t0_a0 sh1add t0,a0,t0
  1713 +#define t0__3t0 sh1add t0,t0,t0
  1714 +#define t0__4t0 sh2add t0,0,t0
  1715 +#define t0__4t0_a0 sh2add t0,a0,t0
  1716 +#define t0__5t0 sh2add t0,t0,t0
  1717 +#define t0__8t0 sh3add t0,0,t0
  1718 +#define t0__8t0_a0 sh3add t0,a0,t0
  1719 +#define t0__9t0 sh3add t0,t0,t0
  1720 +#define t0__16t0 zdep t0,27,28,t0
  1721 +#define t0__32t0 zdep t0,26,27,t0
  1722 +#define t0__256a0 zdep a0,23,24,t0
  1723 +
  1724 +
  1725 + SUBSPA_MILLI
  1726 + ATTR_MILLI
  1727 + .align 16
  1728 + .proc
  1729 + .callinfo millicode
  1730 + .export $$mulI,millicode
  1731 +GSYM($$mulI)
  1732 + combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
  1733 + copy 0,r /* zero out the result */
  1734 + xor a0,a1,a0 /* swap a0 & a1 using the */
  1735 + xor a0,a1,a1 /* old xor trick */
  1736 + xor a0,a1,a0
  1737 +LSYM(l4)
  1738 + combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
  1739 + zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
  1740 + sub,> 0,a1,t0 /* otherwise negate both and */
  1741 + combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
  1742 + sub 0,a0,a1
  1743 + movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
  1744 +
  1745 +LSYM(l0) r__r_t0 /* add in this partial product */
  1746 +LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
  1747 +LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
  1748 +LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
  1749 + extru a1,23,24,a1 /* a1 >>= 8 ****************** */
  1750 +
  1751 +/*16 insts before this. */
  1752 +/* a0 <<= 8 ************************** */
  1753 +LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
  1754 +LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
  1755 +LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
  1756 +LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
  1757 +LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
  1758 +LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
  1759 +LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
  1760 +LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
  1761 +LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
  1762 +LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
  1763 +LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
  1764 +LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
  1765 +LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
  1766 +LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
  1767 +LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
  1768 +LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
  1769 +LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  1770 +LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
  1771 +LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
  1772 +LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
  1773 +LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
  1774 +LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
  1775 +LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
  1776 +LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
  1777 +LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
  1778 +LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
  1779 +LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
  1780 +LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
  1781 +LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  1782 +LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
  1783 +LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
  1784 +LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  1785 +LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  1786 +LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
  1787 +LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
  1788 +LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
  1789 +LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
  1790 +LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
  1791 +LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
  1792 +LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
  1793 +LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
  1794 +LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
  1795 +LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
  1796 +LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
  1797 +LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  1798 +LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
  1799 +LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
  1800 +LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
  1801 +LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
  1802 +LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
  1803 +LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
  1804 +LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
  1805 +LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  1806 +LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
  1807 +LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
  1808 +LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
  1809 +LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  1810 +LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
  1811 +LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
  1812 +LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
  1813 +LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
  1814 +LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
  1815 +LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
  1816 +LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  1817 +LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  1818 +LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
  1819 +LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
  1820 +LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
  1821 +LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  1822 +LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
  1823 +LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
  1824 +LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
  1825 +LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
  1826 +LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
  1827 +LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
  1828 +LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
  1829 +LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  1830 +LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
  1831 +LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
  1832 +LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
  1833 +LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
  1834 +LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
  1835 +LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
  1836 +LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
  1837 +LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  1838 +LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
  1839 +LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
  1840 +LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
  1841 +LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  1842 +LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
  1843 +LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
  1844 +LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
  1845 +LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
  1846 +LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
  1847 +LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
  1848 +LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
  1849 +LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
  1850 +LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
  1851 +LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
  1852 +LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
  1853 +LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
  1854 +LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
  1855 +LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
  1856 +LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
  1857 +LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
  1858 +LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
  1859 +LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
  1860 +LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
  1861 +LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
  1862 +LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
  1863 +LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
  1864 +LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
  1865 +LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
  1866 +LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
  1867 +LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
  1868 +LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
  1869 +LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
  1870 +LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
  1871 +LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
  1872 +LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
  1873 +LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
  1874 +LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
  1875 +LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
  1876 +LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
  1877 +LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
  1878 +LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
  1879 +LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
  1880 +LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  1881 +LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  1882 +LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
  1883 +LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
  1884 +LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
  1885 +LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  1886 +LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
  1887 +LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
  1888 +LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
  1889 +LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  1890 +LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
  1891 +LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
  1892 +LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
  1893 +LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
  1894 +LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
  1895 +LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
  1896 +LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
  1897 +LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
  1898 +LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
  1899 +LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
  1900 +LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
  1901 +LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  1902 +LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
  1903 +LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
  1904 +LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
  1905 +LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  1906 +LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
  1907 +LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
  1908 +LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
  1909 +LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
  1910 +LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
  1911 +LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
  1912 +LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
  1913 +LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
  1914 +LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
  1915 +LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
  1916 +LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
  1917 +LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
  1918 +LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
  1919 +LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
  1920 +LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
  1921 +LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
  1922 +LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
  1923 +LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
  1924 +LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
  1925 +LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
  1926 +LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
  1927 +LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
  1928 +LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
  1929 +LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
  1930 +LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
  1931 +LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
  1932 +LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
  1933 +LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
  1934 +LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
  1935 +LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
  1936 +LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
  1937 +LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
  1938 +LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
  1939 +LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
  1940 +LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
  1941 +LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
  1942 +LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
  1943 +LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
  1944 +LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
  1945 +LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
  1946 +LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
  1947 +LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
  1948 +LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
  1949 +LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
  1950 +LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
  1951 +LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
  1952 +LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
  1953 +LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
  1954 +LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
  1955 +LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
  1956 +LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
  1957 +LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
  1958 +LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
  1959 +LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
  1960 +LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
  1961 +LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
  1962 +LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
  1963 +LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
  1964 +LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
  1965 +LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
  1966 +LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
  1967 +LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
  1968 +LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
  1969 +LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
  1970 +LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
  1971 +LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
  1972 +LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
  1973 +LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
  1974 +LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
  1975 +LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
  1976 +LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
  1977 +LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
  1978 +LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
  1979 +LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
  1980 +LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
  1981 +LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
  1982 +LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
  1983 +LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
  1984 +LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
  1985 +LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
  1986 +LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
  1987 +LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
  1988 +LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
  1989 +LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
  1990 +LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
  1991 +LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
  1992 +LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
  1993 +LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
  1994 +LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
  1995 +LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
  1996 +LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
  1997 +LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
  1998 +LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
  1999 +LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
  2000 +LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
  2001 +LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
  2002 +LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
  2003 +LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
  2004 +LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
  2005 +LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
  2006 +LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
  2007 +LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
  2008 +LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  2009 +/*1040 insts before this. */
  2010 +LSYM(ret_t0) MILLIRET
  2011 +LSYM(e_t0) r__r_t0
  2012 +LSYM(e_shift) a1_ne_0_b_l2
  2013 + a0__256a0 /* a0 <<= 8 *********** */
  2014 + MILLIRETN
  2015 +LSYM(e_t0ma0) a1_ne_0_b_l0
  2016 + t0__t0ma0
  2017 + MILLIRET
  2018 + r__r_t0
  2019 +LSYM(e_t0a0) a1_ne_0_b_l0
  2020 + t0__t0_a0
  2021 + MILLIRET
  2022 + r__r_t0
  2023 +LSYM(e_t02a0) a1_ne_0_b_l0
  2024 + t0__t0_2a0
  2025 + MILLIRET
  2026 + r__r_t0
  2027 +LSYM(e_t04a0) a1_ne_0_b_l0
  2028 + t0__t0_4a0
  2029 + MILLIRET
  2030 + r__r_t0
  2031 +LSYM(e_2t0) a1_ne_0_b_l1
  2032 + r__r_2t0
  2033 + MILLIRETN
  2034 +LSYM(e_2t0a0) a1_ne_0_b_l0
  2035 + t0__2t0_a0
  2036 + MILLIRET
  2037 + r__r_t0
  2038 +LSYM(e2t04a0) t0__t0_2a0
  2039 + a1_ne_0_b_l1
  2040 + r__r_2t0
  2041 + MILLIRETN
  2042 +LSYM(e_3t0) a1_ne_0_b_l0
  2043 + t0__3t0
  2044 + MILLIRET
  2045 + r__r_t0
  2046 +LSYM(e_4t0) a1_ne_0_b_l1
  2047 + r__r_4t0
  2048 + MILLIRETN
  2049 +LSYM(e_4t0a0) a1_ne_0_b_l0
  2050 + t0__4t0_a0
  2051 + MILLIRET
  2052 + r__r_t0
  2053 +LSYM(e4t08a0) t0__t0_2a0
  2054 + a1_ne_0_b_l1
  2055 + r__r_4t0
  2056 + MILLIRETN
  2057 +LSYM(e_5t0) a1_ne_0_b_l0
  2058 + t0__5t0
  2059 + MILLIRET
  2060 + r__r_t0
  2061 +LSYM(e_8t0) a1_ne_0_b_l1
  2062 + r__r_8t0
  2063 + MILLIRETN
  2064 +LSYM(e_8t0a0) a1_ne_0_b_l0
  2065 + t0__8t0_a0
  2066 + MILLIRET
  2067 + r__r_t0
  2068 +
  2069 + .procend
  2070 + .end
  2071 +#endif
arch/parisc/lib/milli/milli.h
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#ifndef _PA_MILLI_H_
  14 +#define _PA_MILLI_H_
  15 +
  16 +#define L_dyncall
  17 +#define L_divI
  18 +#define L_divU
  19 +#define L_remI
  20 +#define L_remU
  21 +#define L_div_const
  22 +#define L_mulI
  23 +
  24 +#ifdef CONFIG_64BIT
  25 + .level 2.0w
  26 +#endif
  27 +
  28 +/* Hardware General Registers. */
  29 +r0: .reg %r0
  30 +r1: .reg %r1
  31 +r2: .reg %r2
  32 +r3: .reg %r3
  33 +r4: .reg %r4
  34 +r5: .reg %r5
  35 +r6: .reg %r6
  36 +r7: .reg %r7
  37 +r8: .reg %r8
  38 +r9: .reg %r9
  39 +r10: .reg %r10
  40 +r11: .reg %r11
  41 +r12: .reg %r12
  42 +r13: .reg %r13
  43 +r14: .reg %r14
  44 +r15: .reg %r15
  45 +r16: .reg %r16
  46 +r17: .reg %r17
  47 +r18: .reg %r18
  48 +r19: .reg %r19
  49 +r20: .reg %r20
  50 +r21: .reg %r21
  51 +r22: .reg %r22
  52 +r23: .reg %r23
  53 +r24: .reg %r24
  54 +r25: .reg %r25
  55 +r26: .reg %r26
  56 +r27: .reg %r27
  57 +r28: .reg %r28
  58 +r29: .reg %r29
  59 +r30: .reg %r30
  60 +r31: .reg %r31
  61 +
  62 +/* Hardware Space Registers. */
  63 +sr0: .reg %sr0
  64 +sr1: .reg %sr1
  65 +sr2: .reg %sr2
  66 +sr3: .reg %sr3
  67 +sr4: .reg %sr4
  68 +sr5: .reg %sr5
  69 +sr6: .reg %sr6
  70 +sr7: .reg %sr7
  71 +
  72 +/* Hardware Floating Point Registers. */
  73 +fr0: .reg %fr0
  74 +fr1: .reg %fr1
  75 +fr2: .reg %fr2
  76 +fr3: .reg %fr3
  77 +fr4: .reg %fr4
  78 +fr5: .reg %fr5
  79 +fr6: .reg %fr6
  80 +fr7: .reg %fr7
  81 +fr8: .reg %fr8
  82 +fr9: .reg %fr9
  83 +fr10: .reg %fr10
  84 +fr11: .reg %fr11
  85 +fr12: .reg %fr12
  86 +fr13: .reg %fr13
  87 +fr14: .reg %fr14
  88 +fr15: .reg %fr15
  89 +
  90 +/* Hardware Control Registers. */
  91 +cr11: .reg %cr11
  92 +sar: .reg %cr11 /* Shift Amount Register */
  93 +
  94 +/* Software Architecture General Registers. */
  95 +rp: .reg r2 /* return pointer */
  96 +#ifdef CONFIG_64BIT
  97 +mrp: .reg r2 /* millicode return pointer */
  98 +#else
  99 +mrp: .reg r31 /* millicode return pointer */
  100 +#endif
  101 +ret0: .reg r28 /* return value */
  102 +ret1: .reg r29 /* return value (high part of double) */
  103 +sp: .reg r30 /* stack pointer */
  104 +dp: .reg r27 /* data pointer */
  105 +arg0: .reg r26 /* argument */
  106 +arg1: .reg r25 /* argument or high part of double argument */
  107 +arg2: .reg r24 /* argument */
  108 +arg3: .reg r23 /* argument or high part of double argument */
  109 +
  110 +/* Software Architecture Space Registers. */
  111 +/* sr0 ; return link from BLE */
  112 +sret: .reg sr1 /* return value */
  113 +sarg: .reg sr1 /* argument */
  114 +/* sr4 ; PC SPACE tracker */
  115 +/* sr5 ; process private data */
  116 +
  117 +/* Frame Offsets (millicode convention!) Used when calling other
  118 + millicode routines. Stack unwinding is dependent upon these
  119 + definitions. */
  120 +r31_slot: .equ -20 /* "current RP" slot */
  121 +sr0_slot: .equ -16 /* "static link" slot */
  122 +#if defined(CONFIG_64BIT)
  123 +mrp_slot: .equ -16 /* "current RP" slot */
  124 +psp_slot: .equ -8 /* "previous SP" slot */
  125 +#else
  126 +mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
  127 +#endif
  128 +
  129 +
  130 +#define DEFINE(name,value)name: .EQU value
  131 +#define RDEFINE(name,value)name: .REG value
  132 +#ifdef milliext
  133 +#define MILLI_BE(lbl) BE lbl(sr7,r0)
  134 +#define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
  135 +#define MILLI_BLE(lbl) BLE lbl(sr7,r0)
  136 +#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
  137 +#define MILLIRETN BE,n 0(sr0,mrp)
  138 +#define MILLIRET BE 0(sr0,mrp)
  139 +#define MILLI_RETN BE,n 0(sr0,mrp)
  140 +#define MILLI_RET BE 0(sr0,mrp)
  141 +#else
  142 +#define MILLI_BE(lbl) B lbl
  143 +#define MILLI_BEN(lbl) B,n lbl
  144 +#define MILLI_BLE(lbl) BL lbl,mrp
  145 +#define MILLI_BLEN(lbl) BL,n lbl,mrp
  146 +#define MILLIRETN BV,n 0(mrp)
  147 +#define MILLIRET BV 0(mrp)
  148 +#define MILLI_RETN BV,n 0(mrp)
  149 +#define MILLI_RET BV 0(mrp)
  150 +#endif
  151 +
  152 +#define CAT(a,b) a##b
  153 +
  154 +#define SUBSPA_MILLI .section .text
  155 +#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
  156 +#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
  157 +#define ATTR_MILLI
  158 +#define SUBSPA_DATA .section .data
  159 +#define ATTR_DATA
  160 +#define GLOBAL $global$
  161 +#define GSYM(sym) !sym:
  162 +#define LSYM(sym) !CAT(.L,sym:)
  163 +#define LREF(sym) CAT(.L,sym)
  164 +
  165 +#endif /*_PA_MILLI_H_*/
arch/parisc/lib/milli/mulI.S
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#include "milli.h"
  14 +
  15 +#ifdef L_mulI
  16 +/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
  17 +/******************************************************************************
  18 +This routine is used on PA2.0 processors when gcc -mno-fpregs is used
  19 +
  20 +ROUTINE: $$mulI
  21 +
  22 +
  23 +DESCRIPTION:
  24 +
  25 + $$mulI multiplies two single word integers, giving a single
  26 + word result.
  27 +
  28 +
  29 +INPUT REGISTERS:
  30 +
  31 + arg0 = Operand 1
  32 + arg1 = Operand 2
  33 + r31 == return pc
  34 + sr0 == return space when called externally
  35 +
  36 +
  37 +OUTPUT REGISTERS:
  38 +
  39 + arg0 = undefined
  40 + arg1 = undefined
  41 + ret1 = result
  42 +
  43 +OTHER REGISTERS AFFECTED:
  44 +
  45 + r1 = undefined
  46 +
  47 +SIDE EFFECTS:
  48 +
  49 + Causes a trap under the following conditions: NONE
  50 + Changes memory at the following places: NONE
  51 +
  52 +PERMISSIBLE CONTEXT:
  53 +
  54 + Unwindable
  55 + Does not create a stack frame
  56 + Is usable for internal or external microcode
  57 +
  58 +DISCUSSION:
  59 +
  60 + Calls other millicode routines via mrp: NONE
  61 + Calls other millicode routines: NONE
  62 +
  63 +***************************************************************************/
  64 +
  65 +
  66 +#define a0 %arg0
  67 +#define a1 %arg1
  68 +#define t0 %r1
  69 +#define r %ret1
  70 +
  71 +#define a0__128a0 zdep a0,24,25,a0
  72 +#define a0__256a0 zdep a0,23,24,a0
  73 +#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
  74 +#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
  75 +#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
  76 +#define b_n_ret_t0 b,n LREF(ret_t0)
  77 +#define b_e_shift b LREF(e_shift)
  78 +#define b_e_t0ma0 b LREF(e_t0ma0)
  79 +#define b_e_t0 b LREF(e_t0)
  80 +#define b_e_t0a0 b LREF(e_t0a0)
  81 +#define b_e_t02a0 b LREF(e_t02a0)
  82 +#define b_e_t04a0 b LREF(e_t04a0)
  83 +#define b_e_2t0 b LREF(e_2t0)
  84 +#define b_e_2t0a0 b LREF(e_2t0a0)
  85 +#define b_e_2t04a0 b LREF(e2t04a0)
  86 +#define b_e_3t0 b LREF(e_3t0)
  87 +#define b_e_4t0 b LREF(e_4t0)
  88 +#define b_e_4t0a0 b LREF(e_4t0a0)
  89 +#define b_e_4t08a0 b LREF(e4t08a0)
  90 +#define b_e_5t0 b LREF(e_5t0)
  91 +#define b_e_8t0 b LREF(e_8t0)
  92 +#define b_e_8t0a0 b LREF(e_8t0a0)
  93 +#define r__r_a0 add r,a0,r
  94 +#define r__r_2a0 sh1add a0,r,r
  95 +#define r__r_4a0 sh2add a0,r,r
  96 +#define r__r_8a0 sh3add a0,r,r
  97 +#define r__r_t0 add r,t0,r
  98 +#define r__r_2t0 sh1add t0,r,r
  99 +#define r__r_4t0 sh2add t0,r,r
  100 +#define r__r_8t0 sh3add t0,r,r
  101 +#define t0__3a0 sh1add a0,a0,t0
  102 +#define t0__4a0 sh2add a0,0,t0
  103 +#define t0__5a0 sh2add a0,a0,t0
  104 +#define t0__8a0 sh3add a0,0,t0
  105 +#define t0__9a0 sh3add a0,a0,t0
  106 +#define t0__16a0 zdep a0,27,28,t0
  107 +#define t0__32a0 zdep a0,26,27,t0
  108 +#define t0__64a0 zdep a0,25,26,t0
  109 +#define t0__128a0 zdep a0,24,25,t0
  110 +#define t0__t0ma0 sub t0,a0,t0
  111 +#define t0__t0_a0 add t0,a0,t0
  112 +#define t0__t0_2a0 sh1add a0,t0,t0
  113 +#define t0__t0_4a0 sh2add a0,t0,t0
  114 +#define t0__t0_8a0 sh3add a0,t0,t0
  115 +#define t0__2t0_a0 sh1add t0,a0,t0
  116 +#define t0__3t0 sh1add t0,t0,t0
  117 +#define t0__4t0 sh2add t0,0,t0
  118 +#define t0__4t0_a0 sh2add t0,a0,t0
  119 +#define t0__5t0 sh2add t0,t0,t0
  120 +#define t0__8t0 sh3add t0,0,t0
  121 +#define t0__8t0_a0 sh3add t0,a0,t0
  122 +#define t0__9t0 sh3add t0,t0,t0
  123 +#define t0__16t0 zdep t0,27,28,t0
  124 +#define t0__32t0 zdep t0,26,27,t0
  125 +#define t0__256a0 zdep a0,23,24,t0
  126 +
  127 +
  128 + SUBSPA_MILLI
  129 + ATTR_MILLI
  130 + .align 16
  131 + .proc
  132 + .callinfo millicode
  133 + .export $$mulI,millicode
  134 +GSYM($$mulI)
  135 + combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
  136 + copy 0,r /* zero out the result */
  137 + xor a0,a1,a0 /* swap a0 & a1 using the */
  138 + xor a0,a1,a1 /* old xor trick */
  139 + xor a0,a1,a0
  140 +LSYM(l4)
  141 + combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
  142 + zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
  143 + sub,> 0,a1,t0 /* otherwise negate both and */
  144 + combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
  145 + sub 0,a0,a1
  146 + movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
  147 +
  148 +LSYM(l0) r__r_t0 /* add in this partial product */
  149 +LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
  150 +LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
  151 +LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
  152 + extru a1,23,24,a1 /* a1 >>= 8 ****************** */
  153 +
  154 +/*16 insts before this. */
  155 +/* a0 <<= 8 ************************** */
  156 +LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
  157 +LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
  158 +LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
  159 +LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
  160 +LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
  161 +LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
  162 +LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
  163 +LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
  164 +LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
  165 +LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
  166 +LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
  167 +LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
  168 +LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
  169 +LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
  170 +LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
  171 +LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
  172 +LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  173 +LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
  174 +LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
  175 +LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
  176 +LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
  177 +LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
  178 +LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
  179 +LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
  180 +LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
  181 +LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
  182 +LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
  183 +LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
  184 +LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  185 +LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
  186 +LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
  187 +LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  188 +LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  189 +LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
  190 +LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
  191 +LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
  192 +LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
  193 +LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
  194 +LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
  195 +LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
  196 +LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
  197 +LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
  198 +LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
  199 +LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
  200 +LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  201 +LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
  202 +LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
  203 +LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
  204 +LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
  205 +LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
  206 +LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
  207 +LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
  208 +LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  209 +LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
  210 +LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
  211 +LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
  212 +LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  213 +LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
  214 +LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
  215 +LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
  216 +LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
  217 +LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
  218 +LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
  219 +LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  220 +LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  221 +LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
  222 +LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
  223 +LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
  224 +LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  225 +LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
  226 +LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
  227 +LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
  228 +LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
  229 +LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
  230 +LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
  231 +LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
  232 +LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
  233 +LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
  234 +LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
  235 +LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
  236 +LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
  237 +LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
  238 +LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
  239 +LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
  240 +LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  241 +LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
  242 +LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
  243 +LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
  244 +LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  245 +LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
  246 +LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
  247 +LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
  248 +LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
  249 +LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
  250 +LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
  251 +LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
  252 +LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
  253 +LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
  254 +LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
  255 +LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
  256 +LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
  257 +LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
  258 +LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
  259 +LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
  260 +LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
  261 +LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
  262 +LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
  263 +LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
  264 +LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
  265 +LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
  266 +LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
  267 +LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
  268 +LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
  269 +LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
  270 +LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
  271 +LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
  272 +LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
  273 +LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
  274 +LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
  275 +LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
  276 +LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
  277 +LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
  278 +LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
  279 +LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
  280 +LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
  281 +LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
  282 +LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
  283 +LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  284 +LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
  285 +LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
  286 +LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
  287 +LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
  288 +LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  289 +LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
  290 +LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
  291 +LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
  292 +LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  293 +LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
  294 +LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
  295 +LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
  296 +LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
  297 +LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
  298 +LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
  299 +LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
  300 +LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
  301 +LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
  302 +LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
  303 +LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
  304 +LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
  305 +LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
  306 +LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
  307 +LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
  308 +LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
  309 +LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
  310 +LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
  311 +LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
  312 +LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
  313 +LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
  314 +LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
  315 +LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
  316 +LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
  317 +LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
  318 +LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
  319 +LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
  320 +LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
  321 +LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
  322 +LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
  323 +LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
  324 +LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
  325 +LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
  326 +LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
  327 +LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
  328 +LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
  329 +LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
  330 +LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
  331 +LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
  332 +LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
  333 +LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
  334 +LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
  335 +LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
  336 +LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
  337 +LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
  338 +LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
  339 +LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
  340 +LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
  341 +LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
  342 +LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
  343 +LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
  344 +LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
  345 +LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
  346 +LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
  347 +LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
  348 +LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
  349 +LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
  350 +LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
  351 +LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
  352 +LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
  353 +LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
  354 +LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
  355 +LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
  356 +LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
  357 +LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
  358 +LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
  359 +LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
  360 +LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
  361 +LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
  362 +LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
  363 +LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
  364 +LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
  365 +LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
  366 +LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
  367 +LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
  368 +LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
  369 +LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
  370 +LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
  371 +LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
  372 +LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
  373 +LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
  374 +LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
  375 +LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
  376 +LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
  377 +LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
  378 +LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
  379 +LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
  380 +LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
  381 +LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
  382 +LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
  383 +LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
  384 +LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
  385 +LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
  386 +LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
  387 +LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
  388 +LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
  389 +LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
  390 +LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
  391 +LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
  392 +LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
  393 +LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
  394 +LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
  395 +LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
  396 +LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
  397 +LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
  398 +LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
  399 +LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
  400 +LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
  401 +LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
  402 +LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
  403 +LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
  404 +LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
  405 +LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
  406 +LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
  407 +LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
  408 +LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
  409 +LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
  410 +LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
  411 +LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
  412 +/*1040 insts before this. */
  413 +LSYM(ret_t0) MILLIRET
  414 +LSYM(e_t0) r__r_t0
  415 +LSYM(e_shift) a1_ne_0_b_l2
  416 + a0__256a0 /* a0 <<= 8 *********** */
  417 + MILLIRETN
  418 +LSYM(e_t0ma0) a1_ne_0_b_l0
  419 + t0__t0ma0
  420 + MILLIRET
  421 + r__r_t0
  422 +LSYM(e_t0a0) a1_ne_0_b_l0
  423 + t0__t0_a0
  424 + MILLIRET
  425 + r__r_t0
  426 +LSYM(e_t02a0) a1_ne_0_b_l0
  427 + t0__t0_2a0
  428 + MILLIRET
  429 + r__r_t0
  430 +LSYM(e_t04a0) a1_ne_0_b_l0
  431 + t0__t0_4a0
  432 + MILLIRET
  433 + r__r_t0
  434 +LSYM(e_2t0) a1_ne_0_b_l1
  435 + r__r_2t0
  436 + MILLIRETN
  437 +LSYM(e_2t0a0) a1_ne_0_b_l0
  438 + t0__2t0_a0
  439 + MILLIRET
  440 + r__r_t0
  441 +LSYM(e2t04a0) t0__t0_2a0
  442 + a1_ne_0_b_l1
  443 + r__r_2t0
  444 + MILLIRETN
  445 +LSYM(e_3t0) a1_ne_0_b_l0
  446 + t0__3t0
  447 + MILLIRET
  448 + r__r_t0
  449 +LSYM(e_4t0) a1_ne_0_b_l1
  450 + r__r_4t0
  451 + MILLIRETN
  452 +LSYM(e_4t0a0) a1_ne_0_b_l0
  453 + t0__4t0_a0
  454 + MILLIRET
  455 + r__r_t0
  456 +LSYM(e4t08a0) t0__t0_2a0
  457 + a1_ne_0_b_l1
  458 + r__r_4t0
  459 + MILLIRETN
  460 +LSYM(e_5t0) a1_ne_0_b_l0
  461 + t0__5t0
  462 + MILLIRET
  463 + r__r_t0
  464 +LSYM(e_8t0) a1_ne_0_b_l1
  465 + r__r_8t0
  466 + MILLIRETN
  467 +LSYM(e_8t0a0) a1_ne_0_b_l0
  468 + t0__8t0_a0
  469 + MILLIRET
  470 + r__r_t0
  471 +
  472 + .procend
  473 + .end
  474 +#endif
arch/parisc/lib/milli/remI.S
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#include "milli.h"
  14 +
  15 +#ifdef L_remI
  16 +/* ROUTINE: $$remI
  17 +
  18 + DESCRIPTION:
  19 + . $$remI returns the remainder of the division of two signed 32-bit
  20 + . integers. The sign of the remainder is the same as the sign of
  21 + . the dividend.
  22 +
  23 +
  24 + INPUT REGISTERS:
  25 + . arg0 == dividend
  26 + . arg1 == divisor
  27 + . mrp == return pc
  28 + . sr0 == return space when called externally
  29 +
  30 + OUTPUT REGISTERS:
  31 + . arg0 = destroyed
  32 + . arg1 = destroyed
  33 + . ret1 = remainder
  34 +
  35 + OTHER REGISTERS AFFECTED:
  36 + . r1 = undefined
  37 +
  38 + SIDE EFFECTS:
  39 + . Causes a trap under the following conditions: DIVIDE BY ZERO
  40 + . Changes memory at the following places: NONE
  41 +
  42 + PERMISSIBLE CONTEXT:
  43 + . Unwindable
  44 + . Does not create a stack frame
  45 + . Is usable for internal or external microcode
  46 +
  47 + DISCUSSION:
  48 + . Calls other millicode routines via mrp: NONE
  49 + . Calls other millicode routines: NONE */
  50 +
  51 +RDEFINE(tmp,r1)
  52 +RDEFINE(retreg,ret1)
  53 +
  54 + SUBSPA_MILLI
  55 + ATTR_MILLI
  56 + .proc
  57 + .callinfo millicode
  58 + .entry
  59 +GSYM($$remI)
  60 +GSYM($$remoI)
  61 + .export $$remI,MILLICODE
  62 + .export $$remoI,MILLICODE
  63 + ldo -1(arg1),tmp /* is there at most one bit set ? */
  64 + and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
  65 + addi,> 0,arg1,r0 /* if denominator > 0, use power */
  66 + /* of 2 */
  67 + b,n LREF(neg_denom)
  68 +LSYM(pow2)
  69 + comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
  70 + and arg0,tmp,retreg /* get the result */
  71 + MILLIRETN
  72 +LSYM(neg_num)
  73 + subi 0,arg0,arg0 /* negate numerator */
  74 + and arg0,tmp,retreg /* get the result */
  75 + subi 0,retreg,retreg /* negate result */
  76 + MILLIRETN
  77 +LSYM(neg_denom)
  78 + addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
  79 + /* of 2 */
  80 + b,n LREF(regular_seq)
  81 + sub r0,arg1,tmp /* make denominator positive */
  82 + comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
  83 + ldo -1(tmp),retreg /* is there at most one bit set ? */
  84 + and,= tmp,retreg,r0 /* if not, go to regular_seq */
  85 + b,n LREF(regular_seq)
  86 + comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
  87 + and arg0,retreg,retreg
  88 + MILLIRETN
  89 +LSYM(neg_num_2)
  90 + subi 0,arg0,tmp /* test against 0x80000000 */
  91 + and tmp,retreg,retreg
  92 + subi 0,retreg,retreg
  93 + MILLIRETN
  94 +LSYM(regular_seq)
  95 + addit,= 0,arg1,0 /* trap if div by zero */
  96 + add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
  97 + sub 0,retreg,retreg /* make it positive */
  98 + sub 0,arg1, tmp /* clear carry, */
  99 + /* negate the divisor */
  100 + ds 0, tmp,0 /* set V-bit to the comple- */
  101 + /* ment of the divisor sign */
  102 + or 0,0, tmp /* clear tmp */
  103 + add retreg,retreg,retreg /* shift msb bit into carry */
  104 + ds tmp,arg1, tmp /* 1st divide step, if no carry */
  105 + /* out, msb of quotient = 0 */
  106 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  107 +LSYM(t1)
  108 + ds tmp,arg1, tmp /* 2nd divide step */
  109 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  110 + ds tmp,arg1, tmp /* 3rd divide step */
  111 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  112 + ds tmp,arg1, tmp /* 4th divide step */
  113 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  114 + ds tmp,arg1, tmp /* 5th divide step */
  115 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  116 + ds tmp,arg1, tmp /* 6th divide step */
  117 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  118 + ds tmp,arg1, tmp /* 7th divide step */
  119 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  120 + ds tmp,arg1, tmp /* 8th divide step */
  121 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  122 + ds tmp,arg1, tmp /* 9th divide step */
  123 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  124 + ds tmp,arg1, tmp /* 10th divide step */
  125 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  126 + ds tmp,arg1, tmp /* 11th divide step */
  127 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  128 + ds tmp,arg1, tmp /* 12th divide step */
  129 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  130 + ds tmp,arg1, tmp /* 13th divide step */
  131 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  132 + ds tmp,arg1, tmp /* 14th divide step */
  133 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  134 + ds tmp,arg1, tmp /* 15th divide step */
  135 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  136 + ds tmp,arg1, tmp /* 16th divide step */
  137 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  138 + ds tmp,arg1, tmp /* 17th divide step */
  139 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  140 + ds tmp,arg1, tmp /* 18th divide step */
  141 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  142 + ds tmp,arg1, tmp /* 19th divide step */
  143 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  144 + ds tmp,arg1, tmp /* 20th divide step */
  145 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  146 + ds tmp,arg1, tmp /* 21st divide step */
  147 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  148 + ds tmp,arg1, tmp /* 22nd divide step */
  149 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  150 + ds tmp,arg1, tmp /* 23rd divide step */
  151 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  152 + ds tmp,arg1, tmp /* 24th divide step */
  153 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  154 + ds tmp,arg1, tmp /* 25th divide step */
  155 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  156 + ds tmp,arg1, tmp /* 26th divide step */
  157 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  158 + ds tmp,arg1, tmp /* 27th divide step */
  159 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  160 + ds tmp,arg1, tmp /* 28th divide step */
  161 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  162 + ds tmp,arg1, tmp /* 29th divide step */
  163 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  164 + ds tmp,arg1, tmp /* 30th divide step */
  165 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  166 + ds tmp,arg1, tmp /* 31st divide step */
  167 + addc retreg,retreg,retreg /* shift retreg with/into carry */
  168 + ds tmp,arg1, tmp /* 32nd divide step, */
  169 + addc retreg,retreg,retreg /* shift last bit into retreg */
  170 + movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
  171 + add,< arg1,0,0 /* if arg1 > 0, add arg1 */
  172 + add,tr tmp,arg1,retreg /* for correcting remainder tmp */
  173 + sub tmp,arg1,retreg /* else add absolute value arg1 */
  174 +LSYM(finish)
  175 + add,>= arg0,0,0 /* set sign of remainder */
  176 + sub 0,retreg,retreg /* to sign of dividend */
  177 + MILLIRET
  178 + nop
  179 + .exit
  180 + .procend
  181 +#ifdef milliext
  182 + .origin 0x00000200
  183 +#endif
  184 + .end
  185 +#endif
arch/parisc/lib/milli/remU.S
  1 +/* 32 and 64-bit millicode, original author Hewlett-Packard
  2 + adapted for gcc by Paul Bame <bame@debian.org>
  3 + and Alan Modra <alan@linuxcare.com.au>.
  4 +
  5 + Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  6 +
  7 + This file is part of GCC and is released under the terms of
  8 + of the GNU General Public License as published by the Free Software
  9 + Foundation; either version 2, or (at your option) any later version.
  10 + See the file COPYING in the top-level GCC source directory for a copy
  11 + of the license. */
  12 +
  13 +#include "milli.h"
  14 +
  15 +#ifdef L_remU
  16 +/* ROUTINE: $$remU
  17 + . Single precision divide for remainder with unsigned binary integers.
  18 + .
  19 + . The remainder must be dividend-(dividend/divisor)*divisor.
  20 + . Divide by zero is trapped.
  21 +
  22 + INPUT REGISTERS:
  23 + . arg0 == dividend
  24 + . arg1 == divisor
  25 + . mrp == return pc
  26 + . sr0 == return space when called externally
  27 +
  28 + OUTPUT REGISTERS:
  29 + . arg0 = undefined
  30 + . arg1 = undefined
  31 + . ret1 = remainder
  32 +
  33 + OTHER REGISTERS AFFECTED:
  34 + . r1 = undefined
  35 +
  36 + SIDE EFFECTS:
  37 + . Causes a trap under the following conditions: DIVIDE BY ZERO
  38 + . Changes memory at the following places: NONE
  39 +
  40 + PERMISSIBLE CONTEXT:
  41 + . Unwindable.
  42 + . Does not create a stack frame.
  43 + . Suitable for internal or external millicode.
  44 + . Assumes the special millicode register conventions.
  45 +
  46 + DISCUSSION:
  47 + . Calls other millicode routines using mrp: NONE
  48 + . Calls other millicode routines: NONE */
  49 +
  50 +
  51 +RDEFINE(temp,r1)
  52 +RDEFINE(rmndr,ret1) /* r29 */
  53 + SUBSPA_MILLI
  54 + ATTR_MILLI
  55 + .export $$remU,millicode
  56 + .proc
  57 + .callinfo millicode
  58 + .entry
  59 +GSYM($$remU)
  60 + ldo -1(arg1),temp /* is there at most one bit set ? */
  61 + and,= arg1,temp,r0 /* if not, don't use power of 2 */
  62 + b LREF(regular_seq)
  63 + addit,= 0,arg1,r0 /* trap on div by zero */
  64 + and arg0,temp,rmndr /* get the result for power of 2 */
  65 + MILLIRETN
  66 +LSYM(regular_seq)
  67 + comib,>=,n 0,arg1,LREF(special_case)
  68 + subi 0,arg1,rmndr /* clear carry, negate the divisor */
  69 + ds r0,rmndr,r0 /* set V-bit to 1 */
  70 + add arg0,arg0,temp /* shift msb bit into carry */
  71 + ds r0,arg1,rmndr /* 1st divide step, if no carry */
  72 + addc temp,temp,temp /* shift temp with/into carry */
  73 + ds rmndr,arg1,rmndr /* 2nd divide step */
  74 + addc temp,temp,temp /* shift temp with/into carry */
  75 + ds rmndr,arg1,rmndr /* 3rd divide step */
  76 + addc temp,temp,temp /* shift temp with/into carry */
  77 + ds rmndr,arg1,rmndr /* 4th divide step */
  78 + addc temp,temp,temp /* shift temp with/into carry */
  79 + ds rmndr,arg1,rmndr /* 5th divide step */
  80 + addc temp,temp,temp /* shift temp with/into carry */
  81 + ds rmndr,arg1,rmndr /* 6th divide step */
  82 + addc temp,temp,temp /* shift temp with/into carry */
  83 + ds rmndr,arg1,rmndr /* 7th divide step */
  84 + addc temp,temp,temp /* shift temp with/into carry */
  85 + ds rmndr,arg1,rmndr /* 8th divide step */
  86 + addc temp,temp,temp /* shift temp with/into carry */
  87 + ds rmndr,arg1,rmndr /* 9th divide step */
  88 + addc temp,temp,temp /* shift temp with/into carry */
  89 + ds rmndr,arg1,rmndr /* 10th divide step */
  90 + addc temp,temp,temp /* shift temp with/into carry */
  91 + ds rmndr,arg1,rmndr /* 11th divide step */
  92 + addc temp,temp,temp /* shift temp with/into carry */
  93 + ds rmndr,arg1,rmndr /* 12th divide step */
  94 + addc temp,temp,temp /* shift temp with/into carry */
  95 + ds rmndr,arg1,rmndr /* 13th divide step */
  96 + addc temp,temp,temp /* shift temp with/into carry */
  97 + ds rmndr,arg1,rmndr /* 14th divide step */
  98 + addc temp,temp,temp /* shift temp with/into carry */
  99 + ds rmndr,arg1,rmndr /* 15th divide step */
  100 + addc temp,temp,temp /* shift temp with/into carry */
  101 + ds rmndr,arg1,rmndr /* 16th divide step */
  102 + addc temp,temp,temp /* shift temp with/into carry */
  103 + ds rmndr,arg1,rmndr /* 17th divide step */
  104 + addc temp,temp,temp /* shift temp with/into carry */
  105 + ds rmndr,arg1,rmndr /* 18th divide step */
  106 + addc temp,temp,temp /* shift temp with/into carry */
  107 + ds rmndr,arg1,rmndr /* 19th divide step */
  108 + addc temp,temp,temp /* shift temp with/into carry */
  109 + ds rmndr,arg1,rmndr /* 20th divide step */
  110 + addc temp,temp,temp /* shift temp with/into carry */
  111 + ds rmndr,arg1,rmndr /* 21st divide step */
  112 + addc temp,temp,temp /* shift temp with/into carry */
  113 + ds rmndr,arg1,rmndr /* 22nd divide step */
  114 + addc temp,temp,temp /* shift temp with/into carry */
  115 + ds rmndr,arg1,rmndr /* 23rd divide step */
  116 + addc temp,temp,temp /* shift temp with/into carry */
  117 + ds rmndr,arg1,rmndr /* 24th divide step */
  118 + addc temp,temp,temp /* shift temp with/into carry */
  119 + ds rmndr,arg1,rmndr /* 25th divide step */
  120 + addc temp,temp,temp /* shift temp with/into carry */
  121 + ds rmndr,arg1,rmndr /* 26th divide step */
  122 + addc temp,temp,temp /* shift temp with/into carry */
  123 + ds rmndr,arg1,rmndr /* 27th divide step */
  124 + addc temp,temp,temp /* shift temp with/into carry */
  125 + ds rmndr,arg1,rmndr /* 28th divide step */
  126 + addc temp,temp,temp /* shift temp with/into carry */
  127 + ds rmndr,arg1,rmndr /* 29th divide step */
  128 + addc temp,temp,temp /* shift temp with/into carry */
  129 + ds rmndr,arg1,rmndr /* 30th divide step */
  130 + addc temp,temp,temp /* shift temp with/into carry */
  131 + ds rmndr,arg1,rmndr /* 31st divide step */
  132 + addc temp,temp,temp /* shift temp with/into carry */
  133 + ds rmndr,arg1,rmndr /* 32nd divide step, */
  134 + comiclr,<= 0,rmndr,r0
  135 + add rmndr,arg1,rmndr /* correction */
  136 + MILLIRETN
  137 + nop
  138 +
  139 +/* Putting >= on the last DS and deleting COMICLR does not work! */
  140 +LSYM(special_case)
  141 + sub,>>= arg0,arg1,rmndr
  142 + copy arg0,rmndr
  143 + MILLIRETN
  144 + nop
  145 + .exit
  146 + .procend
  147 + .end
  148 +#endif