fp_util.S 35.5 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

1001

1002

1003

1004

1005

1006

1007

1008

1009

1010

1011

1012

1013

1014

1015

1016

1017

1018

1019

1020

1021

1022

1023

1024

1025

1026

1027

1028

1029

1030

1031

1032

1033

1034

1035

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046

1047

1048

1049

1050

1051

1052

1053

1054

1055

1056

1057

1058

1059

1060

1061

1062

1063

1064

1065

1066

1067

1068

1069

1070

1071

1072

1073

1074

1075

1076

1077

1078

1079

1080

1081

1082

1083

1084

1085

1086

1087

1088

1089

1090

1091

1092

1093

1094

1095

1096

1097

1098

1099

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110

1111

1112

1113

1114

1115

1116

1117

1118

1119

1120

1121

1122

1123

1124

1125

1126

1127

1128

1129

1130

1131

1132

1133

1134

1135

1136

1137

1138

1139

1140

1141

1142

1143

1144

1145

1146

1147

1148

1149

1150

1151

1152

1153

1154

1155

1156

1157

1158

1159

1160

1161

1162

1163

1164

1165

1166

1167

1168

1169

1170

1171

1172

1173

1174

1175

1176

1177

1178

1179

1180

1181

1182

1183

1184

1185

1186

1187

1188

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200

1201

1202

1203

1204

1205

1206

1207

1208

1209

1210

1211

1212

1213

1214

1215

1216

1217

1218

1219

1220

1221

1222

1223

1224

1225

1226

1227

1228

1229

1230

1231

1232

1233

1234

1235

1236

1237

1238

1239

1240

1241

1242

1243

1244

1245

1246

1247

1248

1249

1250

1251

1252

1253

1254

1255

1256

1257

1258

1259

1260

1261

1262

1263

1264

1265

1266

1267

1268

1269

1270

1271

1272

1273

1274

1275

1276

1277

1278

1279

1280

1281

1282

1283

1284

1285

1286

1287

1288

1289

1290

1291

1292

1293

1294

1295

1296

1297

1298

1299

1300

1301

1302

1303

1304

1305

1306

1307

1308

1309

1310

1311

1312

1313

1314

1315

1316

1317

1318

1319

1320

1321

1322

1323

1324

1325

1326

1327

1328

1329

1330

1331

1332

1333

1334

1335

1336

1337

1338

1339

1340

1341

1342

1343

1344

1345

1346

1347

1348

1349

1350

1351

1352

1353

1354

1355

1356

1357

1358

1359

1360

1361

1362

1363

1364

1365

1366

1367

1368

1369

1370

1371

1372

1373

1374

1375

1376

1377

1378

1379

1380

1381

1382

1383

1384

1385

1386

1387

1388

1389

1390

1391

1392

1393

1394

1395

1396

1397

1398

1399

1400

1401

1402

1403

1404

1405

1406

1407

1408

1409

1410

1411

1412

1413

1414

1415

1416

1417

1418

1419

1420

1421

1422

1423

1424

1425

1426

1427

1428

1429

1430

1431

1432

1433

1434

1435

1436

1437

1438

1439

1440

1441

1442

1443

1444

1445

1446

1447

1448

1449

1450

1451

1452

1453

1454


/*
 * fp_util.S
 *
 * Copyright Roman Zippel, 1997.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, and the entire permission notice in its entirety,
 *    including the disclaimer of warranties.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 *
 * ALTERNATIVELY, this product may be distributed under the terms of
 * the GNU General Public License, in which case the provisions of the GPL are
 * required INSTEAD OF the above restrictions.  (This clause is
 * necessary due to a potential bad interaction between the GPL and
 * the restrictions contained in a BSD-style copyright.)
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "fp_emu.h"

/*
 * Here are lots of conversion and normalization functions mainly
 * used by fp_scan.S
 * Note that these functions are optimized for "normal" numbers,
 * these are handled first and exit as fast as possible, this is
 * especially important for fp_normalize_ext/fp_conv_ext2ext, as
 * it's called very often.
 * The register usage is optimized for fp_scan.S and which register
 * is currently at that time unused, be careful if you want change
 * something here. %d0 and %d1 is always usable, sometimes %d2 (or
 * only the lower half) most function have to return the %a0
 * unmodified, so that the caller can immediately reuse it.
 */

	.globl	fp_ill, fp_end

	| exits from fp_scan:
	| illegal instruction
fp_ill:
	printf	,"fp_illegal\n"
	rts
	| completed instruction
fp_end:
	tst.l	(TASK_MM-8,%a2)
	jmi	1f
	tst.l	(TASK_MM-4,%a2)
	jmi	1f
	tst.l	(TASK_MM,%a2)
	jpl	2f
1:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
2:	clr.l	%d0
	rts

	.globl	fp_conv_long2ext, fp_conv_single2ext
	.globl	fp_conv_double2ext, fp_conv_ext2ext
	.globl	fp_normalize_ext, fp_normalize_double
	.globl	fp_normalize_single, fp_normalize_single_fast
	.globl	fp_conv_ext2double, fp_conv_ext2single
	.globl	fp_conv_ext2long, fp_conv_ext2short
	.globl	fp_conv_ext2byte
	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
	.globl	fp_finalrounding_double
	.globl	fp_finalrounding, fp_finaltest, fp_final

/*
 * First several conversion functions from a source operand
 * into the extended format. Note, that only fp_conv_ext2ext
 * normalizes the number and is always called after the other
 * conversion functions, which only move the information into
 * fp_ext structure.
 */

	| fp_conv_long2ext:
	|
	| args:	%d0 = source (32-bit long)
	|	%a0 = destination (ptr to struct fp_ext)

fp_conv_long2ext:
	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
	clr.l	%d1			| sign defaults to zero
	tst.l	%d0
	jeq	fp_l2e_zero		| is source zero?
	jpl	1f			| positive?
	moveq	#1,%d1
	neg.l	%d0
1:	swap	%d1
	move.w	#0x3fff+31,%d1
	move.l	%d1,(%a0)+		| set sign / exp
	move.l	%d0,(%a0)+		| set mantissa
	clr.l	(%a0)
	subq.l	#8,%a0			| restore %a0
	printx	PCONV,%a0@
	printf	PCONV,")\n"
	rts
	| source is zero
fp_l2e_zero:
	clr.l	(%a0)+
	clr.l	(%a0)+
	clr.l	(%a0)
	subq.l	#8,%a0
	printx	PCONV,%a0@
	printf	PCONV,")\n"
	rts

	| fp_conv_single2ext
	| args:	%d0 = source (single-precision fp value)
	|	%a0 = dest (struct fp_ext *)

fp_conv_single2ext:
	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
	move.l	%d0,%d1
	lsl.l	#8,%d0			| shift mantissa
	lsr.l	#8,%d1			| exponent / sign
	lsr.l	#7,%d1
	lsr.w	#8,%d1
	jeq	fp_s2e_small		| zero / denormal?
	cmp.w	#0xff,%d1		| NaN / Inf?
	jeq	fp_s2e_large
	bset	#31,%d0			| set explizit bit
	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
9:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
	clr.l	(%a0)			| low lword = 0
	subq.l	#8,%a0
	printx	PCONV,%a0@
	printf	PCONV,")\n"
	rts
	| zeros and denormalized
fp_s2e_small:
	| exponent is zero, so explizit bit is already zero too
	tst.l	%d0
	jeq	9b
	move.w	#0x4000-0x7f,%d1
	jra	9b
	| infinities and NAN
fp_s2e_large:
	bclr	#31,%d0			| clear explizit bit
	move.w	#0x7fff,%d1
	jra	9b

fp_conv_double2ext:
#ifdef FPU_EMU_DEBUG
	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
#endif
	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
	move.l	%d0,%d1
	lsl.l	#8,%d0			| shift high mantissa
	lsl.l	#3,%d0
	lsr.l	#8,%d1			| exponent / sign
	lsr.l	#7,%d1
	lsr.w	#5,%d1
	jeq	fp_d2e_small		| zero / denormal?
	cmp.w	#0x7ff,%d1		| NaN / Inf?
	jeq	fp_d2e_large
	bset	#31,%d0			| set explizit bit
	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
9:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
	move.l	%d0,(%a0)+
	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
	move.l	%d0,%d1
	lsl.l	#8,%d0
	lsl.l	#3,%d0
	move.l	%d0,(%a0)
	moveq	#21,%d0
	lsr.l	%d0,%d1
	or.l	%d1,-(%a0)
	subq.l	#4,%a0
	printx	PCONV,%a0@
	printf	PCONV,")\n"
	rts
	| zeros and denormalized
fp_d2e_small:
	| exponent is zero, so explizit bit is already zero too
	tst.l	%d0
	jeq	9b
	move.w	#0x4000-0x3ff,%d1
	jra	9b
	| infinities and NAN
fp_d2e_large:
	bclr	#31,%d0			| clear explizit bit
	move.w	#0x7fff,%d1
	jra	9b

	| fp_conv_ext2ext:
	| originally used to get longdouble from userspace, now it's
	| called before arithmetic operations to make sure the number
	| is normalized [maybe rename it?].
	| args:	%a0 = dest (struct fp_ext *)
	| returns 0 in %d0 for a NaN, otherwise 1

fp_conv_ext2ext:
	printf	PCONV,"e2e: %p(",1,%a0
	printx	PCONV,%a0@
	printf	PCONV,"), "
	move.l	(%a0)+,%d0
	cmp.w	#0x7fff,%d0		| Inf / NaN?
	jeq	fp_e2e_large
	move.l	(%a0),%d0
	jpl	fp_e2e_small		| zero / denorm?
	| The high bit is set, so normalization is irrelevant.
fp_e2e_checkround:
	subq.l	#4,%a0
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
	move.b	(%a0),%d0
	jne	fp_e2e_round
#endif
	printf	PCONV,"%p(",1,%a0
	printx	PCONV,%a0@
	printf	PCONV,")\n"
	moveq	#1,%d0
	rts
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
fp_e2e_round:
	fp_set_sr FPSR_EXC_INEX2
	clr.b	(%a0)
	move.w	(FPD_RND,FPDATA),%d2
	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
	tst.b	%d0			| test guard bit
	jpl	9f			| zero is closer
	btst	#0,(11,%a0)		| test lsb bit
	jne	fp_e2e_doroundup	| round to infinity
	lsl.b	#1,%d0			| check low bits
	jeq	9f			| round to zero
fp_e2e_doroundup:
	addq.l	#1,(8,%a0)
	jcc	9f
	addq.l	#1,(4,%a0)
	jcc	9f
	move.w	#0x8000,(4,%a0)
	addq.w	#1,(2,%a0)
9:	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
fp_e2e_roundother:
	subq.w	#2,%d2
	jcs	9b			| %d2 < 2, round to zero
	jhi	1f			| %d2 > 2, round to +infinity
	tst.b	(1,%a0)			| to -inf
	jne	fp_e2e_doroundup	| negative, round to infinity
	jra	9b			| positive, round to zero
1:	tst.b	(1,%a0)			| to +inf
	jeq	fp_e2e_doroundup	| positive, round to infinity
	jra	9b			| negative, round to zero
#endif
	| zeros and subnormals:
	| try to normalize these anyway.
fp_e2e_small:
	jne	fp_e2e_small1		| high lword zero?
	move.l	(4,%a0),%d0
	jne	fp_e2e_small2
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
	clr.l	%d0
	move.b	(-4,%a0),%d0
	jne	fp_e2e_small3
#endif
	| Genuine zero.
	clr.w	-(%a0)
	subq.l	#2,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	moveq	#1,%d0
	rts
	| definitely subnormal, need to shift all 64 bits
fp_e2e_small1:
	bfffo	%d0{#0,#32},%d1
	move.w	-(%a0),%d2
	sub.w	%d1,%d2
	jcc	1f
	| Pathologically small, denormalize.
	add.w	%d2,%d1
	clr.w	%d2
1:	move.w	%d2,(%a0)+
	move.w	%d1,%d2
	jeq	fp_e2e_checkround
	| fancy 64-bit double-shift begins here
	lsl.l	%d2,%d0
	move.l	%d0,(%a0)+
	move.l	(%a0),%d0
	move.l	%d0,%d1
	lsl.l	%d2,%d0
	move.l	%d0,(%a0)
	neg.w	%d2
	and.w	#0x1f,%d2
	lsr.l	%d2,%d1
	or.l	%d1,-(%a0)
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
fp_e2e_extra1:
	clr.l	%d0
	move.b	(-4,%a0),%d0
	neg.w	%d2
	add.w	#24,%d2
	jcc	1f
	clr.b	(-4,%a0)
	lsl.l	%d2,%d0
	or.l	%d0,(4,%a0)
	jra	fp_e2e_checkround
1:	addq.w	#8,%d2
	lsl.l	%d2,%d0
	move.b	%d0,(-4,%a0)
	lsr.l	#8,%d0
	or.l	%d0,(4,%a0)
#endif
	jra	fp_e2e_checkround
	| pathologically small subnormal
fp_e2e_small2:
	bfffo	%d0{#0,#32},%d1
	add.w	#32,%d1
	move.w	-(%a0),%d2
	sub.w	%d1,%d2
	jcc	1f
	| Beyond pathologically small, denormalize.
	add.w	%d2,%d1
	clr.w	%d2
1:	move.w	%d2,(%a0)+
	ext.l	%d1
	jeq	fp_e2e_checkround
	clr.l	(4,%a0)
	sub.w	#32,%d2
	jcs	1f
	lsl.l	%d1,%d0			| lower lword needs only to be shifted
	move.l	%d0,(%a0)		| into the higher lword
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
	clr.l	%d0
	move.b	(-4,%a0),%d0
	clr.b	(-4,%a0)
	neg.w	%d1
	add.w	#32,%d1
	bfins	%d0,(%a0){%d1,#8}
#endif
	jra	fp_e2e_checkround
1:	neg.w	%d1			| lower lword is splitted between
	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
	jra	fp_e2e_checkround
#else
	move.w	%d1,%d2
	jra	fp_e2e_extra1
	| These are extremely small numbers, that will mostly end up as zero
	| anyway, so this is only important for correct rounding.
fp_e2e_small3:
	bfffo	%d0{#24,#8},%d1
	add.w	#40,%d1
	move.w	-(%a0),%d2
	sub.w	%d1,%d2
	jcc	1f
	| Pathologically small, denormalize.
	add.w	%d2,%d1
	clr.w	%d2
1:	move.w	%d2,(%a0)+
	ext.l	%d1
	jeq	fp_e2e_checkround
	cmp.w	#8,%d1
	jcs	2f
1:	clr.b	(-4,%a0)
	sub.w	#64,%d1
	jcs	1f
	add.w	#24,%d1
	lsl.l	%d1,%d0
	move.l	%d0,(%a0)
	jra	fp_e2e_checkround
1:	neg.w	%d1
	bfins	%d0,(%a0){%d1,#8}
	jra	fp_e2e_checkround
2:	lsl.l	%d1,%d0
	move.b	%d0,(-4,%a0)
	lsr.l	#8,%d0
	move.b	%d0,(7,%a0)
	jra	fp_e2e_checkround
#endif
1:	move.l	%d0,%d1			| lower lword is splitted between
	lsl.l	%d2,%d0			| higher and lower lword
	move.l	%d0,(%a0)
	move.l	%d1,%d0
	neg.w	%d2
	add.w	#32,%d2
	lsr.l	%d2,%d0
	move.l	%d0,-(%a0)
	jra	fp_e2e_checkround
	| Infinities and NaNs
fp_e2e_large:
	move.l	(%a0)+,%d0
	jne	3f
1:	tst.l	(%a0)
	jne	4f
	moveq	#1,%d0
2:	subq.l	#8,%a0
	printf	PCONV,"%p(",1,%a0
	printx	PCONV,%a0@
	printf	PCONV,")\n"
	rts
	| we have maybe a NaN, shift off the highest bit
3:	lsl.l	#1,%d0
	jeq	1b
	| we have a NaN, clear the return value
4:	clrl	%d0
	jra	2b


/*
 * Normalization functions.  Call these on the output of general
 * FP operators, and before any conversion into the destination
 * formats. fp_normalize_ext has always to be called first, the
 * following conversion functions expect an already normalized
 * number.
 */

	| fp_normalize_ext:
	| normalize an extended in extended (unpacked) format, basically
	| it does the same as fp_conv_ext2ext, additionally it also does
	| the necessary postprocessing checks.
	| args:	%a0 (struct fp_ext *)
	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2

fp_normalize_ext:
	printf	PNORM,"ne: %p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,"), "
	move.l	(%a0)+,%d0
	cmp.w	#0x7fff,%d0		| Inf / NaN?
	jeq	fp_ne_large
	move.l	(%a0),%d0
	jpl	fp_ne_small		| zero / denorm?
	| The high bit is set, so normalization is irrelevant.
fp_ne_checkround:
	subq.l	#4,%a0
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
	move.b	(%a0),%d0
	jne	fp_ne_round
#endif
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
fp_ne_round:
	fp_set_sr FPSR_EXC_INEX2
	clr.b	(%a0)
	move.w	(FPD_RND,FPDATA),%d2
	jne	fp_ne_roundother	| %d2 == 0, round to nearest
	tst.b	%d0			| test guard bit
	jpl	9f			| zero is closer
	btst	#0,(11,%a0)		| test lsb bit
	jne	fp_ne_doroundup		| round to infinity
	lsl.b	#1,%d0			| check low bits
	jeq	9f			| round to zero
fp_ne_doroundup:
	addq.l	#1,(8,%a0)
	jcc	9f
	addq.l	#1,(4,%a0)
	jcc	9f
	addq.w	#1,(2,%a0)
	move.w	#0x8000,(4,%a0)
9:	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
fp_ne_roundother:
	subq.w	#2,%d2
	jcs	9b			| %d2 < 2, round to zero
	jhi	1f			| %d2 > 2, round to +infinity
	tst.b	(1,%a0)			| to -inf
	jne	fp_ne_doroundup		| negative, round to infinity
	jra	9b			| positive, round to zero
1:	tst.b	(1,%a0)			| to +inf
	jeq	fp_ne_doroundup		| positive, round to infinity
	jra	9b			| negative, round to zero
#endif
	| Zeros and subnormal numbers
	| These are probably merely subnormal, rather than "denormalized"
	|  numbers, so we will try to make them normal again.
fp_ne_small:
	jne	fp_ne_small1		| high lword zero?
	move.l	(4,%a0),%d0
	jne	fp_ne_small2
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
	clr.l	%d0
	move.b	(-4,%a0),%d0
	jne	fp_ne_small3
#endif
	| Genuine zero.
	clr.w	-(%a0)
	subq.l	#2,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
	| Subnormal.
fp_ne_small1:
	bfffo	%d0{#0,#32},%d1
	move.w	-(%a0),%d2
	sub.w	%d1,%d2
	jcc	1f
	| Pathologically small, denormalize.
	add.w	%d2,%d1
	clr.w	%d2
	fp_set_sr FPSR_EXC_UNFL
1:	move.w	%d2,(%a0)+
	move.w	%d1,%d2
	jeq	fp_ne_checkround
	| This is exactly the same 64-bit double shift as seen above.
	lsl.l	%d2,%d0
	move.l	%d0,(%a0)+
	move.l	(%a0),%d0
	move.l	%d0,%d1
	lsl.l	%d2,%d0
	move.l	%d0,(%a0)
	neg.w	%d2
	and.w	#0x1f,%d2
	lsr.l	%d2,%d1
	or.l	%d1,-(%a0)
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
fp_ne_extra1:
	clr.l	%d0
	move.b	(-4,%a0),%d0
	neg.w	%d2
	add.w	#24,%d2
	jcc	1f
	clr.b	(-4,%a0)
	lsl.l	%d2,%d0
	or.l	%d0,(4,%a0)
	jra	fp_ne_checkround
1:	addq.w	#8,%d2
	lsl.l	%d2,%d0
	move.b	%d0,(-4,%a0)
	lsr.l	#8,%d0
	or.l	%d0,(4,%a0)
#endif
	jra	fp_ne_checkround
	| May or may not be subnormal, if so, only 32 bits to shift.
fp_ne_small2:
	bfffo	%d0{#0,#32},%d1
	add.w	#32,%d1
	move.w	-(%a0),%d2
	sub.w	%d1,%d2
	jcc	1f
	| Beyond pathologically small, denormalize.
	add.w	%d2,%d1
	clr.w	%d2
	fp_set_sr FPSR_EXC_UNFL
1:	move.w	%d2,(%a0)+
	ext.l	%d1
	jeq	fp_ne_checkround
	clr.l	(4,%a0)
	sub.w	#32,%d1
	jcs	1f
	lsl.l	%d1,%d0			| lower lword needs only to be shifted
	move.l	%d0,(%a0)		| into the higher lword
#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
	clr.l	%d0
	move.b	(-4,%a0),%d0
	clr.b	(-4,%a0)
	neg.w	%d1
	add.w	#32,%d1
	bfins	%d0,(%a0){%d1,#8}
#endif
	jra	fp_ne_checkround
1:	neg.w	%d1			| lower lword is splitted between
	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
	jra	fp_ne_checkround
#else
	move.w	%d1,%d2
	jra	fp_ne_extra1
	| These are extremely small numbers, that will mostly end up as zero
	| anyway, so this is only important for correct rounding.
fp_ne_small3:
	bfffo	%d0{#24,#8},%d1
	add.w	#40,%d1
	move.w	-(%a0),%d2
	sub.w	%d1,%d2
	jcc	1f
	| Pathologically small, denormalize.
	add.w	%d2,%d1
	clr.w	%d2
1:	move.w	%d2,(%a0)+
	ext.l	%d1
	jeq	fp_ne_checkround
	cmp.w	#8,%d1
	jcs	2f
1:	clr.b	(-4,%a0)
	sub.w	#64,%d1
	jcs	1f
	add.w	#24,%d1
	lsl.l	%d1,%d0
	move.l	%d0,(%a0)
	jra	fp_ne_checkround
1:	neg.w	%d1
	bfins	%d0,(%a0){%d1,#8}
	jra	fp_ne_checkround
2:	lsl.l	%d1,%d0
	move.b	%d0,(-4,%a0)
	lsr.l	#8,%d0
	move.b	%d0,(7,%a0)
	jra	fp_ne_checkround
#endif
	| Infinities and NaNs, again, same as above.
fp_ne_large:
	move.l	(%a0)+,%d0
	jne	3f
1:	tst.l	(%a0)
	jne	4f
2:	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
	| we have maybe a NaN, shift off the highest bit
3:	move.l	%d0,%d1
	lsl.l	#1,%d1
	jne	4f
	clr.l	(-4,%a0)
	jra	1b
	| we have a NaN, test if it is signaling
4:	bset	#30,%d0
	jne	2b
	fp_set_sr FPSR_EXC_SNAN
	move.l	%d0,(-4,%a0)
	jra	2b

	| these next two do rounding as per the IEEE standard.
	| values for the rounding modes appear to be:
	| 0:	Round to nearest
	| 1:	Round to zero
	| 2:	Round to -Infinity
	| 3:	Round to +Infinity
	| both functions expect that fp_normalize was already
	| called (and extended argument is already normalized
	| as far as possible), these are used if there is different
	| rounding precision is selected and before converting
	| into single/double

	| fp_normalize_double:
	| normalize an extended with double (52-bit) precision
	| args:	 %a0 (struct fp_ext *)

fp_normalize_double:
	printf	PNORM,"nd: %p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,"), "
	move.l	(%a0)+,%d2
	tst.w	%d2
	jeq	fp_nd_zero		| zero / denormalized
	cmp.w	#0x7fff,%d2
	jeq	fp_nd_huge		| NaN / infinitive.
	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
	jcs	fp_nd_small		| too small.
	cmp.w	#0x7fe,%d2
	jcc	fp_nd_large		| too big.
	addq.l	#4,%a0
	move.l	(%a0),%d0		| low lword of mantissa
	| now, round off the low 11 bits.
fp_nd_round:
	moveq	#21,%d1
	lsl.l	%d1,%d0			| keep 11 low bits.
	jne	fp_nd_checkround	| Are they non-zero?
	| nothing to do here
9:	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
	| Be careful with the X bit! It contains the lsb
	| from the shift above, it is needed for round to nearest.
fp_nd_checkround:
	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
	and.w	#0xf800,(2,%a0)		| clear bits 0-10
	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
	jne	2f			| %d2 == 0, round to nearest
	tst.l	%d0			| test guard bit
	jpl	9b			| zero is closer
	| here we test the X bit by adding it to %d2
	clr.w	%d2			| first set z bit, addx only clears it
	addx.w	%d2,%d2			| test lsb bit
	| IEEE754-specified "round to even" behaviour.  If the guard
	| bit is set, then the number is odd, so rounding works like
	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
	| Otherwise, an equal distance rounds towards zero, so as not
	| to produce an odd number.  This is strange, but it is what
	| the standard says.
	jne	fp_nd_doroundup		| round to infinity
	lsl.l	#1,%d0			| check low bits
	jeq	9b			| round to zero
fp_nd_doroundup:
	| round (the mantissa, that is) towards infinity
	add.l	#0x800,(%a0)
	jcc	9b			| no overflow, good.
	addq.l	#1,-(%a0)		| extend to high lword
	jcc	1f			| no overflow, good.
	| Yow! we have managed to overflow the mantissa.  Since this
	| only happens when %d1 was 0xfffff800, it is now zero, so
	| reset the high bit, and increment the exponent.
	move.w	#0x8000,(%a0)
	addq.w	#1,-(%a0)
	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
	jeq	fp_nd_large		| yes, so make it infinity.
1:	subq.l	#4,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
2:	subq.w	#2,%d2
	jcs	9b			| %d2 < 2, round to zero
	jhi	3f			| %d2 > 2, round to +infinity
	| Round to +Inf or -Inf.  High word of %d2 contains the
	| sign of the number, by the way.
	swap	%d2			| to -inf
	tst.b	%d2
	jne	fp_nd_doroundup		| negative, round to infinity
	jra	9b			| positive, round to zero
3:	swap	%d2			| to +inf
	tst.b	%d2
	jeq	fp_nd_doroundup		| positive, round to infinity
	jra	9b			| negative, round to zero
	| Exponent underflow.  Try to make a denormal, and set it to
	| the smallest possible fraction if this fails.
fp_nd_small:
	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
	move.w	#0x3c01,(-2,%a0)	| 2**-1022
	neg.w	%d2			| degree of underflow
	cmp.w	#32,%d2			| single or double shift?
	jcc	1f
	| Again, another 64-bit double shift.
	move.l	(%a0),%d0
	move.l	%d0,%d1
	lsr.l	%d2,%d0
	move.l	%d0,(%a0)+
	move.l	(%a0),%d0
	lsr.l	%d2,%d0
	neg.w	%d2
	add.w	#32,%d2
	lsl.l	%d2,%d1
	or.l	%d1,%d0
	move.l	(%a0),%d1
	move.l	%d0,(%a0)
	| Check to see if we shifted off any significant bits
	lsl.l	%d2,%d1
	jeq	fp_nd_round		| Nope, round.
	bset	#0,%d0			| Yes, so set the "sticky bit".
	jra	fp_nd_round		| Now, round.
	| Another 64-bit single shift and store
1:	sub.w	#32,%d2
	cmp.w	#32,%d2			| Do we really need to shift?
	jcc	2f			| No, the number is too small.
	move.l	(%a0),%d0
	clr.l	(%a0)+
	move.l	%d0,%d1
	lsr.l	%d2,%d0
	neg.w	%d2
	add.w	#32,%d2
	| Again, check to see if we shifted off any significant bits.
	tst.l	(%a0)
	jeq	1f
	bset	#0,%d0			| Sticky bit.
1:	move.l	%d0,(%a0)
	lsl.l	%d2,%d1
	jeq	fp_nd_round
	bset	#0,%d0
	jra	fp_nd_round
	| Sorry, the number is just too small.
2:	clr.l	(%a0)+
	clr.l	(%a0)
	moveq	#1,%d0			| Smallest possible fraction,
	jra	fp_nd_round		| round as desired.
	| zero and denormalized
fp_nd_zero:
	tst.l	(%a0)+
	jne	1f
	tst.l	(%a0)
	jne	1f
	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts				| zero.  nothing to do.
	| These are not merely subnormal numbers, but true denormals,
	| i.e. pathologically small (exponent is 2**-16383) numbers.
	| It is clearly impossible for even a normal extended number
	| with that exponent to fit into double precision, so just
	| write these ones off as "too darn small".
1:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
	clr.l	(%a0)
	clr.l	-(%a0)
	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
	addq.l	#6,%a0
	moveq	#1,%d0
	jra	fp_nd_round		| round.
	| Exponent overflow.  Just call it infinity.
fp_nd_large:
	move.w	#0x7ff,%d0
	and.w	(6,%a0),%d0
	jeq	1f
	fp_set_sr FPSR_EXC_INEX2
1:	fp_set_sr FPSR_EXC_OVFL
	move.w	(FPD_RND,FPDATA),%d2
	jne	3f			| %d2 = 0 round to nearest
1:	move.w	#0x7fff,(-2,%a0)
	clr.l	(%a0)+
	clr.l	(%a0)
2:	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
3:	subq.w	#2,%d2
	jcs	5f			| %d2 < 2, round to zero
	jhi	4f			| %d2 > 2, round to +infinity
	tst.b	(-3,%a0)		| to -inf
	jne	1b
	jra	5f
4:	tst.b	(-3,%a0)		| to +inf
	jeq	1b
5:	move.w	#0x43fe,(-2,%a0)
	moveq	#-1,%d0
	move.l	%d0,(%a0)+
	move.w	#0xf800,%d0
	move.l	%d0,(%a0)
	jra	2b
	| Infinities or NaNs
fp_nd_huge:
	subq.l	#4,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts

	| fp_normalize_single:
	| normalize an extended with single (23-bit) precision
	| args:	 %a0 (struct fp_ext *)

fp_normalize_single:
	printf	PNORM,"ns: %p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,") "
	addq.l	#2,%a0
	move.w	(%a0)+,%d2
	jeq	fp_ns_zero		| zero / denormalized
	cmp.w	#0x7fff,%d2
	jeq	fp_ns_huge		| NaN / infinitive.
	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
	jcs	fp_ns_small		| too small.
	cmp.w	#0xfe,%d2
	jcc	fp_ns_large		| too big.
	move.l	(%a0)+,%d0		| get high lword of mantissa
fp_ns_round:
	tst.l	(%a0)			| check the low lword
	jeq	1f
	| Set a sticky bit if it is non-zero.  This should only
	| affect the rounding in what would otherwise be equal-
	| distance situations, which is what we want it to do.
	bset	#0,%d0
1:	clr.l	(%a0)			| zap it from memory.
	| now, round off the low 8 bits of the hi lword.
	tst.b	%d0			| 8 low bits.
	jne	fp_ns_checkround	| Are they non-zero?
	| nothing to do here
	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
fp_ns_checkround:
	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
	clr.b	-(%a0)			| clear low byte of high lword
	subq.l	#3,%a0
	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
	jne	2f			| %d2 == 0, round to nearest
	tst.b	%d0			| test guard bit
	jpl	9f			| zero is closer
	btst	#8,%d0			| test lsb bit
	| round to even behaviour, see above.
	jne	fp_ns_doroundup		| round to infinity
	lsl.b	#1,%d0			| check low bits
	jeq	9f			| round to zero
fp_ns_doroundup:
	| round (the mantissa, that is) towards infinity
	add.l	#0x100,(%a0)
	jcc	9f			| no overflow, good.
	| Overflow.  This means that the %d1 was 0xffffff00, so it
	| is now zero.  We will set the mantissa to reflect this, and
	| increment the exponent (checking for overflow there too)
	move.w	#0x8000,(%a0)
	addq.w	#1,-(%a0)
	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
	jeq	fp_ns_large		| yes, so make it infinity.
9:	subq.l	#4,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
	| check nondefault rounding modes
2:	subq.w	#2,%d2
	jcs	9b			| %d2 < 2, round to zero
	jhi	3f			| %d2 > 2, round to +infinity
	tst.b	(-3,%a0)		| to -inf
	jne	fp_ns_doroundup		| negative, round to infinity
	jra	9b			| positive, round to zero
3:	tst.b	(-3,%a0)		| to +inf
	jeq	fp_ns_doroundup		| positive, round to infinity
	jra	9b			| negative, round to zero
	| Exponent underflow.  Try to make a denormal, and set it to
	| the smallest possible fraction if this fails.
fp_ns_small:
	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
	move.w	#0x3f81,(-2,%a0)	| 2**-126
	neg.w	%d2			| degree of underflow
	cmp.w	#32,%d2			| single or double shift?
	jcc	2f
	| a 32-bit shift.
	move.l	(%a0),%d0
	move.l	%d0,%d1
	lsr.l	%d2,%d0
	move.l	%d0,(%a0)+
	| Check to see if we shifted off any significant bits.
	neg.w	%d2
	add.w	#32,%d2
	lsl.l	%d2,%d1
	jeq	1f
	bset	#0,%d0			| Sticky bit.
	| Check the lower lword
1:	tst.l	(%a0)
	jeq	fp_ns_round
	clr	(%a0)
	bset	#0,%d0			| Sticky bit.
	jra	fp_ns_round
	| Sorry, the number is just too small.
2:	clr.l	(%a0)+
	clr.l	(%a0)
	moveq	#1,%d0			| Smallest possible fraction,
	jra	fp_ns_round		| round as desired.
	| Exponent overflow.  Just call it infinity.
fp_ns_large:
	tst.b	(3,%a0)
	jeq	1f
	fp_set_sr FPSR_EXC_INEX2
1:	fp_set_sr FPSR_EXC_OVFL
	move.w	(FPD_RND,FPDATA),%d2
	jne	3f			| %d2 = 0 round to nearest
1:	move.w	#0x7fff,(-2,%a0)
	clr.l	(%a0)+
	clr.l	(%a0)
2:	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
3:	subq.w	#2,%d2
	jcs	5f			| %d2 < 2, round to zero
	jhi	4f			| %d2 > 2, round to +infinity
	tst.b	(-3,%a0)		| to -inf
	jne	1b
	jra	5f
4:	tst.b	(-3,%a0)		| to +inf
	jeq	1b
5:	move.w	#0x407e,(-2,%a0)
	move.l	#0xffffff00,(%a0)+
	clr.l	(%a0)
	jra	2b
	| zero and denormalized
fp_ns_zero:
	tst.l	(%a0)+
	jne	1f
	tst.l	(%a0)
	jne	1f
	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts				| zero.  nothing to do.
	| These are not merely subnormal numbers, but true denormals,
	| i.e. pathologically small (exponent is 2**-16383) numbers.
	| It is clearly impossible for even a normal extended number
	| with that exponent to fit into single precision, so just
	| write these ones off as "too darn small".
1:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
	clr.l	(%a0)
	clr.l	-(%a0)
	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
	addq.l	#6,%a0
	moveq	#1,%d0
	jra	fp_ns_round		| round.
	| Infinities or NaNs
fp_ns_huge:
	subq.l	#4,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts

	| fp_normalize_single_fast:
	| normalize an extended with single (23-bit) precision
	| this is only used by fsgldiv/fsgdlmul, where the
	| operand is not completly normalized.
	| args:	 %a0 (struct fp_ext *)

fp_normalize_single_fast:
	printf	PNORM,"nsf: %p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,") "
	addq.l	#2,%a0
	move.w	(%a0)+,%d2
	cmp.w	#0x7fff,%d2
	jeq	fp_nsf_huge		| NaN / infinitive.
	move.l	(%a0)+,%d0		| get high lword of mantissa
fp_nsf_round:
	tst.l	(%a0)			| check the low lword
	jeq	1f
	| Set a sticky bit if it is non-zero.  This should only
	| affect the rounding in what would otherwise be equal-
	| distance situations, which is what we want it to do.
	bset	#0,%d0
1:	clr.l	(%a0)			| zap it from memory.
	| now, round off the low 8 bits of the hi lword.
	tst.b	%d0			| 8 low bits.
	jne	fp_nsf_checkround	| Are they non-zero?
	| nothing to do here
	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
fp_nsf_checkround:
	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
	clr.b	-(%a0)			| clear low byte of high lword
	subq.l	#3,%a0
	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
	jne	2f			| %d2 == 0, round to nearest
	tst.b	%d0			| test guard bit
	jpl	9f			| zero is closer
	btst	#8,%d0			| test lsb bit
	| round to even behaviour, see above.
	jne	fp_nsf_doroundup		| round to infinity
	lsl.b	#1,%d0			| check low bits
	jeq	9f			| round to zero
fp_nsf_doroundup:
	| round (the mantissa, that is) towards infinity
	add.l	#0x100,(%a0)
	jcc	9f			| no overflow, good.
	| Overflow.  This means that the %d1 was 0xffffff00, so it
	| is now zero.  We will set the mantissa to reflect this, and
	| increment the exponent (checking for overflow there too)
	move.w	#0x8000,(%a0)
	addq.w	#1,-(%a0)
	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
	jeq	fp_nsf_large		| yes, so make it infinity.
9:	subq.l	#4,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
	| check nondefault rounding modes
2:	subq.w	#2,%d2
	jcs	9b			| %d2 < 2, round to zero
	jhi	3f			| %d2 > 2, round to +infinity
	tst.b	(-3,%a0)		| to -inf
	jne	fp_nsf_doroundup	| negative, round to infinity
	jra	9b			| positive, round to zero
3:	tst.b	(-3,%a0)		| to +inf
	jeq	fp_nsf_doroundup		| positive, round to infinity
	jra	9b			| negative, round to zero
	| Exponent overflow.  Just call it infinity.
fp_nsf_large:
	tst.b	(3,%a0)
	jeq	1f
	fp_set_sr FPSR_EXC_INEX2
1:	fp_set_sr FPSR_EXC_OVFL
	move.w	(FPD_RND,FPDATA),%d2
	jne	3f			| %d2 = 0 round to nearest
1:	move.w	#0x7fff,(-2,%a0)
	clr.l	(%a0)+
	clr.l	(%a0)
2:	subq.l	#8,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts
3:	subq.w	#2,%d2
	jcs	5f			| %d2 < 2, round to zero
	jhi	4f			| %d2 > 2, round to +infinity
	tst.b	(-3,%a0)		| to -inf
	jne	1b
	jra	5f
4:	tst.b	(-3,%a0)		| to +inf
	jeq	1b
5:	move.w	#0x407e,(-2,%a0)
	move.l	#0xffffff00,(%a0)+
	clr.l	(%a0)
	jra	2b
	| Infinities or NaNs
fp_nsf_huge:
	subq.l	#4,%a0
	printf	PNORM,"%p(",1,%a0
	printx	PNORM,%a0@
	printf	PNORM,")\n"
	rts

	| conv_ext2int (macro):
	| Generates a subroutine that converts an extended value to an
	| integer of a given size, again, with the appropriate type of
	| rounding.

	| Macro arguments:
	| s:	size, as given in an assembly instruction.
	| b:	number of bits in that size.

	| Subroutine arguments:
	| %a0:	source (struct fp_ext *)

	| Returns the integer in %d0 (like it should)

.macro conv_ext2int s,b
	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
	printx	PCONV,%a0@
	printf	PCONV,") "
	addq.l	#2,%a0
	move.w	(%a0)+,%d2		| exponent
	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
	cmp.w	#0x7fff,%d2
	jeq	fp_e2i_huge\b		| Inf / NaN
	sub.w	#0x3ffe,%d2
	jcs	fp_e2i_small\b
	cmp.w	#\b,%d2
	jhi	fp_e2i_large\b
	move.l	(%a0),%d0
	move.l	%d0,%d1
	lsl.l	%d2,%d1
	jne	fp_e2i_round\b
	tst.l	(4,%a0)
	jne	fp_e2i_round\b
	neg.w	%d2
	add.w	#32,%d2
	lsr.l	%d2,%d0
9:	tst.w	(-4,%a0)
	jne	1f
	tst.\s	%d0
	jmi	fp_e2i_large\b
	printf	PCONV,"-> %p\n",1,%d0
	rts
1:	neg.\s	%d0
	jeq	1f
	jpl	fp_e2i_large\b
1:	printf	PCONV,"-> %p\n",1,%d0
	rts
fp_e2i_round\b:
	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
	neg.w	%d2
	add.w	#32,%d2
	.if	\b>16
	jeq	5f
	.endif
	lsr.l	%d2,%d0
	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
	jne	2f			| %d2 == 0, round to nearest
	tst.l	%d1			| test guard bit
	jpl	9b			| zero is closer
	btst	%d2,%d0			| test lsb bit (%d2 still 0)
	jne	fp_e2i_doroundup\b
	lsl.l	#1,%d1			| check low bits
	jne	fp_e2i_doroundup\b
	tst.l	(4,%a0)
	jeq	9b
fp_e2i_doroundup\b:
	addq.l	#1,%d0
	jra	9b
	| check nondefault rounding modes
2:	subq.w	#2,%d2
	jcs	9b			| %d2 < 2, round to zero
	jhi	3f			| %d2 > 2, round to +infinity
	tst.w	(-4,%a0)		| to -inf
	jne	fp_e2i_doroundup\b	| negative, round to infinity
	jra	9b			| positive, round to zero
3:	tst.w	(-4,%a0)		| to +inf
	jeq	fp_e2i_doroundup\b	| positive, round to infinity
	jra	9b	| negative, round to zero
	| we are only want -2**127 get correctly rounded here,
	| since the guard bit is in the lower lword.
	| everything else ends up anyway as overflow.
	.if	\b>16
5:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
	jne	2b			| %d2 == 0, round to nearest
	move.l	(4,%a0),%d1		| test guard bit
	jpl	9b			| zero is closer
	lsl.l	#1,%d1			| check low bits
	jne	fp_e2i_doroundup\b
	jra	9b
	.endif
fp_e2i_zero\b:
	clr.l	%d0
	tst.l	(%a0)+
	jne	1f
	tst.l	(%a0)
	jeq	3f
1:	subq.l	#4,%a0
	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
fp_e2i_small\b:
	fp_set_sr FPSR_EXC_INEX2
	clr.l	%d0
	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
	subq.w	#2,%d2
	jcs	3f			| %d2 < 2, round to nearest/zero
	jhi	2f			| %d2 > 2, round to +infinity
	tst.w	(-4,%a0)		| to -inf
	jeq	3f
	subq.\s	#1,%d0
	jra	3f
2:	tst.w	(-4,%a0)		| to +inf
	jne	3f
	addq.\s	#1,%d0
3:	printf	PCONV,"-> %p\n",1,%d0
	rts
fp_e2i_large\b:
	fp_set_sr FPSR_EXC_OPERR
	move.\s	#inf,%d0
	tst.w	(-4,%a0)
	jeq	1f
	addq.\s	#1,%d0
1:	printf	PCONV,"-> %p\n",1,%d0
	rts
fp_e2i_huge\b:
	move.\s	(%a0),%d0
	tst.l	(%a0)
	jne	1f
	tst.l	(%a0)
	jeq	fp_e2i_large\b
	| fp_normalize_ext has set this bit already
	| and made the number nonsignaling
1:	fp_tst_sr FPSR_EXC_SNAN
	jne	1f
	fp_set_sr FPSR_EXC_OPERR
1:	printf	PCONV,"-> %p\n",1,%d0
	rts
.endm

fp_conv_ext2long:
	conv_ext2int l,32

fp_conv_ext2short:
	conv_ext2int w,16

fp_conv_ext2byte:
	conv_ext2int b,8

fp_conv_ext2double:
	jsr	fp_normalize_double
	printf	PCONV,"e2d: %p(",1,%a0
	printx	PCONV,%a0@
	printf	PCONV,"), "
	move.l	(%a0)+,%d2
	cmp.w	#0x7fff,%d2
	jne	1f
	move.w	#0x7ff,%d2
	move.l	(%a0)+,%d0
	jra	2f
1:	sub.w	#0x3fff-0x3ff,%d2
	move.l	(%a0)+,%d0
	jmi	2f
	clr.w	%d2
2:	lsl.w	#5,%d2
	lsl.l	#7,%d2
	lsl.l	#8,%d2
	move.l	%d0,%d1
	lsl.l	#1,%d0
	lsr.l	#4,%d0
	lsr.l	#8,%d0
	or.l	%d2,%d0
	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
	moveq	#21,%d0
	lsl.l	%d0,%d1
	move.l	(%a0),%d0
	lsr.l	#4,%d0
	lsr.l	#7,%d0
	or.l	%d1,%d0
	putuser.l %d0,(%a1),fp_err_ua2,%a1
#ifdef FPU_EMU_DEBUG
	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
#endif
	rts

fp_conv_ext2single:
	jsr	fp_normalize_single
	printf	PCONV,"e2s: %p(",1,%a0
	printx	PCONV,%a0@
	printf	PCONV,"), "
	move.l	(%a0)+,%d1
	cmp.w	#0x7fff,%d1
	jne	1f
	move.w	#0xff,%d1
	move.l	(%a0)+,%d0
	jra	2f
1:	sub.w	#0x3fff-0x7f,%d1
	move.l	(%a0)+,%d0
	jmi	2f
	clr.w	%d1
2:	lsl.w	#8,%d1
	lsl.l	#7,%d1
	lsl.l	#8,%d1
	bclr	#31,%d0
	lsr.l	#8,%d0
	or.l	%d1,%d0
	printf	PCONV,"%08x\n",1,%d0
	rts

	| special return addresses for instr that
	| encode the rounding precision in the opcode
	| (e.g. fsmove,fdmove)

fp_finalrounding_single:
	addq.l	#8,%sp
	jsr	fp_normalize_ext
	jsr	fp_normalize_single
	jra	fp_finaltest

fp_finalrounding_single_fast:
	addq.l	#8,%sp
	jsr	fp_normalize_ext
	jsr	fp_normalize_single_fast
	jra	fp_finaltest

fp_finalrounding_double:
	addq.l	#8,%sp
	jsr	fp_normalize_ext
	jsr	fp_normalize_double
	jra	fp_finaltest

	| fp_finaltest:
	| set the emulated status register based on the outcome of an
	| emulated instruction.

fp_finalrounding:
	addq.l	#8,%sp
|	printf	,"f: %p\n",1,%a0
	jsr	fp_normalize_ext
	move.w	(FPD_PREC,FPDATA),%d0
	subq.w	#1,%d0
	jcs	fp_finaltest
	jne	1f
	jsr	fp_normalize_single
	jra	2f
1:	jsr	fp_normalize_double
2:|	printf	,"f: %p\n",1,%a0
fp_finaltest:
	| First, we do some of the obvious tests for the exception
	| status byte and condition code bytes of fp_sr here, so that
	| they do not have to be handled individually by every
	| emulated instruction.
	clr.l	%d0
	addq.l	#1,%a0
	tst.b	(%a0)+			| sign
	jeq	1f
	bset	#FPSR_CC_NEG-24,%d0	| N bit
1:	cmp.w	#0x7fff,(%a0)+		| exponent
	jeq	2f
	| test for zero
	moveq	#FPSR_CC_Z-24,%d1
	tst.l	(%a0)+
	jne	9f
	tst.l	(%a0)
	jne	9f
	jra	8f
	| infinitiv and NAN
2:	moveq	#FPSR_CC_NAN-24,%d1
	move.l	(%a0)+,%d2
	lsl.l	#1,%d2			| ignore high bit
	jne	8f
	tst.l	(%a0)
	jne	8f
	moveq	#FPSR_CC_INF-24,%d1
8:	bset	%d1,%d0
9:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
	| move instructions enter here
	| Here, we test things in the exception status byte, and set
	| other things in the accrued exception byte accordingly.
	| Emulated instructions can set various things in the former,
	| as defined in fp_emu.h.
fp_final:
	move.l	(FPD_FPSR,FPDATA),%d0
#if 0
	btst	#FPSR_EXC_SNAN,%d0	| EXC_SNAN
	jne	1f
	btst	#FPSR_EXC_OPERR,%d0	| EXC_OPERR
	jeq	2f
1:	bset	#FPSR_AEXC_IOP,%d0	| set IOP bit
2:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
	jeq	1f
	bset	#FPSR_AEXC_OVFL,%d0	| set OVFL bit
1:	btst	#FPSR_EXC_UNFL,%d0	| EXC_UNFL
	jeq	1f
	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
	jeq	1f
	bset	#FPSR_AEXC_UNFL,%d0	| set UNFL bit
1:	btst	#FPSR_EXC_DZ,%d0	| EXC_INEX1
	jeq	1f
	bset	#FPSR_AEXC_DZ,%d0	| set DZ bit
1:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
	jne	1f
	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
	jne	1f
	btst	#FPSR_EXC_INEX1,%d0	| EXC_INEX1
	jeq	2f
1:	bset	#FPSR_AEXC_INEX,%d0	| set INEX bit
2:	move.l	%d0,(FPD_FPSR,FPDATA)
#else
	| same as above, greatly optimized, but untested (yet)
	move.l	%d0,%d2
	lsr.l	#5,%d0
	move.l	%d0,%d1
	lsr.l	#4,%d1
	or.l	%d0,%d1
	and.b	#0x08,%d1
	move.l	%d2,%d0
	lsr.l	#6,%d0
	or.l	%d1,%d0
	move.l	%d2,%d1
	lsr.l	#4,%d1
	or.b	#0xdf,%d1
	and.b	%d1,%d0
	move.l	%d2,%d1
	lsr.l	#7,%d1
	and.b	#0x80,%d1
	or.b	%d1,%d0
	and.b	#0xf8,%d0
	or.b	%d0,%d2
	move.l	%d2,(FPD_FPSR,FPDATA)
#endif
	move.b	(FPD_FPSR+2,FPDATA),%d0
	and.b	(FPD_FPCR+2,FPDATA),%d0
	jeq	1f
	printf	,"send signal!!!\n"
1:	jra	fp_end