Commit e89fb56c8bcf5514cfe7abd7a3dda9e6007b7238

Authored by Markos Chandras
Committed by Ralf Baechle
1 parent 2ab82e6648

MIPS: lib: csum_partial: Add macro to build csum_partial symbols

In preparation for EVA support, we use a macro to build the
__csum_partial_copy_user main code so it can be shared across
multiple implementations. EVA uses the same code but it replaces
the load/store/prefetch instructions with the EVA specific ones
therefore using a macro avoids unnecessary code duplications.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>

Showing 1 changed file with 108 additions and 92 deletions Side-by-side Diff

arch/mips/lib/csum_partial.S
... ... @@ -331,6 +331,10 @@
331 331 /* Instruction type */
332 332 #define LD_INSN 1
333 333 #define ST_INSN 2
  334 +#define LEGACY_MODE 1
  335 +#define EVA_MODE 2
  336 +#define USEROP 1
  337 +#define KERNELOP 2
334 338  
335 339 /*
336 340 * Wrapper to add an entry in the exception table
... ... @@ -343,10 +347,12 @@
343 347 * handler : Exception handler
344 348 */
345 349 #define EXC(insn, type, reg, addr, handler) \
346   -9: insn reg, addr; \
347   - .section __ex_table,"a"; \
348   - PTR 9b, handler; \
349   - .previous
  350 + .if \mode == LEGACY_MODE; \
  351 +9: insn reg, addr; \
  352 + .section __ex_table,"a"; \
  353 + PTR 9b, handler; \
  354 + .previous; \
  355 + .endif
350 356  
351 357 #undef LOAD
352 358  
353 359  
354 360  
... ... @@ -419,16 +425,20 @@
419 425 .set at=v1
420 426 #endif
421 427  
422   -LEAF(__csum_partial_copy_kernel)
423   -FEXPORT(__csum_partial_copy_to_user)
424   -FEXPORT(__csum_partial_copy_from_user)
  428 + .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck
  429 +
425 430 PTR_ADDU AT, src, len /* See (1) above. */
  431 + /* initialize __nocheck if this the first time we execute this
  432 + * macro
  433 + */
426 434 #ifdef CONFIG_64BIT
427 435 move errptr, a4
428 436 #else
429 437 lw errptr, 16(sp)
430 438 #endif
431   -FEXPORT(csum_partial_copy_nocheck)
  439 + .if \__nocheck == 1
  440 + FEXPORT(csum_partial_copy_nocheck)
  441 + .endif
432 442 move sum, zero
433 443 move odd, zero
434 444 /*
435 445  
436 446  
437 447  
438 448  
439 449  
440 450  
441 451  
442 452  
443 453  
444 454  
445 455  
446 456  
447 457  
... ... @@ -444,48 +454,48 @@
444 454 */
445 455 sltu t2, len, NBYTES
446 456 and t1, dst, ADDRMASK
447   - bnez t2, .Lcopy_bytes_checklen
  457 + bnez t2, .Lcopy_bytes_checklen\@
448 458 and t0, src, ADDRMASK
449 459 andi odd, dst, 0x1 /* odd buffer? */
450   - bnez t1, .Ldst_unaligned
  460 + bnez t1, .Ldst_unaligned\@
451 461 nop
452   - bnez t0, .Lsrc_unaligned_dst_aligned
  462 + bnez t0, .Lsrc_unaligned_dst_aligned\@
453 463 /*
454 464 * use delay slot for fall-through
455 465 * src and dst are aligned; need to compute rem
456 466 */
457   -.Lboth_aligned:
  467 +.Lboth_aligned\@:
458 468 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
459   - beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
  469 + beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
460 470 nop
461 471 SUB len, 8*NBYTES # subtract here for bgez loop
462 472 .align 4
463 473 1:
464   - LOAD(t0, UNIT(0)(src), .Ll_exc)
465   - LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
466   - LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
467   - LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
468   - LOAD(t4, UNIT(4)(src), .Ll_exc_copy)
469   - LOAD(t5, UNIT(5)(src), .Ll_exc_copy)
470   - LOAD(t6, UNIT(6)(src), .Ll_exc_copy)
471   - LOAD(t7, UNIT(7)(src), .Ll_exc_copy)
  474 + LOAD(t0, UNIT(0)(src), .Ll_exc\@)
  475 + LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
  476 + LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
  477 + LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
  478 + LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
  479 + LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@)
  480 + LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@)
  481 + LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@)
472 482 SUB len, len, 8*NBYTES
473 483 ADD src, src, 8*NBYTES
474   - STORE(t0, UNIT(0)(dst), .Ls_exc)
  484 + STORE(t0, UNIT(0)(dst), .Ls_exc\@)
475 485 ADDC(sum, t0)
476   - STORE(t1, UNIT(1)(dst), .Ls_exc)
  486 + STORE(t1, UNIT(1)(dst), .Ls_exc\@)
477 487 ADDC(sum, t1)
478   - STORE(t2, UNIT(2)(dst), .Ls_exc)
  488 + STORE(t2, UNIT(2)(dst), .Ls_exc\@)
479 489 ADDC(sum, t2)
480   - STORE(t3, UNIT(3)(dst), .Ls_exc)
  490 + STORE(t3, UNIT(3)(dst), .Ls_exc\@)
481 491 ADDC(sum, t3)
482   - STORE(t4, UNIT(4)(dst), .Ls_exc)
  492 + STORE(t4, UNIT(4)(dst), .Ls_exc\@)
483 493 ADDC(sum, t4)
484   - STORE(t5, UNIT(5)(dst), .Ls_exc)
  494 + STORE(t5, UNIT(5)(dst), .Ls_exc\@)
485 495 ADDC(sum, t5)
486   - STORE(t6, UNIT(6)(dst), .Ls_exc)
  496 + STORE(t6, UNIT(6)(dst), .Ls_exc\@)
487 497 ADDC(sum, t6)
488   - STORE(t7, UNIT(7)(dst), .Ls_exc)
  498 + STORE(t7, UNIT(7)(dst), .Ls_exc\@)
489 499 ADDC(sum, t7)
490 500 .set reorder /* DADDI_WAR */
491 501 ADD dst, dst, 8*NBYTES
492 502  
493 503  
494 504  
495 505  
496 506  
497 507  
498 508  
499 509  
500 510  
501 511  
502 512  
503 513  
... ... @@ -496,44 +506,44 @@
496 506 /*
497 507 * len == the number of bytes left to copy < 8*NBYTES
498 508 */
499   -.Lcleanup_both_aligned:
  509 +.Lcleanup_both_aligned\@:
500 510 #define rem t7
501   - beqz len, .Ldone
  511 + beqz len, .Ldone\@
502 512 sltu t0, len, 4*NBYTES
503   - bnez t0, .Lless_than_4units
  513 + bnez t0, .Lless_than_4units\@
504 514 and rem, len, (NBYTES-1) # rem = len % NBYTES
505 515 /*
506 516 * len >= 4*NBYTES
507 517 */
508   - LOAD(t0, UNIT(0)(src), .Ll_exc)
509   - LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
510   - LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
511   - LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
  518 + LOAD(t0, UNIT(0)(src), .Ll_exc\@)
  519 + LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
  520 + LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
  521 + LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
512 522 SUB len, len, 4*NBYTES
513 523 ADD src, src, 4*NBYTES
514   - STORE(t0, UNIT(0)(dst), .Ls_exc)
  524 + STORE(t0, UNIT(0)(dst), .Ls_exc\@)
515 525 ADDC(sum, t0)
516   - STORE(t1, UNIT(1)(dst), .Ls_exc)
  526 + STORE(t1, UNIT(1)(dst), .Ls_exc\@)
517 527 ADDC(sum, t1)
518   - STORE(t2, UNIT(2)(dst), .Ls_exc)
  528 + STORE(t2, UNIT(2)(dst), .Ls_exc\@)
519 529 ADDC(sum, t2)
520   - STORE(t3, UNIT(3)(dst), .Ls_exc)
  530 + STORE(t3, UNIT(3)(dst), .Ls_exc\@)
521 531 ADDC(sum, t3)
522 532 .set reorder /* DADDI_WAR */
523 533 ADD dst, dst, 4*NBYTES
524   - beqz len, .Ldone
  534 + beqz len, .Ldone\@
525 535 .set noreorder
526   -.Lless_than_4units:
  536 +.Lless_than_4units\@:
527 537 /*
528 538 * rem = len % NBYTES
529 539 */
530   - beq rem, len, .Lcopy_bytes
  540 + beq rem, len, .Lcopy_bytes\@
531 541 nop
532 542 1:
533   - LOAD(t0, 0(src), .Ll_exc)
  543 + LOAD(t0, 0(src), .Ll_exc\@)
534 544 ADD src, src, NBYTES
535 545 SUB len, len, NBYTES
536   - STORE(t0, 0(dst), .Ls_exc)
  546 + STORE(t0, 0(dst), .Ls_exc\@)
537 547 ADDC(sum, t0)
538 548 .set reorder /* DADDI_WAR */
539 549 ADD dst, dst, NBYTES
540 550  
541 551  
542 552  
543 553  
... ... @@ -552,20 +562,20 @@
552 562 * more instruction-level parallelism.
553 563 */
554 564 #define bits t2
555   - beqz len, .Ldone
  565 + beqz len, .Ldone\@
556 566 ADD t1, dst, len # t1 is just past last byte of dst
557 567 li bits, 8*NBYTES
558 568 SLL rem, len, 3 # rem = number of bits to keep
559   - LOAD(t0, 0(src), .Ll_exc)
  569 + LOAD(t0, 0(src), .Ll_exc\@)
560 570 SUB bits, bits, rem # bits = number of bits to discard
561 571 SHIFT_DISCARD t0, t0, bits
562   - STREST(t0, -1(t1), .Ls_exc)
  572 + STREST(t0, -1(t1), .Ls_exc\@)
563 573 SHIFT_DISCARD_REVERT t0, t0, bits
564 574 .set reorder
565 575 ADDC(sum, t0)
566   - b .Ldone
  576 + b .Ldone\@
567 577 .set noreorder
568   -.Ldst_unaligned:
  578 +.Ldst_unaligned\@:
569 579 /*
570 580 * dst is unaligned
571 581 * t0 = src & ADDRMASK
572 582  
573 583  
574 584  
575 585  
576 586  
577 587  
... ... @@ -576,25 +586,25 @@
576 586 * Set match = (src and dst have same alignment)
577 587 */
578 588 #define match rem
579   - LDFIRST(t3, FIRST(0)(src), .Ll_exc)
  589 + LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
580 590 ADD t2, zero, NBYTES
581   - LDREST(t3, REST(0)(src), .Ll_exc_copy)
  591 + LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
582 592 SUB t2, t2, t1 # t2 = number of bytes copied
583 593 xor match, t0, t1
584   - STFIRST(t3, FIRST(0)(dst), .Ls_exc)
  594 + STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
585 595 SLL t4, t1, 3 # t4 = number of bits to discard
586 596 SHIFT_DISCARD t3, t3, t4
587 597 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
588 598 ADDC(sum, t3)
589   - beq len, t2, .Ldone
  599 + beq len, t2, .Ldone\@
590 600 SUB len, len, t2
591 601 ADD dst, dst, t2
592   - beqz match, .Lboth_aligned
  602 + beqz match, .Lboth_aligned\@
593 603 ADD src, src, t2
594 604  
595   -.Lsrc_unaligned_dst_aligned:
  605 +.Lsrc_unaligned_dst_aligned\@:
596 606 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
597   - beqz t0, .Lcleanup_src_unaligned
  607 + beqz t0, .Lcleanup_src_unaligned\@
598 608 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
599 609 1:
600 610 /*
601 611  
602 612  
603 613  
604 614  
605 615  
606 616  
607 617  
608 618  
609 619  
610 620  
611 621  
... ... @@ -603,53 +613,53 @@
603 613 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
604 614 * are to the same unit (unless src is aligned, but it's not).
605 615 */
606   - LDFIRST(t0, FIRST(0)(src), .Ll_exc)
607   - LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy)
  616 + LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
  617 + LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
608 618 SUB len, len, 4*NBYTES
609   - LDREST(t0, REST(0)(src), .Ll_exc_copy)
610   - LDREST(t1, REST(1)(src), .Ll_exc_copy)
611   - LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy)
612   - LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy)
613   - LDREST(t2, REST(2)(src), .Ll_exc_copy)
614   - LDREST(t3, REST(3)(src), .Ll_exc_copy)
  619 + LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
  620 + LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
  621 + LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
  622 + LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
  623 + LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
  624 + LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
615 625 ADD src, src, 4*NBYTES
616 626 #ifdef CONFIG_CPU_SB1
617 627 nop # improves slotting
618 628 #endif
619   - STORE(t0, UNIT(0)(dst), .Ls_exc)
  629 + STORE(t0, UNIT(0)(dst), .Ls_exc\@)
620 630 ADDC(sum, t0)
621   - STORE(t1, UNIT(1)(dst), .Ls_exc)
  631 + STORE(t1, UNIT(1)(dst), .Ls_exc\@)
622 632 ADDC(sum, t1)
623   - STORE(t2, UNIT(2)(dst), .Ls_exc)
  633 + STORE(t2, UNIT(2)(dst), .Ls_exc\@)
624 634 ADDC(sum, t2)
625   - STORE(t3, UNIT(3)(dst), .Ls_exc)
  635 + STORE(t3, UNIT(3)(dst), .Ls_exc\@)
626 636 ADDC(sum, t3)
627 637 .set reorder /* DADDI_WAR */
628 638 ADD dst, dst, 4*NBYTES
629 639 bne len, rem, 1b
630 640 .set noreorder
631 641  
632   -.Lcleanup_src_unaligned:
633   - beqz len, .Ldone
  642 +.Lcleanup_src_unaligned\@:
  643 + beqz len, .Ldone\@
634 644 and rem, len, NBYTES-1 # rem = len % NBYTES
635   - beq rem, len, .Lcopy_bytes
  645 + beq rem, len, .Lcopy_bytes\@
636 646 nop
637 647 1:
638   - LDFIRST(t0, FIRST(0)(src), .Ll_exc)
639   - LDREST(t0, REST(0)(src), .Ll_exc_copy)
  648 + LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
  649 + LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
640 650 ADD src, src, NBYTES
641 651 SUB len, len, NBYTES
642   - STORE(t0, 0(dst), .Ls_exc)
  652 + STORE(t0, 0(dst), .Ls_exc\@)
643 653 ADDC(sum, t0)
644 654 .set reorder /* DADDI_WAR */
645 655 ADD dst, dst, NBYTES
646 656 bne len, rem, 1b
647 657 .set noreorder
648 658  
649   -.Lcopy_bytes_checklen:
650   - beqz len, .Ldone
  659 +.Lcopy_bytes_checklen\@:
  660 + beqz len, .Ldone\@
651 661 nop
652   -.Lcopy_bytes:
  662 +.Lcopy_bytes\@:
653 663 /* 0 < len < NBYTES */
654 664 #ifdef CONFIG_CPU_LITTLE_ENDIAN
655 665 #define SHIFT_START 0
656 666  
657 667  
... ... @@ -662,12 +672,12 @@
662 672 li t3, SHIFT_START # shift
663 673 /* use .Ll_exc_copy here to return correct sum on fault */
664 674 #define COPY_BYTE(N) \
665   - LOADBU(t0, N(src), .Ll_exc_copy); \
  675 + LOADBU(t0, N(src), .Ll_exc_copy\@); \
666 676 SUB len, len, 1; \
667   - STOREB(t0, N(dst), .Ls_exc); \
  677 + STOREB(t0, N(dst), .Ls_exc\@); \
668 678 SLLV t0, t0, t3; \
669 679 addu t3, SHIFT_INC; \
670   - beqz len, .Lcopy_bytes_done; \
  680 + beqz len, .Lcopy_bytes_done\@; \
671 681 or t2, t0
672 682  
673 683 COPY_BYTE(0)
674 684  
675 685  
676 686  
... ... @@ -678,14 +688,14 @@
678 688 COPY_BYTE(4)
679 689 COPY_BYTE(5)
680 690 #endif
681   - LOADBU(t0, NBYTES-2(src), .Ll_exc_copy)
  691 + LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@)
682 692 SUB len, len, 1
683   - STOREB(t0, NBYTES-2(dst), .Ls_exc)
  693 + STOREB(t0, NBYTES-2(dst), .Ls_exc\@)
684 694 SLLV t0, t0, t3
685 695 or t2, t0
686   -.Lcopy_bytes_done:
  696 +.Lcopy_bytes_done\@:
687 697 ADDC(sum, t2)
688   -.Ldone:
  698 +.Ldone\@:
689 699 /* fold checksum */
690 700 #ifdef USE_DOUBLE
691 701 dsll32 v1, sum, 0
... ... @@ -714,7 +724,7 @@
714 724 jr ra
715 725 .set noreorder
716 726  
717   -.Ll_exc_copy:
  727 +.Ll_exc_copy\@:
718 728 /*
719 729 * Copy bytes from src until faulting load address (or until a
720 730 * lb faults)
... ... @@ -729,7 +739,7 @@
729 739 li t2, SHIFT_START
730 740 LOADK t0, THREAD_BUADDR(t0)
731 741 1:
732   - LOADBU(t1, 0(src), .Ll_exc)
  742 + LOADBU(t1, 0(src), .Ll_exc\@)
733 743 ADD src, src, 1
734 744 sb t1, 0(dst) # can't fault -- we're copy_from_user
735 745 SLLV t1, t1, t2
... ... @@ -739,7 +749,7 @@
739 749 ADD dst, dst, 1
740 750 bne src, t0, 1b
741 751 .set noreorder
742   -.Ll_exc:
  752 +.Ll_exc\@:
743 753 LOADK t0, TI_TASK($28)
744 754 nop
745 755 LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address
... ... @@ -758,7 +768,7 @@
758 768 */
759 769 .set reorder /* DADDI_WAR */
760 770 SUB src, len, 1
761   - beqz len, .Ldone
  771 + beqz len, .Ldone\@
762 772 .set noreorder
763 773 1: sb zero, 0(dst)
764 774 ADD dst, dst, 1
765 775  
766 776  
... ... @@ -773,14 +783,20 @@
773 783 SUB src, src, v1
774 784 #endif
775 785 li v1, -EFAULT
776   - b .Ldone
  786 + b .Ldone\@
777 787 sw v1, (errptr)
778 788  
779   -.Ls_exc:
  789 +.Ls_exc\@:
780 790 li v0, -1 /* invalid checksum */
781 791 li v1, -EFAULT
782 792 jr ra
783 793 sw v1, (errptr)
784 794 .set pop
785   - END(__csum_partial_copy_kernel)
  795 + .endm
  796 +
  797 +LEAF(__csum_partial_copy_kernel)
  798 +FEXPORT(__csum_partial_copy_to_user)
  799 +FEXPORT(__csum_partial_copy_from_user)
  800 +__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1
  801 +END(__csum_partial_copy_kernel)