Commit 2272382879d93d37e7964554cea5b0583c94c247

Authored by Alexey Brodkin
Committed by Tom Rini
1 parent 2f16ac9df4

arc: add library functions

These are library functions used by ARC700 architecture.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>

Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Francois Bedard <fbedard@synopsys.com>
Cc: Wolfgang Denk <wd@denx.de>
Cc: Heiko Schocher <hs@denx.de>

Showing 11 changed files with 846 additions and 0 deletions Side-by-side Diff

arch/arc/lib/Makefile
  1 +#
  2 +# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
  3 +#
  4 +# SPDX-License-Identifier: GPL-2.0+
  5 +#
  6 +
  7 +obj-y += sections.o
  8 +obj-y += relocate.o
  9 +obj-y += strchr-700.o
  10 +obj-y += strcmp.o
  11 +obj-y += strcpy-700.o
  12 +obj-y += strlen.o
  13 +obj-y += memcmp.o
  14 +obj-y += memcpy-700.o
  15 +obj-y += memset.o
  16 +obj-$(CONFIG_CMD_BOOTM) += bootm.o
arch/arc/lib/bootm.c
  1 +/*
  2 + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +#include <common.h>
  8 +
  9 +DECLARE_GLOBAL_DATA_PTR;
  10 +
  11 +static ulong get_sp(void)
  12 +{
  13 + ulong ret;
  14 +
  15 + asm("mov %0, sp" : "=r"(ret) : );
  16 + return ret;
  17 +}
  18 +
  19 +void arch_lmb_reserve(struct lmb *lmb)
  20 +{
  21 + ulong sp;
  22 +
  23 + /*
  24 + * Booting a (Linux) kernel image
  25 + *
  26 + * Allocate space for command line and board info - the
  27 + * address should be as high as possible within the reach of
  28 + * the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused
  29 + * memory, which means far enough below the current stack
  30 + * pointer.
  31 + */
  32 + sp = get_sp();
  33 + debug("## Current stack ends at 0x%08lx ", sp);
  34 +
  35 + /* adjust sp by 4K to be safe */
  36 + sp -= 4096;
  37 + lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp));
  38 +}
  39 +
  40 +static int cleanup_before_linux(void)
  41 +{
  42 + disable_interrupts();
  43 + flush_dcache_all();
  44 + invalidate_icache_all();
  45 +
  46 + return 0;
  47 +}
  48 +
  49 +/* Subcommand: PREP */
  50 +static void boot_prep_linux(bootm_headers_t *images)
  51 +{
  52 + if (image_setup_linux(images))
  53 + hang();
  54 +}
  55 +
  56 +/* Subcommand: GO */
  57 +static void boot_jump_linux(bootm_headers_t *images, int flag)
  58 +{
  59 + void (*kernel_entry)(int zero, int arch, uint params);
  60 + unsigned int r0, r2;
  61 + int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
  62 +
  63 + kernel_entry = (void (*)(int, int, uint))images->ep;
  64 +
  65 + debug("## Transferring control to Linux (at address %08lx)...\n",
  66 + (ulong) kernel_entry);
  67 + bootstage_mark(BOOTSTAGE_ID_RUN_OS);
  68 +
  69 + printf("\nStarting kernel ...%s\n\n", fake ?
  70 + "(fake run for tracing)" : "");
  71 + bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
  72 +
  73 + cleanup_before_linux();
  74 +
  75 + if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
  76 + r0 = 2;
  77 + r2 = (unsigned int)images->ft_addr;
  78 + } else {
  79 + r0 = 1;
  80 + r2 = (unsigned int)getenv("bootargs");
  81 + }
  82 +
  83 + if (!fake)
  84 + kernel_entry(r0, 0, r2);
  85 +}
  86 +
  87 +int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
  88 +{
  89 + /* No need for those on ARC */
  90 + if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
  91 + return -1;
  92 +
  93 + if (flag & BOOTM_STATE_OS_PREP) {
  94 + boot_prep_linux(images);
  95 + return 0;
  96 + }
  97 +
  98 + if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
  99 + boot_jump_linux(images, flag);
  100 + return 0;
  101 + }
  102 +
  103 + boot_prep_linux(images);
  104 + boot_jump_linux(images, flag);
  105 + return 0;
  106 +}
arch/arc/lib/memcmp.S
  1 +/*
  2 + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +#ifdef __LITTLE_ENDIAN__
  8 +#define WORD2 r2
  9 +#define SHIFT r3
  10 +#else /* __BIG_ENDIAN__ */
  11 +#define WORD2 r3
  12 +#define SHIFT r2
  13 +#endif /* _ENDIAN__ */
  14 +
  15 +.global memcmp
  16 +.align 4
  17 +memcmp:
  18 + or %r12, %r0, %r1
  19 + asl_s %r12, %r12, 30
  20 + sub %r3, %r2, 1
  21 + brls %r2, %r12, .Lbytewise
  22 + ld %r4, [%r0, 0]
  23 + ld %r5, [%r1, 0]
  24 + lsr.f %lp_count, %r3, 3
  25 + lpne .Loop_end
  26 + ld_s WORD2, [%r0, 4]
  27 + ld_s %r12, [%r1, 4]
  28 + brne %r4, %r5, .Leven
  29 + ld.a %r4, [%r0, 8]
  30 + ld.a %r5, [%r1, 8]
  31 + brne WORD2, %r12, .Lodd
  32 +.Loop_end:
  33 + asl_s SHIFT, SHIFT, 3
  34 + bhs_s .Last_cmp
  35 + brne %r4, %r5, .Leven
  36 + ld %r4, [%r0, 4]
  37 + ld %r5, [%r1, 4]
  38 +#ifdef __LITTLE_ENDIAN__
  39 + nop_s
  40 + /* one more load latency cycle */
  41 +.Last_cmp:
  42 + xor %r0, %r4, %r5
  43 + bset %r0, %r0, SHIFT
  44 + sub_s %r1, %r0, 1
  45 + bic_s %r1, %r1, %r0
  46 + norm %r1, %r1
  47 + b.d .Leven_cmp
  48 + and %r1, %r1, 24
  49 +.Leven:
  50 + xor %r0, %r4, %r5
  51 + sub_s %r1, %r0, 1
  52 + bic_s %r1, %r1, %r0
  53 + norm %r1, %r1
  54 + /* slow track insn */
  55 + and %r1, %r1, 24
  56 +.Leven_cmp:
  57 + asl %r2, %r4, %r1
  58 + asl %r12, %r5, %r1
  59 + lsr_s %r2, %r2, 1
  60 + lsr_s %r12, %r12, 1
  61 + j_s.d [%blink]
  62 + sub %r0, %r2, %r12
  63 + .balign 4
  64 +.Lodd:
  65 + xor %r0, WORD2, %r12
  66 + sub_s %r1, %r0, 1
  67 + bic_s %r1, %r1, %r0
  68 + norm %r1, %r1
  69 + /* slow track insn */
  70 + and %r1, %r1, 24
  71 + asl_s %r2, %r2, %r1
  72 + asl_s %r12, %r12, %r1
  73 + lsr_s %r2, %r2, 1
  74 + lsr_s %r12, %r12, 1
  75 + j_s.d [%blink]
  76 + sub %r0, %r2, %r12
  77 +#else /* __BIG_ENDIAN__ */
  78 +.Last_cmp:
  79 + neg_s SHIFT, SHIFT
  80 + lsr %r4, %r4, SHIFT
  81 + lsr %r5, %r5, SHIFT
  82 + /* slow track insn */
  83 +.Leven:
  84 + sub.f %r0, %r4, %r5
  85 + mov.ne %r0, 1
  86 + j_s.d [%blink]
  87 + bset.cs %r0, %r0, 31
  88 +.Lodd:
  89 + cmp_s WORD2, %r12
  90 +
  91 + mov_s %r0, 1
  92 + j_s.d [%blink]
  93 + bset.cs %r0, %r0, 31
  94 +#endif /* _ENDIAN__ */
  95 + .balign 4
  96 +.Lbytewise:
  97 + breq %r2, 0, .Lnil
  98 + ldb %r4, [%r0, 0]
  99 + ldb %r5, [%r1, 0]
  100 + lsr.f %lp_count, %r3
  101 + lpne .Lbyte_end
  102 + ldb_s %r3, [%r0, 1]
  103 + ldb %r12, [%r1, 1]
  104 + brne %r4, %r5, .Lbyte_even
  105 + ldb.a %r4, [%r0, 2]
  106 + ldb.a %r5, [%r1, 2]
  107 + brne %r3, %r12, .Lbyte_odd
  108 +.Lbyte_end:
  109 + bcc .Lbyte_even
  110 + brne %r4, %r5, .Lbyte_even
  111 + ldb_s %r3, [%r0, 1]
  112 + ldb_s %r12, [%r1, 1]
  113 +.Lbyte_odd:
  114 + j_s.d [%blink]
  115 + sub %r0, %r3, %r12
  116 +.Lbyte_even:
  117 + j_s.d [%blink]
  118 + sub %r0, %r4, %r5
  119 +.Lnil:
  120 + j_s.d [%blink]
  121 + mov %r0, 0
arch/arc/lib/memcpy-700.S
  1 +/*
  2 + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +.global memcpy
  8 +.align 4
  9 +memcpy:
  10 + or %r3, %r0, %r1
  11 + asl_s %r3, %r3, 30
  12 + mov_s %r5, %r0
  13 + brls.d %r2, %r3, .Lcopy_bytewise
  14 + sub.f %r3, %r2, 1
  15 + ld_s %r12, [%r1, 0]
  16 + asr.f %lp_count, %r3, 3
  17 + bbit0.d %r3, 2, .Lnox4
  18 + bmsk_s %r2, %r2, 1
  19 + st.ab %r12, [%r5, 4]
  20 + ld.a %r12, [%r1, 4]
  21 +.Lnox4:
  22 + lppnz .Lendloop
  23 + ld_s %r3, [%r1, 4]
  24 + st.ab %r12, [%r5, 4]
  25 + ld.a %r12, [%r1, 8]
  26 + st.ab %r3, [%r5, 4]
  27 +.Lendloop:
  28 + breq %r2, 0, .Last_store
  29 + ld %r3, [%r5, 0]
  30 +#ifdef __LITTLE_ENDIAN__
  31 + add3 %r2, -1, %r2
  32 + /* uses long immediate */
  33 + xor_s %r12, %r12, %r3
  34 + bmsk %r12, %r12, %r2
  35 + xor_s %r12, %r12, %r3
  36 +#else /* __BIG_ENDIAN__ */
  37 + sub3 %r2, 31, %r2
  38 + /* uses long immediate */
  39 + xor_s %r3, %r3, %r12
  40 + bmsk %r3, %r3, %r2
  41 + xor_s %r12, %r12, %r3
  42 +#endif /* _ENDIAN__ */
  43 +.Last_store:
  44 + j_s.d [%blink]
  45 + st %r12, [%r5, 0]
  46 +
  47 + .balign 4
  48 +.Lcopy_bytewise:
  49 + jcs [%blink]
  50 + ldb_s %r12, [%r1, 0]
  51 + lsr.f %lp_count, %r3
  52 + bhs_s .Lnox1
  53 + stb.ab %r12, [%r5, 1]
  54 + ldb.a %r12, [%r1, 1]
  55 +.Lnox1:
  56 + lppnz .Lendbloop
  57 + ldb_s %r3, [%r1, 1]
  58 + stb.ab %r12, [%r5, 1]
  59 + ldb.a %r12, [%r1, 2]
  60 + stb.ab %r3, [%r5, 1]
  61 +.Lendbloop:
  62 + j_s.d [%blink]
  63 + stb %r12, [%r5, 0]
arch/arc/lib/memset.S
  1 +/*
  2 + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
  8 +
  9 +.global memset
  10 +.align 4
  11 +memset:
  12 + mov_s %r4, %r0
  13 + or %r12, %r0, %r2
  14 + bmsk.f %r12, %r12, 1
  15 + extb_s %r1, %r1
  16 + asl %r3, %r1, 8
  17 + beq.d .Laligned
  18 + or_s %r1, %r1, %r3
  19 + brls %r2, SMALL, .Ltiny
  20 + add %r3, %r2, %r0
  21 + stb %r1, [%r3, -1]
  22 + bclr_s %r3, %r3, 0
  23 + stw %r1, [%r3, -2]
  24 + bmsk.f %r12, %r0, 1
  25 + add_s %r2, %r2, %r12
  26 + sub.ne %r2, %r2, 4
  27 + stb.ab %r1, [%r4, 1]
  28 + and %r4, %r4, -2
  29 + stw.ab %r1, [%r4, 2]
  30 + and %r4, %r4, -4
  31 +
  32 + .balign 4
  33 +.Laligned:
  34 + asl %r3, %r1, 16
  35 + lsr.f %lp_count, %r2, 2
  36 + or_s %r1, %r1, %r3
  37 + lpne .Loop_end
  38 + st.ab %r1, [%r4, 4]
  39 +.Loop_end:
  40 + j_s [%blink]
  41 +
  42 + .balign 4
  43 +.Ltiny:
  44 + mov.f %lp_count, %r2
  45 + lpne .Ltiny_end
  46 + stb.ab %r1, [%r4, 1]
  47 +.Ltiny_end:
  48 + j_s [%blink]
  49 +
  50 +/*
  51 + * memzero: @r0 = mem, @r1 = size_t
  52 + * memset: @r0 = mem, @r1 = char, @r2 = size_t
  53 + */
  54 +
  55 +.global memzero
  56 +.align 4
  57 +memzero:
  58 + /* adjust bzero args to memset args */
  59 + mov %r2, %r1
  60 + mov %r1, 0
  61 + /* tail call so need to tinker with blink */
  62 + b memset
arch/arc/lib/relocate.c
  1 +/*
  2 + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +#include <common.h>
  8 +#include <elf.h>
  9 +#include <asm/sections.h>
  10 +
  11 +DECLARE_GLOBAL_DATA_PTR;
  12 +
  13 +/*
  14 + * Base functionality is taken from x86 version with added ARC-specifics
  15 + */
  16 +int do_elf_reloc_fixups(void)
  17 +{
  18 + Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
  19 + Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
  20 +
  21 + Elf32_Addr *offset_ptr_rom, *last_offset = NULL;
  22 + Elf32_Addr *offset_ptr_ram;
  23 +
  24 + do {
  25 + /* Get the location from the relocation entry */
  26 + offset_ptr_rom = (Elf32_Addr *)re_src->r_offset;
  27 +
  28 + /* Check that the location of the relocation is in .text */
  29 + if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE &&
  30 + offset_ptr_rom > last_offset) {
  31 + unsigned int val;
  32 + /* Switch to the in-RAM version */
  33 + offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom +
  34 + gd->reloc_off);
  35 +
  36 + /*
  37 + * Use "memcpy" because target location might be
  38 + * 16-bit aligned on ARC so we may need to read
  39 + * byte-by-byte. On attempt to read entire word by
  40 + * CPU throws an exception
  41 + */
  42 + memcpy(&val, offset_ptr_ram, sizeof(int));
  43 +
  44 + /* If location in ".text" section swap value */
  45 + if ((unsigned int)offset_ptr_rom <
  46 + (unsigned int)&__text_end)
  47 + val = (val << 16) | (val >> 16);
  48 +
  49 + /* Check that the target points into .text */
  50 + if (val >= CONFIG_SYS_TEXT_BASE && val <=
  51 + (unsigned int)&__bss_end) {
  52 + val += gd->reloc_off;
  53 + /* If location in ".text" section swap value */
  54 + if ((unsigned int)offset_ptr_rom <
  55 + (unsigned int)&__text_end)
  56 + val = (val << 16) | (val >> 16);
  57 + memcpy(offset_ptr_ram, &val, sizeof(int));
  58 + } else {
  59 + debug(" %p: rom reloc %x, ram %p, value %x, limit %x\n",
  60 + re_src, re_src->r_offset, offset_ptr_ram,
  61 + val, (unsigned int)&__bss_end);
  62 + }
  63 + } else {
  64 + debug(" %p: rom reloc %x, last %p\n", re_src,
  65 + re_src->r_offset, last_offset);
  66 + }
  67 + last_offset = offset_ptr_rom;
  68 +
  69 + } while (++re_src < re_end);
  70 +
  71 + return 0;
  72 +}
arch/arc/lib/sections.c
  1 +/*
  2 + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +/*
  8 + * For some reason linker sets linker-generated symbols to zero in PIE mode.
  9 + * A work-around is substitution of linker-generated symbols with
  10 + * compiler-generated symbols which are properly handled by linker in PAE mode.
  11 + */
  12 +
  13 +char __bss_start[0] __attribute__((section(".__bss_start")));
  14 +char __bss_end[0] __attribute__((section(".__bss_end")));
  15 +char __image_copy_start[0] __attribute__((section(".__image_copy_start")));
  16 +char __image_copy_end[0] __attribute__((section(".__image_copy_end")));
  17 +char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start")));
  18 +char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end")));
  19 +char __text_start[0] __attribute__((section(".__text_start")));
  20 +char __text_end[0] __attribute__((section(".__text_end")));
  21 +char __init_end[0] __attribute__((section(".__init_end")));
arch/arc/lib/strchr-700.S
  1 +/*
  2 + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +/*
  8 + * ARC700 has a relatively long pipeline and branch prediction, so we want
  9 + * to avoid branches that are hard to predict. On the other hand, the
  10 + * presence of the norm instruction makes it easier to operate on whole
  11 + * words branch-free.
  12 + */
  13 +
  14 +.global strchr
  15 +.align 4
  16 +strchr:
  17 + extb_s %r1, %r1
  18 + asl %r5, %r1, 8
  19 + bmsk %r2, %r0, 1
  20 + or %r5, %r5, %r1
  21 + mov_s %r3, 0x01010101
  22 + breq.d %r2, %r0, .Laligned
  23 + asl %r4, %r5, 16
  24 + sub_s %r0, %r0, %r2
  25 + asl %r7, %r2, 3
  26 + ld_s %r2, [%r0]
  27 +#ifdef __LITTLE_ENDIAN__
  28 + asl %r7, %r3, %r7
  29 +#else /* __BIG_ENDIAN__ */
  30 + lsr %r7, %r3, %r7
  31 +#endif /* _ENDIAN__ */
  32 + or %r5, %r5, %r4
  33 + ror %r4, %r3
  34 + sub %r12, %r2, %r7
  35 + bic_s %r12, %r12, %r2
  36 + and %r12, %r12, %r4
  37 + brne.d %r12, 0, .Lfound0_ua
  38 + xor %r6, %r2, %r5
  39 + ld.a %r2, [%r0, 4]
  40 + sub %r12, %r6, %r7
  41 + bic %r12, %r12, %r6
  42 +#ifdef __LITTLE_ENDIAN__
  43 + and %r7, %r12, %r4
  44 + /* For speed, we want this branch to be unaligned. */
  45 + breq %r7, 0, .Loop
  46 + /* Likewise this one */
  47 + b .Lfound_char
  48 +#else /* __BIG_ENDIAN__ */
  49 + and %r12, %r12, %r4
  50 + /* For speed, we want this branch to be unaligned. */
  51 + breq %r12, 0, .Loop
  52 + lsr_s %r12, %r12, 7
  53 + bic %r2, %r7, %r6
  54 + b.d .Lfound_char_b
  55 + and_s %r2, %r2, %r12
  56 +#endif /* _ENDIAN__ */
  57 + /* We require this code address to be unaligned for speed... */
  58 +.Laligned:
  59 + ld_s %r2, [%r0]
  60 + or %r5, %r5, %r4
  61 + ror %r4, %r3
  62 + /* ... so that this code address is aligned, for itself and ... */
  63 +.Loop:
  64 + sub %r12, %r2, %r3
  65 + bic_s %r12, %r12, %r2
  66 + and %r12, %r12, %r4
  67 + brne.d %r12, 0, .Lfound0
  68 + xor %r6, %r2, %r5
  69 + ld.a %r2, [%r0, 4]
  70 + sub %r12, %r6, %r3
  71 + bic %r12, %r12, %r6
  72 + and %r7, %r12, %r4
  73 + breq %r7, 0, .Loop
  74 + /*
  75 + *... so that this branch is unaligned.
  76 + * Found searched-for character.
  77 + * r0 has already advanced to next word.
  78 + */
  79 +#ifdef __LITTLE_ENDIAN__
  80 + /*
  81 + * We only need the information about the first matching byte
  82 + * (i.e. the least significant matching byte) to be exact,
  83 + * hence there is no problem with carry effects.
  84 + */
  85 +.Lfound_char:
  86 + sub %r3, %r7, 1
  87 + bic %r3, %r3, %r7
  88 + norm %r2, %r3
  89 + sub_s %r0, %r0, 1
  90 + asr_s %r2, %r2, 3
  91 + j.d [%blink]
  92 + sub_s %r0, %r0, %r2
  93 +
  94 + .balign 4
  95 +.Lfound0_ua:
  96 + mov %r3, %r7
  97 +.Lfound0:
  98 + sub %r3, %r6, %r3
  99 + bic %r3, %r3, %r6
  100 + and %r2, %r3, %r4
  101 + or_s %r12, %r12, %r2
  102 + sub_s %r3, %r12, 1
  103 + bic_s %r3, %r3, %r12
  104 + norm %r3, %r3
  105 + add_s %r0, %r0, 3
  106 + asr_s %r12, %r3, 3
  107 + asl.f 0, %r2, %r3
  108 + sub_s %r0, %r0, %r12
  109 + j_s.d [%blink]
  110 + mov.pl %r0, 0
  111 +#else /* __BIG_ENDIAN__ */
  112 +.Lfound_char:
  113 + lsr %r7, %r7, 7
  114 +
  115 + bic %r2, %r7, %r6
  116 +.Lfound_char_b:
  117 + norm %r2, %r2
  118 + sub_s %r0, %r0, 4
  119 + asr_s %r2, %r2, 3
  120 + j.d [%blink]
  121 + add_s %r0, %r0, %r2
  122 +
  123 +.Lfound0_ua:
  124 + mov_s %r3, %r7
  125 +.Lfound0:
  126 + asl_s %r2, %r2, 7
  127 + or %r7, %r6, %r4
  128 + bic_s %r12, %r12, %r2
  129 + sub %r2, %r7, %r3
  130 + or %r2, %r2, %r6
  131 + bic %r12, %r2, %r12
  132 + bic.f %r3, %r4, %r12
  133 + norm %r3, %r3
  134 +
  135 + add.pl %r3, %r3, 1
  136 + asr_s %r12, %r3, 3
  137 + asl.f 0, %r2, %r3
  138 + add_s %r0, %r0, %r12
  139 + j_s.d [%blink]
  140 + mov.mi %r0, 0
  141 +#endif /* _ENDIAN__ */
arch/arc/lib/strcmp.S
  1 +/*
  2 + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +/*
  8 + * This is optimized primarily for the ARC700.
  9 + * It would be possible to speed up the loops by one cycle / word
  10 + * respective one cycle / byte by forcing double source 1 alignment, unrolling
  11 + * by a factor of two, and speculatively loading the second word / byte of
  12 + * source 1; however, that would increase the overhead for loop setup / finish,
  13 + * and strcmp might often terminate early.
  14 + */
  15 +
  16 +.global strcmp
  17 +.align 4
  18 +strcmp:
  19 + or %r2, %r0, %r1
  20 + bmsk_s %r2, %r2, 1
  21 + brne %r2, 0, .Lcharloop
  22 + mov_s %r12, 0x01010101
  23 + ror %r5, %r12
  24 +.Lwordloop:
  25 + ld.ab %r2, [%r0, 4]
  26 + ld.ab %r3, [%r1, 4]
  27 + nop_s
  28 + sub %r4, %r2, %r12
  29 + bic %r4, %r4, %r2
  30 + and %r4, %r4, %r5
  31 + brne %r4, 0, .Lfound0
  32 + breq %r2 ,%r3, .Lwordloop
  33 +#ifdef __LITTLE_ENDIAN__
  34 + xor %r0, %r2, %r3 /* mask for difference */
  35 + sub_s %r1, %r0, 1
  36 + bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
  37 + sub %r1, %r5, %r0
  38 + xor %r0, %r5, %r1 /* mask for least significant difference byte */
  39 + and_s %r2, %r2, %r0
  40 + and_s %r3, %r3, %r0
  41 +#endif /* _ENDIAN__ */
  42 + cmp_s %r2, %r3
  43 + mov_s %r0, 1
  44 + j_s.d [%blink]
  45 + bset.lo %r0, %r0, 31
  46 +
  47 + .balign 4
  48 +#ifdef __LITTLE_ENDIAN__
  49 +.Lfound0:
  50 + xor %r0, %r2, %r3 /* mask for difference */
  51 + or %r0, %r0, %r4 /* or in zero indicator */
  52 + sub_s %r1, %r0, 1
  53 + bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
  54 + sub %r1, %r5, %r0
  55 + xor %r0, %r5, %r1 /* mask for least significant difference byte */
  56 + and_s %r2, %r2, %r0
  57 + and_s %r3, %r3, %r0
  58 + sub.f %r0, %r2, %r3
  59 + mov.hi %r0, 1
  60 + j_s.d [%blink]
  61 + bset.lo %r0, %r0, 31
  62 +#else /* __BIG_ENDIAN__ */
  63 + /*
  64 + * The zero-detection above can mis-detect 0x01 bytes as zeroes
  65 + * because of carry-propagateion from a lower significant zero byte.
  66 + * We can compensate for this by checking that bit0 is zero.
  67 + * This compensation is not necessary in the step where we
  68 + * get a low estimate for r2, because in any affected bytes
  69 + * we already have 0x00 or 0x01, which will remain unchanged
  70 + * when bit 7 is cleared.
  71 + */
  72 + .balign 4
  73 +.Lfound0:
  74 + lsr %r0, %r4, 8
  75 + lsr_s %r1, %r2
  76 + bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */
  77 + bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */
  78 + or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */
  79 + cmp_s %r3, %r2 /* ... be independent of trailing garbage */
  80 + or_s %r2, %r2, %r0 /* likewise for r3 > r2 */
  81 + bic_s %r3, %r3, %r0
  82 + rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */
  83 + cmp_s %r2, %r3
  84 + j_s.d [%blink]
  85 + bset.lo %r0, %r0, 31
  86 +#endif /* _ENDIAN__ */
  87 +
  88 + .balign 4
  89 +.Lcharloop:
  90 + ldb.ab %r2,[%r0,1]
  91 + ldb.ab %r3,[%r1,1]
  92 + nop_s
  93 + breq %r2, 0, .Lcmpend
  94 + breq %r2, %r3, .Lcharloop
  95 +.Lcmpend:
  96 + j_s.d [%blink]
  97 + sub %r0, %r2, %r3
arch/arc/lib/strcpy-700.S
  1 +/*
  2 + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +/*
  8 + * If dst and src are 4 byte aligned, copy 8 bytes at a time.
  9 + * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
  10 + * it 8 byte aligned. Thus, we can do a little read-ahead, without
  11 + * dereferencing a cache line that we should not touch.
  12 + * Note that short and long instructions have been scheduled to avoid
  13 + * branch stalls.
  14 + * The beq_s to r3z could be made unaligned & long to avoid a stall
  15 + * there, but it is not likely to be taken often, and it would also be likely
  16 + * to cost an unaligned mispredict at the next call.
  17 + */
  18 +
  19 +.global strcpy
  20 +.align 4
  21 +strcpy:
  22 + or %r2, %r0, %r1
  23 + bmsk_s %r2, %r2, 1
  24 + brne.d %r2, 0, charloop
  25 + mov_s %r10, %r0
  26 + ld_s %r3, [%r1, 0]
  27 + mov %r8, 0x01010101
  28 + bbit0.d %r1, 2, loop_start
  29 + ror %r12, %r8
  30 + sub %r2, %r3, %r8
  31 + bic_s %r2, %r2, %r3
  32 + tst_s %r2,%r12
  33 + bne r3z
  34 + mov_s %r4,%r3
  35 + .balign 4
  36 +loop:
  37 + ld.a %r3, [%r1, 4]
  38 + st.ab %r4, [%r10, 4]
  39 +loop_start:
  40 + ld.a %r4, [%r1, 4]
  41 + sub %r2, %r3, %r8
  42 + bic_s %r2, %r2, %r3
  43 + tst_s %r2, %r12
  44 + bne_s r3z
  45 + st.ab %r3, [%r10, 4]
  46 + sub %r2, %r4, %r8
  47 + bic %r2, %r2, %r4
  48 + tst %r2, %r12
  49 + beq loop
  50 + mov_s %r3, %r4
  51 +#ifdef __LITTLE_ENDIAN__
  52 +r3z: bmsk.f %r1, %r3, 7
  53 + lsr_s %r3, %r3, 8
  54 +#else /* __BIG_ENDIAN__ */
  55 +r3z: lsr.f %r1, %r3, 24
  56 + asl_s %r3, %r3, 8
  57 +#endif /* _ENDIAN__ */
  58 + bne.d r3z
  59 + stb.ab %r1, [%r10, 1]
  60 + j_s [%blink]
  61 +
  62 + .balign 4
  63 +charloop:
  64 + ldb.ab %r3, [%r1, 1]
  65 + brne.d %r3, 0, charloop
  66 + stb.ab %r3, [%r10, 1]
  67 + j [%blink]
arch/arc/lib/strlen.S
  1 +/*
  2 + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3 + *
  4 + * SPDX-License-Identifier: GPL-2.0+
  5 + */
  6 +
  7 +.global strlen
  8 +.align 4
  9 +strlen:
  10 + or %r3, %r0, 7
  11 + ld %r2, [%r3, -7]
  12 + ld.a %r6, [%r3, -3]
  13 + mov %r4, 0x01010101
  14 + /* uses long immediate */
  15 +#ifdef __LITTLE_ENDIAN__
  16 + asl_s %r1, %r0, 3
  17 + btst_s %r0, 2
  18 + asl %r7, %r4, %r1
  19 + ror %r5, %r4
  20 + sub %r1, %r2, %r7
  21 + bic_s %r1, %r1, %r2
  22 + mov.eq %r7, %r4
  23 + sub %r12, %r6, %r7
  24 + bic %r12, %r12, %r6
  25 + or.eq %r12, %r12, %r1
  26 + and %r12, %r12, %r5
  27 + brne %r12, 0, .Learly_end
  28 +#else /* __BIG_ENDIAN__ */
  29 + ror %r5, %r4
  30 + btst_s %r0, 2
  31 + mov_s %r1, 31
  32 + sub3 %r7, %r1, %r0
  33 + sub %r1, %r2, %r4
  34 + bic_s %r1, %r1, %r2
  35 + bmsk %r1, %r1, %r7
  36 + sub %r12, %r6, %r4
  37 + bic %r12, %r12, %r6
  38 + bmsk.ne %r12, %r12, %r7
  39 + or.eq %r12, %r12, %r1
  40 + and %r12, %r12, %r5
  41 + brne %r12, 0, .Learly_end
  42 +#endif /* _ENDIAN__ */
  43 +
  44 +.Loop:
  45 + ld_s %r2, [%r3, 4]
  46 + ld.a %r6, [%r3, 8]
  47 + /* stall for load result */
  48 + sub %r1, %r2, %r4
  49 + bic_s %r1, %r1, %r2
  50 + sub %r12, %r6, %r4
  51 + bic %r12, %r12, %r6
  52 + or %r12, %r12, %r1
  53 + and %r12, %r12, %r5
  54 + breq %r12, 0, .Loop
  55 +.Lend:
  56 + and.f %r1, %r1, %r5
  57 + sub.ne %r3, %r3, 4
  58 + mov.eq %r1, %r12
  59 +#ifdef __LITTLE_ENDIAN__
  60 + sub_s %r2, %r1, 1
  61 + bic_s %r2, %r2, %r1
  62 + norm %r1, %r2
  63 + sub_s %r0, %r0, 3
  64 + lsr_s %r1, %r1, 3
  65 + sub %r0, %r3, %r0
  66 + j_s.d [%blink]
  67 + sub %r0, %r0, %r1
  68 +#else /* __BIG_ENDIAN__ */
  69 + lsr_s %r1, %r1, 7
  70 + mov.eq %r2, %r6
  71 + bic_s %r1, %r1, %r2
  72 + norm %r1, %r1
  73 + sub %r0, %r3, %r0
  74 + lsr_s %r1, %r1, 3
  75 + j_s.d [%blink]
  76 + add %r0, %r0, %r1
  77 +#endif /* _ENDIAN */
  78 +.Learly_end:
  79 + b.d .Lend
  80 + sub_s.ne %r1, %r1, %r1