Commit 2272382879d93d37e7964554cea5b0583c94c247
Committed by
Tom Rini
1 parent
2f16ac9df4
Exists in
master
and in
49 other branches
arc: add library functions
These are library functions used by ARC700 architecture. Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Francois Bedard <fbedard@synopsys.com> Cc: Wolfgang Denk <wd@denx.de> Cc: Heiko Schocher <hs@denx.de>
Showing 11 changed files with 846 additions and 0 deletions Side-by-side Diff
arch/arc/lib/Makefile
1 | +# | |
2 | +# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. | |
3 | +# | |
4 | +# SPDX-License-Identifier: GPL-2.0+ | |
5 | +# | |
6 | + | |
7 | +obj-y += sections.o | |
8 | +obj-y += relocate.o | |
9 | +obj-y += strchr-700.o | |
10 | +obj-y += strcmp.o | |
11 | +obj-y += strcpy-700.o | |
12 | +obj-y += strlen.o | |
13 | +obj-y += memcmp.o | |
14 | +obj-y += memcpy-700.o | |
15 | +obj-y += memset.o | |
16 | +obj-$(CONFIG_CMD_BOOTM) += bootm.o |
arch/arc/lib/bootm.c
1 | +/* | |
2 | + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +#include <common.h> | |
8 | + | |
9 | +DECLARE_GLOBAL_DATA_PTR; | |
10 | + | |
11 | +static ulong get_sp(void) | |
12 | +{ | |
13 | + ulong ret; | |
14 | + | |
15 | + asm("mov %0, sp" : "=r"(ret) : ); | |
16 | + return ret; | |
17 | +} | |
18 | + | |
19 | +void arch_lmb_reserve(struct lmb *lmb) | |
20 | +{ | |
21 | + ulong sp; | |
22 | + | |
23 | + /* | |
24 | + * Booting a (Linux) kernel image | |
25 | + * | |
26 | + * Allocate space for command line and board info - the | |
27 | + * address should be as high as possible within the reach of | |
28 | + * the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused | |
29 | + * memory, which means far enough below the current stack | |
30 | + * pointer. | |
31 | + */ | |
32 | + sp = get_sp(); | |
33 | + debug("## Current stack ends at 0x%08lx ", sp); | |
34 | + | |
35 | + /* adjust sp by 4K to be safe */ | |
36 | + sp -= 4096; | |
37 | + lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp)); | |
38 | +} | |
39 | + | |
40 | +static int cleanup_before_linux(void) | |
41 | +{ | |
42 | + disable_interrupts(); | |
43 | + flush_dcache_all(); | |
44 | + invalidate_icache_all(); | |
45 | + | |
46 | + return 0; | |
47 | +} | |
48 | + | |
49 | +/* Subcommand: PREP */ | |
50 | +static void boot_prep_linux(bootm_headers_t *images) | |
51 | +{ | |
52 | + if (image_setup_linux(images)) | |
53 | + hang(); | |
54 | +} | |
55 | + | |
56 | +/* Subcommand: GO */ | |
57 | +static void boot_jump_linux(bootm_headers_t *images, int flag) | |
58 | +{ | |
59 | + void (*kernel_entry)(int zero, int arch, uint params); | |
60 | + unsigned int r0, r2; | |
61 | + int fake = (flag & BOOTM_STATE_OS_FAKE_GO); | |
62 | + | |
63 | + kernel_entry = (void (*)(int, int, uint))images->ep; | |
64 | + | |
65 | + debug("## Transferring control to Linux (at address %08lx)...\n", | |
66 | + (ulong) kernel_entry); | |
67 | + bootstage_mark(BOOTSTAGE_ID_RUN_OS); | |
68 | + | |
69 | + printf("\nStarting kernel ...%s\n\n", fake ? | |
70 | + "(fake run for tracing)" : ""); | |
71 | + bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel"); | |
72 | + | |
73 | + cleanup_before_linux(); | |
74 | + | |
75 | + if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) { | |
76 | + r0 = 2; | |
77 | + r2 = (unsigned int)images->ft_addr; | |
78 | + } else { | |
79 | + r0 = 1; | |
80 | + r2 = (unsigned int)getenv("bootargs"); | |
81 | + } | |
82 | + | |
83 | + if (!fake) | |
84 | + kernel_entry(r0, 0, r2); | |
85 | +} | |
86 | + | |
87 | +int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images) | |
88 | +{ | |
89 | + /* No need for those on ARC */ | |
90 | + if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE)) | |
91 | + return -1; | |
92 | + | |
93 | + if (flag & BOOTM_STATE_OS_PREP) { | |
94 | + boot_prep_linux(images); | |
95 | + return 0; | |
96 | + } | |
97 | + | |
98 | + if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) { | |
99 | + boot_jump_linux(images, flag); | |
100 | + return 0; | |
101 | + } | |
102 | + | |
103 | + boot_prep_linux(images); | |
104 | + boot_jump_linux(images, flag); | |
105 | + return 0; | |
106 | +} |
arch/arc/lib/memcmp.S
1 | +/* | |
2 | + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +#ifdef __LITTLE_ENDIAN__ | |
8 | +#define WORD2 r2 | |
9 | +#define SHIFT r3 | |
10 | +#else /* __BIG_ENDIAN__ */ | |
11 | +#define WORD2 r3 | |
12 | +#define SHIFT r2 | |
13 | +#endif /* _ENDIAN__ */ | |
14 | + | |
15 | +.global memcmp | |
16 | +.align 4 | |
17 | +memcmp: | |
18 | + or %r12, %r0, %r1 | |
19 | + asl_s %r12, %r12, 30 | |
20 | + sub %r3, %r2, 1 | |
21 | + brls %r2, %r12, .Lbytewise | |
22 | + ld %r4, [%r0, 0] | |
23 | + ld %r5, [%r1, 0] | |
24 | + lsr.f %lp_count, %r3, 3 | |
25 | + lpne .Loop_end | |
26 | + ld_s WORD2, [%r0, 4] | |
27 | + ld_s %r12, [%r1, 4] | |
28 | + brne %r4, %r5, .Leven | |
29 | + ld.a %r4, [%r0, 8] | |
30 | + ld.a %r5, [%r1, 8] | |
31 | + brne WORD2, %r12, .Lodd | |
32 | +.Loop_end: | |
33 | + asl_s SHIFT, SHIFT, 3 | |
34 | + bhs_s .Last_cmp | |
35 | + brne %r4, %r5, .Leven | |
36 | + ld %r4, [%r0, 4] | |
37 | + ld %r5, [%r1, 4] | |
38 | +#ifdef __LITTLE_ENDIAN__ | |
39 | + nop_s | |
40 | + /* one more load latency cycle */ | |
41 | +.Last_cmp: | |
42 | + xor %r0, %r4, %r5 | |
43 | + bset %r0, %r0, SHIFT | |
44 | + sub_s %r1, %r0, 1 | |
45 | + bic_s %r1, %r1, %r0 | |
46 | + norm %r1, %r1 | |
47 | + b.d .Leven_cmp | |
48 | + and %r1, %r1, 24 | |
49 | +.Leven: | |
50 | + xor %r0, %r4, %r5 | |
51 | + sub_s %r1, %r0, 1 | |
52 | + bic_s %r1, %r1, %r0 | |
53 | + norm %r1, %r1 | |
54 | + /* slow track insn */ | |
55 | + and %r1, %r1, 24 | |
56 | +.Leven_cmp: | |
57 | + asl %r2, %r4, %r1 | |
58 | + asl %r12, %r5, %r1 | |
59 | + lsr_s %r2, %r2, 1 | |
60 | + lsr_s %r12, %r12, 1 | |
61 | + j_s.d [%blink] | |
62 | + sub %r0, %r2, %r12 | |
63 | + .balign 4 | |
64 | +.Lodd: | |
65 | + xor %r0, WORD2, %r12 | |
66 | + sub_s %r1, %r0, 1 | |
67 | + bic_s %r1, %r1, %r0 | |
68 | + norm %r1, %r1 | |
69 | + /* slow track insn */ | |
70 | + and %r1, %r1, 24 | |
71 | + asl_s %r2, %r2, %r1 | |
72 | + asl_s %r12, %r12, %r1 | |
73 | + lsr_s %r2, %r2, 1 | |
74 | + lsr_s %r12, %r12, 1 | |
75 | + j_s.d [%blink] | |
76 | + sub %r0, %r2, %r12 | |
77 | +#else /* __BIG_ENDIAN__ */ | |
78 | +.Last_cmp: | |
79 | + neg_s SHIFT, SHIFT | |
80 | + lsr %r4, %r4, SHIFT | |
81 | + lsr %r5, %r5, SHIFT | |
82 | + /* slow track insn */ | |
83 | +.Leven: | |
84 | + sub.f %r0, %r4, %r5 | |
85 | + mov.ne %r0, 1 | |
86 | + j_s.d [%blink] | |
87 | + bset.cs %r0, %r0, 31 | |
88 | +.Lodd: | |
89 | + cmp_s WORD2, %r12 | |
90 | + | |
91 | + mov_s %r0, 1 | |
92 | + j_s.d [%blink] | |
93 | + bset.cs %r0, %r0, 31 | |
94 | +#endif /* _ENDIAN__ */ | |
95 | + .balign 4 | |
96 | +.Lbytewise: | |
97 | + breq %r2, 0, .Lnil | |
98 | + ldb %r4, [%r0, 0] | |
99 | + ldb %r5, [%r1, 0] | |
100 | + lsr.f %lp_count, %r3 | |
101 | + lpne .Lbyte_end | |
102 | + ldb_s %r3, [%r0, 1] | |
103 | + ldb %r12, [%r1, 1] | |
104 | + brne %r4, %r5, .Lbyte_even | |
105 | + ldb.a %r4, [%r0, 2] | |
106 | + ldb.a %r5, [%r1, 2] | |
107 | + brne %r3, %r12, .Lbyte_odd | |
108 | +.Lbyte_end: | |
109 | + bcc .Lbyte_even | |
110 | + brne %r4, %r5, .Lbyte_even | |
111 | + ldb_s %r3, [%r0, 1] | |
112 | + ldb_s %r12, [%r1, 1] | |
113 | +.Lbyte_odd: | |
114 | + j_s.d [%blink] | |
115 | + sub %r0, %r3, %r12 | |
116 | +.Lbyte_even: | |
117 | + j_s.d [%blink] | |
118 | + sub %r0, %r4, %r5 | |
119 | +.Lnil: | |
120 | + j_s.d [%blink] | |
121 | + mov %r0, 0 |
arch/arc/lib/memcpy-700.S
1 | +/* | |
2 | + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +.global memcpy | |
8 | +.align 4 | |
9 | +memcpy: | |
10 | + or %r3, %r0, %r1 | |
11 | + asl_s %r3, %r3, 30 | |
12 | + mov_s %r5, %r0 | |
13 | + brls.d %r2, %r3, .Lcopy_bytewise | |
14 | + sub.f %r3, %r2, 1 | |
15 | + ld_s %r12, [%r1, 0] | |
16 | + asr.f %lp_count, %r3, 3 | |
17 | + bbit0.d %r3, 2, .Lnox4 | |
18 | + bmsk_s %r2, %r2, 1 | |
19 | + st.ab %r12, [%r5, 4] | |
20 | + ld.a %r12, [%r1, 4] | |
21 | +.Lnox4: | |
22 | + lppnz .Lendloop | |
23 | + ld_s %r3, [%r1, 4] | |
24 | + st.ab %r12, [%r5, 4] | |
25 | + ld.a %r12, [%r1, 8] | |
26 | + st.ab %r3, [%r5, 4] | |
27 | +.Lendloop: | |
28 | + breq %r2, 0, .Last_store | |
29 | + ld %r3, [%r5, 0] | |
30 | +#ifdef __LITTLE_ENDIAN__ | |
31 | + add3 %r2, -1, %r2 | |
32 | + /* uses long immediate */ | |
33 | + xor_s %r12, %r12, %r3 | |
34 | + bmsk %r12, %r12, %r2 | |
35 | + xor_s %r12, %r12, %r3 | |
36 | +#else /* __BIG_ENDIAN__ */ | |
37 | + sub3 %r2, 31, %r2 | |
38 | + /* uses long immediate */ | |
39 | + xor_s %r3, %r3, %r12 | |
40 | + bmsk %r3, %r3, %r2 | |
41 | + xor_s %r12, %r12, %r3 | |
42 | +#endif /* _ENDIAN__ */ | |
43 | +.Last_store: | |
44 | + j_s.d [%blink] | |
45 | + st %r12, [%r5, 0] | |
46 | + | |
47 | + .balign 4 | |
48 | +.Lcopy_bytewise: | |
49 | + jcs [%blink] | |
50 | + ldb_s %r12, [%r1, 0] | |
51 | + lsr.f %lp_count, %r3 | |
52 | + bhs_s .Lnox1 | |
53 | + stb.ab %r12, [%r5, 1] | |
54 | + ldb.a %r12, [%r1, 1] | |
55 | +.Lnox1: | |
56 | + lppnz .Lendbloop | |
57 | + ldb_s %r3, [%r1, 1] | |
58 | + stb.ab %r12, [%r5, 1] | |
59 | + ldb.a %r12, [%r1, 2] | |
60 | + stb.ab %r3, [%r5, 1] | |
61 | +.Lendbloop: | |
62 | + j_s.d [%blink] | |
63 | + stb %r12, [%r5, 0] |
arch/arc/lib/memset.S
1 | +/* | |
2 | + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ | |
8 | + | |
9 | +.global memset | |
10 | +.align 4 | |
11 | +memset: | |
12 | + mov_s %r4, %r0 | |
13 | + or %r12, %r0, %r2 | |
14 | + bmsk.f %r12, %r12, 1 | |
15 | + extb_s %r1, %r1 | |
16 | + asl %r3, %r1, 8 | |
17 | + beq.d .Laligned | |
18 | + or_s %r1, %r1, %r3 | |
19 | + brls %r2, SMALL, .Ltiny | |
20 | + add %r3, %r2, %r0 | |
21 | + stb %r1, [%r3, -1] | |
22 | + bclr_s %r3, %r3, 0 | |
23 | + stw %r1, [%r3, -2] | |
24 | + bmsk.f %r12, %r0, 1 | |
25 | + add_s %r2, %r2, %r12 | |
26 | + sub.ne %r2, %r2, 4 | |
27 | + stb.ab %r1, [%r4, 1] | |
28 | + and %r4, %r4, -2 | |
29 | + stw.ab %r1, [%r4, 2] | |
30 | + and %r4, %r4, -4 | |
31 | + | |
32 | + .balign 4 | |
33 | +.Laligned: | |
34 | + asl %r3, %r1, 16 | |
35 | + lsr.f %lp_count, %r2, 2 | |
36 | + or_s %r1, %r1, %r3 | |
37 | + lpne .Loop_end | |
38 | + st.ab %r1, [%r4, 4] | |
39 | +.Loop_end: | |
40 | + j_s [%blink] | |
41 | + | |
42 | + .balign 4 | |
43 | +.Ltiny: | |
44 | + mov.f %lp_count, %r2 | |
45 | + lpne .Ltiny_end | |
46 | + stb.ab %r1, [%r4, 1] | |
47 | +.Ltiny_end: | |
48 | + j_s [%blink] | |
49 | + | |
50 | +/* | |
51 | + * memzero: @r0 = mem, @r1 = size_t | |
52 | + * memset: @r0 = mem, @r1 = char, @r2 = size_t | |
53 | + */ | |
54 | + | |
55 | +.global memzero | |
56 | +.align 4 | |
57 | +memzero: | |
58 | + /* adjust bzero args to memset args */ | |
59 | + mov %r2, %r1 | |
60 | + mov %r1, 0 | |
61 | + /* tail call so need to tinker with blink */ | |
62 | + b memset |
arch/arc/lib/relocate.c
1 | +/* | |
2 | + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +#include <common.h> | |
8 | +#include <elf.h> | |
9 | +#include <asm/sections.h> | |
10 | + | |
11 | +DECLARE_GLOBAL_DATA_PTR; | |
12 | + | |
13 | +/* | |
14 | + * Base functionality is taken from x86 version with added ARC-specifics | |
15 | + */ | |
16 | +int do_elf_reloc_fixups(void) | |
17 | +{ | |
18 | + Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start); | |
19 | + Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end); | |
20 | + | |
21 | + Elf32_Addr *offset_ptr_rom, *last_offset = NULL; | |
22 | + Elf32_Addr *offset_ptr_ram; | |
23 | + | |
24 | + do { | |
25 | + /* Get the location from the relocation entry */ | |
26 | + offset_ptr_rom = (Elf32_Addr *)re_src->r_offset; | |
27 | + | |
28 | + /* Check that the location of the relocation is in .text */ | |
29 | + if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE && | |
30 | + offset_ptr_rom > last_offset) { | |
31 | + unsigned int val; | |
32 | + /* Switch to the in-RAM version */ | |
33 | + offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom + | |
34 | + gd->reloc_off); | |
35 | + | |
36 | + /* | |
37 | + * Use "memcpy" because target location might be | |
38 | + * 16-bit aligned on ARC so we may need to read | |
39 | + * byte-by-byte. On attempt to read entire word by | |
40 | + * CPU throws an exception | |
41 | + */ | |
42 | + memcpy(&val, offset_ptr_ram, sizeof(int)); | |
43 | + | |
44 | + /* If location in ".text" section swap value */ | |
45 | + if ((unsigned int)offset_ptr_rom < | |
46 | + (unsigned int)&__text_end) | |
47 | + val = (val << 16) | (val >> 16); | |
48 | + | |
49 | + /* Check that the target points into .text */ | |
50 | + if (val >= CONFIG_SYS_TEXT_BASE && val <= | |
51 | + (unsigned int)&__bss_end) { | |
52 | + val += gd->reloc_off; | |
53 | + /* If location in ".text" section swap value */ | |
54 | + if ((unsigned int)offset_ptr_rom < | |
55 | + (unsigned int)&__text_end) | |
56 | + val = (val << 16) | (val >> 16); | |
57 | + memcpy(offset_ptr_ram, &val, sizeof(int)); | |
58 | + } else { | |
59 | + debug(" %p: rom reloc %x, ram %p, value %x, limit %x\n", | |
60 | + re_src, re_src->r_offset, offset_ptr_ram, | |
61 | + val, (unsigned int)&__bss_end); | |
62 | + } | |
63 | + } else { | |
64 | + debug(" %p: rom reloc %x, last %p\n", re_src, | |
65 | + re_src->r_offset, last_offset); | |
66 | + } | |
67 | + last_offset = offset_ptr_rom; | |
68 | + | |
69 | + } while (++re_src < re_end); | |
70 | + | |
71 | + return 0; | |
72 | +} |
arch/arc/lib/sections.c
1 | +/* | |
2 | + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +/* | |
8 | + * For some reason linker sets linker-generated symbols to zero in PIE mode. | |
9 | + * A work-around is substitution of linker-generated symbols with | |
10 | + * compiler-generated symbols which are properly handled by linker in PAE mode. | |
11 | + */ | |
12 | + | |
13 | +char __bss_start[0] __attribute__((section(".__bss_start"))); | |
14 | +char __bss_end[0] __attribute__((section(".__bss_end"))); | |
15 | +char __image_copy_start[0] __attribute__((section(".__image_copy_start"))); | |
16 | +char __image_copy_end[0] __attribute__((section(".__image_copy_end"))); | |
17 | +char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start"))); | |
18 | +char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end"))); | |
19 | +char __text_start[0] __attribute__((section(".__text_start"))); | |
20 | +char __text_end[0] __attribute__((section(".__text_end"))); | |
21 | +char __init_end[0] __attribute__((section(".__init_end"))); |
arch/arc/lib/strchr-700.S
1 | +/* | |
2 | + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +/* | |
8 | + * ARC700 has a relatively long pipeline and branch prediction, so we want | |
9 | + * to avoid branches that are hard to predict. On the other hand, the | |
10 | + * presence of the norm instruction makes it easier to operate on whole | |
11 | + * words branch-free. | |
12 | + */ | |
13 | + | |
14 | +.global strchr | |
15 | +.align 4 | |
16 | +strchr: | |
17 | + extb_s %r1, %r1 | |
18 | + asl %r5, %r1, 8 | |
19 | + bmsk %r2, %r0, 1 | |
20 | + or %r5, %r5, %r1 | |
21 | + mov_s %r3, 0x01010101 | |
22 | + breq.d %r2, %r0, .Laligned | |
23 | + asl %r4, %r5, 16 | |
24 | + sub_s %r0, %r0, %r2 | |
25 | + asl %r7, %r2, 3 | |
26 | + ld_s %r2, [%r0] | |
27 | +#ifdef __LITTLE_ENDIAN__ | |
28 | + asl %r7, %r3, %r7 | |
29 | +#else /* __BIG_ENDIAN__ */ | |
30 | + lsr %r7, %r3, %r7 | |
31 | +#endif /* _ENDIAN__ */ | |
32 | + or %r5, %r5, %r4 | |
33 | + ror %r4, %r3 | |
34 | + sub %r12, %r2, %r7 | |
35 | + bic_s %r12, %r12, %r2 | |
36 | + and %r12, %r12, %r4 | |
37 | + brne.d %r12, 0, .Lfound0_ua | |
38 | + xor %r6, %r2, %r5 | |
39 | + ld.a %r2, [%r0, 4] | |
40 | + sub %r12, %r6, %r7 | |
41 | + bic %r12, %r12, %r6 | |
42 | +#ifdef __LITTLE_ENDIAN__ | |
43 | + and %r7, %r12, %r4 | |
44 | + /* For speed, we want this branch to be unaligned. */ | |
45 | + breq %r7, 0, .Loop | |
46 | + /* Likewise this one */ | |
47 | + b .Lfound_char | |
48 | +#else /* __BIG_ENDIAN__ */ | |
49 | + and %r12, %r12, %r4 | |
50 | + /* For speed, we want this branch to be unaligned. */ | |
51 | + breq %r12, 0, .Loop | |
52 | + lsr_s %r12, %r12, 7 | |
53 | + bic %r2, %r7, %r6 | |
54 | + b.d .Lfound_char_b | |
55 | + and_s %r2, %r2, %r12 | |
56 | +#endif /* _ENDIAN__ */ | |
57 | + /* We require this code address to be unaligned for speed... */ | |
58 | +.Laligned: | |
59 | + ld_s %r2, [%r0] | |
60 | + or %r5, %r5, %r4 | |
61 | + ror %r4, %r3 | |
62 | + /* ... so that this code address is aligned, for itself and ... */ | |
63 | +.Loop: | |
64 | + sub %r12, %r2, %r3 | |
65 | + bic_s %r12, %r12, %r2 | |
66 | + and %r12, %r12, %r4 | |
67 | + brne.d %r12, 0, .Lfound0 | |
68 | + xor %r6, %r2, %r5 | |
69 | + ld.a %r2, [%r0, 4] | |
70 | + sub %r12, %r6, %r3 | |
71 | + bic %r12, %r12, %r6 | |
72 | + and %r7, %r12, %r4 | |
73 | + breq %r7, 0, .Loop | |
74 | + /* | |
75 | + *... so that this branch is unaligned. | |
76 | + * Found searched-for character. | |
77 | + * r0 has already advanced to next word. | |
78 | + */ | |
79 | +#ifdef __LITTLE_ENDIAN__ | |
80 | + /* | |
81 | + * We only need the information about the first matching byte | |
82 | + * (i.e. the least significant matching byte) to be exact, | |
83 | + * hence there is no problem with carry effects. | |
84 | + */ | |
85 | +.Lfound_char: | |
86 | + sub %r3, %r7, 1 | |
87 | + bic %r3, %r3, %r7 | |
88 | + norm %r2, %r3 | |
89 | + sub_s %r0, %r0, 1 | |
90 | + asr_s %r2, %r2, 3 | |
91 | + j.d [%blink] | |
92 | + sub_s %r0, %r0, %r2 | |
93 | + | |
94 | + .balign 4 | |
95 | +.Lfound0_ua: | |
96 | + mov %r3, %r7 | |
97 | +.Lfound0: | |
98 | + sub %r3, %r6, %r3 | |
99 | + bic %r3, %r3, %r6 | |
100 | + and %r2, %r3, %r4 | |
101 | + or_s %r12, %r12, %r2 | |
102 | + sub_s %r3, %r12, 1 | |
103 | + bic_s %r3, %r3, %r12 | |
104 | + norm %r3, %r3 | |
105 | + add_s %r0, %r0, 3 | |
106 | + asr_s %r12, %r3, 3 | |
107 | + asl.f 0, %r2, %r3 | |
108 | + sub_s %r0, %r0, %r12 | |
109 | + j_s.d [%blink] | |
110 | + mov.pl %r0, 0 | |
111 | +#else /* __BIG_ENDIAN__ */ | |
112 | +.Lfound_char: | |
113 | + lsr %r7, %r7, 7 | |
114 | + | |
115 | + bic %r2, %r7, %r6 | |
116 | +.Lfound_char_b: | |
117 | + norm %r2, %r2 | |
118 | + sub_s %r0, %r0, 4 | |
119 | + asr_s %r2, %r2, 3 | |
120 | + j.d [%blink] | |
121 | + add_s %r0, %r0, %r2 | |
122 | + | |
123 | +.Lfound0_ua: | |
124 | + mov_s %r3, %r7 | |
125 | +.Lfound0: | |
126 | + asl_s %r2, %r2, 7 | |
127 | + or %r7, %r6, %r4 | |
128 | + bic_s %r12, %r12, %r2 | |
129 | + sub %r2, %r7, %r3 | |
130 | + or %r2, %r2, %r6 | |
131 | + bic %r12, %r2, %r12 | |
132 | + bic.f %r3, %r4, %r12 | |
133 | + norm %r3, %r3 | |
134 | + | |
135 | + add.pl %r3, %r3, 1 | |
136 | + asr_s %r12, %r3, 3 | |
137 | + asl.f 0, %r2, %r3 | |
138 | + add_s %r0, %r0, %r12 | |
139 | + j_s.d [%blink] | |
140 | + mov.mi %r0, 0 | |
141 | +#endif /* _ENDIAN__ */ |
arch/arc/lib/strcmp.S
1 | +/* | |
2 | + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +/* | |
8 | + * This is optimized primarily for the ARC700. | |
9 | + * It would be possible to speed up the loops by one cycle / word | |
10 | + * respective one cycle / byte by forcing double source 1 alignment, unrolling | |
11 | + * by a factor of two, and speculatively loading the second word / byte of | |
12 | + * source 1; however, that would increase the overhead for loop setup / finish, | |
13 | + * and strcmp might often terminate early. | |
14 | + */ | |
15 | + | |
16 | +.global strcmp | |
17 | +.align 4 | |
18 | +strcmp: | |
19 | + or %r2, %r0, %r1 | |
20 | + bmsk_s %r2, %r2, 1 | |
21 | + brne %r2, 0, .Lcharloop | |
22 | + mov_s %r12, 0x01010101 | |
23 | + ror %r5, %r12 | |
24 | +.Lwordloop: | |
25 | + ld.ab %r2, [%r0, 4] | |
26 | + ld.ab %r3, [%r1, 4] | |
27 | + nop_s | |
28 | + sub %r4, %r2, %r12 | |
29 | + bic %r4, %r4, %r2 | |
30 | + and %r4, %r4, %r5 | |
31 | + brne %r4, 0, .Lfound0 | |
32 | + breq %r2 ,%r3, .Lwordloop | |
33 | +#ifdef __LITTLE_ENDIAN__ | |
34 | + xor %r0, %r2, %r3 /* mask for difference */ | |
35 | + sub_s %r1, %r0, 1 | |
36 | + bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ | |
37 | + sub %r1, %r5, %r0 | |
38 | + xor %r0, %r5, %r1 /* mask for least significant difference byte */ | |
39 | + and_s %r2, %r2, %r0 | |
40 | + and_s %r3, %r3, %r0 | |
41 | +#endif /* _ENDIAN__ */ | |
42 | + cmp_s %r2, %r3 | |
43 | + mov_s %r0, 1 | |
44 | + j_s.d [%blink] | |
45 | + bset.lo %r0, %r0, 31 | |
46 | + | |
47 | + .balign 4 | |
48 | +#ifdef __LITTLE_ENDIAN__ | |
49 | +.Lfound0: | |
50 | + xor %r0, %r2, %r3 /* mask for difference */ | |
51 | + or %r0, %r0, %r4 /* or in zero indicator */ | |
52 | + sub_s %r1, %r0, 1 | |
53 | + bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ | |
54 | + sub %r1, %r5, %r0 | |
55 | + xor %r0, %r5, %r1 /* mask for least significant difference byte */ | |
56 | + and_s %r2, %r2, %r0 | |
57 | + and_s %r3, %r3, %r0 | |
58 | + sub.f %r0, %r2, %r3 | |
59 | + mov.hi %r0, 1 | |
60 | + j_s.d [%blink] | |
61 | + bset.lo %r0, %r0, 31 | |
62 | +#else /* __BIG_ENDIAN__ */ | |
63 | + /* | |
64 | + * The zero-detection above can mis-detect 0x01 bytes as zeroes | |
65 | + * because of carry-propagateion from a lower significant zero byte. | |
66 | + * We can compensate for this by checking that bit0 is zero. | |
67 | + * This compensation is not necessary in the step where we | |
68 | + * get a low estimate for r2, because in any affected bytes | |
69 | + * we already have 0x00 or 0x01, which will remain unchanged | |
70 | + * when bit 7 is cleared. | |
71 | + */ | |
72 | + .balign 4 | |
73 | +.Lfound0: | |
74 | + lsr %r0, %r4, 8 | |
75 | + lsr_s %r1, %r2 | |
76 | + bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */ | |
77 | + bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */ | |
78 | + or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */ | |
79 | + cmp_s %r3, %r2 /* ... be independent of trailing garbage */ | |
80 | + or_s %r2, %r2, %r0 /* likewise for r3 > r2 */ | |
81 | + bic_s %r3, %r3, %r0 | |
82 | + rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */ | |
83 | + cmp_s %r2, %r3 | |
84 | + j_s.d [%blink] | |
85 | + bset.lo %r0, %r0, 31 | |
86 | +#endif /* _ENDIAN__ */ | |
87 | + | |
88 | + .balign 4 | |
89 | +.Lcharloop: | |
90 | + ldb.ab %r2,[%r0,1] | |
91 | + ldb.ab %r3,[%r1,1] | |
92 | + nop_s | |
93 | + breq %r2, 0, .Lcmpend | |
94 | + breq %r2, %r3, .Lcharloop | |
95 | +.Lcmpend: | |
96 | + j_s.d [%blink] | |
97 | + sub %r0, %r2, %r3 |
arch/arc/lib/strcpy-700.S
1 | +/* | |
2 | + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +/* | |
8 | + * If dst and src are 4 byte aligned, copy 8 bytes at a time. | |
9 | + * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get | |
10 | + * it 8 byte aligned. Thus, we can do a little read-ahead, without | |
11 | + * dereferencing a cache line that we should not touch. | |
12 | + * Note that short and long instructions have been scheduled to avoid | |
13 | + * branch stalls. | |
14 | + * The beq_s to r3z could be made unaligned & long to avoid a stall | |
15 | + * there, but it is not likely to be taken often, and it would also be likely | |
16 | + * to cost an unaligned mispredict at the next call. | |
17 | + */ | |
18 | + | |
19 | +.global strcpy | |
20 | +.align 4 | |
21 | +strcpy: | |
22 | + or %r2, %r0, %r1 | |
23 | + bmsk_s %r2, %r2, 1 | |
24 | + brne.d %r2, 0, charloop | |
25 | + mov_s %r10, %r0 | |
26 | + ld_s %r3, [%r1, 0] | |
27 | + mov %r8, 0x01010101 | |
28 | + bbit0.d %r1, 2, loop_start | |
29 | + ror %r12, %r8 | |
30 | + sub %r2, %r3, %r8 | |
31 | + bic_s %r2, %r2, %r3 | |
32 | + tst_s %r2,%r12 | |
33 | + bne r3z | |
34 | + mov_s %r4,%r3 | |
35 | + .balign 4 | |
36 | +loop: | |
37 | + ld.a %r3, [%r1, 4] | |
38 | + st.ab %r4, [%r10, 4] | |
39 | +loop_start: | |
40 | + ld.a %r4, [%r1, 4] | |
41 | + sub %r2, %r3, %r8 | |
42 | + bic_s %r2, %r2, %r3 | |
43 | + tst_s %r2, %r12 | |
44 | + bne_s r3z | |
45 | + st.ab %r3, [%r10, 4] | |
46 | + sub %r2, %r4, %r8 | |
47 | + bic %r2, %r2, %r4 | |
48 | + tst %r2, %r12 | |
49 | + beq loop | |
50 | + mov_s %r3, %r4 | |
51 | +#ifdef __LITTLE_ENDIAN__ | |
52 | +r3z: bmsk.f %r1, %r3, 7 | |
53 | + lsr_s %r3, %r3, 8 | |
54 | +#else /* __BIG_ENDIAN__ */ | |
55 | +r3z: lsr.f %r1, %r3, 24 | |
56 | + asl_s %r3, %r3, 8 | |
57 | +#endif /* _ENDIAN__ */ | |
58 | + bne.d r3z | |
59 | + stb.ab %r1, [%r10, 1] | |
60 | + j_s [%blink] | |
61 | + | |
62 | + .balign 4 | |
63 | +charloop: | |
64 | + ldb.ab %r3, [%r1, 1] | |
65 | + brne.d %r3, 0, charloop | |
66 | + stb.ab %r3, [%r10, 1] | |
67 | + j [%blink] |
arch/arc/lib/strlen.S
1 | +/* | |
2 | + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. | |
3 | + * | |
4 | + * SPDX-License-Identifier: GPL-2.0+ | |
5 | + */ | |
6 | + | |
7 | +.global strlen | |
8 | +.align 4 | |
9 | +strlen: | |
10 | + or %r3, %r0, 7 | |
11 | + ld %r2, [%r3, -7] | |
12 | + ld.a %r6, [%r3, -3] | |
13 | + mov %r4, 0x01010101 | |
14 | + /* uses long immediate */ | |
15 | +#ifdef __LITTLE_ENDIAN__ | |
16 | + asl_s %r1, %r0, 3 | |
17 | + btst_s %r0, 2 | |
18 | + asl %r7, %r4, %r1 | |
19 | + ror %r5, %r4 | |
20 | + sub %r1, %r2, %r7 | |
21 | + bic_s %r1, %r1, %r2 | |
22 | + mov.eq %r7, %r4 | |
23 | + sub %r12, %r6, %r7 | |
24 | + bic %r12, %r12, %r6 | |
25 | + or.eq %r12, %r12, %r1 | |
26 | + and %r12, %r12, %r5 | |
27 | + brne %r12, 0, .Learly_end | |
28 | +#else /* __BIG_ENDIAN__ */ | |
29 | + ror %r5, %r4 | |
30 | + btst_s %r0, 2 | |
31 | + mov_s %r1, 31 | |
32 | + sub3 %r7, %r1, %r0 | |
33 | + sub %r1, %r2, %r4 | |
34 | + bic_s %r1, %r1, %r2 | |
35 | + bmsk %r1, %r1, %r7 | |
36 | + sub %r12, %r6, %r4 | |
37 | + bic %r12, %r12, %r6 | |
38 | + bmsk.ne %r12, %r12, %r7 | |
39 | + or.eq %r12, %r12, %r1 | |
40 | + and %r12, %r12, %r5 | |
41 | + brne %r12, 0, .Learly_end | |
42 | +#endif /* _ENDIAN__ */ | |
43 | + | |
44 | +.Loop: | |
45 | + ld_s %r2, [%r3, 4] | |
46 | + ld.a %r6, [%r3, 8] | |
47 | + /* stall for load result */ | |
48 | + sub %r1, %r2, %r4 | |
49 | + bic_s %r1, %r1, %r2 | |
50 | + sub %r12, %r6, %r4 | |
51 | + bic %r12, %r12, %r6 | |
52 | + or %r12, %r12, %r1 | |
53 | + and %r12, %r12, %r5 | |
54 | + breq %r12, 0, .Loop | |
55 | +.Lend: | |
56 | + and.f %r1, %r1, %r5 | |
57 | + sub.ne %r3, %r3, 4 | |
58 | + mov.eq %r1, %r12 | |
59 | +#ifdef __LITTLE_ENDIAN__ | |
60 | + sub_s %r2, %r1, 1 | |
61 | + bic_s %r2, %r2, %r1 | |
62 | + norm %r1, %r2 | |
63 | + sub_s %r0, %r0, 3 | |
64 | + lsr_s %r1, %r1, 3 | |
65 | + sub %r0, %r3, %r0 | |
66 | + j_s.d [%blink] | |
67 | + sub %r0, %r0, %r1 | |
68 | +#else /* __BIG_ENDIAN__ */ | |
69 | + lsr_s %r1, %r1, 7 | |
70 | + mov.eq %r2, %r6 | |
71 | + bic_s %r1, %r1, %r2 | |
72 | + norm %r1, %r1 | |
73 | + sub %r0, %r3, %r0 | |
74 | + lsr_s %r1, %r1, 3 | |
75 | + j_s.d [%blink] | |
76 | + add %r0, %r0, %r1 | |
77 | +#endif /* _ENDIAN */ | |
78 | +.Learly_end: | |
79 | + b.d .Lend | |
80 | + sub_s.ne %r1, %r1, %r1 |