These are library functions used by ARC700 architecture. Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Francois Bedard <fbedard@synopsys.com> Cc: Wolfgang Denk <wd@denx.de> Cc: Heiko Schocher <hs@denx.de>master
parent
2f16ac9df4
commit
2272382879
@ -0,0 +1,16 @@ |
||||
#
|
||||
# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0+
|
||||
#
|
||||
|
||||
obj-y += sections.o
|
||||
obj-y += relocate.o
|
||||
obj-y += strchr-700.o
|
||||
obj-y += strcmp.o
|
||||
obj-y += strcpy-700.o
|
||||
obj-y += strlen.o
|
||||
obj-y += memcmp.o
|
||||
obj-y += memcpy-700.o
|
||||
obj-y += memset.o
|
||||
obj-$(CONFIG_CMD_BOOTM) += bootm.o
|
@ -0,0 +1,106 @@ |
||||
/*
|
||||
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
#include <common.h> |
||||
|
||||
DECLARE_GLOBAL_DATA_PTR; |
||||
|
||||
static ulong get_sp(void) |
||||
{ |
||||
ulong ret; |
||||
|
||||
asm("mov %0, sp" : "=r"(ret) : ); |
||||
return ret; |
||||
} |
||||
|
||||
void arch_lmb_reserve(struct lmb *lmb) |
||||
{ |
||||
ulong sp; |
||||
|
||||
/*
|
||||
* Booting a (Linux) kernel image |
||||
* |
||||
* Allocate space for command line and board info - the |
||||
* address should be as high as possible within the reach of |
||||
* the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused |
||||
* memory, which means far enough below the current stack |
||||
* pointer. |
||||
*/ |
||||
sp = get_sp(); |
||||
debug("## Current stack ends at 0x%08lx ", sp); |
||||
|
||||
/* adjust sp by 4K to be safe */ |
||||
sp -= 4096; |
||||
lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp)); |
||||
} |
||||
|
||||
static int cleanup_before_linux(void) |
||||
{ |
||||
disable_interrupts(); |
||||
flush_dcache_all(); |
||||
invalidate_icache_all(); |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
/* Subcommand: PREP */ |
||||
static void boot_prep_linux(bootm_headers_t *images) |
||||
{ |
||||
if (image_setup_linux(images)) |
||||
hang(); |
||||
} |
||||
|
||||
/* Subcommand: GO */ |
||||
static void boot_jump_linux(bootm_headers_t *images, int flag) |
||||
{ |
||||
void (*kernel_entry)(int zero, int arch, uint params); |
||||
unsigned int r0, r2; |
||||
int fake = (flag & BOOTM_STATE_OS_FAKE_GO); |
||||
|
||||
kernel_entry = (void (*)(int, int, uint))images->ep; |
||||
|
||||
debug("## Transferring control to Linux (at address %08lx)...\n", |
||||
(ulong) kernel_entry); |
||||
bootstage_mark(BOOTSTAGE_ID_RUN_OS); |
||||
|
||||
printf("\nStarting kernel ...%s\n\n", fake ? |
||||
"(fake run for tracing)" : ""); |
||||
bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel"); |
||||
|
||||
cleanup_before_linux(); |
||||
|
||||
if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) { |
||||
r0 = 2; |
||||
r2 = (unsigned int)images->ft_addr; |
||||
} else { |
||||
r0 = 1; |
||||
r2 = (unsigned int)getenv("bootargs"); |
||||
} |
||||
|
||||
if (!fake) |
||||
kernel_entry(r0, 0, r2); |
||||
} |
||||
|
||||
int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images) |
||||
{ |
||||
/* No need for those on ARC */ |
||||
if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE)) |
||||
return -1; |
||||
|
||||
if (flag & BOOTM_STATE_OS_PREP) { |
||||
boot_prep_linux(images); |
||||
return 0; |
||||
} |
||||
|
||||
if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) { |
||||
boot_jump_linux(images, flag); |
||||
return 0; |
||||
} |
||||
|
||||
boot_prep_linux(images); |
||||
boot_jump_linux(images, flag); |
||||
return 0; |
||||
} |
@ -0,0 +1,121 @@ |
||||
/* |
||||
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
#ifdef __LITTLE_ENDIAN__ |
||||
#define WORD2 r2 |
||||
#define SHIFT r3 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
#define WORD2 r3 |
||||
#define SHIFT r2 |
||||
#endif /* _ENDIAN__ */ |
||||
|
||||
.global memcmp
|
||||
.align 4
|
||||
memcmp: |
||||
or %r12, %r0, %r1 |
||||
asl_s %r12, %r12, 30 |
||||
sub %r3, %r2, 1 |
||||
brls %r2, %r12, .Lbytewise |
||||
ld %r4, [%r0, 0] |
||||
ld %r5, [%r1, 0] |
||||
lsr.f %lp_count, %r3, 3 |
||||
lpne .Loop_end |
||||
ld_s WORD2, [%r0, 4] |
||||
ld_s %r12, [%r1, 4] |
||||
brne %r4, %r5, .Leven |
||||
ld.a %r4, [%r0, 8] |
||||
ld.a %r5, [%r1, 8] |
||||
brne WORD2, %r12, .Lodd |
||||
.Loop_end: |
||||
asl_s SHIFT, SHIFT, 3 |
||||
bhs_s .Last_cmp |
||||
brne %r4, %r5, .Leven |
||||
ld %r4, [%r0, 4] |
||||
ld %r5, [%r1, 4] |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
nop_s |
||||
/* one more load latency cycle */ |
||||
.Last_cmp: |
||||
xor %r0, %r4, %r5 |
||||
bset %r0, %r0, SHIFT |
||||
sub_s %r1, %r0, 1 |
||||
bic_s %r1, %r1, %r0 |
||||
norm %r1, %r1 |
||||
b.d .Leven_cmp |
||||
and %r1, %r1, 24 |
||||
.Leven: |
||||
xor %r0, %r4, %r5 |
||||
sub_s %r1, %r0, 1 |
||||
bic_s %r1, %r1, %r0 |
||||
norm %r1, %r1 |
||||
/* slow track insn */ |
||||
and %r1, %r1, 24 |
||||
.Leven_cmp: |
||||
asl %r2, %r4, %r1 |
||||
asl %r12, %r5, %r1 |
||||
lsr_s %r2, %r2, 1 |
||||
lsr_s %r12, %r12, 1 |
||||
j_s.d [%blink] |
||||
sub %r0, %r2, %r12 |
||||
.balign 4
|
||||
.Lodd: |
||||
xor %r0, WORD2, %r12 |
||||
sub_s %r1, %r0, 1 |
||||
bic_s %r1, %r1, %r0 |
||||
norm %r1, %r1 |
||||
/* slow track insn */ |
||||
and %r1, %r1, 24 |
||||
asl_s %r2, %r2, %r1 |
||||
asl_s %r12, %r12, %r1 |
||||
lsr_s %r2, %r2, 1 |
||||
lsr_s %r12, %r12, 1 |
||||
j_s.d [%blink] |
||||
sub %r0, %r2, %r12 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
.Last_cmp: |
||||
neg_s SHIFT, SHIFT |
||||
lsr %r4, %r4, SHIFT |
||||
lsr %r5, %r5, SHIFT |
||||
/* slow track insn */ |
||||
.Leven: |
||||
sub.f %r0, %r4, %r5 |
||||
mov.ne %r0, 1 |
||||
j_s.d [%blink] |
||||
bset.cs %r0, %r0, 31 |
||||
.Lodd: |
||||
cmp_s WORD2, %r12 |
||||
|
||||
mov_s %r0, 1 |
||||
j_s.d [%blink] |
||||
bset.cs %r0, %r0, 31 |
||||
#endif /* _ENDIAN__ */ |
||||
.balign 4
|
||||
.Lbytewise: |
||||
breq %r2, 0, .Lnil |
||||
ldb %r4, [%r0, 0] |
||||
ldb %r5, [%r1, 0] |
||||
lsr.f %lp_count, %r3 |
||||
lpne .Lbyte_end |
||||
ldb_s %r3, [%r0, 1] |
||||
ldb %r12, [%r1, 1] |
||||
brne %r4, %r5, .Lbyte_even |
||||
ldb.a %r4, [%r0, 2] |
||||
ldb.a %r5, [%r1, 2] |
||||
brne %r3, %r12, .Lbyte_odd |
||||
.Lbyte_end: |
||||
bcc .Lbyte_even |
||||
brne %r4, %r5, .Lbyte_even |
||||
ldb_s %r3, [%r0, 1] |
||||
ldb_s %r12, [%r1, 1] |
||||
.Lbyte_odd: |
||||
j_s.d [%blink] |
||||
sub %r0, %r3, %r12 |
||||
.Lbyte_even: |
||||
j_s.d [%blink] |
||||
sub %r0, %r4, %r5 |
||||
.Lnil: |
||||
j_s.d [%blink] |
||||
mov %r0, 0 |
@ -0,0 +1,63 @@ |
||||
/* |
||||
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
.global memcpy
|
||||
.align 4
|
||||
memcpy: |
||||
or %r3, %r0, %r1 |
||||
asl_s %r3, %r3, 30 |
||||
mov_s %r5, %r0 |
||||
brls.d %r2, %r3, .Lcopy_bytewise |
||||
sub.f %r3, %r2, 1 |
||||
ld_s %r12, [%r1, 0] |
||||
asr.f %lp_count, %r3, 3 |
||||
bbit0.d %r3, 2, .Lnox4 |
||||
bmsk_s %r2, %r2, 1 |
||||
st.ab %r12, [%r5, 4] |
||||
ld.a %r12, [%r1, 4] |
||||
.Lnox4: |
||||
lppnz .Lendloop |
||||
ld_s %r3, [%r1, 4] |
||||
st.ab %r12, [%r5, 4] |
||||
ld.a %r12, [%r1, 8] |
||||
st.ab %r3, [%r5, 4] |
||||
.Lendloop: |
||||
breq %r2, 0, .Last_store |
||||
ld %r3, [%r5, 0] |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
add3 %r2, -1, %r2 |
||||
/* uses long immediate */ |
||||
xor_s %r12, %r12, %r3 |
||||
bmsk %r12, %r12, %r2 |
||||
xor_s %r12, %r12, %r3 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
sub3 %r2, 31, %r2 |
||||
/* uses long immediate */ |
||||
xor_s %r3, %r3, %r12 |
||||
bmsk %r3, %r3, %r2 |
||||
xor_s %r12, %r12, %r3 |
||||
#endif /* _ENDIAN__ */ |
||||
.Last_store: |
||||
j_s.d [%blink] |
||||
st %r12, [%r5, 0] |
||||
|
||||
.balign 4
|
||||
.Lcopy_bytewise: |
||||
jcs [%blink] |
||||
ldb_s %r12, [%r1, 0] |
||||
lsr.f %lp_count, %r3 |
||||
bhs_s .Lnox1 |
||||
stb.ab %r12, [%r5, 1] |
||||
ldb.a %r12, [%r1, 1] |
||||
.Lnox1: |
||||
lppnz .Lendbloop |
||||
ldb_s %r3, [%r1, 1] |
||||
stb.ab %r12, [%r5, 1] |
||||
ldb.a %r12, [%r1, 2] |
||||
stb.ab %r3, [%r5, 1] |
||||
.Lendbloop: |
||||
j_s.d [%blink] |
||||
stb %r12, [%r5, 0] |
@ -0,0 +1,62 @@ |
||||
/* |
||||
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ |
||||
|
||||
.global memset
|
||||
.align 4
|
||||
memset: |
||||
mov_s %r4, %r0 |
||||
or %r12, %r0, %r2 |
||||
bmsk.f %r12, %r12, 1 |
||||
extb_s %r1, %r1 |
||||
asl %r3, %r1, 8 |
||||
beq.d .Laligned |
||||
or_s %r1, %r1, %r3 |
||||
brls %r2, SMALL, .Ltiny |
||||
add %r3, %r2, %r0 |
||||
stb %r1, [%r3, -1] |
||||
bclr_s %r3, %r3, 0 |
||||
stw %r1, [%r3, -2] |
||||
bmsk.f %r12, %r0, 1 |
||||
add_s %r2, %r2, %r12 |
||||
sub.ne %r2, %r2, 4 |
||||
stb.ab %r1, [%r4, 1] |
||||
and %r4, %r4, -2 |
||||
stw.ab %r1, [%r4, 2] |
||||
and %r4, %r4, -4 |
||||
|
||||
.balign 4
|
||||
.Laligned: |
||||
asl %r3, %r1, 16 |
||||
lsr.f %lp_count, %r2, 2 |
||||
or_s %r1, %r1, %r3 |
||||
lpne .Loop_end |
||||
st.ab %r1, [%r4, 4] |
||||
.Loop_end: |
||||
j_s [%blink] |
||||
|
||||
.balign 4
|
||||
.Ltiny: |
||||
mov.f %lp_count, %r2 |
||||
lpne .Ltiny_end |
||||
stb.ab %r1, [%r4, 1] |
||||
.Ltiny_end: |
||||
j_s [%blink] |
||||
|
||||
/* |
||||
* memzero: @r0 = mem, @r1 = size_t
|
||||
* memset: @r0 = mem, @r1 = char, @r2 = size_t
|
||||
*/ |
||||
|
||||
.global memzero
|
||||
.align 4
|
||||
memzero: |
||||
/* adjust bzero args to memset args */ |
||||
mov %r2, %r1 |
||||
mov %r1, 0 |
||||
/* tail call so need to tinker with blink */ |
||||
b memset |
@ -0,0 +1,72 @@ |
||||
/*
|
||||
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
#include <common.h> |
||||
#include <elf.h> |
||||
#include <asm/sections.h> |
||||
|
||||
DECLARE_GLOBAL_DATA_PTR; |
||||
|
||||
/*
|
||||
* Base functionality is taken from x86 version with added ARC-specifics |
||||
*/ |
||||
int do_elf_reloc_fixups(void) |
||||
{ |
||||
Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start); |
||||
Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end); |
||||
|
||||
Elf32_Addr *offset_ptr_rom, *last_offset = NULL; |
||||
Elf32_Addr *offset_ptr_ram; |
||||
|
||||
do { |
||||
/* Get the location from the relocation entry */ |
||||
offset_ptr_rom = (Elf32_Addr *)re_src->r_offset; |
||||
|
||||
/* Check that the location of the relocation is in .text */ |
||||
if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE && |
||||
offset_ptr_rom > last_offset) { |
||||
unsigned int val; |
||||
/* Switch to the in-RAM version */ |
||||
offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom + |
||||
gd->reloc_off); |
||||
|
||||
/*
|
||||
* Use "memcpy" because target location might be |
||||
* 16-bit aligned on ARC so we may need to read |
||||
* byte-by-byte. On attempt to read entire word by |
||||
* CPU throws an exception |
||||
*/ |
||||
memcpy(&val, offset_ptr_ram, sizeof(int)); |
||||
|
||||
/* If location in ".text" section swap value */ |
||||
if ((unsigned int)offset_ptr_rom < |
||||
(unsigned int)&__text_end) |
||||
val = (val << 16) | (val >> 16); |
||||
|
||||
/* Check that the target points into .text */ |
||||
if (val >= CONFIG_SYS_TEXT_BASE && val <= |
||||
(unsigned int)&__bss_end) { |
||||
val += gd->reloc_off; |
||||
/* If location in ".text" section swap value */ |
||||
if ((unsigned int)offset_ptr_rom < |
||||
(unsigned int)&__text_end) |
||||
val = (val << 16) | (val >> 16); |
||||
memcpy(offset_ptr_ram, &val, sizeof(int)); |
||||
} else { |
||||
debug(" %p: rom reloc %x, ram %p, value %x, limit %x\n", |
||||
re_src, re_src->r_offset, offset_ptr_ram, |
||||
val, (unsigned int)&__bss_end); |
||||
} |
||||
} else { |
||||
debug(" %p: rom reloc %x, last %p\n", re_src, |
||||
re_src->r_offset, last_offset); |
||||
} |
||||
last_offset = offset_ptr_rom; |
||||
|
||||
} while (++re_src < re_end); |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,21 @@ |
||||
/*
|
||||
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
/*
|
||||
* For some reason linker sets linker-generated symbols to zero in PIE mode. |
||||
* A work-around is substitution of linker-generated symbols with |
||||
* compiler-generated symbols which are properly handled by linker in PAE mode. |
||||
*/ |
||||
|
||||
char __bss_start[0] __attribute__((section(".__bss_start"))); |
||||
char __bss_end[0] __attribute__((section(".__bss_end"))); |
||||
char __image_copy_start[0] __attribute__((section(".__image_copy_start"))); |
||||
char __image_copy_end[0] __attribute__((section(".__image_copy_end"))); |
||||
char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start"))); |
||||
char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end"))); |
||||
char __text_start[0] __attribute__((section(".__text_start"))); |
||||
char __text_end[0] __attribute__((section(".__text_end"))); |
||||
char __init_end[0] __attribute__((section(".__init_end"))); |
@ -0,0 +1,141 @@ |
||||
/* |
||||
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
/* |
||||
* ARC700 has a relatively long pipeline and branch prediction, so we want |
||||
* to avoid branches that are hard to predict. On the other hand, the |
||||
* presence of the norm instruction makes it easier to operate on whole |
||||
* words branch-free. |
||||
*/ |
||||
|
||||
.global strchr
|
||||
.align 4
|
||||
strchr: |
||||
extb_s %r1, %r1 |
||||
asl %r5, %r1, 8 |
||||
bmsk %r2, %r0, 1 |
||||
or %r5, %r5, %r1 |
||||
mov_s %r3, 0x01010101 |
||||
breq.d %r2, %r0, .Laligned |
||||
asl %r4, %r5, 16 |
||||
sub_s %r0, %r0, %r2 |
||||
asl %r7, %r2, 3 |
||||
ld_s %r2, [%r0] |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
asl %r7, %r3, %r7 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
lsr %r7, %r3, %r7 |
||||
#endif /* _ENDIAN__ */ |
||||
or %r5, %r5, %r4 |
||||
ror %r4, %r3 |
||||
sub %r12, %r2, %r7 |
||||
bic_s %r12, %r12, %r2 |
||||
and %r12, %r12, %r4 |
||||
brne.d %r12, 0, .Lfound0_ua |
||||
xor %r6, %r2, %r5 |
||||
ld.a %r2, [%r0, 4] |
||||
sub %r12, %r6, %r7 |
||||
bic %r12, %r12, %r6 |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
and %r7, %r12, %r4 |
||||
/* For speed, we want this branch to be unaligned. */ |
||||
breq %r7, 0, .Loop |
||||
/* Likewise this one */ |
||||
b .Lfound_char |
||||
#else /* __BIG_ENDIAN__ */ |
||||
and %r12, %r12, %r4 |
||||
/* For speed, we want this branch to be unaligned. */ |
||||
breq %r12, 0, .Loop |
||||
lsr_s %r12, %r12, 7 |
||||
bic %r2, %r7, %r6 |
||||
b.d .Lfound_char_b |
||||
and_s %r2, %r2, %r12 |
||||
#endif /* _ENDIAN__ */ |
||||
/* We require this code address to be unaligned for speed... */ |
||||
.Laligned: |
||||
ld_s %r2, [%r0] |
||||
or %r5, %r5, %r4 |
||||
ror %r4, %r3 |
||||
/* ... so that this code address is aligned, for itself and ... */ |
||||
.Loop: |
||||
sub %r12, %r2, %r3 |
||||
bic_s %r12, %r12, %r2 |
||||
and %r12, %r12, %r4 |
||||
brne.d %r12, 0, .Lfound0 |
||||
xor %r6, %r2, %r5 |
||||
ld.a %r2, [%r0, 4] |
||||
sub %r12, %r6, %r3 |
||||
bic %r12, %r12, %r6 |
||||
and %r7, %r12, %r4 |
||||
breq %r7, 0, .Loop |
||||
/* |
||||
*... so that this branch is unaligned. |
||||
* Found searched-for character. |
||||
* r0 has already advanced to next word. |
||||
*/ |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
/* |
||||
* We only need the information about the first matching byte |
||||
* (i.e. the least significant matching byte) to be exact, |
||||
* hence there is no problem with carry effects. |
||||
*/ |
||||
.Lfound_char: |
||||
sub %r3, %r7, 1 |
||||
bic %r3, %r3, %r7 |
||||
norm %r2, %r3 |
||||
sub_s %r0, %r0, 1 |
||||
asr_s %r2, %r2, 3 |
||||
j.d [%blink] |
||||
sub_s %r0, %r0, %r2 |
||||
|
||||
.balign 4
|
||||
.Lfound0_ua: |
||||
mov %r3, %r7 |
||||
.Lfound0: |
||||
sub %r3, %r6, %r3 |
||||
bic %r3, %r3, %r6 |
||||
and %r2, %r3, %r4 |
||||
or_s %r12, %r12, %r2 |
||||
sub_s %r3, %r12, 1 |
||||
bic_s %r3, %r3, %r12 |
||||
norm %r3, %r3 |
||||
add_s %r0, %r0, 3 |
||||
asr_s %r12, %r3, 3 |
||||
asl.f 0, %r2, %r3 |
||||
sub_s %r0, %r0, %r12 |
||||
j_s.d [%blink] |
||||
mov.pl %r0, 0 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
.Lfound_char: |
||||
lsr %r7, %r7, 7 |
||||
|
||||
bic %r2, %r7, %r6 |
||||
.Lfound_char_b: |
||||
norm %r2, %r2 |
||||
sub_s %r0, %r0, 4 |
||||
asr_s %r2, %r2, 3 |
||||
j.d [%blink] |
||||
add_s %r0, %r0, %r2 |
||||
|
||||
.Lfound0_ua: |
||||
mov_s %r3, %r7 |
||||
.Lfound0: |
||||
asl_s %r2, %r2, 7 |
||||
or %r7, %r6, %r4 |
||||
bic_s %r12, %r12, %r2 |
||||
sub %r2, %r7, %r3 |
||||
or %r2, %r2, %r6 |
||||
bic %r12, %r2, %r12 |
||||
bic.f %r3, %r4, %r12 |
||||
norm %r3, %r3 |
||||
|
||||
add.pl %r3, %r3, 1 |
||||
asr_s %r12, %r3, 3 |
||||
asl.f 0, %r2, %r3 |
||||
add_s %r0, %r0, %r12 |
||||
j_s.d [%blink] |
||||
mov.mi %r0, 0 |
||||
#endif /* _ENDIAN__ */ |
@ -0,0 +1,97 @@ |
||||
/* |
||||
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
/* |
||||
* This is optimized primarily for the ARC700. |
||||
* It would be possible to speed up the loops by one cycle / word |
||||
* respective one cycle / byte by forcing double source 1 alignment, unrolling |
||||
* by a factor of two, and speculatively loading the second word / byte of |
||||
* source 1; however, that would increase the overhead for loop setup / finish,
|
||||
* and strcmp might often terminate early. |
||||
*/ |
||||
|
||||
.global strcmp
|
||||
.align 4
|
||||
strcmp: |
||||
or %r2, %r0, %r1 |
||||
bmsk_s %r2, %r2, 1 |
||||
brne %r2, 0, .Lcharloop |
||||
mov_s %r12, 0x01010101 |
||||
ror %r5, %r12 |
||||
.Lwordloop: |
||||
ld.ab %r2, [%r0, 4] |
||||
ld.ab %r3, [%r1, 4] |
||||
nop_s |
||||
sub %r4, %r2, %r12 |
||||
bic %r4, %r4, %r2 |
||||
and %r4, %r4, %r5 |
||||
brne %r4, 0, .Lfound0 |
||||
breq %r2 ,%r3, .Lwordloop |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
xor %r0, %r2, %r3 /* mask for difference */ |
||||
sub_s %r1, %r0, 1 |
||||
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ |
||||
sub %r1, %r5, %r0 |
||||
xor %r0, %r5, %r1 /* mask for least significant difference byte */ |
||||
and_s %r2, %r2, %r0 |
||||
and_s %r3, %r3, %r0 |
||||
#endif /* _ENDIAN__ */ |
||||
cmp_s %r2, %r3 |
||||
mov_s %r0, 1 |
||||
j_s.d [%blink] |
||||
bset.lo %r0, %r0, 31 |
||||
|
||||
.balign 4
|
||||
#ifdef __LITTLE_ENDIAN__ |
||||
.Lfound0: |
||||
xor %r0, %r2, %r3 /* mask for difference */ |
||||
or %r0, %r0, %r4 /* or in zero indicator */ |
||||
sub_s %r1, %r0, 1 |
||||
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ |
||||
sub %r1, %r5, %r0 |
||||
xor %r0, %r5, %r1 /* mask for least significant difference byte */ |
||||
and_s %r2, %r2, %r0 |
||||
and_s %r3, %r3, %r0 |
||||
sub.f %r0, %r2, %r3 |
||||
mov.hi %r0, 1 |
||||
j_s.d [%blink] |
||||
bset.lo %r0, %r0, 31 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
/* |
||||
* The zero-detection above can mis-detect 0x01 bytes as zeroes |
||||
* because of carry-propagateion from a lower significant zero byte. |
||||
* We can compensate for this by checking that bit0 is zero. |
||||
* This compensation is not necessary in the step where we |
||||
* get a low estimate for r2, because in any affected bytes |
||||
* we already have 0x00 or 0x01, which will remain unchanged |
||||
* when bit 7 is cleared. |
||||
*/ |
||||
.balign 4
|
||||
.Lfound0: |
||||
lsr %r0, %r4, 8 |
||||
lsr_s %r1, %r2 |
||||
bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */ |
||||
bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */ |
||||
or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */ |
||||
cmp_s %r3, %r2 /* ... be independent of trailing garbage */ |
||||
or_s %r2, %r2, %r0 /* likewise for r3 > r2 */ |
||||
bic_s %r3, %r3, %r0 |
||||
rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */ |
||||
cmp_s %r2, %r3 |
||||
j_s.d [%blink] |
||||
bset.lo %r0, %r0, 31 |
||||
#endif /* _ENDIAN__ */ |
||||
|
||||
.balign 4
|
||||
.Lcharloop: |
||||
ldb.ab %r2,[%r0,1] |
||||
ldb.ab %r3,[%r1,1] |
||||
nop_s |
||||
breq %r2, 0, .Lcmpend |
||||
breq %r2, %r3, .Lcharloop |
||||
.Lcmpend: |
||||
j_s.d [%blink] |
||||
sub %r0, %r2, %r3 |
@ -0,0 +1,67 @@ |
||||
/* |
||||
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
/* |
||||
* If dst and src are 4 byte aligned, copy 8 bytes at a time. |
||||
* If the src is 4, but not 8 byte aligned, we first read 4 bytes to get |
||||
* it 8 byte aligned. Thus, we can do a little read-ahead, without |
||||
* dereferencing a cache line that we should not touch. |
||||
* Note that short and long instructions have been scheduled to avoid |
||||
* branch stalls. |
||||
* The beq_s to r3z could be made unaligned & long to avoid a stall |
||||
* there, but it is not likely to be taken often, and it would also be likely |
||||
* to cost an unaligned mispredict at the next call. |
||||
*/ |
||||
|
||||
.global strcpy
|
||||
.align 4
|
||||
strcpy: |
||||
or %r2, %r0, %r1 |
||||
bmsk_s %r2, %r2, 1 |
||||
brne.d %r2, 0, charloop |
||||
mov_s %r10, %r0 |
||||
ld_s %r3, [%r1, 0] |
||||
mov %r8, 0x01010101 |
||||
bbit0.d %r1, 2, loop_start |
||||
ror %r12, %r8 |
||||
sub %r2, %r3, %r8 |
||||
bic_s %r2, %r2, %r3 |
||||
tst_s %r2,%r12 |
||||
bne r3z |
||||
mov_s %r4,%r3 |
||||
.balign 4
|
||||
loop: |
||||
ld.a %r3, [%r1, 4] |
||||
st.ab %r4, [%r10, 4] |
||||
loop_start: |
||||
ld.a %r4, [%r1, 4] |
||||
sub %r2, %r3, %r8 |
||||
bic_s %r2, %r2, %r3 |
||||
tst_s %r2, %r12 |
||||
bne_s r3z |
||||
st.ab %r3, [%r10, 4] |
||||
sub %r2, %r4, %r8 |
||||
bic %r2, %r2, %r4 |
||||
tst %r2, %r12 |
||||
beq loop |
||||
mov_s %r3, %r4 |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
r3z: bmsk.f %r1, %r3, 7 |
||||
lsr_s %r3, %r3, 8 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
r3z: lsr.f %r1, %r3, 24 |
||||
asl_s %r3, %r3, 8 |
||||
#endif /* _ENDIAN__ */ |
||||
bne.d r3z |
||||
stb.ab %r1, [%r10, 1] |
||||
j_s [%blink] |
||||
|
||||
.balign 4
|
||||
charloop: |
||||
ldb.ab %r3, [%r1, 1] |
||||
brne.d %r3, 0, charloop |
||||
stb.ab %r3, [%r10, 1] |
||||
j [%blink] |
@ -0,0 +1,80 @@ |
||||
/* |
||||
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
||||
* |
||||
* SPDX-License-Identifier: GPL-2.0+ |
||||
*/ |
||||
|
||||
.global strlen
|
||||
.align 4
|
||||
strlen: |
||||
or %r3, %r0, 7 |
||||
ld %r2, [%r3, -7] |
||||
ld.a %r6, [%r3, -3] |
||||
mov %r4, 0x01010101 |
||||
/* uses long immediate */ |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
asl_s %r1, %r0, 3 |
||||
btst_s %r0, 2 |
||||
asl %r7, %r4, %r1 |
||||
ror %r5, %r4 |
||||
sub %r1, %r2, %r7 |
||||
bic_s %r1, %r1, %r2 |
||||
mov.eq %r7, %r4 |
||||
sub %r12, %r6, %r7 |
||||
bic %r12, %r12, %r6 |
||||
or.eq %r12, %r12, %r1 |
||||
and %r12, %r12, %r5 |
||||
brne %r12, 0, .Learly_end |
||||
#else /* __BIG_ENDIAN__ */ |
||||
ror %r5, %r4 |
||||
btst_s %r0, 2 |
||||
mov_s %r1, 31 |
||||
sub3 %r7, %r1, %r0 |
||||
sub %r1, %r2, %r4 |
||||
bic_s %r1, %r1, %r2 |
||||
bmsk %r1, %r1, %r7 |
||||
sub %r12, %r6, %r4 |
||||
bic %r12, %r12, %r6 |
||||
bmsk.ne %r12, %r12, %r7 |
||||
or.eq %r12, %r12, %r1 |
||||
and %r12, %r12, %r5 |
||||
brne %r12, 0, .Learly_end |
||||
#endif /* _ENDIAN__ */ |
||||
|
||||
.Loop: |
||||
ld_s %r2, [%r3, 4] |
||||
ld.a %r6, [%r3, 8] |
||||
/* stall for load result */ |
||||
sub %r1, %r2, %r4 |
||||
bic_s %r1, %r1, %r2 |
||||
sub %r12, %r6, %r4 |
||||
bic %r12, %r12, %r6 |
||||
or %r12, %r12, %r1 |
||||
and %r12, %r12, %r5 |
||||
breq %r12, 0, .Loop |
||||
.Lend: |
||||
and.f %r1, %r1, %r5 |
||||
sub.ne %r3, %r3, 4 |
||||
mov.eq %r1, %r12 |
||||
#ifdef __LITTLE_ENDIAN__ |
||||
sub_s %r2, %r1, 1 |
||||
bic_s %r2, %r2, %r1 |
||||
norm %r1, %r2 |
||||
sub_s %r0, %r0, 3 |
||||
lsr_s %r1, %r1, 3 |
||||
sub %r0, %r3, %r0 |
||||
j_s.d [%blink] |
||||
sub %r0, %r0, %r1 |
||||
#else /* __BIG_ENDIAN__ */ |
||||
lsr_s %r1, %r1, 7 |
||||
mov.eq %r2, %r6 |
||||
bic_s %r1, %r1, %r2 |
||||
norm %r1, %r1 |
||||
sub %r0, %r3, %r0 |
||||
lsr_s %r1, %r1, 3 |
||||
j_s.d [%blink] |
||||
add %r0, %r0, %r1 |
||||
#endif /* _ENDIAN */ |
||||
.Learly_end: |
||||
b.d .Lend |
||||
sub_s.ne %r1, %r1, %r1 |
Loading…
Reference in new issue