arc: add library functions

These are library functions used by ARC700 architecture.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>

Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Francois Bedard <fbedard@synopsys.com>
Cc: Wolfgang Denk <wd@denx.de>
Cc: Heiko Schocher <hs@denx.de>
master
Alexey Brodkin 10 years ago committed by Tom Rini
parent 2f16ac9df4
commit 2272382879
  1. 16
      arch/arc/lib/Makefile
  2. 106
      arch/arc/lib/bootm.c
  3. 121
      arch/arc/lib/memcmp.S
  4. 63
      arch/arc/lib/memcpy-700.S
  5. 62
      arch/arc/lib/memset.S
  6. 72
      arch/arc/lib/relocate.c
  7. 21
      arch/arc/lib/sections.c
  8. 141
      arch/arc/lib/strchr-700.S
  9. 97
      arch/arc/lib/strcmp.S
  10. 67
      arch/arc/lib/strcpy-700.S
  11. 80
      arch/arc/lib/strlen.S

@ -0,0 +1,16 @@
#
# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
#
# SPDX-License-Identifier: GPL-2.0+
#
obj-y += sections.o
obj-y += relocate.o
obj-y += strchr-700.o
obj-y += strcmp.o
obj-y += strcpy-700.o
obj-y += strlen.o
obj-y += memcmp.o
obj-y += memcpy-700.o
obj-y += memset.o
obj-$(CONFIG_CMD_BOOTM) += bootm.o

@ -0,0 +1,106 @@
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <common.h>
DECLARE_GLOBAL_DATA_PTR;
static ulong get_sp(void)
{
ulong ret;
asm("mov %0, sp" : "=r"(ret) : );
return ret;
}
void arch_lmb_reserve(struct lmb *lmb)
{
ulong sp;
/*
* Booting a (Linux) kernel image
*
* Allocate space for command line and board info - the
* address should be as high as possible within the reach of
* the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused
* memory, which means far enough below the current stack
* pointer.
*/
sp = get_sp();
debug("## Current stack ends at 0x%08lx ", sp);
/* adjust sp by 4K to be safe */
sp -= 4096;
lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp));
}
static int cleanup_before_linux(void)
{
disable_interrupts();
flush_dcache_all();
invalidate_icache_all();
return 0;
}
/* Subcommand: PREP */
static void boot_prep_linux(bootm_headers_t *images)
{
if (image_setup_linux(images))
hang();
}
/* Subcommand: GO */
static void boot_jump_linux(bootm_headers_t *images, int flag)
{
void (*kernel_entry)(int zero, int arch, uint params);
unsigned int r0, r2;
int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
kernel_entry = (void (*)(int, int, uint))images->ep;
debug("## Transferring control to Linux (at address %08lx)...\n",
(ulong) kernel_entry);
bootstage_mark(BOOTSTAGE_ID_RUN_OS);
printf("\nStarting kernel ...%s\n\n", fake ?
"(fake run for tracing)" : "");
bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
cleanup_before_linux();
if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
r0 = 2;
r2 = (unsigned int)images->ft_addr;
} else {
r0 = 1;
r2 = (unsigned int)getenv("bootargs");
}
if (!fake)
kernel_entry(r0, 0, r2);
}
int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
{
/* No need for those on ARC */
if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
return -1;
if (flag & BOOTM_STATE_OS_PREP) {
boot_prep_linux(images);
return 0;
}
if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
boot_jump_linux(images, flag);
return 0;
}
boot_prep_linux(images);
boot_jump_linux(images, flag);
return 0;
}

@ -0,0 +1,121 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#ifdef __LITTLE_ENDIAN__
#define WORD2 r2
#define SHIFT r3
#else /* __BIG_ENDIAN__ */
#define WORD2 r3
#define SHIFT r2
#endif /* _ENDIAN__ */
.global memcmp
.align 4
memcmp:
or %r12, %r0, %r1
asl_s %r12, %r12, 30
sub %r3, %r2, 1
brls %r2, %r12, .Lbytewise
ld %r4, [%r0, 0]
ld %r5, [%r1, 0]
lsr.f %lp_count, %r3, 3
lpne .Loop_end
ld_s WORD2, [%r0, 4]
ld_s %r12, [%r1, 4]
brne %r4, %r5, .Leven
ld.a %r4, [%r0, 8]
ld.a %r5, [%r1, 8]
brne WORD2, %r12, .Lodd
.Loop_end:
asl_s SHIFT, SHIFT, 3
bhs_s .Last_cmp
brne %r4, %r5, .Leven
ld %r4, [%r0, 4]
ld %r5, [%r1, 4]
#ifdef __LITTLE_ENDIAN__
nop_s
/* one more load latency cycle */
.Last_cmp:
xor %r0, %r4, %r5
bset %r0, %r0, SHIFT
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
b.d .Leven_cmp
and %r1, %r1, 24
.Leven:
xor %r0, %r4, %r5
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
/* slow track insn */
and %r1, %r1, 24
.Leven_cmp:
asl %r2, %r4, %r1
asl %r12, %r5, %r1
lsr_s %r2, %r2, 1
lsr_s %r12, %r12, 1
j_s.d [%blink]
sub %r0, %r2, %r12
.balign 4
.Lodd:
xor %r0, WORD2, %r12
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
/* slow track insn */
and %r1, %r1, 24
asl_s %r2, %r2, %r1
asl_s %r12, %r12, %r1
lsr_s %r2, %r2, 1
lsr_s %r12, %r12, 1
j_s.d [%blink]
sub %r0, %r2, %r12
#else /* __BIG_ENDIAN__ */
.Last_cmp:
neg_s SHIFT, SHIFT
lsr %r4, %r4, SHIFT
lsr %r5, %r5, SHIFT
/* slow track insn */
.Leven:
sub.f %r0, %r4, %r5
mov.ne %r0, 1
j_s.d [%blink]
bset.cs %r0, %r0, 31
.Lodd:
cmp_s WORD2, %r12
mov_s %r0, 1
j_s.d [%blink]
bset.cs %r0, %r0, 31
#endif /* _ENDIAN__ */
.balign 4
.Lbytewise:
breq %r2, 0, .Lnil
ldb %r4, [%r0, 0]
ldb %r5, [%r1, 0]
lsr.f %lp_count, %r3
lpne .Lbyte_end
ldb_s %r3, [%r0, 1]
ldb %r12, [%r1, 1]
brne %r4, %r5, .Lbyte_even
ldb.a %r4, [%r0, 2]
ldb.a %r5, [%r1, 2]
brne %r3, %r12, .Lbyte_odd
.Lbyte_end:
bcc .Lbyte_even
brne %r4, %r5, .Lbyte_even
ldb_s %r3, [%r0, 1]
ldb_s %r12, [%r1, 1]
.Lbyte_odd:
j_s.d [%blink]
sub %r0, %r3, %r12
.Lbyte_even:
j_s.d [%blink]
sub %r0, %r4, %r5
.Lnil:
j_s.d [%blink]
mov %r0, 0

@ -0,0 +1,63 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
.global memcpy
.align 4
memcpy:
or %r3, %r0, %r1
asl_s %r3, %r3, 30
mov_s %r5, %r0
brls.d %r2, %r3, .Lcopy_bytewise
sub.f %r3, %r2, 1
ld_s %r12, [%r1, 0]
asr.f %lp_count, %r3, 3
bbit0.d %r3, 2, .Lnox4
bmsk_s %r2, %r2, 1
st.ab %r12, [%r5, 4]
ld.a %r12, [%r1, 4]
.Lnox4:
lppnz .Lendloop
ld_s %r3, [%r1, 4]
st.ab %r12, [%r5, 4]
ld.a %r12, [%r1, 8]
st.ab %r3, [%r5, 4]
.Lendloop:
breq %r2, 0, .Last_store
ld %r3, [%r5, 0]
#ifdef __LITTLE_ENDIAN__
add3 %r2, -1, %r2
/* uses long immediate */
xor_s %r12, %r12, %r3
bmsk %r12, %r12, %r2
xor_s %r12, %r12, %r3
#else /* __BIG_ENDIAN__ */
sub3 %r2, 31, %r2
/* uses long immediate */
xor_s %r3, %r3, %r12
bmsk %r3, %r3, %r2
xor_s %r12, %r12, %r3
#endif /* _ENDIAN__ */
.Last_store:
j_s.d [%blink]
st %r12, [%r5, 0]
.balign 4
.Lcopy_bytewise:
jcs [%blink]
ldb_s %r12, [%r1, 0]
lsr.f %lp_count, %r3
bhs_s .Lnox1
stb.ab %r12, [%r5, 1]
ldb.a %r12, [%r1, 1]
.Lnox1:
lppnz .Lendbloop
ldb_s %r3, [%r1, 1]
stb.ab %r12, [%r5, 1]
ldb.a %r12, [%r1, 2]
stb.ab %r3, [%r5, 1]
.Lendbloop:
j_s.d [%blink]
stb %r12, [%r5, 0]

@ -0,0 +1,62 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
.global memset
.align 4
memset:
mov_s %r4, %r0
or %r12, %r0, %r2
bmsk.f %r12, %r12, 1
extb_s %r1, %r1
asl %r3, %r1, 8
beq.d .Laligned
or_s %r1, %r1, %r3
brls %r2, SMALL, .Ltiny
add %r3, %r2, %r0
stb %r1, [%r3, -1]
bclr_s %r3, %r3, 0
stw %r1, [%r3, -2]
bmsk.f %r12, %r0, 1
add_s %r2, %r2, %r12
sub.ne %r2, %r2, 4
stb.ab %r1, [%r4, 1]
and %r4, %r4, -2
stw.ab %r1, [%r4, 2]
and %r4, %r4, -4
.balign 4
.Laligned:
asl %r3, %r1, 16
lsr.f %lp_count, %r2, 2
or_s %r1, %r1, %r3
lpne .Loop_end
st.ab %r1, [%r4, 4]
.Loop_end:
j_s [%blink]
.balign 4
.Ltiny:
mov.f %lp_count, %r2
lpne .Ltiny_end
stb.ab %r1, [%r4, 1]
.Ltiny_end:
j_s [%blink]
/*
* memzero: @r0 = mem, @r1 = size_t
* memset: @r0 = mem, @r1 = char, @r2 = size_t
*/
.global memzero
.align 4
memzero:
/* adjust bzero args to memset args */
mov %r2, %r1
mov %r1, 0
/* tail call so need to tinker with blink */
b memset

@ -0,0 +1,72 @@
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#include <common.h>
#include <elf.h>
#include <asm/sections.h>
DECLARE_GLOBAL_DATA_PTR;
/*
* Base functionality is taken from x86 version with added ARC-specifics
*/
int do_elf_reloc_fixups(void)
{
Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
Elf32_Addr *offset_ptr_rom, *last_offset = NULL;
Elf32_Addr *offset_ptr_ram;
do {
/* Get the location from the relocation entry */
offset_ptr_rom = (Elf32_Addr *)re_src->r_offset;
/* Check that the location of the relocation is in .text */
if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE &&
offset_ptr_rom > last_offset) {
unsigned int val;
/* Switch to the in-RAM version */
offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom +
gd->reloc_off);
/*
* Use "memcpy" because target location might be
* 16-bit aligned on ARC so we may need to read
* byte-by-byte. On attempt to read entire word by
* CPU throws an exception
*/
memcpy(&val, offset_ptr_ram, sizeof(int));
/* If location in ".text" section swap value */
if ((unsigned int)offset_ptr_rom <
(unsigned int)&__text_end)
val = (val << 16) | (val >> 16);
/* Check that the target points into .text */
if (val >= CONFIG_SYS_TEXT_BASE && val <=
(unsigned int)&__bss_end) {
val += gd->reloc_off;
/* If location in ".text" section swap value */
if ((unsigned int)offset_ptr_rom <
(unsigned int)&__text_end)
val = (val << 16) | (val >> 16);
memcpy(offset_ptr_ram, &val, sizeof(int));
} else {
debug(" %p: rom reloc %x, ram %p, value %x, limit %x\n",
re_src, re_src->r_offset, offset_ptr_ram,
val, (unsigned int)&__bss_end);
}
} else {
debug(" %p: rom reloc %x, last %p\n", re_src,
re_src->r_offset, last_offset);
}
last_offset = offset_ptr_rom;
} while (++re_src < re_end);
return 0;
}

@ -0,0 +1,21 @@
/*
* Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* For some reason linker sets linker-generated symbols to zero in PIE mode.
* A work-around is substitution of linker-generated symbols with
* compiler-generated symbols which are properly handled by linker in PAE mode.
*/
char __bss_start[0] __attribute__((section(".__bss_start")));
char __bss_end[0] __attribute__((section(".__bss_end")));
char __image_copy_start[0] __attribute__((section(".__image_copy_start")));
char __image_copy_end[0] __attribute__((section(".__image_copy_end")));
char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start")));
char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end")));
char __text_start[0] __attribute__((section(".__text_start")));
char __text_end[0] __attribute__((section(".__text_end")));
char __init_end[0] __attribute__((section(".__init_end")));

@ -0,0 +1,141 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* ARC700 has a relatively long pipeline and branch prediction, so we want
* to avoid branches that are hard to predict. On the other hand, the
* presence of the norm instruction makes it easier to operate on whole
* words branch-free.
*/
.global strchr
.align 4
strchr:
extb_s %r1, %r1
asl %r5, %r1, 8
bmsk %r2, %r0, 1
or %r5, %r5, %r1
mov_s %r3, 0x01010101
breq.d %r2, %r0, .Laligned
asl %r4, %r5, 16
sub_s %r0, %r0, %r2
asl %r7, %r2, 3
ld_s %r2, [%r0]
#ifdef __LITTLE_ENDIAN__
asl %r7, %r3, %r7
#else /* __BIG_ENDIAN__ */
lsr %r7, %r3, %r7
#endif /* _ENDIAN__ */
or %r5, %r5, %r4
ror %r4, %r3
sub %r12, %r2, %r7
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0_ua
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r7
bic %r12, %r12, %r6
#ifdef __LITTLE_ENDIAN__
and %r7, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r7, 0, .Loop
/* Likewise this one */
b .Lfound_char
#else /* __BIG_ENDIAN__ */
and %r12, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r12, 0, .Loop
lsr_s %r12, %r12, 7
bic %r2, %r7, %r6
b.d .Lfound_char_b
and_s %r2, %r2, %r12
#endif /* _ENDIAN__ */
/* We require this code address to be unaligned for speed... */
.Laligned:
ld_s %r2, [%r0]
or %r5, %r5, %r4
ror %r4, %r3
/* ... so that this code address is aligned, for itself and ... */
.Loop:
sub %r12, %r2, %r3
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r3
bic %r12, %r12, %r6
and %r7, %r12, %r4
breq %r7, 0, .Loop
/*
*... so that this branch is unaligned.
* Found searched-for character.
* r0 has already advanced to next word.
*/
#ifdef __LITTLE_ENDIAN__
/*
* We only need the information about the first matching byte
* (i.e. the least significant matching byte) to be exact,
* hence there is no problem with carry effects.
*/
.Lfound_char:
sub %r3, %r7, 1
bic %r3, %r3, %r7
norm %r2, %r3
sub_s %r0, %r0, 1
asr_s %r2, %r2, 3
j.d [%blink]
sub_s %r0, %r0, %r2
.balign 4
.Lfound0_ua:
mov %r3, %r7
.Lfound0:
sub %r3, %r6, %r3
bic %r3, %r3, %r6
and %r2, %r3, %r4
or_s %r12, %r12, %r2
sub_s %r3, %r12, 1
bic_s %r3, %r3, %r12
norm %r3, %r3
add_s %r0, %r0, 3
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
sub_s %r0, %r0, %r12
j_s.d [%blink]
mov.pl %r0, 0
#else /* __BIG_ENDIAN__ */
.Lfound_char:
lsr %r7, %r7, 7
bic %r2, %r7, %r6
.Lfound_char_b:
norm %r2, %r2
sub_s %r0, %r0, 4
asr_s %r2, %r2, 3
j.d [%blink]
add_s %r0, %r0, %r2
.Lfound0_ua:
mov_s %r3, %r7
.Lfound0:
asl_s %r2, %r2, 7
or %r7, %r6, %r4
bic_s %r12, %r12, %r2
sub %r2, %r7, %r3
or %r2, %r2, %r6
bic %r12, %r2, %r12
bic.f %r3, %r4, %r12
norm %r3, %r3
add.pl %r3, %r3, 1
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
add_s %r0, %r0, %r12
j_s.d [%blink]
mov.mi %r0, 0
#endif /* _ENDIAN__ */

@ -0,0 +1,97 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* This is optimized primarily for the ARC700.
* It would be possible to speed up the loops by one cycle / word
* respective one cycle / byte by forcing double source 1 alignment, unrolling
* by a factor of two, and speculatively loading the second word / byte of
* source 1; however, that would increase the overhead for loop setup / finish,
* and strcmp might often terminate early.
*/
.global strcmp
.align 4
strcmp:
or %r2, %r0, %r1
bmsk_s %r2, %r2, 1
brne %r2, 0, .Lcharloop
mov_s %r12, 0x01010101
ror %r5, %r12
.Lwordloop:
ld.ab %r2, [%r0, 4]
ld.ab %r3, [%r1, 4]
nop_s
sub %r4, %r2, %r12
bic %r4, %r4, %r2
and %r4, %r4, %r5
brne %r4, 0, .Lfound0
breq %r2 ,%r3, .Lwordloop
#ifdef __LITTLE_ENDIAN__
xor %r0, %r2, %r3 /* mask for difference */
sub_s %r1, %r0, 1
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
sub %r1, %r5, %r0
xor %r0, %r5, %r1 /* mask for least significant difference byte */
and_s %r2, %r2, %r0
and_s %r3, %r3, %r0
#endif /* _ENDIAN__ */
cmp_s %r2, %r3
mov_s %r0, 1
j_s.d [%blink]
bset.lo %r0, %r0, 31
.balign 4
#ifdef __LITTLE_ENDIAN__
.Lfound0:
xor %r0, %r2, %r3 /* mask for difference */
or %r0, %r0, %r4 /* or in zero indicator */
sub_s %r1, %r0, 1
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
sub %r1, %r5, %r0
xor %r0, %r5, %r1 /* mask for least significant difference byte */
and_s %r2, %r2, %r0
and_s %r3, %r3, %r0
sub.f %r0, %r2, %r3
mov.hi %r0, 1
j_s.d [%blink]
bset.lo %r0, %r0, 31
#else /* __BIG_ENDIAN__ */
/*
* The zero-detection above can mis-detect 0x01 bytes as zeroes
* because of carry-propagateion from a lower significant zero byte.
* We can compensate for this by checking that bit0 is zero.
* This compensation is not necessary in the step where we
* get a low estimate for r2, because in any affected bytes
* we already have 0x00 or 0x01, which will remain unchanged
* when bit 7 is cleared.
*/
.balign 4
.Lfound0:
lsr %r0, %r4, 8
lsr_s %r1, %r2
bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */
bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */
or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */
cmp_s %r3, %r2 /* ... be independent of trailing garbage */
or_s %r2, %r2, %r0 /* likewise for r3 > r2 */
bic_s %r3, %r3, %r0
rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */
cmp_s %r2, %r3
j_s.d [%blink]
bset.lo %r0, %r0, 31
#endif /* _ENDIAN__ */
.balign 4
.Lcharloop:
ldb.ab %r2,[%r0,1]
ldb.ab %r3,[%r1,1]
nop_s
breq %r2, 0, .Lcmpend
breq %r2, %r3, .Lcharloop
.Lcmpend:
j_s.d [%blink]
sub %r0, %r2, %r3

@ -0,0 +1,67 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* If dst and src are 4 byte aligned, copy 8 bytes at a time.
* If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
* it 8 byte aligned. Thus, we can do a little read-ahead, without
* dereferencing a cache line that we should not touch.
* Note that short and long instructions have been scheduled to avoid
* branch stalls.
* The beq_s to r3z could be made unaligned & long to avoid a stall
* there, but it is not likely to be taken often, and it would also be likely
* to cost an unaligned mispredict at the next call.
*/
.global strcpy
.align 4
strcpy:
or %r2, %r0, %r1
bmsk_s %r2, %r2, 1
brne.d %r2, 0, charloop
mov_s %r10, %r0
ld_s %r3, [%r1, 0]
mov %r8, 0x01010101
bbit0.d %r1, 2, loop_start
ror %r12, %r8
sub %r2, %r3, %r8
bic_s %r2, %r2, %r3
tst_s %r2,%r12
bne r3z
mov_s %r4,%r3
.balign 4
loop:
ld.a %r3, [%r1, 4]
st.ab %r4, [%r10, 4]
loop_start:
ld.a %r4, [%r1, 4]
sub %r2, %r3, %r8
bic_s %r2, %r2, %r3
tst_s %r2, %r12
bne_s r3z
st.ab %r3, [%r10, 4]
sub %r2, %r4, %r8
bic %r2, %r2, %r4
tst %r2, %r12
beq loop
mov_s %r3, %r4
#ifdef __LITTLE_ENDIAN__
r3z: bmsk.f %r1, %r3, 7
lsr_s %r3, %r3, 8
#else /* __BIG_ENDIAN__ */
r3z: lsr.f %r1, %r3, 24
asl_s %r3, %r3, 8
#endif /* _ENDIAN__ */
bne.d r3z
stb.ab %r1, [%r10, 1]
j_s [%blink]
.balign 4
charloop:
ldb.ab %r3, [%r1, 1]
brne.d %r3, 0, charloop
stb.ab %r3, [%r10, 1]
j [%blink]

@ -0,0 +1,80 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
.global strlen
.align 4
strlen:
or %r3, %r0, 7
ld %r2, [%r3, -7]
ld.a %r6, [%r3, -3]
mov %r4, 0x01010101
/* uses long immediate */
#ifdef __LITTLE_ENDIAN__
asl_s %r1, %r0, 3
btst_s %r0, 2
asl %r7, %r4, %r1
ror %r5, %r4
sub %r1, %r2, %r7
bic_s %r1, %r1, %r2
mov.eq %r7, %r4
sub %r12, %r6, %r7
bic %r12, %r12, %r6
or.eq %r12, %r12, %r1
and %r12, %r12, %r5
brne %r12, 0, .Learly_end
#else /* __BIG_ENDIAN__ */
ror %r5, %r4
btst_s %r0, 2
mov_s %r1, 31
sub3 %r7, %r1, %r0
sub %r1, %r2, %r4
bic_s %r1, %r1, %r2
bmsk %r1, %r1, %r7
sub %r12, %r6, %r4
bic %r12, %r12, %r6
bmsk.ne %r12, %r12, %r7
or.eq %r12, %r12, %r1
and %r12, %r12, %r5
brne %r12, 0, .Learly_end
#endif /* _ENDIAN__ */
.Loop:
ld_s %r2, [%r3, 4]
ld.a %r6, [%r3, 8]
/* stall for load result */
sub %r1, %r2, %r4
bic_s %r1, %r1, %r2
sub %r12, %r6, %r4
bic %r12, %r12, %r6
or %r12, %r12, %r1
and %r12, %r12, %r5
breq %r12, 0, .Loop
.Lend:
and.f %r1, %r1, %r5
sub.ne %r3, %r3, 4
mov.eq %r1, %r12
#ifdef __LITTLE_ENDIAN__
sub_s %r2, %r1, 1
bic_s %r2, %r2, %r1
norm %r1, %r2
sub_s %r0, %r0, 3
lsr_s %r1, %r1, 3
sub %r0, %r3, %r0
j_s.d [%blink]
sub %r0, %r0, %r1
#else /* __BIG_ENDIAN__ */
lsr_s %r1, %r1, 7
mov.eq %r2, %r6
bic_s %r1, %r1, %r2
norm %r1, %r1
sub %r0, %r3, %r0
lsr_s %r1, %r1, 3
j_s.d [%blink]
add %r0, %r0, %r1
#endif /* _ENDIAN */
.Learly_end:
b.d .Lend
sub_s.ne %r1, %r1, %r1
Loading…
Cancel
Save