Merge git://git.denx.de/u-boot-arc

Alexey:
 1. Significantly rework cache-related functionality.
    In particular that fixes coherency problems in some corner-cases,
    allows us to enable and disable caches in run-time and still
    have properly running system, finally support execution from
    real flash (before we used to run from DDR from the very beginning).

 2. Remove string routines implemented in assembly.
    That allows us to build and run U-Boot on wide range of ARC cores
    with different configurations. I.e. whatever tuning is used on GCC's
    command-line we'll get code for desired flavor of ARC.
    Otherwise for each and every corner-case we would need to add ifdefs
    in assembly code to accommodate missing instructions etc.

 3. Get use of GCC's garbage collector which helps to slim-down resulting image
    quite a bit.

 4. Also now we may disable U-Boot self-relocation for ARC if needed either
    by platform or for debugging purposes.
master
Tom Rini 6 years ago
commit 423effc04a
  1. 18
      arch/arc/Kconfig
  2. 3
      arch/arc/config.mk
  3. 77
      arch/arc/include/asm/arc-bcr.h
  4. 11
      arch/arc/include/asm/arcregs.h
  5. 7
      arch/arc/include/asm/cache.h
  6. 6
      arch/arc/include/asm/global_data.h
  7. 8
      arch/arc/include/asm/io.h
  8. 26
      arch/arc/include/asm/string.h
  9. 7
      arch/arc/lib/Makefile
  10. 55
      arch/arc/lib/bootm.c
  11. 677
      arch/arc/lib/cache.c
  12. 6
      arch/arc/lib/init_helpers.c
  13. 123
      arch/arc/lib/memcmp.S
  14. 63
      arch/arc/lib/memcpy-700.S
  15. 62
      arch/arc/lib/memset.S
  16. 6
      arch/arc/lib/relocate.c
  17. 28
      arch/arc/lib/start.S
  18. 141
      arch/arc/lib/strchr-700.S
  19. 97
      arch/arc/lib/strcmp.S
  20. 67
      arch/arc/lib/strcpy-700.S
  21. 80
      arch/arc/lib/strlen.S
  22. 12
      board/synopsys/axs10x/axs10x.c
  23. 11
      board/synopsys/hsdk/hsdk.c
  24. 3
      common/board_f.c

@ -116,6 +116,24 @@ config SYS_DCACHE_OFF
bool "Do not use Data Cache"
default n
menuconfig ARC_DBG
bool "ARC debugging"
default n
if ARC_DBG
config ARC_DBG_IOC_ENABLE
bool "Enable IO coherency unit"
depends on CPU_ARCHS38
default n
help
Enable IO coherency unit to debug problems with caches and
DMA peripherals.
NOTE: as of today linux will not work properly if this option
is enabled in u-boot!
endif
choice
prompt "Target select"
default TARGET_AXS103

@ -51,9 +51,10 @@ PLATFORM_CPPFLAGS += -mcpu=archs
endif
PLATFORM_CPPFLAGS += -ffixed-r25 -D__ARC__ -gdwarf-2 -mno-sdata
PLATFORM_RELFLAGS += -ffunction-sections -fdata-sections
# Needed for relocation
LDFLAGS_FINAL += -pie
LDFLAGS_FINAL += -pie --gc-sections
# Load address for standalone apps
CONFIG_STANDALONE_LOAD_ADDR ?= 0x82000000

@ -0,0 +1,77 @@
/*
* ARC Build Configuration Registers, with encoded hardware config
*
* Copyright (C) 2018 Synopsys
* Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*/
#ifndef __ARC_BCR_H
#define __ARC_BCR_H
#ifndef __ASSEMBLY__
#include <config.h>
union bcr_di_cache {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
#else
unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
#endif
} fields;
unsigned int word;
};
union bcr_slc_cfg {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:24, way:2, lsz:2, sz:4;
#else
unsigned int sz:4, lsz:2, way:2, pad:24;
#endif
} fields;
unsigned int word;
};
union bcr_generic {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:24, ver:8;
#else
unsigned int ver:8, pad:24;
#endif
} fields;
unsigned int word;
};
union bcr_clust_cfg {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
#else
unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
#endif
} fields;
unsigned int word;
};
union bcr_mmu_4 {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
#else
/* DTLB ITLB JES JE JA */
unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
#endif
} fields;
unsigned int word;
};
#endif /* __ASSEMBLY__ */
#endif /* __ARC_BCR_H */

@ -8,6 +8,7 @@
#define _ASM_ARC_ARCREGS_H
#include <asm/cache.h>
#include <config.h>
/*
* ARC architecture has additional address space - auxiliary registers.
@ -88,6 +89,16 @@
/* ARCNUM [15:8] - field to identify each core in a multi-core system */
#define CPU_ID_GET() ((read_aux_reg(ARC_AUX_IDENTITY) & 0xFF00) >> 8)
static const inline int is_isa_arcv2(void)
{
return IS_ENABLED(CONFIG_ISA_ARCV2);
}
static const inline int is_isa_arcompact(void)
{
return IS_ENABLED(CONFIG_ISA_ARCOMPACT);
}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_ARC_ARCREGS_H */

@ -30,6 +30,13 @@
#ifndef __ASSEMBLY__
void cache_init(void);
void flush_n_invalidate_dcache_all(void);
void sync_n_cleanup_cache_all(void);
static const inline int is_ioc_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_DBG_IOC_ENABLE);
}
#endif /* __ASSEMBLY__ */

@ -7,9 +7,15 @@
#ifndef __ASM_ARC_GLOBAL_DATA_H
#define __ASM_ARC_GLOBAL_DATA_H
#include <config.h>
#ifndef __ASSEMBLY__
/* Architecture-specific global data */
struct arch_global_data {
int l1_line_sz;
#if defined(CONFIG_ISA_ARCV2)
int slc_line_sz;
#endif
};
#endif /* __ASSEMBLY__ */

@ -10,7 +10,7 @@
#include <linux/types.h>
#include <asm/byteorder.h>
#ifdef CONFIG_ISA_ARCV2
#ifdef __ARCHS__
/*
* ARCv2 based HS38 cores are in-order issue, but still weakly ordered
@ -42,12 +42,12 @@
#define mb() asm volatile("sync\n" : : : "memory")
#endif
#ifdef CONFIG_ISA_ARCV2
#ifdef __ARCHS__
#define __iormb() rmb()
#define __iowmb() wmb()
#else
#define __iormb() do { } while (0)
#define __iowmb() do { } while (0)
#define __iormb() asm volatile("" : : : "memory")
#define __iowmb() asm volatile("" : : : "memory")
#endif
static inline void sync(void)

@ -1,27 +1 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#ifndef __ASM_ARC_STRING_H
#define __ASM_ARC_STRING_H
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMCMP
#define __HAVE_ARCH_STRCHR
#define __HAVE_ARCH_STRCPY
#define __HAVE_ARCH_STRCMP
#define __HAVE_ARCH_STRLEN
extern void *memset(void *ptr, int, __kernel_size_t);
extern void *memcpy(void *, const void *, __kernel_size_t);
extern void memzero(void *ptr, __kernel_size_t n);
extern int memcmp(const void *, const void *, __kernel_size_t);
extern char *strchr(const char *s, int c);
extern char *strcpy(char *dest, const char *src);
extern int strcmp(const char *cs, const char *ct);
extern __kernel_size_t strlen(const char *);
#endif /* __ASM_ARC_STRING_H */

@ -10,13 +10,6 @@ obj-y += cache.o
obj-y += cpu.o
obj-y += interrupts.o
obj-y += relocate.o
obj-y += strchr-700.o
obj-y += strcmp.o
obj-y += strcpy-700.o
obj-y += strlen.o
obj-y += memcmp.o
obj-y += memcpy-700.o
obj-y += memset.o
obj-y += reset.o
obj-y += ints_low.o
obj-y += init_helpers.o

@ -4,6 +4,7 @@
* SPDX-License-Identifier: GPL-2.0+
*/
#include <asm/cache.h>
#include <common.h>
DECLARE_GLOBAL_DATA_PTR;
@ -40,41 +41,52 @@ void arch_lmb_reserve(struct lmb *lmb)
static int cleanup_before_linux(void)
{
disable_interrupts();
flush_dcache_all();
invalidate_icache_all();
sync_n_cleanup_cache_all();
return 0;
}
__weak int board_prep_linux(bootm_headers_t *images) { return 0; }
/* Subcommand: PREP */
static void boot_prep_linux(bootm_headers_t *images)
static int boot_prep_linux(bootm_headers_t *images)
{
if (image_setup_linux(images))
hang();
int ret;
ret = image_setup_linux(images);
if (ret)
return ret;
return board_prep_linux(images);
}
__weak void smp_set_core_boot_addr(unsigned long addr, int corenr) {}
__weak void smp_kick_all_cpus(void) {}
/* Generic implementation for single core CPU */
__weak void board_jump_and_run(ulong entry, int zero, int arch, uint params)
{
void (*kernel_entry)(int zero, int arch, uint params);
kernel_entry = (void (*)(int, int, uint))entry;
kernel_entry(zero, arch, params);
}
/* Subcommand: GO */
static void boot_jump_linux(bootm_headers_t *images, int flag)
{
void (*kernel_entry)(int zero, int arch, uint params);
ulong kernel_entry;
unsigned int r0, r2;
int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
kernel_entry = (void (*)(int, int, uint))images->ep;
kernel_entry = images->ep;
debug("## Transferring control to Linux (at address %08lx)...\n",
(ulong) kernel_entry);
kernel_entry);
bootstage_mark(BOOTSTAGE_ID_RUN_OS);
printf("\nStarting kernel ...%s\n\n", fake ?
"(fake run for tracing)" : "");
bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
cleanup_before_linux();
if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
r0 = 2;
r2 = (unsigned int)images->ft_addr;
@ -83,11 +95,10 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
r2 = (unsigned int)env_get("bootargs");
}
if (!fake) {
smp_set_core_boot_addr((unsigned long)kernel_entry, -1);
smp_kick_all_cpus();
kernel_entry(r0, 0, r2);
}
cleanup_before_linux();
if (!fake)
board_jump_and_run(kernel_entry, r0, 0, r2);
}
int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
@ -96,17 +107,13 @@ int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
return -1;
if (flag & BOOTM_STATE_OS_PREP) {
boot_prep_linux(images);
return 0;
}
if (flag & BOOTM_STATE_OS_PREP)
return boot_prep_linux(images);
if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
boot_jump_linux(images, flag);
return 0;
}
boot_prep_linux(images);
boot_jump_linux(images, flag);
return 0;
return -1;
}

@ -10,8 +10,145 @@
#include <linux/kernel.h>
#include <linux/log2.h>
#include <asm/arcregs.h>
#include <asm/arc-bcr.h>
#include <asm/cache.h>
/*
* [ NOTE 1 ]:
* Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable
* operation may result in unexpected behavior and data loss even if we flush
* data cache right before invalidation. That may happens if we store any context
* on stack (like we store BLINK register on stack before function call).
* BLINK register is the register where return address is automatically saved
* when we do function call with instructions like 'bl'.
*
* There is the real example:
* We may hang in the next code as we store any BLINK register on stack in
* invalidate_dcache_all() function.
*
* void flush_dcache_all() {
* __dc_entire_op(OP_FLUSH);
* // Other code //
* }
*
* void invalidate_dcache_all() {
* __dc_entire_op(OP_INV);
* // Other code //
* }
*
* void foo(void) {
* flush_dcache_all();
* invalidate_dcache_all();
* }
*
* Now let's see what really happens during that code execution:
*
* foo()
* |->> call flush_dcache_all
* [return address is saved to BLINK register]
* [push BLINK] (save to stack) ![point 1]
* |->> call __dc_entire_op(OP_FLUSH)
* [return address is saved to BLINK register]
* [flush L1 D$]
* return [jump to BLINK]
* <<------
* [other flush_dcache_all code]
* [pop BLINK] (get from stack)
* return [jump to BLINK]
* <<------
* |->> call invalidate_dcache_all
* [return address is saved to BLINK register]
* [push BLINK] (save to stack) ![point 2]
* |->> call __dc_entire_op(OP_FLUSH)
* [return address is saved to BLINK register]
* [invalidate L1 D$] ![point 3]
* // Oops!!!
* // We lose return address from invalidate_dcache_all function:
* // we save it to stack and invalidate L1 D$ after that!
* return [jump to BLINK]
* <<------
* [other invalidate_dcache_all code]
* [pop BLINK] (get from stack)
* // we don't have this data in L1 dcache as we invalidated it in [point 3]
* // so we get it from next memory level (for example DDR memory)
* // but in the memory we have value which we save in [point 1], which
* // is return address from flush_dcache_all function (instead of
* // address from current invalidate_dcache_all function which we
* // saved in [point 2] !)
* return [jump to BLINK]
* <<------
* // As BLINK points to invalidate_dcache_all, we call it again and
* // loop forever.
*
* Fortunately we may fix that by using flush & invalidation of D$ with a single
* one instruction (instead of flush and invalidation instructions pair) and
* enabling force function inline with '__attribute__((always_inline))' gcc
* attribute to avoid any function call (and BLINK store) between cache flush
* and disable.
*
*
* [ NOTE 2 ]:
* As of today we only support the following cache configurations on ARC.
* Other configurations may exist in HW (for example, since version 3.0 HS
* supports SL$ (L2 system level cache) disable) but we don't support it in SW.
* Configuration 1:
* ______________________
* | |
* | ARC CPU |
* |______________________|
* ___|___ ___|___
* | | | |
* | L1 I$ | | L1 D$ |
* |_______| |_______|
* on/off on/off
* ___|______________|____
* | |
* | main memory |
* |______________________|
*
* Configuration 2:
* ______________________
* | |
* | ARC CPU |
* |______________________|
* ___|___ ___|___
* | | | |
* | L1 I$ | | L1 D$ |
* |_______| |_______|
* on/off on/off
* ___|______________|____
* | |
* | L2 (SL$) |
* |______________________|
* always must be on
* ___|______________|____
* | |
* | main memory |
* |______________________|
*
* Configuration 3:
* ______________________
* | |
* | ARC CPU |
* |______________________|
* ___|___ ___|___
* | | | |
* | L1 I$ | | L1 D$ |
* |_______| |_______|
* on/off must be on
* ___|______________|____ _______
* | | | |
* | L2 (SL$) |-----| IOC |
* |______________________| |_______|
* always must be on on/off
* ___|______________|____
* | |
* | main memory |
* |______________________|
*/
DECLARE_GLOBAL_DATA_PTR;
/* Bit values in IC_CTRL */
#define IC_CTRL_CACHE_DISABLE BIT(0)
@ -19,11 +156,10 @@
#define DC_CTRL_CACHE_DISABLE BIT(0)
#define DC_CTRL_INV_MODE_FLUSH BIT(6)
#define DC_CTRL_FLUSH_STATUS BIT(8)
#define CACHE_VER_NUM_MASK 0xF
#define OP_INV 0x1
#define OP_FLUSH 0x2
#define OP_INV_IC 0x3
#define OP_INV BIT(0)
#define OP_FLUSH BIT(1)
#define OP_FLUSH_N_INV (OP_FLUSH | OP_INV)
/* Bit val in SLC_CONTROL */
#define SLC_CTRL_DIS 0x001
@ -31,55 +167,117 @@
#define SLC_CTRL_BUSY 0x100
#define SLC_CTRL_RGN_OP_INV 0x200
#define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1))
/*
* By default that variable will fall into .bss section.
* But .bss section is not relocated and so it will be initilized before
* relocation but will be used after being zeroed.
* We don't want to use '__always_inline' macro here as it can be redefined
* to simple 'inline' in some cases which breaks stuff. See [ NOTE 1 ] for more
* details about the reasons we need to use always_inline functions.
*/
int l1_line_sz __section(".data");
bool dcache_exists __section(".data") = false;
bool icache_exists __section(".data") = false;
#define CACHE_LINE_MASK (~(l1_line_sz - 1))
#ifdef CONFIG_ISA_ARCV2
int slc_line_sz __section(".data");
bool slc_exists __section(".data") = false;
bool ioc_exists __section(".data") = false;
bool pae_exists __section(".data") = false;
#define inlined_cachefunc inline __attribute__((always_inline))
/* To force enable IOC set ioc_enable to 'true' */
bool ioc_enable __section(".data") = false;
static inlined_cachefunc void __ic_entire_invalidate(void);
static inlined_cachefunc void __dc_entire_op(const int cacheop);
void read_decode_mmu_bcr(void)
static inline bool pae_exists(void)
{
/* TODO: should we compare mmu version from BCR and from CONFIG? */
#if (CONFIG_ARC_MMU_VER >= 4)
u32 tmp;
union bcr_mmu_4 mmu4;
tmp = read_aux_reg(ARC_AUX_MMU_BCR);
mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR);
struct bcr_mmu_4 {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
#else
/* DTLB ITLB JES JE JA */
unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
#endif /* CONFIG_CPU_BIG_ENDIAN */
} *mmu4;
if (mmu4.fields.pae)
return true;
#endif /* (CONFIG_ARC_MMU_VER >= 4) */
mmu4 = (struct bcr_mmu_4 *)&tmp;
return false;
}
pae_exists = !!mmu4->pae;
#endif /* (CONFIG_ARC_MMU_VER >= 4) */
static inlined_cachefunc bool icache_exists(void)
{
union bcr_di_cache ibcr;
ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
return !!ibcr.fields.ver;
}
static void __slc_entire_op(const int op)
static inlined_cachefunc bool icache_enabled(void)
{
if (!icache_exists())
return false;
return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE);
}
static inlined_cachefunc bool dcache_exists(void)
{
union bcr_di_cache dbcr;
dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
return !!dbcr.fields.ver;
}
static inlined_cachefunc bool dcache_enabled(void)
{
if (!dcache_exists())
return false;
return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE);
}
static inlined_cachefunc bool slc_exists(void)
{
if (is_isa_arcv2()) {
union bcr_generic sbcr;
sbcr.word = read_aux_reg(ARC_BCR_SLC);
return !!sbcr.fields.ver;
}
return false;
}
static inlined_cachefunc bool slc_data_bypass(void)
{
/*
* If L1 data cache is disabled SL$ is bypassed and all load/store
* requests are sent directly to main memory.
*/
return !dcache_enabled();
}
static inline bool ioc_exists(void)
{
if (is_isa_arcv2()) {
union bcr_clust_cfg cbcr;
cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
return cbcr.fields.c;
}
return false;
}
static inline bool ioc_enabled(void)
{
/*
* We check only CONFIG option instead of IOC HW state check as IOC
* must be disabled by default.
*/
if (is_ioc_enabled())
return ioc_exists();
return false;
}
static inlined_cachefunc void __slc_entire_op(const int op)
{
unsigned int ctrl;
if (!slc_exists())
return;
ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
if (!(op & OP_FLUSH)) /* i.e. OP_INV */
@ -104,6 +302,14 @@ static void __slc_entire_op(const int op)
static void slc_upper_region_init(void)
{
/*
* ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
* only if PAE exists in current HW. So we had to check pae_exist
* before using them.
*/
if (!pae_exists())
return;
/*
* ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0
* as we don't use PAE40.
*/
@ -113,9 +319,14 @@ static void slc_upper_region_init(void)
static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
{
#ifdef CONFIG_ISA_ARCV2
unsigned int ctrl;
unsigned long end;
if (!slc_exists())
return;
/*
* The Region Flush operation is specified by CTRL.RGN_OP[11..9]
* - b'000 (default) is Flush,
@ -142,7 +353,7 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
* END needs to be setup before START (latter triggers the operation)
* END can't be same as START, so add (l2_line_sz - 1) to sz
*/
end = paddr + sz + slc_line_sz - 1;
end = paddr + sz + gd->arch.slc_line_sz - 1;
/*
* Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1)
@ -156,85 +367,82 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
read_aux_reg(ARC_AUX_SLC_CTRL);
while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
}
#endif /* CONFIG_ISA_ARCV2 */
}
static void arc_ioc_setup(void)
{
/* IOC Aperture start is equal to DDR start */
unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
/* IOC Aperture size is equal to DDR size */
long ap_size = CONFIG_SYS_SDRAM_SIZE;
/* Unsupported configuration. See [ NOTE 2 ] for more details. */
if (!slc_exists())
panic("Try to enable IOC but SLC is not present");
/* Unsupported configuration. See [ NOTE 2 ] for more details. */
if (!dcache_enabled())
panic("Try to enable IOC but L1 D$ is disabled");
if (!is_power_of_2(ap_size) || ap_size < 4096)
panic("IOC Aperture size must be power of 2 and bigger 4Kib");
/* IOC Aperture start must be aligned to the size of the aperture */
if (ap_base % ap_size != 0)
panic("IOC Aperture start must be aligned to the size of the aperture");
flush_n_invalidate_dcache_all();
/*
* IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
* so setting 0x11 implies 512M, 0x12 implies 1G...
*/
write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
order_base_2(ap_size / 1024) - 2);
write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
}
#ifdef CONFIG_ISA_ARCV2
static void read_decode_cache_bcr_arcv2(void)
{
union {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:24, way:2, lsz:2, sz:4;
#else
unsigned int sz:4, lsz:2, way:2, pad:24;
#endif
} fields;
unsigned int word;
} slc_cfg;
union {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:24, ver:8;
#else
unsigned int ver:8, pad:24;
#endif
} fields;
unsigned int word;
} sbcr;
#ifdef CONFIG_ISA_ARCV2
sbcr.word = read_aux_reg(ARC_BCR_SLC);
if (sbcr.fields.ver) {
union bcr_slc_cfg slc_cfg;
if (slc_exists()) {
slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG);
slc_exists = true;
slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
}
gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
union {
struct bcr_clust_cfg {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
#else
unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
#endif
} fields;
unsigned int word;
} cbcr;
/*
* We don't support configuration where L1 I$ or L1 D$ is
* absent but SL$ exists. See [ NOTE 2 ] for more details.
*/
if (!icache_exists() || !dcache_exists())
panic("Unsupported cache configuration: SLC exists but one of L1 caches is absent");
}
cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
if (cbcr.fields.c && ioc_enable)
ioc_exists = true;
#endif /* CONFIG_ISA_ARCV2 */
}
#endif
void read_decode_cache_bcr(void)
{
int dc_line_sz = 0, ic_line_sz = 0;
union {
struct {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
#else
unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
#endif
} fields;
unsigned int word;
} ibcr, dbcr;
union bcr_di_cache ibcr, dbcr;
ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
if (ibcr.fields.ver) {
icache_exists = true;
l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
gd->arch.l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
if (!ic_line_sz)
panic("Instruction exists but line length is 0\n");
}
dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
if (dbcr.fields.ver) {
dcache_exists = true;
l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
gd->arch.l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
if (!dc_line_sz)
panic("Data cache exists but line length is 0\n");
}
@ -247,109 +455,79 @@ void cache_init(void)
{
read_decode_cache_bcr();
#ifdef CONFIG_ISA_ARCV2
read_decode_cache_bcr_arcv2();
if (ioc_exists) {
/* IOC Aperture start is equal to DDR start */
unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
/* IOC Aperture size is equal to DDR size */
long ap_size = CONFIG_SYS_SDRAM_SIZE;
flush_dcache_all();
invalidate_dcache_all();
if (is_isa_arcv2())
read_decode_cache_bcr_arcv2();
if (!is_power_of_2(ap_size) || ap_size < 4096)
panic("IOC Aperture size must be power of 2 and bigger 4Kib");
/*
* IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
* so setting 0x11 implies 512M, 0x12 implies 1G...
*/
write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
order_base_2(ap_size / 1024) - 2);
/* IOC Aperture start must be aligned to the size of the aperture */
if (ap_base % ap_size != 0)
panic("IOC Aperture start must be aligned to the size of the aperture");
write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
}
if (is_isa_arcv2() && ioc_enabled())
arc_ioc_setup();
read_decode_mmu_bcr();
/*
* ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
* only if PAE exists in current HW. So we had to check pae_exist
* before using them.
*/
if (slc_exists && pae_exists)
if (is_isa_arcv2() && slc_exists())
slc_upper_region_init();
#endif /* CONFIG_ISA_ARCV2 */
}
int icache_status(void)
{
if (!icache_exists)
return 0;
if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE)
return 0;
else
return 1;
return icache_enabled();
}
void icache_enable(void)
{
if (icache_exists)
if (icache_exists())
write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
~IC_CTRL_CACHE_DISABLE);
}
void icache_disable(void)
{
if (icache_exists)
write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
IC_CTRL_CACHE_DISABLE);
if (!icache_exists())
return;
__ic_entire_invalidate();
write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
IC_CTRL_CACHE_DISABLE);
}
void invalidate_icache_all(void)
/* IC supports only invalidation */
static inlined_cachefunc void __ic_entire_invalidate(void)
{
if (!icache_enabled())
return;
/* Any write to IC_IVIC register triggers invalidation of entire I$ */
if (icache_status()) {
write_aux_reg(ARC_AUX_IC_IVIC, 1);
/*
* As per ARC HS databook (see chapter 5.3.3.2)
* it is required to add 3 NOPs after each write to IC_IVIC.
*/
__builtin_arc_nop();
__builtin_arc_nop();
__builtin_arc_nop();
read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */
}
write_aux_reg(ARC_AUX_IC_IVIC, 1);
/*
* As per ARC HS databook (see chapter 5.3.3.2)
* it is required to add 3 NOPs after each write to IC_IVIC.
*/
__builtin_arc_nop();
__builtin_arc_nop();
__builtin_arc_nop();
read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */
}
#ifdef CONFIG_ISA_ARCV2
if (slc_exists)
void invalidate_icache_all(void)
{
__ic_entire_invalidate();
/*
* If SL$ is bypassed for data it is used only for instructions,
* so we need to invalidate it too.
* TODO: HS 3.0 supports SLC disable so we need to check slc
* enable/disable status here.
*/
if (is_isa_arcv2() && slc_data_bypass())
__slc_entire_op(OP_INV);
#endif
}
int dcache_status(void)
{
if (!dcache_exists)
return 0;
if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE)
return 0;
else
return 1;
return dcache_enabled();
}
void dcache_enable(void)
{
if (!dcache_exists)
if (!dcache_exists())
return;
write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) &
@ -358,83 +536,77 @@ void dcache_enable(void)
void dcache_disable(void)
{
if (!dcache_exists)
if (!dcache_exists())
return;
__dc_entire_op(OP_FLUSH_N_INV);
/*
* As SLC will be bypassed for data after L1 D$ disable we need to
* flush it first before L1 D$ disable. Also we invalidate SLC to
* avoid any inconsistent data problems after enabling L1 D$ again with
* dcache_enable function.
*/
if (is_isa_arcv2())
__slc_entire_op(OP_FLUSH_N_INV);
write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) |
DC_CTRL_CACHE_DISABLE);
}
#ifndef CONFIG_SYS_DCACHE_OFF
/*
* Common Helper for Line Operations on {I,D}-Cache
*/
static inline void __cache_line_loop(unsigned long paddr, unsigned long sz,
const int cacheop)
/* Common Helper for Line Operations on D-cache */
static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz,
const int cacheop)
{
unsigned int aux_cmd;
#if (CONFIG_ARC_MMU_VER == 3)
unsigned int aux_tag;
#endif
int num_lines;
if (cacheop == OP_INV_IC) {
aux_cmd = ARC_AUX_IC_IVIL;
#if (CONFIG_ARC_MMU_VER == 3)
aux_tag = ARC_AUX_IC_PTAG;
#endif
} else {
/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
#if (CONFIG_ARC_MMU_VER == 3)
aux_tag = ARC_AUX_DC_PTAG;
#endif
}
/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
num_lines = DIV_ROUND_UP(sz, l1_line_sz);
num_lines = DIV_ROUND_UP(sz, gd->arch.l1_line_sz);
while (num_lines-- > 0) {
#if (CONFIG_ARC_MMU_VER == 3)
write_aux_reg(aux_tag, paddr);
write_aux_reg(ARC_AUX_DC_PTAG, paddr);
#endif
write_aux_reg(aux_cmd, paddr);
paddr += l1_line_sz;
paddr += gd->arch.l1_line_sz;
}
}
static unsigned int __before_dc_op(const int op)
static inlined_cachefunc void __before_dc_op(const int op)
{
unsigned int reg;
unsigned int ctrl;
if (op == OP_INV) {
/*
* IM is set by default and implies Flush-n-inv
* Clear it here for vanilla inv
*/
reg = read_aux_reg(ARC_AUX_DC_CTRL);
write_aux_reg(ARC_AUX_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
}
ctrl = read_aux_reg(ARC_AUX_DC_CTRL);
return reg;
/* IM bit implies flush-n-inv, instead of vanilla inv */
if (op == OP_INV)
ctrl &= ~DC_CTRL_INV_MODE_FLUSH;
else
ctrl |= DC_CTRL_INV_MODE_FLUSH;
write_aux_reg(ARC_AUX_DC_CTRL, ctrl);
}
static void __after_dc_op(const int op, unsigned int reg)
static inlined_cachefunc void __after_dc_op(const int op)
{
if (op & OP_FLUSH) /* flush / flush-n-inv both wait */
while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
/* Switch back to default Invalidate mode */
if (op == OP_INV)
write_aux_reg(ARC_AUX_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH);
}
static inline void __dc_entire_op(const int cacheop)
static inlined_cachefunc void __dc_entire_op(const int cacheop)
{
int aux;
unsigned int ctrl_reg = __before_dc_op(cacheop);
if (!dcache_enabled())
return;
__before_dc_op(cacheop);
if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */
aux = ARC_AUX_DC_IVDC;
@ -443,36 +615,36 @@ static inline void __dc_entire_op(const int cacheop)
write_aux_reg(aux, 0x1);
__after_dc_op(cacheop, ctrl_reg);
__after_dc_op(cacheop);
}
static inline void __dc_line_op(unsigned long paddr, unsigned long sz,
const int cacheop)
{
unsigned int ctrl_reg = __before_dc_op(cacheop);
if (!dcache_enabled())
return;
__cache_line_loop(paddr, sz, cacheop);
__after_dc_op(cacheop, ctrl_reg);
__before_dc_op(cacheop);
__dcache_line_loop(paddr, sz, cacheop);
__after_dc_op(cacheop);
}
#else
#define __dc_entire_op(cacheop)
#define __dc_line_op(paddr, sz, cacheop)
#endif /* !CONFIG_SYS_DCACHE_OFF */
void invalidate_dcache_range(unsigned long start, unsigned long end)
{
if (start >= end)
return;
#ifdef CONFIG_ISA_ARCV2
if (!ioc_exists)
#endif
/*
* ARCv1 -> call __dc_line_op
* ARCv2 && L1 D$ disabled -> nothing
* ARCv2 && L1 D$ enabled && IOC enabled -> nothing
* ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op
*/
if (!is_isa_arcv2() || !ioc_enabled())
__dc_line_op(start, end - start, OP_INV);
#ifdef CONFIG_ISA_ARCV2
if (slc_exists && !ioc_exists)
if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass())
__slc_rgn_op(start, end - start, OP_INV);
#endif
}
void flush_dcache_range(unsigned long start, unsigned long end)
@ -480,15 +652,17 @@ void flush_dcache_range(unsigned long start, unsigned long end)
if (start >= end)
return;
#ifdef CONFIG_ISA_ARCV2
if (!ioc_exists)
#endif
/*
* ARCv1 -> call __dc_line_op
* ARCv2 && L1 D$ disabled -> nothing
* ARCv2 && L1 D$ enabled && IOC enabled -> nothing
* ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op
*/
if (!is_isa_arcv2() || !ioc_enabled())
__dc_line_op(start, end - start, OP_FLUSH);
#ifdef CONFIG_ISA_ARCV2
if (slc_exists && !ioc_exists)
if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass())
__slc_rgn_op(start, end - start, OP_FLUSH);
#endif
}
void flush_cache(unsigned long start, unsigned long size)
@ -496,22 +670,47 @@ void flush_cache(unsigned long start, unsigned long size)
flush_dcache_range(start, start + size);
}
void invalidate_dcache_all(void)
/*
* As invalidate_dcache_all() is not used in generic U-Boot code and as we
* don't need it in arch/arc code alone (invalidate without flush) we implement
* flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because
* it's much safer. See [ NOTE 1 ] for more details.
*/
void flush_n_invalidate_dcache_all(void)
{
__dc_entire_op(OP_INV);
__dc_entire_op(OP_FLUSH_N_INV);
#ifdef CONFIG_ISA_ARCV2
if (slc_exists)
__slc_entire_op(OP_INV);
#endif
if (is_isa_arcv2() && !slc_data_bypass())
__slc_entire_op(OP_FLUSH_N_INV);
}
void flush_dcache_all(void)
{
__dc_entire_op(OP_FLUSH);
#ifdef CONFIG_ISA_ARCV2
if (slc_exists)
if (is_isa_arcv2() && !slc_data_bypass())
__slc_entire_op(OP_FLUSH);
#endif
}
/*
* This is function to cleanup all caches (and therefore sync I/D caches) which
* can be used for cleanup before linux launch or to sync caches during
* relocation.
*/
void sync_n_cleanup_cache_all(void)
{
__dc_entire_op(OP_FLUSH_N_INV);
/*
* If SL$ is bypassed for data it is used only for instructions,
* and we shouldn't flush it. So invalidate it instead of flush_n_inv.
*/
if (is_isa_arcv2()) {
if (slc_data_bypass())
__slc_entire_op(OP_INV);
else
__slc_entire_op(OP_FLUSH_N_INV);
}
__ic_entire_invalidate();
}

@ -4,14 +4,14 @@
* SPDX-License-Identifier: GPL-2.0+
*/
#include <asm/cache.h>
#include <common.h>
DECLARE_GLOBAL_DATA_PTR;
int init_cache_f_r(void)
{
#ifndef CONFIG_SYS_DCACHE_OFF
flush_dcache_all();
#endif
sync_n_cleanup_cache_all();
return 0;
}

@ -1,123 +0,0 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#ifdef __LITTLE_ENDIAN__
#define WORD2 r2
#define SHIFT r3
#else /* __BIG_ENDIAN__ */
#define WORD2 r3
#define SHIFT r2
#endif /* _ENDIAN__ */
.global memcmp
.align 4
memcmp:
or %r12, %r0, %r1
asl_s %r12, %r12, 30
sub %r3, %r2, 1
brls %r2, %r12, .Lbytewise
ld %r4, [%r0, 0]
ld %r5, [%r1, 0]
lsr.f %lp_count, %r3, 3
lpne .Loop_end
ld_s WORD2, [%r0, 4]
ld_s %r12, [%r1, 4]
brne %r4, %r5, .Leven
ld.a %r4, [%r0, 8]
ld.a %r5, [%r1, 8]
brne WORD2, %r12, .Lodd
nop
.Loop_end:
asl_s SHIFT, SHIFT, 3
bhs_s .Last_cmp
brne %r4, %r5, .Leven
ld %r4, [%r0, 4]
ld %r5, [%r1, 4]
#ifdef __LITTLE_ENDIAN__
nop_s
/* one more load latency cycle */
.Last_cmp:
xor %r0, %r4, %r5
bset %r0, %r0, SHIFT
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
b.d .Leven_cmp
and %r1, %r1, 24
.Leven:
xor %r0, %r4, %r5
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
/* slow track insn */
and %r1, %r1, 24
.Leven_cmp:
asl %r2, %r4, %r1
asl %r12, %r5, %r1
lsr_s %r2, %r2, 1
lsr_s %r12, %r12, 1
j_s.d [%blink]
sub %r0, %r2, %r12
.balign 4
.Lodd:
xor %r0, WORD2, %r12
sub_s %r1, %r0, 1
bic_s %r1, %r1, %r0
norm %r1, %r1
/* slow track insn */
and %r1, %r1, 24
asl_s %r2, %r2, %r1
asl_s %r12, %r12, %r1
lsr_s %r2, %r2, 1
lsr_s %r12, %r12, 1
j_s.d [%blink]
sub %r0, %r2, %r12
#else /* __BIG_ENDIAN__ */
.Last_cmp:
neg_s SHIFT, SHIFT
lsr %r4, %r4, SHIFT
lsr %r5, %r5, SHIFT
/* slow track insn */
.Leven:
sub.f %r0, %r4, %r5
mov.ne %r0, 1
j_s.d [%blink]
bset.cs %r0, %r0, 31
.Lodd:
cmp_s WORD2, %r12
mov_s %r0, 1
j_s.d [%blink]
bset.cs %r0, %r0, 31
#endif /* _ENDIAN__ */
.balign 4
.Lbytewise:
breq %r2, 0, .Lnil
ldb %r4, [%r0, 0]
ldb %r5, [%r1, 0]
lsr.f %lp_count, %r3
lpne .Lbyte_end
ldb_s %r3, [%r0, 1]
ldb %r12, [%r1, 1]
brne %r4, %r5, .Lbyte_even
ldb.a %r4, [%r0, 2]
ldb.a %r5, [%r1, 2]
brne %r3, %r12, .Lbyte_odd
nop
.Lbyte_end:
bcc .Lbyte_even
brne %r4, %r5, .Lbyte_even
ldb_s %r3, [%r0, 1]
ldb_s %r12, [%r1, 1]
.Lbyte_odd:
j_s.d [%blink]
sub %r0, %r3, %r12
.Lbyte_even:
j_s.d [%blink]
sub %r0, %r4, %r5
.Lnil:
j_s.d [%blink]
mov %r0, 0

@ -1,63 +0,0 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
.global memcpy
.align 4
memcpy:
or %r3, %r0, %r1
asl_s %r3, %r3, 30
mov_s %r5, %r0
brls.d %r2, %r3, .Lcopy_bytewise
sub.f %r3, %r2, 1
ld_s %r12, [%r1, 0]
asr.f %lp_count, %r3, 3
bbit0.d %r3, 2, .Lnox4
bmsk_s %r2, %r2, 1
st.ab %r12, [%r5, 4]
ld.a %r12, [%r1, 4]
.Lnox4:
lppnz .Lendloop
ld_s %r3, [%r1, 4]
st.ab %r12, [%r5, 4]
ld.a %r12, [%r1, 8]
st.ab %r3, [%r5, 4]
.Lendloop:
breq %r2, 0, .Last_store
ld %r3, [%r5, 0]
#ifdef __LITTLE_ENDIAN__
add3 %r2, -1, %r2
/* uses long immediate */
xor_s %r12, %r12, %r3
bmsk %r12, %r12, %r2
xor_s %r12, %r12, %r3
#else /* __BIG_ENDIAN__ */
sub3 %r2, 31, %r2
/* uses long immediate */
xor_s %r3, %r3, %r12
bmsk %r3, %r3, %r2
xor_s %r12, %r12, %r3
#endif /* _ENDIAN__ */
.Last_store:
j_s.d [%blink]
st %r12, [%r5, 0]
.balign 4
.Lcopy_bytewise:
jcs [%blink]
ldb_s %r12, [%r1, 0]
lsr.f %lp_count, %r3
bhs_s .Lnox1
stb.ab %r12, [%r5, 1]
ldb.a %r12, [%r1, 1]
.Lnox1:
lppnz .Lendbloop
ldb_s %r3, [%r1, 1]
stb.ab %r12, [%r5, 1]
ldb.a %r12, [%r1, 2]
stb.ab %r3, [%r5, 1]
.Lendbloop:
j_s.d [%blink]
stb %r12, [%r5, 0]

@ -1,62 +0,0 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
.global memset
.align 4
memset:
mov_s %r4, %r0
or %r12, %r0, %r2
bmsk.f %r12, %r12, 1
extb_s %r1, %r1
asl %r3, %r1, 8
beq.d .Laligned
or_s %r1, %r1, %r3
brls %r2, SMALL, .Ltiny
add %r3, %r2, %r0
stb %r1, [%r3, -1]
bclr_s %r3, %r3, 0
stw %r1, [%r3, -2]
bmsk.f %r12, %r0, 1
add_s %r2, %r2, %r12
sub.ne %r2, %r2, 4
stb.ab %r1, [%r4, 1]
and %r4, %r4, -2
stw.ab %r1, [%r4, 2]
and %r4, %r4, -4
.balign 4
.Laligned:
asl %r3, %r1, 16
lsr.f %lp_count, %r2, 2
or_s %r1, %r1, %r3
lpne .Loop_end
st.ab %r1, [%r4, 4]
.Loop_end:
j_s [%blink]
.balign 4
.Ltiny:
mov.f %lp_count, %r2
lpne .Ltiny_end
stb.ab %r1, [%r4, 1]
.Ltiny_end:
j_s [%blink]
/*
* memzero: @r0 = mem, @r1 = size_t
* memset: @r0 = mem, @r1 = char, @r2 = size_t
*/
.global memzero
.align 4
memzero:
/* adjust bzero args to memset args */
mov %r2, %r1
mov %r1, 0
/* tail call so need to tinker with blink */
b memset

@ -17,6 +17,9 @@ int copy_uboot_to_ram(void)
{
size_t len = (size_t)&__image_copy_end - (size_t)&__image_copy_start;
if (gd->flags & GD_FLG_SKIP_RELOC)
return 0;
memcpy((void *)gd->relocaddr, (void *)&__image_copy_start, len);
return 0;
@ -40,6 +43,9 @@ int do_elf_reloc_fixups(void)
Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
if (gd->flags & GD_FLG_SKIP_RELOC)
return 0;
debug("Section .rela.dyn is located at %08x-%08x\n",
(unsigned int)re_src, (unsigned int)re_end);

@ -10,26 +10,6 @@
#include <asm/arcregs.h>
ENTRY(_start)
; ARCompact devices are not supposed to be SMP so master/slave check
; makes no sense.
#ifdef CONFIG_ISA_ARCV2
; Non-masters will be halted immediately, they might be kicked later
; by platform code right before passing control to the Linux kernel
; in bootm.c:boot_jump_linux().
lr r5, [identity]
lsr r5, r5, 8
bmsk r5, r5, 7
cmp r5, 0
mov.nz r0, r5
bz .Lmaster_proceed
flag 1
nop
nop
nop
.Lmaster_proceed:
#endif
/* Setup interrupt vector base that matches "__text_start" */
sr __ivt_start, [ARC_AUX_INTR_VEC_BASE]
@ -98,7 +78,13 @@ ENTRY(_start)
/* Zero the one and only argument of "board_init_f" */
mov_s %r0, 0
j board_init_f
bl board_init_f
/* We only get here if relocation is disabled by GD_FLG_SKIP_RELOC */
/* Make sure we don't lose GD overwritten by zero new GD */
mov %r0, %r25
mov %r1, 0
bl board_init_r
ENDPROC(_start)
/*

@ -1,141 +0,0 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* ARC700 has a relatively long pipeline and branch prediction, so we want
* to avoid branches that are hard to predict. On the other hand, the
* presence of the norm instruction makes it easier to operate on whole
* words branch-free.
*/
.global strchr
.align 4
strchr:
extb_s %r1, %r1
asl %r5, %r1, 8
bmsk %r2, %r0, 1
or %r5, %r5, %r1
mov_s %r3, 0x01010101
breq.d %r2, %r0, .Laligned
asl %r4, %r5, 16
sub_s %r0, %r0, %r2
asl %r7, %r2, 3
ld_s %r2, [%r0]
#ifdef __LITTLE_ENDIAN__
asl %r7, %r3, %r7
#else /* __BIG_ENDIAN__ */
lsr %r7, %r3, %r7
#endif /* _ENDIAN__ */
or %r5, %r5, %r4
ror %r4, %r3
sub %r12, %r2, %r7
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0_ua
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r7
bic %r12, %r12, %r6
#ifdef __LITTLE_ENDIAN__
and %r7, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r7, 0, .Loop
/* Likewise this one */
b .Lfound_char
#else /* __BIG_ENDIAN__ */
and %r12, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r12, 0, .Loop
lsr_s %r12, %r12, 7
bic %r2, %r7, %r6
b.d .Lfound_char_b
and_s %r2, %r2, %r12
#endif /* _ENDIAN__ */
/* We require this code address to be unaligned for speed... */
.Laligned:
ld_s %r2, [%r0]
or %r5, %r5, %r4
ror %r4, %r3
/* ... so that this code address is aligned, for itself and ... */
.Loop:
sub %r12, %r2, %r3
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r3
bic %r12, %r12, %r6
and %r7, %r12, %r4
breq %r7, 0, .Loop
/*
*... so that this branch is unaligned.
* Found searched-for character.
* r0 has already advanced to next word.
*/
#ifdef __LITTLE_ENDIAN__
/*
* We only need the information about the first matching byte
* (i.e. the least significant matching byte) to be exact,
* hence there is no problem with carry effects.
*/
.Lfound_char:
sub %r3, %r7, 1
bic %r3, %r3, %r7
norm %r2, %r3
sub_s %r0, %r0, 1
asr_s %r2, %r2, 3
j.d [%blink]
sub_s %r0, %r0, %r2
.balign 4
.Lfound0_ua:
mov %r3, %r7
.Lfound0:
sub %r3, %r6, %r3
bic %r3, %r3, %r6
and %r2, %r3, %r4
or_s %r12, %r12, %r2
sub_s %r3, %r12, 1
bic_s %r3, %r3, %r12
norm %r3, %r3
add_s %r0, %r0, 3
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
sub_s %r0, %r0, %r12
j_s.d [%blink]
mov.pl %r0, 0
#else /* __BIG_ENDIAN__ */
.Lfound_char:
lsr %r7, %r7, 7
bic %r2, %r7, %r6
.Lfound_char_b:
norm %r2, %r2
sub_s %r0, %r0, 4
asr_s %r2, %r2, 3
j.d [%blink]
add_s %r0, %r0, %r2
.Lfound0_ua:
mov_s %r3, %r7
.Lfound0:
asl_s %r2, %r2, 7
or %r7, %r6, %r4
bic_s %r12, %r12, %r2
sub %r2, %r7, %r3
or %r2, %r2, %r6
bic %r12, %r2, %r12
bic.f %r3, %r4, %r12
norm %r3, %r3
add.pl %r3, %r3, 1
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
add_s %r0, %r0, %r12
j_s.d [%blink]
mov.mi %r0, 0
#endif /* _ENDIAN__ */

@ -1,97 +0,0 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* This is optimized primarily for the ARC700.
* It would be possible to speed up the loops by one cycle / word
* respective one cycle / byte by forcing double source 1 alignment, unrolling
* by a factor of two, and speculatively loading the second word / byte of
* source 1; however, that would increase the overhead for loop setup / finish,
* and strcmp might often terminate early.
*/
.global strcmp
.align 4
strcmp:
or %r2, %r0, %r1
bmsk_s %r2, %r2, 1
brne %r2, 0, .Lcharloop
mov_s %r12, 0x01010101
ror %r5, %r12
.Lwordloop:
ld.ab %r2, [%r0, 4]
ld.ab %r3, [%r1, 4]
nop_s
sub %r4, %r2, %r12
bic %r4, %r4, %r2
and %r4, %r4, %r5
brne %r4, 0, .Lfound0
breq %r2 ,%r3, .Lwordloop
#ifdef __LITTLE_ENDIAN__
xor %r0, %r2, %r3 /* mask for difference */
sub_s %r1, %r0, 1
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
sub %r1, %r5, %r0
xor %r0, %r5, %r1 /* mask for least significant difference byte */
and_s %r2, %r2, %r0
and_s %r3, %r3, %r0
#endif /* _ENDIAN__ */
cmp_s %r2, %r3
mov_s %r0, 1
j_s.d [%blink]
bset.lo %r0, %r0, 31
.balign 4
#ifdef __LITTLE_ENDIAN__
.Lfound0:
xor %r0, %r2, %r3 /* mask for difference */
or %r0, %r0, %r4 /* or in zero indicator */
sub_s %r1, %r0, 1
bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
sub %r1, %r5, %r0
xor %r0, %r5, %r1 /* mask for least significant difference byte */
and_s %r2, %r2, %r0
and_s %r3, %r3, %r0
sub.f %r0, %r2, %r3
mov.hi %r0, 1
j_s.d [%blink]
bset.lo %r0, %r0, 31
#else /* __BIG_ENDIAN__ */
/*
* The zero-detection above can mis-detect 0x01 bytes as zeroes
* because of carry-propagateion from a lower significant zero byte.
* We can compensate for this by checking that bit0 is zero.
* This compensation is not necessary in the step where we
* get a low estimate for r2, because in any affected bytes
* we already have 0x00 or 0x01, which will remain unchanged
* when bit 7 is cleared.
*/
.balign 4
.Lfound0:
lsr %r0, %r4, 8
lsr_s %r1, %r2
bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */
bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */
or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */
cmp_s %r3, %r2 /* ... be independent of trailing garbage */
or_s %r2, %r2, %r0 /* likewise for r3 > r2 */
bic_s %r3, %r3, %r0
rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */
cmp_s %r2, %r3
j_s.d [%blink]
bset.lo %r0, %r0, 31
#endif /* _ENDIAN__ */
.balign 4
.Lcharloop:
ldb.ab %r2,[%r0,1]
ldb.ab %r3,[%r1,1]
nop_s
breq %r2, 0, .Lcmpend
breq %r2, %r3, .Lcharloop
.Lcmpend:
j_s.d [%blink]
sub %r0, %r2, %r3

@ -1,67 +0,0 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* If dst and src are 4 byte aligned, copy 8 bytes at a time.
* If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
* it 8 byte aligned. Thus, we can do a little read-ahead, without
* dereferencing a cache line that we should not touch.
* Note that short and long instructions have been scheduled to avoid
* branch stalls.
* The beq_s to r3z could be made unaligned & long to avoid a stall
* there, but it is not likely to be taken often, and it would also be likely
* to cost an unaligned mispredict at the next call.
*/
.global strcpy
.align 4
strcpy:
or %r2, %r0, %r1
bmsk_s %r2, %r2, 1
brne.d %r2, 0, charloop
mov_s %r10, %r0
ld_s %r3, [%r1, 0]
mov %r8, 0x01010101
bbit0.d %r1, 2, loop_start
ror %r12, %r8
sub %r2, %r3, %r8
bic_s %r2, %r2, %r3
tst_s %r2,%r12
bne r3z
mov_s %r4,%r3
.balign 4
loop:
ld.a %r3, [%r1, 4]
st.ab %r4, [%r10, 4]
loop_start:
ld.a %r4, [%r1, 4]
sub %r2, %r3, %r8
bic_s %r2, %r2, %r3
tst_s %r2, %r12
bne_s r3z
st.ab %r3, [%r10, 4]
sub %r2, %r4, %r8
bic %r2, %r2, %r4
tst %r2, %r12
beq loop
mov_s %r3, %r4
#ifdef __LITTLE_ENDIAN__
r3z: bmsk.f %r1, %r3, 7
lsr_s %r3, %r3, 8
#else /* __BIG_ENDIAN__ */
r3z: lsr.f %r1, %r3, 24
asl_s %r3, %r3, 8
#endif /* _ENDIAN__ */
bne.d r3z
stb.ab %r1, [%r10, 1]
j_s [%blink]
.balign 4
charloop:
ldb.ab %r3, [%r1, 1]
brne.d %r3, 0, charloop
stb.ab %r3, [%r10, 1]
j [%blink]

@ -1,80 +0,0 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
.global strlen
.align 4
strlen:
or %r3, %r0, 7
ld %r2, [%r3, -7]
ld.a %r6, [%r3, -3]
mov %r4, 0x01010101
/* uses long immediate */
#ifdef __LITTLE_ENDIAN__
asl_s %r1, %r0, 3
btst_s %r0, 2
asl %r7, %r4, %r1
ror %r5, %r4
sub %r1, %r2, %r7
bic_s %r1, %r1, %r2
mov.eq %r7, %r4
sub %r12, %r6, %r7
bic %r12, %r12, %r6
or.eq %r12, %r12, %r1
and %r12, %r12, %r5
brne %r12, 0, .Learly_end
#else /* __BIG_ENDIAN__ */
ror %r5, %r4
btst_s %r0, 2
mov_s %r1, 31
sub3 %r7, %r1, %r0
sub %r1, %r2, %r4
bic_s %r1, %r1, %r2
bmsk %r1, %r1, %r7
sub %r12, %r6, %r4
bic %r12, %r12, %r6
bmsk.ne %r12, %r12, %r7
or.eq %r12, %r12, %r1
and %r12, %r12, %r5
brne %r12, 0, .Learly_end
#endif /* _ENDIAN__ */
.Loop:
ld_s %r2, [%r3, 4]
ld.a %r6, [%r3, 8]
/* stall for load result */
sub %r1, %r2, %r4
bic_s %r1, %r1, %r2
sub %r12, %r6, %r4
bic %r12, %r12, %r6
or %r12, %r12, %r1
and %r12, %r12, %r5
breq %r12, 0, .Loop
.Lend:
and.f %r1, %r1, %r5
sub.ne %r3, %r3, 4
mov.eq %r1, %r12
#ifdef __LITTLE_ENDIAN__
sub_s %r2, %r1, 1
bic_s %r2, %r2, %r1
norm %r1, %r2
sub_s %r0, %r0, 3
lsr_s %r1, %r1, 3
sub %r0, %r3, %r0
j_s.d [%blink]
sub %r0, %r0, %r1
#else /* __BIG_ENDIAN__ */
lsr_s %r1, %r1, 7
mov.eq %r2, %r6
bic_s %r1, %r1, %r2
norm %r1, %r1
sub %r0, %r3, %r0
lsr_s %r1, %r1, 3
j_s.d [%blink]
add %r0, %r0, %r1
#endif /* _ENDIAN */
.Learly_end:
b.d .Lend
sub_s.ne %r1, %r1, %r1

@ -47,6 +47,18 @@ int board_early_init_f(void)
}
#ifdef CONFIG_ISA_ARCV2
void board_jump_and_run(ulong entry, int zero, int arch, uint params)
{
void (*kernel_entry)(int zero, int arch, uint params);
kernel_entry = (void (*)(int, int, uint))entry;
smp_set_core_boot_addr(entry, -1);
smp_kick_all_cpus();
kernel_entry(zero, arch, params);
}
#define RESET_VECTOR_ADDR 0x0
void smp_set_core_boot_addr(unsigned long addr, int corenr)

@ -58,6 +58,17 @@ int board_mmc_init(bd_t *bis)
return 0;
}
void board_jump_and_run(ulong entry, int zero, int arch, uint params)
{
void (*kernel_entry)(int zero, int arch, uint params);
kernel_entry = (void (*)(int, int, uint))entry;
smp_set_core_boot_addr(entry, -1);
smp_kick_all_cpus();
kernel_entry(zero, arch, params);
}
#define RESET_VECTOR_ADDR 0x0
void smp_set_core_boot_addr(unsigned long addr, int corenr)

@ -900,7 +900,8 @@ void board_init_f(ulong boot_flags)
hang();
#if !defined(CONFIG_ARM) && !defined(CONFIG_SANDBOX) && \
!defined(CONFIG_EFI_APP) && !CONFIG_IS_ENABLED(X86_64)
!defined(CONFIG_EFI_APP) && !CONFIG_IS_ENABLED(X86_64) && \
!defined(CONFIG_ARC)
/* NOTREACHED - jump_to_copy() does not return */
hang();
#endif

Loading…
Cancel
Save