Patrick Georgi (pgeorgi@google.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/10413
-gerrit
commit f4c3441869d60157d04ad1534aa32eda870f53be Author: Patrick Georgi patrick@georgi-clan.de Date: Thu Jun 4 00:44:59 2015 +0200
libpayload: get rid of GPL'd code
Replace optimized memory/string handling that came from Linux under GPLv2 with optimized implementations from OpenBSD to simplify libpayload's licensing situation.
x86: The new memcpy and memset are slightly faster, too. arm: Testing required.
Change-Id: I7c5e070e842bd4a32f1b0821d7ed2d1932ecd6ca Signed-off-by: Patrick Georgi patrick@georgi-clan.de --- payloads/libpayload/LICENSES | 13 +- payloads/libpayload/arch/arm/Makefile.inc | 3 +- payloads/libpayload/arch/arm/_memcpy.S | 462 ++++++++++++++++++++++++++++++ payloads/libpayload/arch/arm/asmlib.h | 72 ----- payloads/libpayload/arch/arm/memcpy.S | 265 +++-------------- payloads/libpayload/arch/arm/memmove.S | 221 ++------------ payloads/libpayload/arch/arm/memset.S | 220 +++++++------- payloads/libpayload/arch/x86/Makefile.inc | 5 +- payloads/libpayload/arch/x86/memmove.S | 106 +++++++ payloads/libpayload/arch/x86/memset.S | 54 ++++ payloads/libpayload/arch/x86/string.c | 102 ------- 11 files changed, 810 insertions(+), 713 deletions(-)
diff --git a/payloads/libpayload/LICENSES b/payloads/libpayload/LICENSES index ab8daa4..be72b37 100644 --- a/payloads/libpayload/LICENSES +++ b/payloads/libpayload/LICENSES @@ -122,15 +122,10 @@ holders, and the exact license terms that apply. Original files: src/lib/libc/hash/sha1.c Current version we use: CVS revision 1.20 2005/08/08
-* arch/arm/mem*.S: GPLv2 - Source: Linux, http://www.kernel.org - Original files: arch/arm/lib/mem*.S - Current version we use: 3.9 (418df63adac56841ef6b0f1fcf435bc64d4ed177) - -* arch/x86/string.c: LGPLv2.1, modified to GPLv2 under the terms of section 3 - Source: GNU C Library (glibc), http://www.gnu.org/software/libc/libc.html - Original files: sysdeps/i386/memset.c - Current version we use: 2.14 +* arch/{arm,x86}/*mem*.S: 2-clause BSD license + Source: OpenBSD + http://www.openbsd.org/cgi-bin/cvsweb/src/lib/libc/arch/* + Current version we use: stated in the $OpenBSD$ header of each file
* liblz4/lz4.c: 2-clause BSD Source: LZ4 library, https://github.com/Cyan4973/lz4 diff --git a/payloads/libpayload/arch/arm/Makefile.inc b/payloads/libpayload/arch/arm/Makefile.inc index 6c8667a..e82c49f 100644 --- a/payloads/libpayload/arch/arm/Makefile.inc +++ b/payloads/libpayload/arch/arm/Makefile.inc @@ -39,8 +39,7 @@ libc-y += exception_asm.S exception.c libc-y += cache.c cpu.S libc-y += selfboot.c
-# Will fall back to default_memXXX() in libc/memory.c if GPL not allowed. -libc-$(CONFIG_LP_GPL) += memcpy.S memset.S memmove.S +libc-y += memcpy.S memset.S memmove.S _memcpy.S
libgdb-y += gdb.c
diff --git a/payloads/libpayload/arch/arm/_memcpy.S b/payloads/libpayload/arch/arm/_memcpy.S new file mode 100644 index 0000000..3e6acc1 --- /dev/null +++ b/payloads/libpayload/arch/arm/_memcpy.S @@ -0,0 +1,462 @@ +/* $OpenBSD: _memcpy.S,v 1.3 2008/06/26 05:42:04 ray Exp $ */ +/* $NetBSD: _memcpy.S,v 1.4 2003/04/05 23:08:52 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <arch/asm.h> + +/* + * This is one fun bit of code ... + * Some easy listening music is suggested while trying to understand this + * code e.g. Iron Maiden + * + * For anyone attempting to understand it : + * + * The core code is implemented here with simple stubs for memcpy() + * memmove() and bcopy(). + * + * All local labels are prefixed with Lmemcpy_ + * Following the prefix a label starting f is used in the forward copy code + * while a label using b is used in the backwards copy code + * The source and destination addresses determine whether a forward or + * backward copy is performed. + * Separate bits of code are used to deal with the following situations + * for both the forward and backwards copy. + * unaligned source address + * unaligned destination address + * Separate copy routines are used to produce an optimised result for each + * of these cases. + * The copy code will use LDM/STM instructions to copy up to 32 bytes at + * a time where possible. + * + * Note: r12 (aka ip) can be trashed during the function along with + * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. + * Additional registers are preserved prior to use i.e. r4, r5 & lr + * + * Apologies for the state of the comments ;-) + */ + +ENTRY(_memcpy) + /* Determine copy direction */ + cmp r1, r0 + bcc .Lmemcpy_backwards + + moveq r0, #0 /* Quick abort for len=0 */ + moveq pc, lr + + stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ + subs r2, r2, #4 + blt .Lmemcpy_fl4 /* less than 4 bytes */ + ands r12, r0, #3 + bne .Lmemcpy_fdestul /* oh unaligned destination addr */ + ands r12, r1, #3 + bne .Lmemcpy_fsrcul /* oh unaligned source addr */ + +.Lmemcpy_ft8: + /* We have aligned source and destination */ + subs r2, r2, #8 + blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */ + subs r2, r2, #0x14 + blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ + stmdb sp!, {r4} /* borrow r4 */ + + /* blat 32 bytes at a time */ + /* XXX for really big copies perhaps we should use more registers */ +.Lmemcpy_floop32: + ldmia r1!, {r3, r4, r12, lr} + stmia r0!, {r3, r4, r12, lr} + ldmia r1!, {r3, r4, r12, lr} + stmia r0!, {r3, r4, r12, lr} + subs r2, r2, #0x20 + bge .Lmemcpy_floop32 + + cmn r2, #0x10 + ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ + stmgeia r0!, {r3, r4, r12, lr} + subge r2, r2, #0x10 + ldmia sp!, {r4} /* return r4 */ + +.Lmemcpy_fl32: + adds r2, r2, #0x14 + + /* blat 12 bytes at a time */ +.Lmemcpy_floop12: + ldmgeia r1!, {r3, r12, lr} + stmgeia r0!, {r3, r12, lr} + subges r2, r2, #0x0c + bge .Lmemcpy_floop12 + +.Lmemcpy_fl12: + adds r2, r2, #8 + blt .Lmemcpy_fl4 + + subs r2, r2, #4 + ldrlt r3, [r1], #4 + strlt r3, [r0], #4 + ldmgeia r1!, {r3, r12} + stmgeia r0!, {r3, r12} + subge r2, r2, #4 + +.Lmemcpy_fl4: + /* less than 4 bytes to go */ + adds r2, r2, #4 + ldmeqia sp!, {r0, pc} /* done */ + + /* copy the crud byte at a time */ + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0], #1 + ldrgeb r3, [r1], #1 + strgeb r3, [r0], #1 + ldrgtb r3, [r1], #1 + strgtb r3, [r0], #1 + ldmia sp!, {r0, pc} + + /* erg - unaligned destination */ +.Lmemcpy_fdestul: + rsb r12, r12, #4 + cmp r12, #2 + + /* align destination with byte copies */ + ldrb r3, [r1], #1 + strb r3, [r0], #1 + ldrgeb r3, [r1], #1 + strgeb r3, [r0], #1 + ldrgtb r3, [r1], #1 + strgtb r3, [r0], #1 + subs r2, r2, r12 + blt .Lmemcpy_fl4 /* less the 4 bytes */ + + ands r12, r1, #3 + beq .Lmemcpy_ft8 /* we have an aligned source */ + + /* erg - unaligned source */ + /* This is where it gets nasty ... */ +.Lmemcpy_fsrcul: + bic r1, r1, #3 + ldr lr, [r1], #4 + cmp r12, #2 + bgt .Lmemcpy_fsrcul3 + beq .Lmemcpy_fsrcul2 + cmp r2, #0x0c + blt .Lmemcpy_fsrcul1loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5} + +.Lmemcpy_fsrcul1loop16: + mov r3, lr, lsr #8 + ldmia r1!, {r4, r5, r12, lr} + orr r3, r3, r4, lsl #24 + mov r4, r4, lsr #8 + orr r4, r4, r5, lsl #24 + mov r5, r5, lsr #8 + orr r5, r5, r12, lsl #24 + mov r12, r12, lsr #8 + orr r12, r12, lr, lsl #24 + stmia r0!, {r3-r5, r12} + subs r2, r2, #0x10 + bge .Lmemcpy_fsrcul1loop16 + ldmia sp!, {r4, r5} + adds r2, r2, #0x0c + blt .Lmemcpy_fsrcul1l4 + +.Lmemcpy_fsrcul1loop4: + mov r12, lr, lsr #8 + ldr lr, [r1], #4 + orr r12, r12, lr, lsl #24 + str r12, [r0], #4 + subs r2, r2, #4 + bge .Lmemcpy_fsrcul1loop4 + +.Lmemcpy_fsrcul1l4: + sub r1, r1, #3 + b .Lmemcpy_fl4 + +.Lmemcpy_fsrcul2: + cmp r2, #0x0c + blt .Lmemcpy_fsrcul2loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5} + +.Lmemcpy_fsrcul2loop16: + mov r3, lr, lsr #16 + ldmia r1!, {r4, r5, r12, lr} + orr r3, r3, r4, lsl #16 + mov r4, r4, lsr #16 + orr r4, r4, r5, lsl #16 + mov r5, r5, lsr #16 + orr r5, r5, r12, lsl #16 + mov r12, r12, lsr #16 + orr r12, r12, lr, lsl #16 + stmia r0!, {r3-r5, r12} + subs r2, r2, #0x10 + bge .Lmemcpy_fsrcul2loop16 + ldmia sp!, {r4, r5} + adds r2, r2, #0x0c + blt .Lmemcpy_fsrcul2l4 + +.Lmemcpy_fsrcul2loop4: + mov r12, lr, lsr #16 + ldr lr, [r1], #4 + orr r12, r12, lr, lsl #16 + str r12, [r0], #4 + subs r2, r2, #4 + bge .Lmemcpy_fsrcul2loop4 + +.Lmemcpy_fsrcul2l4: + sub r1, r1, #2 + b .Lmemcpy_fl4 + +.Lmemcpy_fsrcul3: + cmp r2, #0x0c + blt .Lmemcpy_fsrcul3loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5} + +.Lmemcpy_fsrcul3loop16: + mov r3, lr, lsr #24 + ldmia r1!, {r4, r5, r12, lr} + orr r3, r3, r4, lsl #8 + mov r4, r4, lsr #24 + orr r4, r4, r5, lsl #8 + mov r5, r5, lsr #24 + orr r5, r5, r12, lsl #8 + mov r12, r12, lsr #24 + orr r12, r12, lr, lsl #8 + stmia r0!, {r3-r5, r12} + subs r2, r2, #0x10 + bge .Lmemcpy_fsrcul3loop16 + ldmia sp!, {r4, r5} + adds r2, r2, #0x0c + blt .Lmemcpy_fsrcul3l4 + +.Lmemcpy_fsrcul3loop4: + mov r12, lr, lsr #24 + ldr lr, [r1], #4 + orr r12, r12, lr, lsl #8 + str r12, [r0], #4 + subs r2, r2, #4 + bge .Lmemcpy_fsrcul3loop4 + +.Lmemcpy_fsrcul3l4: + sub r1, r1, #1 + b .Lmemcpy_fl4 + +.Lmemcpy_backwards: + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt .Lmemcpy_bl4 /* less than 4 bytes */ + ands r12, r0, #3 + bne .Lmemcpy_bdestul /* oh unaligned destination addr */ + ands r12, r1, #3 + bne .Lmemcpy_bsrcul /* oh unaligned source addr */ + +.Lmemcpy_bt8: + /* We have aligned source and destination */ + subs r2, r2, #8 + blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */ + stmdb sp!, {r4, lr} + subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ + blt .Lmemcpy_bl32 + + /* blat 32 bytes at a time */ + /* XXX for really big copies perhaps we should use more registers */ +.Lmemcpy_bloop32: + ldmdb r1!, {r3, r4, r12, lr} + stmdb r0!, {r3, r4, r12, lr} + ldmdb r1!, {r3, r4, r12, lr} + stmdb r0!, {r3, r4, r12, lr} + subs r2, r2, #0x20 + bge .Lmemcpy_bloop32 + +.Lmemcpy_bl32: + cmn r2, #0x10 + ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ + stmgedb r0!, {r3, r4, r12, lr} + subge r2, r2, #0x10 + adds r2, r2, #0x14 + ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ + stmgedb r0!, {r3, r12, lr} + subge r2, r2, #0x0c + ldmia sp!, {r4, lr} + +.Lmemcpy_bl12: + adds r2, r2, #8 + blt .Lmemcpy_bl4 + subs r2, r2, #4 + ldrlt r3, [r1, #-4]! + strlt r3, [r0, #-4]! + ldmgedb r1!, {r3, r12} + stmgedb r0!, {r3, r12} + subge r2, r2, #4 + +.Lmemcpy_bl4: + /* less than 4 bytes to go */ + adds r2, r2, #4 + moveq pc, lr /* done */ + + /* copy the crud byte at a time */ + cmp r2, #2 + ldrb r3, [r1, #-1]! + strb r3, [r0, #-1]! + ldrgeb r3, [r1, #-1]! + strgeb r3, [r0, #-1]! + ldrgtb r3, [r1, #-1]! + strgtb r3, [r0, #-1]! + mov pc, lr + + /* erg - unaligned destination */ +.Lmemcpy_bdestul: + cmp r12, #2 + + /* align destination with byte copies */ + ldrb r3, [r1, #-1]! + strb r3, [r0, #-1]! + ldrgeb r3, [r1, #-1]! + strgeb r3, [r0, #-1]! + ldrgtb r3, [r1, #-1]! + strgtb r3, [r0, #-1]! + subs r2, r2, r12 + blt .Lmemcpy_bl4 /* less than 4 bytes to go */ + ands r12, r1, #3 + beq .Lmemcpy_bt8 /* we have an aligned source */ + + /* erg - unaligned source */ + /* This is where it gets nasty ... */ +.Lmemcpy_bsrcul: + bic r1, r1, #3 + ldr r3, [r1, #0] + cmp r12, #2 + blt .Lmemcpy_bsrcul1 + beq .Lmemcpy_bsrcul2 + cmp r2, #0x0c + blt .Lmemcpy_bsrcul3loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5, lr} + +.Lmemcpy_bsrcul3loop16: + mov lr, r3, lsl #8 + ldmdb r1!, {r3-r5, r12} + orr lr, lr, r12, lsr #24 + mov r12, r12, lsl #8 + orr r12, r12, r5, lsr #24 + mov r5, r5, lsl #8 + orr r5, r5, r4, lsr #24 + mov r4, r4, lsl #8 + orr r4, r4, r3, lsr #24 + stmdb r0!, {r4, r5, r12, lr} + subs r2, r2, #0x10 + bge .Lmemcpy_bsrcul3loop16 + ldmia sp!, {r4, r5, lr} + adds r2, r2, #0x0c + blt .Lmemcpy_bsrcul3l4 + +.Lmemcpy_bsrcul3loop4: + mov r12, r3, lsl #8 + ldr r3, [r1, #-4]! + orr r12, r12, r3, lsr #24 + str r12, [r0, #-4]! + subs r2, r2, #4 + bge .Lmemcpy_bsrcul3loop4 + +.Lmemcpy_bsrcul3l4: + add r1, r1, #3 + b .Lmemcpy_bl4 + +.Lmemcpy_bsrcul2: + cmp r2, #0x0c + blt .Lmemcpy_bsrcul2loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5, lr} + +.Lmemcpy_bsrcul2loop16: + mov lr, r3, lsl #16 + ldmdb r1!, {r3-r5, r12} + orr lr, lr, r12, lsr #16 + mov r12, r12, lsl #16 + orr r12, r12, r5, lsr #16 + mov r5, r5, lsl #16 + orr r5, r5, r4, lsr #16 + mov r4, r4, lsl #16 + orr r4, r4, r3, lsr #16 + stmdb r0!, {r4, r5, r12, lr} + subs r2, r2, #0x10 + bge .Lmemcpy_bsrcul2loop16 + ldmia sp!, {r4, r5, lr} + adds r2, r2, #0x0c + blt .Lmemcpy_bsrcul2l4 + +.Lmemcpy_bsrcul2loop4: + mov r12, r3, lsl #16 + ldr r3, [r1, #-4]! + orr r12, r12, r3, lsr #16 + str r12, [r0, #-4]! + subs r2, r2, #4 + bge .Lmemcpy_bsrcul2loop4 + +.Lmemcpy_bsrcul2l4: + add r1, r1, #2 + b .Lmemcpy_bl4 + +.Lmemcpy_bsrcul1: + cmp r2, #0x0c + blt .Lmemcpy_bsrcul1loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5, lr} + +.Lmemcpy_bsrcul1loop32: + mov lr, r3, lsl #24 + ldmdb r1!, {r3-r5, r12} + orr lr, lr, r12, lsr #8 + mov r12, r12, lsl #24 + orr r12, r12, r5, lsr #8 + mov r5, r5, lsl #24 + orr r5, r5, r4, lsr #8 + mov r4, r4, lsl #24 + orr r4, r4, r3, lsr #8 + stmdb r0!, {r4, r5, r12, lr} + subs r2, r2, #0x10 + bge .Lmemcpy_bsrcul1loop32 + ldmia sp!, {r4, r5, lr} + adds r2, r2, #0x0c + blt .Lmemcpy_bsrcul1l4 + +.Lmemcpy_bsrcul1loop4: + mov r12, r3, lsl #24 + ldr r3, [r1, #-4]! + orr r12, r12, r3, lsr #8 + str r12, [r0, #-4]! + subs r2, r2, #4 + bge .Lmemcpy_bsrcul1loop4 + +.Lmemcpy_bsrcul1l4: + add r1, r1, #1 + b .Lmemcpy_bl4 diff --git a/payloads/libpayload/arch/arm/asmlib.h b/payloads/libpayload/arch/arm/asmlib.h deleted file mode 100644 index 8b3fa22..0000000 --- a/payloads/libpayload/arch/arm/asmlib.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * arch/arm/asmlib.h - * - * Adapted from Linux arch/arm/include/assembler.h - * - * Copyright (C) 1996-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This file contains arm architecture specific defines - * for the different processors. - * - * Do not include any C declarations in this file - it is included by - * assembler source. - */ - -/* - * WARNING: This file is *only* meant for memcpy.S and friends which were copied - * from Linux and require some weird macros. It does unspeakable things like - * redefining "push", so do *not* try to turn it into a general assembly macro - * file, and keep it out of global include directories. - */ - -#ifndef __ARM_ASMLIB_H__ -#define __ARM_ASMLIB_H__ - -/* - * Endian independent macros for shifting bytes within registers. - */ -#ifndef __ARMEB__ -#define pull lsr -#define push lsl -#define get_byte_0 lsl #0 -#define get_byte_1 lsr #8 -#define get_byte_2 lsr #16 -#define get_byte_3 lsr #24 -#define put_byte_0 lsl #0 -#define put_byte_1 lsl #8 -#define put_byte_2 lsl #16 -#define put_byte_3 lsl #24 -#else -#define pull lsl -#define push lsr -#define get_byte_0 lsr #24 -#define get_byte_1 lsr #16 -#define get_byte_2 lsr #8 -#define get_byte_3 lsl #0 -#define put_byte_0 lsl #24 -#define put_byte_1 lsl #16 -#define put_byte_2 lsl #8 -#define put_byte_3 lsl #0 -#endif - -/* - * Data preload for architectures that support it - */ -#if 1 /* TODO: differentiate once libpayload supports more ARM versions */ -#define PLD(code...) code -#else -#define PLD(code...) -#endif - -/* - * This can be used to enable code to cacheline align the destination - * pointer when bulk writing to memory. Linux doesn't enable this except - * for the "Feroceon" processor, so we better just leave it out. - */ -#define CALGN(code...) - -#endif /* __ARM_ASMLIB_H */ diff --git a/payloads/libpayload/arch/arm/memcpy.S b/payloads/libpayload/arch/arm/memcpy.S index 1388d05..6d68639 100644 --- a/payloads/libpayload/arch/arm/memcpy.S +++ b/payloads/libpayload/arch/arm/memcpy.S @@ -1,237 +1,44 @@ -/* - * linux/arch/arm/lib/memcpy.S +/* $OpenBSD: memcpy.S,v 1.4 2014/11/30 19:43:56 deraadt Exp $ */ +/* $NetBSD: memcpy.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: MontaVista Software, Inc. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */
#include <arch/asm.h> -#include "asmlib.h" - -#define LDR1W_SHIFT 0 -#define STR1W_SHIFT 0 - - .macro ldr1w ptr reg abort - W(ldr) \reg, [\ptr], #4 - .endm - - .macro ldr4w ptr reg1 reg2 reg3 reg4 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} - .endm - - .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm - - .macro ldr1b ptr reg cond=al abort - ldr\cond()b \reg, [\ptr], #1 - .endm - - .macro str1w ptr reg abort - W(str) \reg, [\ptr], #4 - .endm - - .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm - - .macro str1b ptr reg cond=al abort - str\cond()b \reg, [\ptr], #1 - .endm - - .macro enter reg1 reg2 - stmdb sp!, {r0, \reg1, \reg2} - .endm
- .macro exit reg1 reg2 - ldmfd sp!, {r0, \reg1, \reg2} - .endm - -/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +/* + * XXX + * the _memcpy function which this calls is actually a _memmove + * variant which handles overlaps... That should be fixed. + */
ENTRY(memcpy) - - enter r4, lr - - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #0] ) - bne 9f - ands ip, r1, #3 - bne 10f - -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - blt 5f - - CALGN( ands ip, r0, #31 ) - CALGN( rsb r3, ip, #32 ) - CALGN( sbcnes r4, r3, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, r3 ) @ C gets set - CALGN( add pc, r4, ip ) - - PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 4f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -3: PLD( pld [r1, #124] ) -4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - subs r2, r2, #32 - str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) - -5: ands ip, r2, #28 - rsb ip, ip, #32 -#if LDR1W_SHIFT > 0 - lsl ip, ip, #LDR1W_SHIFT -#endif - addne pc, pc, ip @ C is always clear here - b 7f -6: - .rept (1 << LDR1W_SHIFT) - W(nop) - .endr - ldr1w r1, r3, abort=20f - ldr1w r1, r4, abort=20f - ldr1w r1, r5, abort=20f - ldr1w r1, r6, abort=20f - ldr1w r1, r7, abort=20f - ldr1w r1, r8, abort=20f - ldr1w r1, lr, abort=20f - -#if LDR1W_SHIFT < STR1W_SHIFT - lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT -#elif LDR1W_SHIFT > STR1W_SHIFT - lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT -#endif - add pc, pc, ip - nop - .rept (1 << STR1W_SHIFT) - W(nop) - .endr - str1w r0, r3, abort=20f - str1w r0, r4, abort=20f - str1w r0, r5, abort=20f - str1w r0, r6, abort=20f - str1w r0, r7, abort=20f - str1w r0, r8, abort=20f - str1w r0, lr, abort=20f - - CALGN( bcs 2b ) - -7: ldmfd sp!, {r5 - r8} - -8: movs r2, r2, lsl #31 - ldr1b r1, r3, ne, abort=21f - ldr1b r1, r4, cs, abort=21f - ldr1b r1, ip, cs, abort=21f - str1b r0, r3, ne, abort=21f - str1b r0, r4, cs, abort=21f - str1b r0, ip, cs, abort=21f - - exit r4, pc - -9: rsb ip, ip, #4 - cmp ip, #2 - ldr1b r1, r3, gt, abort=21f - ldr1b r1, r4, ge, abort=21f - ldr1b r1, lr, abort=21f - str1b r0, r3, gt, abort=21f - str1b r0, r4, ge, abort=21f - subs r2, r2, ip - str1b r0, lr, abort=21f - blt 8b - ands ip, r1, #3 - beq 1b - -10: bic r1, r1, #3 - cmp ip, #2 - ldr1w r1, lr, abort=21f - beq 17f - bgt 18f - - - .macro forward_copy_shift pull push - - subs r2, r2, #28 - blt 14f - - CALGN( ands ip, r0, #31 ) - CALGN( rsb ip, ip, #32 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) - -11: stmfd sp!, {r5 - r9} - - PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 13f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -12: PLD( pld [r1, #124] ) -13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull - subs r2, r2, #32 - ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push - str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) - - ldmfd sp!, {r5 - r9} - -14: ands ip, r2, #28 - beq 16f - -15: mov r3, lr, pull #\pull - ldr1w r1, lr, abort=21f - subs ip, ip, #4 - orr r3, r3, lr, push #\push - str1w r0, r3, abort=21f - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) - -16: sub r1, r1, #(\push / 8) - b 8b - - .endm - - - forward_copy_shift pull=8 push=24 - -17: forward_copy_shift pull=16 push=16 - -18: forward_copy_shift pull=24 push=8 -ENDPROC(memcpy) + stmfd sp!, {r0, lr} + bl _memcpy + ldmfd sp!, {r0, pc} diff --git a/payloads/libpayload/arch/arm/memmove.S b/payloads/libpayload/arch/arm/memmove.S index bd5f8f1..f071068 100644 --- a/payloads/libpayload/arch/arm/memmove.S +++ b/payloads/libpayload/arch/arm/memmove.S @@ -1,197 +1,38 @@ -/* - * linux/arch/arm/lib/memmove.S +/* $OpenBSD: memmove.S,v 1.3 2008/06/26 05:42:04 ray Exp $ */ +/* $NetBSD: memmove.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: (C) MontaVista Software Inc. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */
#include <arch/asm.h> -#include "asmlib.h" - -/* - * Prototype: void *memmove(void *dest, const void *src, size_t n); - * - * Note: - * - * If the memory regions don't overlap, we simply branch to memcpy which is - * normally a bit faster. Otherwise the copy is done going downwards. This - * is a transposition of the code from copy_template.S but with the copy - * occurring in the opposite direction. - */
ENTRY(memmove) - - subs ip, r0, r1 - cmphi r2, ip - bls memcpy - - stmfd sp!, {r0, r4, lr} - add r1, r1, r2 - add r0, r0, r2 - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #-4] ) - bne 9f - ands ip, r1, #3 - bne 10f - -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - blt 5f - - CALGN( ands ip, r0, #31 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, ip ) @ C is set here - CALGN( rsb ip, ip, #32 ) - CALGN( add pc, r4, ip ) - - PLD( pld [r1, #-4] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) - PLD( blt 4f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) - -3: PLD( pld [r1, #-128] ) -4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} - subs r2, r2, #32 - stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) - -5: ands ip, r2, #28 - rsb ip, ip, #32 - addne pc, pc, ip @ C is always clear here - b 7f -6: W(nop) - W(ldr) r3, [r1, #-4]! - W(ldr) r4, [r1, #-4]! - W(ldr) r5, [r1, #-4]! - W(ldr) r6, [r1, #-4]! - W(ldr) r7, [r1, #-4]! - W(ldr) r8, [r1, #-4]! - W(ldr) lr, [r1, #-4]! - - add pc, pc, ip - nop - W(nop) - W(str) r3, [r0, #-4]! - W(str) r4, [r0, #-4]! - W(str) r5, [r0, #-4]! - W(str) r6, [r0, #-4]! - W(str) r7, [r0, #-4]! - W(str) r8, [r0, #-4]! - W(str) lr, [r0, #-4]! - - CALGN( bcs 2b ) - -7: ldmfd sp!, {r5 - r8} - -8: movs r2, r2, lsl #31 - ldrneb r3, [r1, #-1]! - ldrcsb r4, [r1, #-1]! - ldrcsb ip, [r1, #-1] - strneb r3, [r0, #-1]! - strcsb r4, [r0, #-1]! - strcsb ip, [r0, #-1] - ldmfd sp!, {r0, r4, pc} - -9: cmp ip, #2 - ldrgtb r3, [r1, #-1]! - ldrgeb r4, [r1, #-1]! - ldrb lr, [r1, #-1]! - strgtb r3, [r0, #-1]! - strgeb r4, [r0, #-1]! - subs r2, r2, ip - strb lr, [r0, #-1]! - blt 8b - ands ip, r1, #3 - beq 1b - -10: bic r1, r1, #3 - cmp ip, #2 - ldr r3, [r1, #0] - beq 17f - blt 18f - - - .macro backward_copy_shift push pull - - subs r2, r2, #28 - blt 14f - - CALGN( ands ip, r0, #31 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) - -11: stmfd sp!, {r5 - r9} - - PLD( pld [r1, #-4] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) - PLD( blt 13f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) - -12: PLD( pld [r1, #-128] ) -13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push - subs r2, r2, #32 - ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull - stmdb r0!, {r4 - r9, ip, lr} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) - - ldmfd sp!, {r5 - r9} - -14: ands ip, r2, #28 - beq 16f - -15: mov lr, r3, push #\push - ldr r3, [r1, #-4]! - subs ip, ip, #4 - orr lr, lr, r3, pull #\pull - str lr, [r0, #-4]! - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) - -16: add r1, r1, #(\pull / 8) - b 8b - - .endm - - - backward_copy_shift push=8 pull=24 - -17: backward_copy_shift push=16 pull=16 - -18: backward_copy_shift push=24 pull=8 - -ENDPROC(memmove) + stmfd sp!, {r0, lr} + bl _memcpy + ldmfd sp!, {r0, pc} diff --git a/payloads/libpayload/arch/arm/memset.S b/payloads/libpayload/arch/arm/memset.S index 0c1102d..b0a2bc9 100644 --- a/payloads/libpayload/arch/arm/memset.S +++ b/payloads/libpayload/arch/arm/memset.S @@ -1,121 +1,127 @@ +/* $OpenBSD: memset.S,v 1.2 2004/02/01 05:40:52 drahn Exp $ */ +/* $NetBSD: memset.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $ */ + /* - * linux/arch/arm/lib/memset.S - * - * Copyright (C) 1995-2000 Russell King + * Copyright (c) 1995 Mark Brinicombe. + * All rights reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe. + * 4. The name of the company nor the name of the author may be used to + * endorse or promote products derived from this software without specific + * prior written permission. * - * ASM optimised string functions + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */
#include <arch/asm.h> -#include "asmlib.h"
-ENTRY(memset) - ands r3, r0, #3 @ 1 unaligned? - mov ip, r0 @ preserve r0 as return value - bne 6f @ 1 /* - * we know that the pointer in ip is aligned to a word boundary. + * Sets a block of memory to the specified value + * + * On entry: + * r0 - dest address + * r1 - byte to write + * r2 - number of bytes to write + * + * On exit: + * r0 - dest address */ -1: orr r1, r1, r1, lsl #8 - orr r1, r1, r1, lsl #16 - mov r3, r1 - cmp r2, #16 - blt 4f
-#if ! CALGN(1)+0 +ENTRY(memset) + stmfd sp!, {r0} /* Remember address for return value */ + and r1, r1, #0x000000ff /* We write bytes */
-/* - * We need 2 extra registers for this loop - use r8 and the LR - */ - stmfd sp!, {r8, lr} - mov r8, r1 - mov lr, r1 - -2: subs r2, r2, #64 - stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. - stmgeia ip!, {r1, r3, r8, lr} - stmgeia ip!, {r1, r3, r8, lr} - stmgeia ip!, {r1, r3, r8, lr} - bgt 2b - ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. -/* - * No need to correct the count; we're only testing bits from now on - */ - tst r2, #32 - stmneia ip!, {r1, r3, r8, lr} - stmneia ip!, {r1, r3, r8, lr} - tst r2, #16 - stmneia ip!, {r1, r3, r8, lr} - ldmfd sp!, {r8, lr} + cmp r2, #0x00000004 /* Do we have less than 4 bytes */ + blt .Lmemset_lessthanfour
-#else + /* Ok first we will word align the address */
-/* - * This version aligns the destination pointer in order to write - * whole cache lines at once. - */ + ands r3, r0, #0x00000003 /* Get the bottom two bits */ + beq .Lmemset_addraligned /* The address is word aligned */
- stmfd sp!, {r4-r8, lr} - mov r4, r1 - mov r5, r1 - mov r6, r1 - mov r7, r1 - mov r8, r1 - mov lr, r1 - - cmp r2, #96 - tstgt ip, #31 - ble 3f - - and r8, ip, #31 - rsb r8, r8, #32 - sub r2, r2, r8 - movs r8, r8, lsl #(32 - 4) - stmcsia ip!, {r4, r5, r6, r7} - stmmiia ip!, {r4, r5} - tst r8, #(1 << 30) - mov r8, r1 - strne r1, [ip], #4 - -3: subs r2, r2, #64 - stmgeia ip!, {r1, r3-r8, lr} - stmgeia ip!, {r1, r3-r8, lr} - bgt 3b - ldmeqfd sp!, {r4-r8, pc} - - tst r2, #32 - stmneia ip!, {r1, r3-r8, lr} - tst r2, #16 - stmneia ip!, {r4-r7} - ldmfd sp!, {r4-r8, lr} - -#endif - -4: tst r2, #8 - stmneia ip!, {r1, r3} - tst r2, #4 - strne r1, [ip], #4 -/* - * When we get here, we've got less than 4 bytes to zero. We - * may have an unaligned pointer as well. - */ -5: tst r2, #2 - strneb r1, [ip], #1 - strneb r1, [ip], #1 - tst r2, #1 - strneb r1, [ip], #1 - mov pc, lr - -6: subs r2, r2, #4 @ 1 do we have enough - blt 5b @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [ip], #1 @ 1 - strleb r1, [ip], #1 @ 1 - strb r1, [ip], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) - b 1b -ENDPROC(memset) + rsb r3, r3, #0x00000004 + sub r2, r2, r3 + cmp r3, #0x00000002 + strb r1, [r0], #0x0001 /* Set 1 byte */ + strgeb r1, [r0], #0x0001 /* Set another byte */ + strgtb r1, [r0], #0x0001 /* and a third */ + + cmp r2, #0x00000004 + blt .Lmemset_lessthanfour + + /* Now we must be word aligned */ + +.Lmemset_addraligned: + + orr r3, r1, r1, lsl #8 /* Repeat the byte into a word */ + orr r3, r3, r3, lsl #16 + + /* We know we have at least 4 bytes ... */ + + cmp r2, #0x00000020 /* If less than 32 then use words */ + blt .Lmemset_lessthan32 + + /* We have at least 32 so lets use quad words */ + + stmfd sp!, {r4-r6} /* Store registers */ + mov r4, r3 /* Duplicate data */ + mov r5, r3 + mov r6, r3 + +.Lmemset_loop16: + stmia r0!, {r3-r6} /* Store 16 bytes */ + sub r2, r2, #0x00000010 /* Adjust count */ + cmp r2, #0x00000010 /* Still got at least 16 bytes ? */ + bgt .Lmemset_loop16 + + ldmfd sp!, {r4-r6} /* Restore registers */ + + /* Do we need to set some words as well ? */ + + cmp r2, #0x00000004 + blt .Lmemset_lessthanfour + + /* Have either less than 16 or less than 32 depending on route taken */ + +.Lmemset_lessthan32: + + /* We have at least 4 bytes so copy as words */ + +.Lmemset_loop4: + str r3, [r0], #0x0004 + sub r2, r2, #0x0004 + cmp r2, #0x00000004 + bge .Lmemset_loop4 + +.Lmemset_lessthanfour: + cmp r2, #0x00000000 + ldmeqfd sp!, {r0} + moveq pc, lr /* Zero length so exit */ + + cmp r2, #0x00000002 + strb r1, [r0], #0x0001 /* Set 1 byte */ + strgeb r1, [r0], #0x0001 /* Set another byte */ + strgtb r1, [r0], #0x0001 /* and a third */ + + ldmfd sp!, {r0} + mov pc, lr /* Exit */ diff --git a/payloads/libpayload/arch/x86/Makefile.inc b/payloads/libpayload/arch/x86/Makefile.inc index 6517bf1..d32bcca 100644 --- a/payloads/libpayload/arch/x86/Makefile.inc +++ b/payloads/libpayload/arch/x86/Makefile.inc @@ -36,8 +36,9 @@ libc-y += exec.S virtual.c libc-y += selfboot.c libc-y += exception_asm.S exception.c
-# Will fall back to default_memXXX() in libc/memory.c if GPL not allowed. -libc-$(CONFIG_LP_GPL) += string.c +libc-y += memset.S +# also contains memcpy +libc-y += memmove.S
libgdb-y += gdb.c
diff --git a/payloads/libpayload/arch/x86/memmove.S b/payloads/libpayload/arch/x86/memmove.S new file mode 100644 index 0000000..3e9e57f --- /dev/null +++ b/payloads/libpayload/arch/x86/memmove.S @@ -0,0 +1,106 @@ +/* $OpenBSD: memmove.S,v 1.5 2014/12/02 03:07:13 tedu Exp $ */ + +/*- + * Copyright (c) 1993, 1994, 1995 Charles M. Hannum. All rights reserved. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Emulate bcopy() by swapping the first two arguments, and jumping + * into memmove(), which handles overlapping regions. + */ +//ENTRY(bcopy) + pushl %esi + pushl %edi + movl 12(%esp),%esi + movl 16(%esp),%edi + jmp docopy + +/* + * memmove(caddr_t dst, caddr_t src, size_t len); + * Copy len bytes, coping with overlapping space. + */ + .globl memmove +memmove: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi +docopy: + movl 20(%esp),%ecx + movl %edi,%eax + subl %esi,%eax + cmpl %ecx,%eax # overlapping? + jb 1f + jmp docopyf # nope +/* + * memcpy() doesn't worry about overlap and always copies forward + */ + .globl memcpy +memcpy: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx +docopyf: + movl %edi,%eax # setup return value for memcpy/memmove + shrl $2,%ecx # copy by 32-bit words + rep + movsl + movl 20(%esp),%ecx + andl $3,%ecx # any bytes left? + rep + movsb + popl %edi + popl %esi + ret + +1: movl %edi,%eax # setup return value for memmove + addl %ecx,%edi # copy backward + addl %ecx,%esi + std + andl $3,%ecx # any fractional bytes? + decl %edi + decl %esi + rep + movsb + movl 20(%esp),%ecx # copy remainder by 32-bit words + shrl $2,%ecx + subl $3,%esi + subl $3,%edi + rep + movsl + popl %edi + popl %esi + cld + ret + diff --git a/payloads/libpayload/arch/x86/memset.S b/payloads/libpayload/arch/x86/memset.S new file mode 100644 index 0000000..1a8cbd9 --- /dev/null +++ b/payloads/libpayload/arch/x86/memset.S @@ -0,0 +1,54 @@ +/* $OpenBSD: memset.S,v 1.4 2007/05/25 20:32:29 krw Exp $ */ +/* + * Written by J.T. Conklin jtc@netbsd.org. + * Public domain. + */ + + .globl memset +memset: + pushl %edi + pushl %ebx + movl 12(%esp),%edi + movzbl 16(%esp),%eax /* unsigned char, zero extend */ + movl 20(%esp),%ecx + pushl %edi /* push address of buffer */ + + cld /* set fill direction forward */ + + /* + * if the string is too short, it's really not worth the overhead + * of aligning to word boundaries, etc. So we jump to a plain + * unaligned set. + */ + cmpl $0x0f,%ecx + jle L1 + + movb %al,%ah /* copy char to all bytes in word */ + movl %eax,%edx + sall $16,%eax + orl %edx,%eax + + movl %edi,%edx /* compute misalignment */ + negl %edx + andl $3,%edx + movl %ecx,%ebx + subl %edx,%ebx + + movl %edx,%ecx /* set until word aligned */ + rep + stosb + + movl %ebx,%ecx + shrl $2,%ecx /* set by words */ + rep + stosl + + movl %ebx,%ecx /* set remainder by bytes */ + andl $3,%ecx +L1: rep + stosb + + popl %eax /* pop address of buffer */ + popl %ebx + popl %edi + ret diff --git a/payloads/libpayload/arch/x86/string.c b/payloads/libpayload/arch/x86/string.c deleted file mode 100644 index 60de812..0000000 --- a/payloads/libpayload/arch/x86/string.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc. - * This file is part of the GNU C Library. - * Copyright (c) 2011 The Chromium OS Authors. - * - * See file CREDITS for list of people who contributed to this - * project. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc. - */ - -/* From glibc-2.14, sysdeps/i386/memset.c */ - -#include <stdint.h> - -#include "string.h" - -typedef uint32_t op_t; - -void *memset(void *dstpp, int c, size_t len) -{ - int d0; - unsigned long int dstp = (unsigned long int) dstpp; - - /* This explicit register allocation improves code very much indeed. */ - register op_t x asm("ax"); - - x = (unsigned char) c; - - /* Clear the direction flag, so filling will move forward. */ - asm volatile("cld"); - - /* This threshold value is optimal. */ - if (len >= 12) { - /* Fill X with four copies of the char we want to fill with. */ - x |= (x << 8); - x |= (x << 16); - - /* Adjust LEN for the bytes handled in the first loop. */ - len -= (-dstp) % sizeof(op_t); - - /* - * There are at least some bytes to set. No need to test for - * LEN == 0 in this alignment loop. - */ - - /* Fill bytes until DSTP is aligned on a longword boundary. */ - asm volatile( - "rep\n" - "stosb" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) : - "memory"); - - /* Fill longwords. */ - asm volatile( - "rep\n" - "stosl" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) : - "memory"); - len %= sizeof(op_t); - } - - /* Write the last few bytes. */ - asm volatile( - "rep\n" - "stosb" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" (len), "a" (x) : - "memory"); - - return dstpp; -} - -void *memcpy(void *dest, const void *src, size_t n) -{ - unsigned long d0, d1, d2; - - asm volatile( - "rep ; movsl\n\t" - "movl %4,%%ecx\n\t" - "rep ; movsb\n\t" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src) - : "memory" - ); - - return dest; -}