[coreboot-gerrit] Patch set updated for coreboot: b6e43c0 libpayload: get rid of GPL'd code

Thu Jun 4 10:42:13 CEST 2015

Patrick Georgi (pgeorgi at google.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/10413

-gerrit

commit b6e43c00ef45b93756bae42045afb21f3e6640a7
Author: Patrick Georgi <patrick at georgi-clan.de>
Date:   Thu Jun 4 00:44:59 2015 +0200

    libpayload: get rid of GPL'd code
    
    Replace optimized memory/string handling that came from Linux under GPLv2 with
    implementations from OpenBSD to simplify libpayload's licensing situation.
    
    Since those are all uses of the GPL flag in the configuration, remove it.
    
    (still untested. need to check if the calling convention is the same)
    
    Change-Id: I7c5e070e842bd4a32f1b0821d7ed2d1932ecd6ca
    Signed-off-by: Patrick Georgi <patrick at georgi-clan.de>
---
 payloads/libpayload/Config.in             |   9 -
 payloads/libpayload/LICENSES              |  13 +-
 payloads/libpayload/arch/arm/Makefile.inc |   3 +-
 payloads/libpayload/arch/arm/_memcpy.S    | 462 ++++++++++++++++++++++++++++++
 payloads/libpayload/arch/arm/asmlib.h     |  72 -----
 payloads/libpayload/arch/arm/memcpy.S     | 265 +++--------------
 payloads/libpayload/arch/arm/memmove.S    | 221 ++------------
 payloads/libpayload/arch/arm/memset.S     | 220 +++++++-------
 payloads/libpayload/arch/x86/Makefile.inc |   5 +-
 payloads/libpayload/arch/x86/memmove.S    | 106 +++++++
 payloads/libpayload/arch/x86/memset.S     |  54 ++++
 payloads/libpayload/arch/x86/string.c     | 102 -------
 12 files changed, 810 insertions(+), 722 deletions(-)

diff --git a/payloads/libpayload/Config.in b/payloads/libpayload/Config.in
index 92c89ab..785a7c1 100644
--- a/payloads/libpayload/Config.in
+++ b/payloads/libpayload/Config.in
@@ -32,15 +32,6 @@ mainmenu "Libpayload Configuration"
 
 menu "Generic Options"
 
-config GPL
-	bool "GPLv2-licensed Options"
-	default n
-	help
-	  Prompt for options that will build code licensed under the GNU General
-	  Public License (version 2). This will subject the whole payload to the
-	  terms of this license (including its provision to release all sources,
-	  please see the LICENSE_GPL file for details).
-
 config EXPERIMENTAL
 	bool "Experimental Options"
 	default n
diff --git a/payloads/libpayload/LICENSES b/payloads/libpayload/LICENSES
index f340ead..621c428 100644
--- a/payloads/libpayload/LICENSES
+++ b/payloads/libpayload/LICENSES
@@ -122,12 +122,7 @@ holders, and the exact license terms that apply.
   Original files: src/lib/libc/hash/sha1.c
   Current version we use: CVS revision 1.20 2005/08/08
 
-* arch/arm/mem*.S: GPLv2
-  Source: Linux, http://www.kernel.org
-  Original files: arch/arm/lib/mem*.S
-  Current version we use: 3.9 (418df63adac56841ef6b0f1fcf435bc64d4ed177)
-
-* arch/x86/string.c: LGPLv2.1, modified to GPLv2 under the terms of section 3
-  Source: GNU C Library (glibc), http://www.gnu.org/software/libc/libc.html
-  Original files: sysdeps/i386/memset.c
-  Current version we use: 2.14
+* arch/{arm,x86}/*mem*.S: 2-clause BSD license
+  Source: OpenBSD
+          http://www.openbsd.org/cgi-bin/cvsweb/src/lib/libc/arch/*
+  Current version we use: stated in the $OpenBSD$ header of each file
diff --git a/payloads/libpayload/arch/arm/Makefile.inc b/payloads/libpayload/arch/arm/Makefile.inc
index 6c8667a..e82c49f 100644
--- a/payloads/libpayload/arch/arm/Makefile.inc
+++ b/payloads/libpayload/arch/arm/Makefile.inc
@@ -39,8 +39,7 @@ libc-y += exception_asm.S exception.c
 libc-y += cache.c cpu.S
 libc-y += selfboot.c
 
-# Will fall back to default_memXXX() in libc/memory.c if GPL not allowed.
-libc-$(CONFIG_LP_GPL) += memcpy.S memset.S memmove.S
+libc-y += memcpy.S memset.S memmove.S _memcpy.S
 
 libgdb-y += gdb.c
 
diff --git a/payloads/libpayload/arch/arm/_memcpy.S b/payloads/libpayload/arch/arm/_memcpy.S
new file mode 100644
index 0000000..3e6acc1
--- /dev/null
+++ b/payloads/libpayload/arch/arm/_memcpy.S
@@ -0,0 +1,462 @@
+/*	$OpenBSD: _memcpy.S,v 1.3 2008/06/26 05:42:04 ray Exp $	*/
+/*	$NetBSD: _memcpy.S,v 1.4 2003/04/05 23:08:52 bjh21 Exp $	*/
+
+/*-
+ * Copyright (c) 1997 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Neil A. Carson and Mark Brinicombe
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch/asm.h>
+
+/*
+ * This is one fun bit of code ...
+ * Some easy listening music is suggested while trying to understand this
+ * code e.g. Iron Maiden
+ *
+ * For anyone attempting to understand it :
+ *
+ * The core code is implemented here with simple stubs for memcpy()
+ * memmove() and bcopy().
+ *
+ * All local labels are prefixed with Lmemcpy_
+ * Following the prefix a label starting f is used in the forward copy code
+ * while a label using b is used in the backwards copy code
+ * The source and destination addresses determine whether a forward or
+ * backward copy is performed.
+ * Separate bits of code are used to deal with the following situations
+ * for both the forward and backwards copy.
+ * unaligned source address
+ * unaligned destination address
+ * Separate copy routines are used to produce an optimised result for each
+ * of these cases.
+ * The copy code will use LDM/STM instructions to copy up to 32 bytes at
+ * a time where possible.
+ *
+ * Note: r12 (aka ip) can be trashed during the function along with
+ * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
+ * Additional registers are preserved prior to use i.e. r4, r5 & lr
+ *
+ * Apologies for the state of the comments ;-)
+ */
+
+ENTRY(_memcpy)
+	/* Determine copy direction */
+	cmp	r1, r0
+	bcc	.Lmemcpy_backwards
+
+	moveq	r0, #0			/* Quick abort for len=0 */
+	moveq	pc, lr
+
+	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
+	subs	r2, r2, #4
+	blt	.Lmemcpy_fl4		/* less than 4 bytes */
+	ands	r12, r0, #3
+	bne	.Lmemcpy_fdestul	/* oh unaligned destination addr */
+	ands	r12, r1, #3
+	bne	.Lmemcpy_fsrcul		/* oh unaligned source addr */
+
+.Lmemcpy_ft8:
+	/* We have aligned source and destination */
+	subs	r2, r2, #8
+	blt	.Lmemcpy_fl12		/* less than 12 bytes (4 from above) */
+	subs	r2, r2, #0x14
+	blt	.Lmemcpy_fl32		/* less than 32 bytes (12 from above) */
+	stmdb	sp!, {r4}		/* borrow r4 */
+
+	/* blat 32 bytes at a time */
+	/* XXX for really big copies perhaps we should use more registers */
+.Lmemcpy_floop32:
+	ldmia	r1!, {r3, r4, r12, lr}
+	stmia	r0!, {r3, r4, r12, lr}
+	ldmia	r1!, {r3, r4, r12, lr}
+	stmia	r0!, {r3, r4, r12, lr}
+	subs	r2, r2, #0x20
+	bge	.Lmemcpy_floop32
+
+	cmn	r2, #0x10
+	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
+	stmgeia	r0!, {r3, r4, r12, lr}
+	subge	r2, r2, #0x10
+	ldmia	sp!, {r4}		/* return r4 */
+
+.Lmemcpy_fl32:
+	adds	r2, r2, #0x14
+
+	/* blat 12 bytes at a time */
+.Lmemcpy_floop12:
+	ldmgeia	r1!, {r3, r12, lr}
+	stmgeia	r0!, {r3, r12, lr}
+	subges	r2, r2, #0x0c
+	bge	.Lmemcpy_floop12
+
+.Lmemcpy_fl12:
+	adds	r2, r2, #8
+	blt	.Lmemcpy_fl4
+
+	subs	r2, r2, #4
+	ldrlt	r3, [r1], #4
+	strlt	r3, [r0], #4
+	ldmgeia	r1!, {r3, r12}
+	stmgeia	r0!, {r3, r12}
+	subge	r2, r2, #4
+
+.Lmemcpy_fl4:
+	/* less than 4 bytes to go */
+	adds	r2, r2, #4
+	ldmeqia	sp!, {r0, pc}		/* done */
+
+	/* copy the crud byte at a time */
+	cmp	r2, #2
+	ldrb	r3, [r1], #1
+	strb	r3, [r0], #1
+	ldrgeb	r3, [r1], #1
+	strgeb	r3, [r0], #1
+	ldrgtb	r3, [r1], #1
+	strgtb	r3, [r0], #1
+	ldmia	sp!, {r0, pc}
+
+	/* erg - unaligned destination */
+.Lmemcpy_fdestul:
+	rsb	r12, r12, #4
+	cmp	r12, #2
+
+	/* align destination with byte copies */
+	ldrb	r3, [r1], #1
+	strb	r3, [r0], #1
+	ldrgeb	r3, [r1], #1
+	strgeb	r3, [r0], #1
+	ldrgtb	r3, [r1], #1
+	strgtb	r3, [r0], #1
+	subs	r2, r2, r12
+	blt	.Lmemcpy_fl4		/* less the 4 bytes */
+
+	ands	r12, r1, #3
+	beq	.Lmemcpy_ft8		/* we have an aligned source */
+
+	/* erg - unaligned source */
+	/* This is where it gets nasty ... */
+.Lmemcpy_fsrcul:
+	bic	r1, r1, #3
+	ldr	lr, [r1], #4
+	cmp	r12, #2
+	bgt	.Lmemcpy_fsrcul3
+	beq	.Lmemcpy_fsrcul2
+	cmp	r2, #0x0c
+	blt	.Lmemcpy_fsrcul1loop4
+	sub	r2, r2, #0x0c
+	stmdb	sp!, {r4, r5}
+
+.Lmemcpy_fsrcul1loop16:
+	mov	r3, lr, lsr #8
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsl #24
+	mov	r4, r4, lsr #8
+	orr	r4, r4, r5, lsl #24
+	mov	r5, r5, lsr #8
+	orr	r5, r5, r12, lsl #24
+	mov	r12, r12, lsr #8
+	orr	r12, r12, lr, lsl #24
+	stmia	r0!, {r3-r5, r12}
+	subs	r2, r2, #0x10
+	bge	.Lmemcpy_fsrcul1loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c
+	blt	.Lmemcpy_fsrcul1l4
+
+.Lmemcpy_fsrcul1loop4:
+	mov	r12, lr, lsr #8
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsl #24
+	str	r12, [r0], #4
+	subs	r2, r2, #4
+	bge	.Lmemcpy_fsrcul1loop4
+
+.Lmemcpy_fsrcul1l4:
+	sub	r1, r1, #3
+	b	.Lmemcpy_fl4
+
+.Lmemcpy_fsrcul2:
+	cmp	r2, #0x0c
+	blt	.Lmemcpy_fsrcul2loop4
+	sub	r2, r2, #0x0c
+	stmdb	sp!, {r4, r5}
+
+.Lmemcpy_fsrcul2loop16:
+	mov	r3, lr, lsr #16
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsl #16
+	mov	r4, r4, lsr #16
+	orr	r4, r4, r5, lsl #16
+	mov	r5, r5, lsr #16
+	orr	r5, r5, r12, lsl #16
+	mov	r12, r12, lsr #16
+	orr	r12, r12, lr, lsl #16
+	stmia	r0!, {r3-r5, r12}
+	subs	r2, r2, #0x10
+	bge	.Lmemcpy_fsrcul2loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c
+	blt	.Lmemcpy_fsrcul2l4
+
+.Lmemcpy_fsrcul2loop4:
+	mov	r12, lr, lsr #16
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsl #16
+	str	r12, [r0], #4
+	subs	r2, r2, #4
+	bge	.Lmemcpy_fsrcul2loop4
+
+.Lmemcpy_fsrcul2l4:
+	sub	r1, r1, #2
+	b	.Lmemcpy_fl4
+
+.Lmemcpy_fsrcul3:
+	cmp	r2, #0x0c
+	blt	.Lmemcpy_fsrcul3loop4
+	sub	r2, r2, #0x0c
+	stmdb	sp!, {r4, r5}
+
+.Lmemcpy_fsrcul3loop16:
+	mov	r3, lr, lsr #24
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsl #8
+	mov	r4, r4, lsr #24
+	orr	r4, r4, r5, lsl #8
+	mov	r5, r5, lsr #24
+	orr	r5, r5, r12, lsl #8
+	mov	r12, r12, lsr #24
+	orr	r12, r12, lr, lsl #8
+	stmia	r0!, {r3-r5, r12}
+	subs	r2, r2, #0x10
+	bge	.Lmemcpy_fsrcul3loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c
+	blt	.Lmemcpy_fsrcul3l4
+
+.Lmemcpy_fsrcul3loop4:
+	mov	r12, lr, lsr #24
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsl #8
+	str	r12, [r0], #4
+	subs	r2, r2, #4
+	bge	.Lmemcpy_fsrcul3loop4
+
+.Lmemcpy_fsrcul3l4:
+	sub	r1, r1, #1
+	b	.Lmemcpy_fl4
+
+.Lmemcpy_backwards:
+	add	r1, r1, r2
+	add	r0, r0, r2
+	subs	r2, r2, #4
+	blt	.Lmemcpy_bl4		/* less than 4 bytes */
+	ands	r12, r0, #3
+	bne	.Lmemcpy_bdestul	/* oh unaligned destination addr */
+	ands	r12, r1, #3
+	bne	.Lmemcpy_bsrcul		/* oh unaligned source addr */
+
+.Lmemcpy_bt8:
+	/* We have aligned source and destination */
+	subs	r2, r2, #8
+	blt	.Lmemcpy_bl12		/* less than 12 bytes (4 from above) */
+	stmdb	sp!, {r4, lr}
+	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
+	blt	.Lmemcpy_bl32
+
+	/* blat 32 bytes at a time */
+	/* XXX for really big copies perhaps we should use more registers */
+.Lmemcpy_bloop32:
+	ldmdb	r1!, {r3, r4, r12, lr}
+	stmdb	r0!, {r3, r4, r12, lr}
+	ldmdb	r1!, {r3, r4, r12, lr}
+	stmdb	r0!, {r3, r4, r12, lr}
+	subs	r2, r2, #0x20
+	bge	.Lmemcpy_bloop32
+
+.Lmemcpy_bl32:
+	cmn	r2, #0x10
+	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
+	stmgedb	r0!, {r3, r4, r12, lr}
+	subge	r2, r2, #0x10
+	adds	r2, r2, #0x14
+	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
+	stmgedb	r0!, {r3, r12, lr}
+	subge	r2, r2, #0x0c
+	ldmia	sp!, {r4, lr}
+
+.Lmemcpy_bl12:
+	adds	r2, r2, #8
+	blt	.Lmemcpy_bl4
+	subs	r2, r2, #4
+	ldrlt	r3, [r1, #-4]!
+	strlt	r3, [r0, #-4]!
+	ldmgedb	r1!, {r3, r12}
+	stmgedb	r0!, {r3, r12}
+	subge	r2, r2, #4
+
+.Lmemcpy_bl4:
+	/* less than 4 bytes to go */
+	adds	r2, r2, #4
+	moveq	pc, lr			/* done */
+
+	/* copy the crud byte at a time */
+	cmp	r2, #2
+	ldrb	r3, [r1, #-1]!
+	strb	r3, [r0, #-1]!
+	ldrgeb	r3, [r1, #-1]!
+	strgeb	r3, [r0, #-1]!
+	ldrgtb	r3, [r1, #-1]!
+	strgtb	r3, [r0, #-1]!
+	mov	pc, lr
+
+	/* erg - unaligned destination */
+.Lmemcpy_bdestul:
+	cmp	r12, #2
+
+	/* align destination with byte copies */
+	ldrb	r3, [r1, #-1]!
+	strb	r3, [r0, #-1]!
+	ldrgeb	r3, [r1, #-1]!
+	strgeb	r3, [r0, #-1]!
+	ldrgtb	r3, [r1, #-1]!
+	strgtb	r3, [r0, #-1]!
+	subs	r2, r2, r12
+	blt	.Lmemcpy_bl4		/* less than 4 bytes to go */
+	ands	r12, r1, #3
+	beq	.Lmemcpy_bt8		/* we have an aligned source */
+
+	/* erg - unaligned source */
+	/* This is where it gets nasty ... */
+.Lmemcpy_bsrcul:
+	bic	r1, r1, #3
+	ldr	r3, [r1, #0]
+	cmp	r12, #2
+	blt	.Lmemcpy_bsrcul1
+	beq	.Lmemcpy_bsrcul2
+	cmp	r2, #0x0c
+	blt	.Lmemcpy_bsrcul3loop4
+	sub	r2, r2, #0x0c
+	stmdb	sp!, {r4, r5, lr}
+
+.Lmemcpy_bsrcul3loop16:
+	mov	lr, r3, lsl #8
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsr #24
+	mov	r12, r12, lsl #8
+	orr	r12, r12, r5, lsr #24
+	mov	r5, r5, lsl #8
+	orr	r5, r5, r4, lsr #24
+	mov	r4, r4, lsl #8
+	orr	r4, r4, r3, lsr #24
+	stmdb	r0!, {r4, r5, r12, lr}
+	subs	r2, r2, #0x10
+	bge	.Lmemcpy_bsrcul3loop16
+	ldmia	sp!, {r4, r5, lr}
+	adds	r2, r2, #0x0c
+	blt	.Lmemcpy_bsrcul3l4
+
+.Lmemcpy_bsrcul3loop4:
+	mov	r12, r3, lsl #8
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsr #24
+	str	r12, [r0, #-4]!
+	subs	r2, r2, #4
+	bge	.Lmemcpy_bsrcul3loop4
+
+.Lmemcpy_bsrcul3l4:
+	add	r1, r1, #3
+	b	.Lmemcpy_bl4
+
+.Lmemcpy_bsrcul2:
+	cmp	r2, #0x0c
+	blt	.Lmemcpy_bsrcul2loop4
+	sub	r2, r2, #0x0c
+	stmdb	sp!, {r4, r5, lr}
+
+.Lmemcpy_bsrcul2loop16:
+	mov	lr, r3, lsl #16
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsr #16
+	mov	r12, r12, lsl #16
+	orr	r12, r12, r5, lsr #16
+	mov	r5, r5, lsl #16
+	orr	r5, r5, r4, lsr #16
+	mov	r4, r4, lsl #16
+	orr	r4, r4, r3, lsr #16
+	stmdb	r0!, {r4, r5, r12, lr}
+	subs	r2, r2, #0x10
+	bge	.Lmemcpy_bsrcul2loop16
+	ldmia	sp!, {r4, r5, lr}
+	adds	r2, r2, #0x0c
+	blt	.Lmemcpy_bsrcul2l4
+
+.Lmemcpy_bsrcul2loop4:
+	mov	r12, r3, lsl #16
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsr #16
+	str	r12, [r0, #-4]!
+	subs	r2, r2, #4
+	bge	.Lmemcpy_bsrcul2loop4
+
+.Lmemcpy_bsrcul2l4:
+	add	r1, r1, #2
+	b	.Lmemcpy_bl4
+
+.Lmemcpy_bsrcul1:
+	cmp	r2, #0x0c
+	blt	.Lmemcpy_bsrcul1loop4
+	sub	r2, r2, #0x0c
+	stmdb	sp!, {r4, r5, lr}
+
+.Lmemcpy_bsrcul1loop32:
+	mov	lr, r3, lsl #24
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsr #8
+	mov	r12, r12, lsl #24
+	orr	r12, r12, r5, lsr #8
+	mov	r5, r5, lsl #24
+	orr	r5, r5, r4, lsr #8
+	mov	r4, r4, lsl #24
+	orr	r4, r4, r3, lsr #8
+	stmdb	r0!, {r4, r5, r12, lr}
+	subs	r2, r2, #0x10
+	bge	.Lmemcpy_bsrcul1loop32
+	ldmia	sp!, {r4, r5, lr}
+	adds	r2, r2, #0x0c
+	blt	.Lmemcpy_bsrcul1l4
+
+.Lmemcpy_bsrcul1loop4:
+	mov	r12, r3, lsl #24
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsr #8
+	str	r12, [r0, #-4]!
+	subs	r2, r2, #4
+	bge	.Lmemcpy_bsrcul1loop4
+
+.Lmemcpy_bsrcul1l4:
+	add	r1, r1, #1
+	b	.Lmemcpy_bl4
diff --git a/payloads/libpayload/arch/arm/asmlib.h b/payloads/libpayload/arch/arm/asmlib.h
deleted file mode 100644
index 8b3fa22..0000000
--- a/payloads/libpayload/arch/arm/asmlib.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *  arch/arm/asmlib.h
- *
- *  Adapted from Linux arch/arm/include/assembler.h
- *
- *  Copyright (C) 1996-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  This file contains arm architecture specific defines
- *  for the different processors.
- *
- *  Do not include any C declarations in this file - it is included by
- *  assembler source.
- */
-
-/*
- * WARNING: This file is *only* meant for memcpy.S and friends which were copied
- * from Linux and require some weird macros. It does unspeakable things like
- * redefining "push", so do *not* try to turn it into a general assembly macro
- * file, and keep it out of global include directories.
- */
-
-#ifndef __ARM_ASMLIB_H__
-#define __ARM_ASMLIB_H__
-
-/*
- * Endian independent macros for shifting bytes within registers.
- */
-#ifndef __ARMEB__
-#define pull		lsr
-#define push		lsl
-#define get_byte_0	lsl #0
-#define get_byte_1	lsr #8
-#define get_byte_2	lsr #16
-#define get_byte_3	lsr #24
-#define put_byte_0	lsl #0
-#define put_byte_1	lsl #8
-#define put_byte_2	lsl #16
-#define put_byte_3	lsl #24
-#else
-#define pull		lsl
-#define push		lsr
-#define get_byte_0	lsr #24
-#define get_byte_1	lsr #16
-#define get_byte_2	lsr #8
-#define get_byte_3      lsl #0
-#define put_byte_0	lsl #24
-#define put_byte_1	lsl #16
-#define put_byte_2	lsl #8
-#define put_byte_3      lsl #0
-#endif
-
-/*
- * Data preload for architectures that support it
- */
-#if 1	/* TODO: differentiate once libpayload supports more ARM versions */
-#define PLD(code...)	code
-#else
-#define PLD(code...)
-#endif
-
-/*
- * This can be used to enable code to cacheline align the destination
- * pointer when bulk writing to memory. Linux doesn't enable this except
- * for the "Feroceon" processor, so we better just leave it out.
- */
-#define CALGN(code...)
-
-#endif	/* __ARM_ASMLIB_H */
diff --git a/payloads/libpayload/arch/arm/memcpy.S b/payloads/libpayload/arch/arm/memcpy.S
index 1388d05..6d68639 100644
--- a/payloads/libpayload/arch/arm/memcpy.S
+++ b/payloads/libpayload/arch/arm/memcpy.S
@@ -1,237 +1,44 @@
-/*
- *  linux/arch/arm/lib/memcpy.S
+/*	$OpenBSD: memcpy.S,v 1.4 2014/11/30 19:43:56 deraadt Exp $	*/
+/*	$NetBSD: memcpy.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $	*/
+
+/*-
+ * Copyright (c) 1997 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Neil A. Carson and Mark Brinicombe
  *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	MontaVista Software, Inc.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <arch/asm.h>
-#include "asmlib.h"
-
-#define LDR1W_SHIFT	0
-#define STR1W_SHIFT	0
-
-	.macro ldr1w ptr reg abort
-	W(ldr) \reg, [\ptr], #4
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro ldr1b ptr reg cond=al abort
-	ldr\cond\()b \reg, [\ptr], #1
-	.endm
-
-	.macro str1w ptr reg abort
-	W(str) \reg, [\ptr], #4
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro str1b ptr reg cond=al abort
-	str\cond\()b \reg, [\ptr], #1
-	.endm
-
-	.macro enter reg1 reg2
-	stmdb sp!, {r0, \reg1, \reg2}
-	.endm
 
-	.macro exit reg1 reg2
-	ldmfd sp!, {r0, \reg1, \reg2}
-	.endm
-
-/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+/*
+ * XXX
+ * the _memcpy function which this calls is actually a _memmove
+ * variant which handles overlaps...  That should be fixed.
+ */
 
 ENTRY(memcpy)
-
-		enter	r4, lr
-
-		subs	r2, r2, #4
-		blt	8f
-		ands	ip, r0, #3
-	PLD(	pld	[r1, #0]		)
-		bne	9f
-		ands	ip, r1, #3
-		bne	10f
-
-1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
-		blt	5f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	rsb	r3, ip, #32		)
-	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
-	CALGN(	bcs	2f			)
-	CALGN(	adr	r4, 6f			)
-	CALGN(	subs	r2, r2, r3		)  @ C gets set
-	CALGN(	add	pc, r4, ip		)
-
-	PLD(	pld	[r1, #0]		)
-2:	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	blt	4f			)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-
-3:	PLD(	pld	[r1, #124]		)
-4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-		subs	r2, r2, #32
-		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-		bge	3b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	4b			)
-
-5:		ands	ip, r2, #28
-		rsb	ip, ip, #32
-#if LDR1W_SHIFT > 0
-		lsl	ip, ip, #LDR1W_SHIFT
-#endif
-		addne	pc, pc, ip		@ C is always clear here
-		b	7f
-6:
-		.rept	(1 << LDR1W_SHIFT)
-		W(nop)
-		.endr
-		ldr1w	r1, r3, abort=20f
-		ldr1w	r1, r4, abort=20f
-		ldr1w	r1, r5, abort=20f
-		ldr1w	r1, r6, abort=20f
-		ldr1w	r1, r7, abort=20f
-		ldr1w	r1, r8, abort=20f
-		ldr1w	r1, lr, abort=20f
-
-#if LDR1W_SHIFT < STR1W_SHIFT
-		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
-#elif LDR1W_SHIFT > STR1W_SHIFT
-		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
-#endif
-		add	pc, pc, ip
-		nop
-		.rept	(1 << STR1W_SHIFT)
-		W(nop)
-		.endr
-		str1w	r0, r3, abort=20f
-		str1w	r0, r4, abort=20f
-		str1w	r0, r5, abort=20f
-		str1w	r0, r6, abort=20f
-		str1w	r0, r7, abort=20f
-		str1w	r0, r8, abort=20f
-		str1w	r0, lr, abort=20f
-
-	CALGN(	bcs	2b			)
-
-7:		ldmfd	sp!, {r5 - r8}
-
-8:		movs	r2, r2, lsl #31
-		ldr1b	r1, r3, ne, abort=21f
-		ldr1b	r1, r4, cs, abort=21f
-		ldr1b	r1, ip, cs, abort=21f
-		str1b	r0, r3, ne, abort=21f
-		str1b	r0, r4, cs, abort=21f
-		str1b	r0, ip, cs, abort=21f
-
-		exit	r4, pc
-
-9:		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldr1b	r1, r3, gt, abort=21f
-		ldr1b	r1, r4, ge, abort=21f
-		ldr1b	r1, lr, abort=21f
-		str1b	r0, r3, gt, abort=21f
-		str1b	r0, r4, ge, abort=21f
-		subs	r2, r2, ip
-		str1b	r0, lr, abort=21f
-		blt	8b
-		ands	ip, r1, #3
-		beq	1b
-
-10:		bic	r1, r1, #3
-		cmp	ip, #2
-		ldr1w	r1, lr, abort=21f
-		beq	17f
-		bgt	18f
-
-
-		.macro	forward_copy_shift pull push
-
-		subs	r2, r2, #28
-		blt	14f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
-	CALGN(	subcc	r2, r2, ip		)
-	CALGN(	bcc	15f			)
-
-11:		stmfd	sp!, {r5 - r9}
-
-	PLD(	pld	[r1, #0]		)
-	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	blt	13f			)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-
-12:	PLD(	pld	[r1, #124]		)
-13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
-		subs	r2, r2, #32
-		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
-		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
-		bge	12b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	13b			)
-
-		ldmfd	sp!, {r5 - r9}
-
-14:		ands	ip, r2, #28
-		beq	16f
-
-15:		mov	r3, lr, pull #\pull
-		ldr1w	r1, lr, abort=21f
-		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
-		str1w	r0, r3, abort=21f
-		bgt	15b
-	CALGN(	cmp	r2, #0			)
-	CALGN(	bge	11b			)
-
-16:		sub	r1, r1, #(\push / 8)
-		b	8b
-
-		.endm
-
-
-		forward_copy_shift	pull=8	push=24
-
-17:		forward_copy_shift	pull=16	push=16
-
-18:		forward_copy_shift	pull=24	push=8
-ENDPROC(memcpy)
+	stmfd	sp!, {r0, lr}
+	bl	_memcpy
+	ldmfd	sp!, {r0, pc}
diff --git a/payloads/libpayload/arch/arm/memmove.S b/payloads/libpayload/arch/arm/memmove.S
index bd5f8f1..f071068 100644
--- a/payloads/libpayload/arch/arm/memmove.S
+++ b/payloads/libpayload/arch/arm/memmove.S
@@ -1,197 +1,38 @@
-/*
- *  linux/arch/arm/lib/memmove.S
+/*	$OpenBSD: memmove.S,v 1.3 2008/06/26 05:42:04 ray Exp $	*/
+/*	$NetBSD: memmove.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $	*/
+
+/*-
+ * Copyright (c) 1997 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Neil A. Carson and Mark Brinicombe
  *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	(C) MontaVista Software Inc.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <arch/asm.h>
-#include "asmlib.h"
-
-/*
- * Prototype: void *memmove(void *dest, const void *src, size_t n);
- *
- * Note:
- *
- * If the memory regions don't overlap, we simply branch to memcpy which is
- * normally a bit faster. Otherwise the copy is done going downwards.  This
- * is a transposition of the code from copy_template.S but with the copy
- * occurring in the opposite direction.
- */
 
 ENTRY(memmove)
-
-		subs	ip, r0, r1
-		cmphi	r2, ip
-		bls	memcpy
-
-		stmfd	sp!, {r0, r4, lr}
-		add	r1, r1, r2
-		add	r0, r0, r2
-		subs	r2, r2, #4
-		blt	8f
-		ands	ip, r0, #3
-	PLD(	pld	[r1, #-4]		)
-		bne	9f
-		ands	ip, r1, #3
-		bne	10f
-
-1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
-		blt	5f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
-	CALGN(	bcs	2f			)
-	CALGN(	adr	r4, 6f			)
-	CALGN(	subs	r2, r2, ip		)  @ C is set here
-	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	add	pc, r4, ip		)
-
-	PLD(	pld	[r1, #-4]		)
-2:	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #-32]		)
-	PLD(	blt	4f			)
-	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-
-3:	PLD(	pld	[r1, #-128]		)
-4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
-		subs	r2, r2, #32
-		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
-		bge	3b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	4b			)
-
-5:		ands	ip, r2, #28
-		rsb	ip, ip, #32
-		addne	pc, pc, ip		@ C is always clear here
-		b	7f
-6:		W(nop)
-		W(ldr)	r3, [r1, #-4]!
-		W(ldr)	r4, [r1, #-4]!
-		W(ldr)	r5, [r1, #-4]!
-		W(ldr)	r6, [r1, #-4]!
-		W(ldr)	r7, [r1, #-4]!
-		W(ldr)	r8, [r1, #-4]!
-		W(ldr)	lr, [r1, #-4]!
-
-		add	pc, pc, ip
-		nop
-		W(nop)
-		W(str)	r3, [r0, #-4]!
-		W(str)	r4, [r0, #-4]!
-		W(str)	r5, [r0, #-4]!
-		W(str)	r6, [r0, #-4]!
-		W(str)	r7, [r0, #-4]!
-		W(str)	r8, [r0, #-4]!
-		W(str)	lr, [r0, #-4]!
-
-	CALGN(	bcs	2b			)
-
-7:		ldmfd	sp!, {r5 - r8}
-
-8:		movs	r2, r2, lsl #31
-		ldrneb	r3, [r1, #-1]!
-		ldrcsb	r4, [r1, #-1]!
-		ldrcsb	ip, [r1, #-1]
-		strneb	r3, [r0, #-1]!
-		strcsb	r4, [r0, #-1]!
-		strcsb	ip, [r0, #-1]
-		ldmfd	sp!, {r0, r4, pc}
-
-9:		cmp	ip, #2
-		ldrgtb	r3, [r1, #-1]!
-		ldrgeb	r4, [r1, #-1]!
-		ldrb	lr, [r1, #-1]!
-		strgtb	r3, [r0, #-1]!
-		strgeb	r4, [r0, #-1]!
-		subs	r2, r2, ip
-		strb	lr, [r0, #-1]!
-		blt	8b
-		ands	ip, r1, #3
-		beq	1b
-
-10:		bic	r1, r1, #3
-		cmp	ip, #2
-		ldr	r3, [r1, #0]
-		beq	17f
-		blt	18f
-
-
-		.macro	backward_copy_shift push pull
-
-		subs	r2, r2, #28
-		blt	14f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
-	CALGN(	subcc	r2, r2, ip		)
-	CALGN(	bcc	15f			)
-
-11:		stmfd	sp!, {r5 - r9}
-
-	PLD(	pld	[r1, #-4]		)
-	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #-32]		)
-	PLD(	blt	13f			)
-	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-
-12:	PLD(	pld	[r1, #-128]		)
-13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
-		subs    r2, r2, #32
-		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
-		stmdb   r0!, {r4 - r9, ip, lr}
-		bge	12b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	13b			)
-
-		ldmfd	sp!, {r5 - r9}
-
-14:		ands	ip, r2, #28
-		beq	16f
-
-15:		mov     lr, r3, push #\push
-		ldr	r3, [r1, #-4]!
-		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
-		str	lr, [r0, #-4]!
-		bgt	15b
-	CALGN(	cmp	r2, #0			)
-	CALGN(	bge	11b			)
-
-16:		add	r1, r1, #(\pull / 8)
-		b	8b
-
-		.endm
-
-
-		backward_copy_shift	push=8	pull=24
-
-17:		backward_copy_shift	push=16	pull=16
-
-18:		backward_copy_shift	push=24	pull=8
-
-ENDPROC(memmove)
+	stmfd	sp!, {r0, lr}
+	bl	_memcpy
+	ldmfd	sp!, {r0, pc}
diff --git a/payloads/libpayload/arch/arm/memset.S b/payloads/libpayload/arch/arm/memset.S
index 0c1102d..b0a2bc9 100644
--- a/payloads/libpayload/arch/arm/memset.S
+++ b/payloads/libpayload/arch/arm/memset.S
@@ -1,121 +1,127 @@
+/*	$OpenBSD: memset.S,v 1.2 2004/02/01 05:40:52 drahn Exp $	*/
+/*	$NetBSD: memset.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $	*/
+
 /*
- *  linux/arch/arm/lib/memset.S
- *
- *  Copyright (C) 1995-2000 Russell King
+ * Copyright (c) 1995 Mark Brinicombe.
+ * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Mark Brinicombe.
+ * 4. The name of the company nor the name of the author may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
  *
- *  ASM optimised string functions
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #include <arch/asm.h>
-#include "asmlib.h"
 
-ENTRY(memset)
-	ands	r3, r0, #3		@ 1 unaligned?
-	mov	ip, r0			@ preserve r0 as return value
-	bne	6f			@ 1
 /*
- * we know that the pointer in ip is aligned to a word boundary.
+ * Sets a block of memory to the specified value
+ *
+ * On entry:
+ *   r0 - dest address
+ *   r1 - byte to write
+ *   r2 - number of bytes to write
+ *
+ * On exit:
+ *   r0 - dest address
  */
-1:	orr	r1, r1, r1, lsl #8
-	orr	r1, r1, r1, lsl #16
-	mov	r3, r1
-	cmp	r2, #16
-	blt	4f
 
-#if ! CALGN(1)+0
+ENTRY(memset)
+	stmfd	sp!, {r0}		/* Remember address for return value */
+	and	r1, r1, #0x000000ff	/* We write bytes */
 
-/*
- * We need 2 extra registers for this loop - use r8 and the LR
- */
-	stmfd	sp!, {r8, lr}
-	mov	r8, r1
-	mov	lr, r1
-
-2:	subs	r2, r2, #64
-	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
-	stmgeia	ip!, {r1, r3, r8, lr}
-	stmgeia	ip!, {r1, r3, r8, lr}
-	stmgeia	ip!, {r1, r3, r8, lr}
-	bgt	2b
-	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-	tst	r2, #32
-	stmneia	ip!, {r1, r3, r8, lr}
-	stmneia	ip!, {r1, r3, r8, lr}
-	tst	r2, #16
-	stmneia	ip!, {r1, r3, r8, lr}
-	ldmfd	sp!, {r8, lr}
+	cmp	r2, #0x00000004		/* Do we have less than 4 bytes */
+	blt	.Lmemset_lessthanfour
 
-#else
+	/* Ok first we will word align the address */
 
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
+	ands	r3, r0, #0x00000003	/* Get the bottom two bits */
+	beq	.Lmemset_addraligned	/* The address is word aligned */
 
-	stmfd	sp!, {r4-r8, lr}
-	mov	r4, r1
-	mov	r5, r1
-	mov	r6, r1
-	mov	r7, r1
-	mov	r8, r1
-	mov	lr, r1
-
-	cmp	r2, #96
-	tstgt	ip, #31
-	ble	3f
-
-	and	r8, ip, #31
-	rsb	r8, r8, #32
-	sub	r2, r2, r8
-	movs	r8, r8, lsl #(32 - 4)
-	stmcsia	ip!, {r4, r5, r6, r7}
-	stmmiia	ip!, {r4, r5}
-	tst	r8, #(1 << 30)
-	mov	r8, r1
-	strne	r1, [ip], #4
-
-3:	subs	r2, r2, #64
-	stmgeia	ip!, {r1, r3-r8, lr}
-	stmgeia	ip!, {r1, r3-r8, lr}
-	bgt	3b
-	ldmeqfd	sp!, {r4-r8, pc}
-
-	tst	r2, #32
-	stmneia	ip!, {r1, r3-r8, lr}
-	tst	r2, #16
-	stmneia	ip!, {r4-r7}
-	ldmfd	sp!, {r4-r8, lr}
-
-#endif
-
-4:	tst	r2, #8
-	stmneia	ip!, {r1, r3}
-	tst	r2, #4
-	strne	r1, [ip], #4
-/*
- * When we get here, we've got less than 4 bytes to zero.  We
- * may have an unaligned pointer as well.
- */
-5:	tst	r2, #2
-	strneb	r1, [ip], #1
-	strneb	r1, [ip], #1
-	tst	r2, #1
-	strneb	r1, [ip], #1
-	mov	pc, lr
-
-6:	subs	r2, r2, #4		@ 1 do we have enough
-	blt	5b			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strltb	r1, [ip], #1		@ 1
-	strleb	r1, [ip], #1		@ 1
-	strb	r1, [ip], #1		@ 1
-	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
-	b	1b
-ENDPROC(memset)
+	rsb	r3, r3, #0x00000004
+	sub	r2, r2, r3
+	cmp	r3, #0x00000002
+	strb	r1, [r0], #0x0001	/* Set 1 byte */
+	strgeb	r1, [r0], #0x0001	/* Set another byte */
+	strgtb	r1, [r0], #0x0001	/* and a third */
+
+	cmp	r2, #0x00000004
+	blt	.Lmemset_lessthanfour
+
+	/* Now we must be word aligned */
+
+.Lmemset_addraligned:
+
+	orr	r3, r1, r1, lsl #8	/* Repeat the byte into a word */
+	orr	r3, r3, r3, lsl #16
+
+	/* We know we have at least 4 bytes ... */
+
+	cmp	r2, #0x00000020		/* If less than 32 then use words */
+	blt	.Lmemset_lessthan32
+
+	/* We have at least 32 so lets use quad words */
+
+	stmfd	sp!, {r4-r6}		/* Store registers */
+	mov	r4, r3			/* Duplicate data */
+	mov	r5, r3
+	mov	r6, r3
+
+.Lmemset_loop16:
+	stmia	r0!, {r3-r6}		/* Store 16 bytes */
+	sub	r2, r2, #0x00000010	/* Adjust count */
+	cmp	r2, #0x00000010		/* Still got at least 16 bytes ? */
+	bgt	.Lmemset_loop16
+
+	ldmfd	sp!, {r4-r6}		/* Restore registers */
+
+	/* Do we need to set some words as well ? */
+
+	cmp	r2, #0x00000004
+	blt	.Lmemset_lessthanfour
+
+	/* Have either less than 16 or less than 32 depending on route taken */
+
+.Lmemset_lessthan32:
+
+	/* We have at least 4 bytes so copy as words */
+
+.Lmemset_loop4:
+	str	r3, [r0], #0x0004
+	sub	r2, r2, #0x0004
+	cmp	r2, #0x00000004
+	bge	.Lmemset_loop4
+
+.Lmemset_lessthanfour:
+	cmp	r2, #0x00000000
+	ldmeqfd	sp!, {r0}
+	moveq	pc, lr			/* Zero length so exit */
+
+	cmp	r2, #0x00000002
+	strb	r1, [r0], #0x0001	/* Set 1 byte */
+	strgeb	r1, [r0], #0x0001	/* Set another byte */
+	strgtb	r1, [r0], #0x0001	/* and a third */
+
+	ldmfd	sp!, {r0}
+	mov	pc, lr			/* Exit */
diff --git a/payloads/libpayload/arch/x86/Makefile.inc b/payloads/libpayload/arch/x86/Makefile.inc
index 87b3e9e..19784f5 100644
--- a/payloads/libpayload/arch/x86/Makefile.inc
+++ b/payloads/libpayload/arch/x86/Makefile.inc
@@ -34,8 +34,9 @@ libc-y += exec.S virtual.c
 libc-y += selfboot.c
 libc-y += exception_asm.S exception.c
 
-# Will fall back to default_memXXX() in libc/memory.c if GPL not allowed.
-libc-$(CONFIG_LP_GPL) += string.c
+libc-y += memset.S
+# also contains memcpy
+libc-y += memmove.S
 
 libgdb-y += gdb.c
 
diff --git a/payloads/libpayload/arch/x86/memmove.S b/payloads/libpayload/arch/x86/memmove.S
new file mode 100644
index 0000000..3e9e57f
--- /dev/null
+++ b/payloads/libpayload/arch/x86/memmove.S
@@ -0,0 +1,106 @@
+/*	$OpenBSD: memmove.S,v 1.5 2014/12/02 03:07:13 tedu Exp $	*/
+
+/*-
+ * Copyright (c) 1993, 1994, 1995 Charles M. Hannum.  All rights reserved.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Emulate bcopy() by swapping the first two arguments, and jumping
+ * into memmove(), which handles overlapping regions.
+ */
+//ENTRY(bcopy)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi
+	movl	16(%esp),%edi
+	jmp	docopy
+
+/*
+ * memmove(caddr_t dst, caddr_t src, size_t len);
+ * Copy len bytes, coping with overlapping space.
+ */
+	.globl memmove
+memmove:
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+docopy:
+	movl	20(%esp),%ecx
+	movl	%edi,%eax
+	subl	%esi,%eax
+	cmpl	%ecx,%eax		# overlapping?
+	jb	1f
+	jmp	docopyf			# nope
+/*
+ * memcpy() doesn't worry about overlap and always copies forward
+ */
+	.globl memcpy
+memcpy:
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%ecx
+docopyf:
+	movl	%edi,%eax		# setup return value for memcpy/memmove
+	shrl	$2,%ecx			# copy by 32-bit words
+	rep
+	movsl
+	movl	20(%esp),%ecx
+	andl	$3,%ecx			# any bytes left?
+	rep
+	movsb
+	popl	%edi
+	popl	%esi
+	ret
+
+1:	movl	%edi,%eax		# setup return value for memmove
+	addl	%ecx,%edi		# copy backward
+	addl	%ecx,%esi
+	std
+	andl	$3,%ecx			# any fractional bytes?
+	decl	%edi
+	decl	%esi
+	rep
+	movsb
+	movl	20(%esp),%ecx		# copy remainder by 32-bit words
+	shrl	$2,%ecx
+	subl	$3,%esi
+	subl	$3,%edi
+	rep
+	movsl
+	popl	%edi
+	popl	%esi
+	cld
+	ret
+
diff --git a/payloads/libpayload/arch/x86/memset.S b/payloads/libpayload/arch/x86/memset.S
new file mode 100644
index 0000000..1a8cbd9
--- /dev/null
+++ b/payloads/libpayload/arch/x86/memset.S
@@ -0,0 +1,54 @@
+/*	$OpenBSD: memset.S,v 1.4 2007/05/25 20:32:29 krw Exp $ */
+/*
+ * Written by J.T. Conklin <jtc at netbsd.org>.
+ * Public domain.
+ */
+
+	.globl memset
+memset:
+	pushl	%edi
+	pushl	%ebx
+	movl	12(%esp),%edi
+	movzbl	16(%esp),%eax		/* unsigned char, zero extend */
+	movl	20(%esp),%ecx
+	pushl	%edi			/* push address of buffer */
+
+	cld				/* set fill direction forward */
+
+	/*
+	 * if the string is too short, it's really not worth the overhead
+	 * of aligning to word boundaries, etc.  So we jump to a plain
+	 * unaligned set.
+	 */
+	cmpl	$0x0f,%ecx
+	jle	L1
+
+	movb	%al,%ah			/* copy char to all bytes in word */
+	movl	%eax,%edx
+	sall	$16,%eax
+	orl	%edx,%eax
+
+	movl	%edi,%edx		/* compute misalignment */
+	negl	%edx
+	andl	$3,%edx
+	movl	%ecx,%ebx
+	subl	%edx,%ebx
+
+	movl	%edx,%ecx		/* set until word aligned */
+	rep
+	stosb
+
+	movl	%ebx,%ecx
+	shrl	$2,%ecx			/* set by words */
+	rep
+	stosl
+
+	movl	%ebx,%ecx		/* set remainder by bytes */
+	andl	$3,%ecx
+L1:	rep
+	stosb
+
+	popl	%eax			/* pop address of buffer */
+	popl	%ebx
+	popl	%edi
+	ret
diff --git a/payloads/libpayload/arch/x86/string.c b/payloads/libpayload/arch/x86/string.c
deleted file mode 100644
index 60de812..0000000
--- a/payloads/libpayload/arch/x86/string.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
- * This file is part of the GNU C Library.
- * Copyright (c) 2011 The Chromium OS Authors.
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc.
- */
-
-/* From glibc-2.14, sysdeps/i386/memset.c */
-
-#include <stdint.h>
-
-#include "string.h"
-
-typedef uint32_t op_t;
-
-void *memset(void *dstpp, int c, size_t len)
-{
-	int d0;
-	unsigned long int dstp = (unsigned long int) dstpp;
-
-	/* This explicit register allocation improves code very much indeed. */
-	register op_t x asm("ax");
-
-	x = (unsigned char) c;
-
-	/* Clear the direction flag, so filling will move forward.  */
-	asm volatile("cld");
-
-	/* This threshold value is optimal.  */
-	if (len >= 12) {
-		/* Fill X with four copies of the char we want to fill with. */
-		x |= (x << 8);
-		x |= (x << 16);
-
-		/* Adjust LEN for the bytes handled in the first loop.  */
-		len -= (-dstp) % sizeof(op_t);
-
-		/*
-		 * There are at least some bytes to set. No need to test for
-		 * LEN == 0 in this alignment loop.
-		 */
-
-		/* Fill bytes until DSTP is aligned on a longword boundary. */
-		asm volatile(
-			"rep\n"
-			"stosb" /* %0, %2, %3 */ :
-			"=D" (dstp), "=c" (d0) :
-			"0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
-			"memory");
-
-		/* Fill longwords.  */
-		asm volatile(
-			"rep\n"
-			"stosl" /* %0, %2, %3 */ :
-			"=D" (dstp), "=c" (d0) :
-			"0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
-			"memory");
-		len %= sizeof(op_t);
-	}
-
-	/* Write the last few bytes. */
-	asm volatile(
-		"rep\n"
-		"stosb" /* %0, %2, %3 */ :
-		"=D" (dstp), "=c" (d0) :
-		"0" (dstp), "1" (len), "a" (x) :
-		"memory");
-
-	return dstpp;
-}
-
-void *memcpy(void *dest, const void *src, size_t n)
-{
-	unsigned long d0, d1, d2;
-
-	asm volatile(
-		"rep ; movsl\n\t"
-		"movl %4,%%ecx\n\t"
-		"rep ; movsb\n\t"
-		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
-		: "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
-		: "memory"
-	);
-
-	return dest;
-}