support.S

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 Olivier Houchard
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 /*
   27  * Copyright 2003 Wasabi Systems, Inc.
   28  * All rights reserved.
   29  *
   30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
   31  *
   32  * Redistribution and use in source and binary forms, with or without
   33  * modification, are permitted provided that the following conditions
   34  * are met:
   35  * 1. Redistributions of source code must retain the above copyright
   36  *    notice, this list of conditions and the following disclaimer.
   37  * 2. Redistributions in binary form must reproduce the above copyright
   38  *    notice, this list of conditions and the following disclaimer in the
   39  *    documentation and/or other materials provided with the distribution.
   40  * 3. All advertising materials mentioning features or use of this software
   41  *    must display the following acknowledgement:
   42  *      This product includes software developed for the NetBSD Project by
   43  *      Wasabi Systems, Inc.
   44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   45  *    or promote products derived from this software without specific prior
   46  *    written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   58  * POSSIBILITY OF SUCH DAMAGE.
   59  */
   60 /*
   61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
   62  * All rights reserved.
   63  *
   64  * This code is derived from software contributed to The NetBSD Foundation
   65  * by Neil A. Carson and Mark Brinicombe
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  * 3. All advertising materials mentioning features or use of this software
   76  *    must display the following acknowledgement:
   77  *      This product includes software developed by the NetBSD
   78  *      Foundation, Inc. and its contributors.
   79  * 4. Neither the name of The NetBSD Foundation nor the names of its
   80  *    contributors may be used to endorse or promote products derived
   81  *    from this software without specific prior written permission.
   82  *
   83  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   84  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   85  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   86  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   87  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   88  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   89  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   90  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   91  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   92  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   93  * POSSIBILITY OF SUCH DAMAGE.
   94  */
   95 
   96 #include <machine/asm.h>
   97 #include <machine/asmacros.h>
   98 __FBSDID("$FreeBSD: releng/8.1/sys/arm/arm/support.S 175255 2008-01-12 21:11:43Z cognet $");
   99 
  100 #include "assym.s"
  101 
  102 .L_arm_memcpy:
  103         .word   _C_LABEL(_arm_memcpy)
  104 .L_arm_bzero:
  105         .word   _C_LABEL(_arm_bzero)
  106 .L_min_memcpy_size:
  107         .word   _C_LABEL(_min_memcpy_size)
  108 .L_min_bzero_size:
  109         .word   _C_LABEL(_min_bzero_size)
  110 /*
  111  * memset: Sets a block of memory to the specified value
  112  *
  113  * On entry:
  114  *   r0 - dest address
  115  *   r1 - byte to write
  116  *   r2 - number of bytes to write
  117  *
  118  * On exit:
  119  *   r0 - dest address
  120  */
  121 /* LINTSTUB: Func: void bzero(void *, size_t) */
  122 ENTRY(bzero)
  123         ldr     r3, .L_arm_bzero
  124         ldr     r3, [r3]
  125         cmp     r3, #0
  126         beq     .Lnormal0
  127         ldr     r2, .L_min_bzero_size
  128         ldr     r2, [r2]
  129         cmp     r1, r2
  130         blt     .Lnormal0
  131         stmfd   sp!, {r0, r1, lr}
  132         mov     r2, #0
  133         mov     lr, pc
  134         mov     pc, r3
  135         cmp     r0, #0
  136         ldmfd   sp!, {r0, r1, lr}
  137         RETeq
  138 .Lnormal0:
  139         mov     r3, #0x00
  140         b       do_memset
  141 
  142 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
  143 ENTRY(memset)
  144         and     r3, r1, #0xff           /* We deal with bytes */
  145         mov     r1, r2
  146 do_memset:
  147         cmp     r1, #0x04               /* Do we have less than 4 bytes */
  148         mov     ip, r0
  149         blt     .Lmemset_lessthanfour
  150 
  151         /* Ok first we will word align the address */
  152         ands    r2, ip, #0x03           /* Get the bottom two bits */
  153         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
  154 
  155         /* We are now word aligned */
  156 .Lmemset_wordaligned:
  157         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
  158 #ifdef _ARM_ARCH_5E
  159         tst     ip, #0x04               /* Quad-align for armv5e */
  160 #else
  161         cmp     r1, #0x10
  162 #endif
  163         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
  164 #ifdef _ARM_ARCH_5E
  165         subne   r1, r1, #0x04           /* Quad-align if necessary */
  166         strne   r3, [ip], #0x04
  167         cmp     r1, #0x10
  168 #endif
  169         blt     .Lmemset_loop4          /* If less than 16 then use words */
  170         mov     r2, r3                  /* Duplicate data */
  171         cmp     r1, #0x80               /* If < 128 then skip the big loop */
  172         blt     .Lmemset_loop32
  173 
  174         /* Do 128 bytes at a time */
  175 .Lmemset_loop128:
  176         subs    r1, r1, #0x80
  177 #ifdef _ARM_ARCH_5E
  178         strged  r2, [ip], #0x08
  179         strged  r2, [ip], #0x08
  180         strged  r2, [ip], #0x08
  181         strged  r2, [ip], #0x08
  182         strged  r2, [ip], #0x08
  183         strged  r2, [ip], #0x08
  184         strged  r2, [ip], #0x08
  185         strged  r2, [ip], #0x08
  186         strged  r2, [ip], #0x08
  187         strged  r2, [ip], #0x08
  188         strged  r2, [ip], #0x08
  189         strged  r2, [ip], #0x08
  190         strged  r2, [ip], #0x08
  191         strged  r2, [ip], #0x08
  192         strged  r2, [ip], #0x08
  193         strged  r2, [ip], #0x08
  194 #else
  195         stmgeia ip!, {r2-r3}
  196         stmgeia ip!, {r2-r3}
  197         stmgeia ip!, {r2-r3}
  198         stmgeia ip!, {r2-r3}
  199         stmgeia ip!, {r2-r3}
  200         stmgeia ip!, {r2-r3}
  201         stmgeia ip!, {r2-r3}
  202         stmgeia ip!, {r2-r3}
  203         stmgeia ip!, {r2-r3}
  204         stmgeia ip!, {r2-r3}
  205         stmgeia ip!, {r2-r3}
  206         stmgeia ip!, {r2-r3}
  207         stmgeia ip!, {r2-r3}
  208         stmgeia ip!, {r2-r3}
  209         stmgeia ip!, {r2-r3}
  210         stmgeia ip!, {r2-r3}
  211 #endif
  212         bgt     .Lmemset_loop128
  213         RETeq                   /* Zero length so just exit */
  214 
  215         add     r1, r1, #0x80           /* Adjust for extra sub */
  216 
  217         /* Do 32 bytes at a time */
  218 .Lmemset_loop32:
  219         subs    r1, r1, #0x20
  220 #ifdef _ARM_ARCH_5E
  221         strged  r2, [ip], #0x08
  222         strged  r2, [ip], #0x08
  223         strged  r2, [ip], #0x08
  224         strged  r2, [ip], #0x08
  225 #else
  226         stmgeia ip!, {r2-r3}
  227         stmgeia ip!, {r2-r3}
  228         stmgeia ip!, {r2-r3}
  229         stmgeia ip!, {r2-r3}
  230 #endif
  231         bgt     .Lmemset_loop32
  232         RETeq                   /* Zero length so just exit */
  233 
  234         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
  235 
  236         /* Deal with 16 bytes or more */
  237 #ifdef _ARM_ARCH_5E
  238         strged  r2, [ip], #0x08
  239         strged  r2, [ip], #0x08
  240 #else
  241         stmgeia ip!, {r2-r3}
  242         stmgeia ip!, {r2-r3}
  243 #endif
  244         RETeq                   /* Zero length so just exit */
  245 
  246         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
  247 
  248         /* We have at least 4 bytes so copy as words */
  249 .Lmemset_loop4:
  250         subs    r1, r1, #0x04
  251         strge   r3, [ip], #0x04
  252         bgt     .Lmemset_loop4
  253         RETeq                   /* Zero length so just exit */
  254 
  255 #ifdef _ARM_ARCH_5E
  256         /* Compensate for 64-bit alignment check */
  257         adds    r1, r1, #0x04
  258         RETeq
  259         cmp     r1, #2
  260 #else
  261         cmp     r1, #-2
  262 #endif
  263 
  264         strb    r3, [ip], #0x01         /* Set 1 byte */
  265         strgeb  r3, [ip], #0x01         /* Set another byte */
  266         strgtb  r3, [ip]                /* and a third */
  267         RET                     /* Exit */
  268 
  269 .Lmemset_wordunaligned:
  270         rsb     r2, r2, #0x004
  271         strb    r3, [ip], #0x01         /* Set 1 byte */
  272         cmp     r2, #0x02
  273         strgeb  r3, [ip], #0x01         /* Set another byte */
  274         sub     r1, r1, r2
  275         strgtb  r3, [ip], #0x01         /* and a third */
  276         cmp     r1, #0x04               /* More than 4 bytes left? */
  277         bge     .Lmemset_wordaligned    /* Yup */
  278 
  279 .Lmemset_lessthanfour:
  280         cmp     r1, #0x00
  281         RETeq                   /* Zero length so exit */
  282         strb    r3, [ip], #0x01         /* Set 1 byte */
  283         cmp     r1, #0x02
  284         strgeb  r3, [ip], #0x01         /* Set another byte */
  285         strgtb  r3, [ip]                /* and a third */
  286         RET                     /* Exit */
  287 
  288 ENTRY(bcmp)
  289         mov     ip, r0
  290         cmp     r2, #0x06
  291         beq     .Lmemcmp_6bytes
  292         mov     r0, #0x00
  293 
  294         /* Are both addresses aligned the same way? */
  295         cmp     r2, #0x00
  296         eornes  r3, ip, r1
  297         RETeq                   /* len == 0, or same addresses! */
  298         tst     r3, #0x03
  299         subne   r2, r2, #0x01
  300         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
  301 
  302         /* Word-align the addresses, if necessary */
  303         sub     r3, r1, #0x05
  304         ands    r3, r3, #0x03
  305         add     r3, r3, r3, lsl #1
  306         addne   pc, pc, r3, lsl #3
  307         nop
  308 
  309         /* Compare up to 3 bytes */
  310         ldrb    r0, [ip], #0x01
  311         ldrb    r3, [r1], #0x01
  312         subs    r0, r0, r3
  313         RETne
  314         subs    r2, r2, #0x01
  315         RETeq
  316 
  317         /* Compare up to 2 bytes */
  318         ldrb    r0, [ip], #0x01
  319         ldrb    r3, [r1], #0x01
  320         subs    r0, r0, r3
  321         RETne
  322         subs    r2, r2, #0x01
  323         RETeq
  324 
  325         /* Compare 1 byte */
  326         ldrb    r0, [ip], #0x01
  327         ldrb    r3, [r1], #0x01
  328         subs    r0, r0, r3
  329         RETne
  330         subs    r2, r2, #0x01
  331         RETeq
  332 
  333         /* Compare 4 bytes at a time, if possible */
  334         subs    r2, r2, #0x04
  335         bcc     .Lmemcmp_bytewise
  336 .Lmemcmp_word_aligned:
  337         ldr     r0, [ip], #0x04
  338         ldr     r3, [r1], #0x04
  339         subs    r2, r2, #0x04
  340         cmpcs   r0, r3
  341         beq     .Lmemcmp_word_aligned
  342         sub     r0, r0, r3
  343 
  344         /* Correct for extra subtraction, and check if done */
  345         adds    r2, r2, #0x04
  346         cmpeq   r0, #0x00               /* If done, did all bytes match? */
  347         RETeq                   /* Yup. Just return */
  348 
  349         /* Re-do the final word byte-wise */
  350         sub     ip, ip, #0x04
  351         sub     r1, r1, #0x04
  352 
  353 .Lmemcmp_bytewise:
  354         add     r2, r2, #0x03
  355 .Lmemcmp_bytewise2:
  356         ldrb    r0, [ip], #0x01
  357         ldrb    r3, [r1], #0x01
  358         subs    r2, r2, #0x01
  359         cmpcs   r0, r3
  360         beq     .Lmemcmp_bytewise2
  361         sub     r0, r0, r3
  362         RET
  363 
  364         /*
  365          * 6 byte compares are very common, thanks to the network stack.
  366          * This code is hand-scheduled to reduce the number of stalls for
  367          * load results. Everything else being equal, this will be ~32%
  368          * faster than a byte-wise memcmp.
  369          */
  370         .align  5
  371 .Lmemcmp_6bytes:
  372         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
  373         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
  374         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
  375         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
  376         ldreqb  r3, [ip, #0x01]         /* r3 = b1#1 */
  377         RETne                   /* Return if mismatch on #0 */
  378         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
  379         ldreqb  r3, [r1, #0x02]         /* r3 = b2#2 */
  380         ldreqb  r0, [ip, #0x02]         /* r0 = b1#2 */
  381         RETne                   /* Return if mismatch on #1 */
  382         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
  383         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
  384         ldreqb  r3, [ip, #0x03]         /* r3 = b1#3 */
  385         RETne                   /* Return if mismatch on #2 */
  386         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
  387         ldreqb  r3, [r1, #0x04]         /* r3 = b2#4 */
  388         ldreqb  r0, [ip, #0x04]         /* r0 = b1#4 */
  389         RETne                   /* Return if mismatch on #3 */
  390         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
  391         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
  392         ldreqb  r3, [ip, #0x05]         /* r3 = b1#5 */
  393         RETne                   /* Return if mismatch on #4 */
  394         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
  395         RET
  396 
  397 ENTRY(bcopy)
  398         /* switch the source and destination registers */
  399         eor     r0, r1, r0 
  400         eor     r1, r0, r1 
  401         eor     r0, r1, r0 
  402 ENTRY(memmove)
  403         /* Do the buffers overlap? */
  404         cmp     r0, r1
  405         RETeq           /* Bail now if src/dst are the same */
  406         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
  407         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
  408         cmp     r3, r2          /* if (r3 < len) we have an overlap */
  409         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
  410 
  411         /* Determine copy direction */
  412         cmp     r1, r0
  413         bcc     .Lmemmove_backwards
  414 
  415         moveq   r0, #0                  /* Quick abort for len=0 */
  416         RETeq
  417 
  418         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
  419         subs    r2, r2, #4
  420         blt     .Lmemmove_fl4           /* less than 4 bytes */
  421         ands    r12, r0, #3
  422         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
  423         ands    r12, r1, #3
  424         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
  425 
  426 .Lmemmove_ft8:
  427         /* We have aligned source and destination */
  428         subs    r2, r2, #8
  429         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
  430         subs    r2, r2, #0x14         
  431         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
  432         stmdb   sp!, {r4}               /* borrow r4 */
  433 
  434         /* blat 32 bytes at a time */
  435         /* XXX for really big copies perhaps we should use more registers */
  436 .Lmemmove_floop32:      
  437         ldmia   r1!, {r3, r4, r12, lr}
  438         stmia   r0!, {r3, r4, r12, lr}
  439         ldmia   r1!, {r3, r4, r12, lr}
  440         stmia   r0!, {r3, r4, r12, lr}
  441         subs    r2, r2, #0x20         
  442         bge     .Lmemmove_floop32
  443 
  444         cmn     r2, #0x10
  445         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  446         stmgeia r0!, {r3, r4, r12, lr}
  447         subge   r2, r2, #0x10         
  448         ldmia   sp!, {r4}               /* return r4 */
  449 
  450 .Lmemmove_fl32:
  451         adds    r2, r2, #0x14         
  452 
  453         /* blat 12 bytes at a time */
  454 .Lmemmove_floop12:
  455         ldmgeia r1!, {r3, r12, lr}
  456         stmgeia r0!, {r3, r12, lr}
  457         subges  r2, r2, #0x0c         
  458         bge     .Lmemmove_floop12
  459 
  460 .Lmemmove_fl12:
  461         adds    r2, r2, #8
  462         blt     .Lmemmove_fl4
  463 
  464         subs    r2, r2, #4
  465         ldrlt   r3, [r1], #4
  466         strlt   r3, [r0], #4
  467         ldmgeia r1!, {r3, r12}
  468         stmgeia r0!, {r3, r12}
  469         subge   r2, r2, #4
  470 
  471 .Lmemmove_fl4:
  472         /* less than 4 bytes to go */
  473         adds    r2, r2, #4
  474         ldmeqia sp!, {r0, pc}           /* done */
  475 
  476         /* copy the crud byte at a time */
  477         cmp     r2, #2
  478         ldrb    r3, [r1], #1
  479         strb    r3, [r0], #1
  480         ldrgeb  r3, [r1], #1
  481         strgeb  r3, [r0], #1
  482         ldrgtb  r3, [r1], #1
  483         strgtb  r3, [r0], #1
  484         ldmia   sp!, {r0, pc}
  485 
  486         /* erg - unaligned destination */
  487 .Lmemmove_fdestul:
  488         rsb     r12, r12, #4
  489         cmp     r12, #2
  490 
  491         /* align destination with byte copies */
  492         ldrb    r3, [r1], #1
  493         strb    r3, [r0], #1
  494         ldrgeb  r3, [r1], #1
  495         strgeb  r3, [r0], #1
  496         ldrgtb  r3, [r1], #1
  497         strgtb  r3, [r0], #1
  498         subs    r2, r2, r12
  499         blt     .Lmemmove_fl4           /* less the 4 bytes */
  500 
  501         ands    r12, r1, #3
  502         beq     .Lmemmove_ft8           /* we have an aligned source */
  503 
  504         /* erg - unaligned source */
  505         /* This is where it gets nasty ... */
  506 .Lmemmove_fsrcul:
  507         bic     r1, r1, #3
  508         ldr     lr, [r1], #4
  509         cmp     r12, #2
  510         bgt     .Lmemmove_fsrcul3
  511         beq     .Lmemmove_fsrcul2
  512         cmp     r2, #0x0c            
  513         blt     .Lmemmove_fsrcul1loop4
  514         sub     r2, r2, #0x0c         
  515         stmdb   sp!, {r4, r5}
  516 
  517 .Lmemmove_fsrcul1loop16:
  518 #ifdef __ARMEB__
  519         mov     r3, lr, lsl #8
  520 #else
  521         mov     r3, lr, lsr #8
  522 #endif
  523         ldmia   r1!, {r4, r5, r12, lr}
  524 #ifdef __ARMEB__
  525         orr     r3, r3, r4, lsr #24
  526         mov     r4, r4, lsl #8
  527         orr     r4, r4, r5, lsr #24
  528         mov     r5, r5, lsl #8
  529         orr     r5, r5, r12, lsr #24
  530         mov     r12, r12, lsl #8
  531         orr     r12, r12, lr, lsr #24
  532 #else
  533         orr     r3, r3, r4, lsl #24
  534         mov     r4, r4, lsr #8
  535         orr     r4, r4, r5, lsl #24
  536         mov     r5, r5, lsr #8
  537         orr     r5, r5, r12, lsl #24
  538         mov     r12, r12, lsr #8
  539         orr     r12, r12, lr, lsl #24
  540 #endif
  541         stmia   r0!, {r3-r5, r12}
  542         subs    r2, r2, #0x10         
  543         bge     .Lmemmove_fsrcul1loop16
  544         ldmia   sp!, {r4, r5}
  545         adds    r2, r2, #0x0c         
  546         blt     .Lmemmove_fsrcul1l4
  547 
  548 .Lmemmove_fsrcul1loop4:
  549 #ifdef __ARMEB__
  550         mov     r12, lr, lsl #8
  551 #else
  552         mov     r12, lr, lsr #8
  553 #endif
  554         ldr     lr, [r1], #4
  555 #ifdef __ARMEB__
  556         orr     r12, r12, lr, lsr #24
  557 #else
  558         orr     r12, r12, lr, lsl #24
  559 #endif
  560         str     r12, [r0], #4
  561         subs    r2, r2, #4
  562         bge     .Lmemmove_fsrcul1loop4
  563 
  564 .Lmemmove_fsrcul1l4:
  565         sub     r1, r1, #3
  566         b       .Lmemmove_fl4
  567 
  568 .Lmemmove_fsrcul2:
  569         cmp     r2, #0x0c            
  570         blt     .Lmemmove_fsrcul2loop4
  571         sub     r2, r2, #0x0c         
  572         stmdb   sp!, {r4, r5}
  573 
  574 .Lmemmove_fsrcul2loop16:
  575 #ifdef __ARMEB__
  576         mov     r3, lr, lsl #16
  577 #else
  578         mov     r3, lr, lsr #16
  579 #endif
  580         ldmia   r1!, {r4, r5, r12, lr}
  581 #ifdef __ARMEB__
  582         orr     r3, r3, r4, lsr #16
  583         mov     r4, r4, lsl #16
  584         orr     r4, r4, r5, lsr #16
  585         mov     r5, r5, lsl #16
  586         orr     r5, r5, r12, lsr #16
  587         mov     r12, r12, lsl #16
  588         orr     r12, r12, lr, lsr #16
  589 #else
  590         orr     r3, r3, r4, lsl #16
  591         mov     r4, r4, lsr #16
  592         orr     r4, r4, r5, lsl #16
  593         mov     r5, r5, lsr #16
  594         orr     r5, r5, r12, lsl #16
  595         mov     r12, r12, lsr #16
  596         orr     r12, r12, lr, lsl #16
  597 #endif
  598         stmia   r0!, {r3-r5, r12}
  599         subs    r2, r2, #0x10         
  600         bge     .Lmemmove_fsrcul2loop16
  601         ldmia   sp!, {r4, r5}
  602         adds    r2, r2, #0x0c         
  603         blt     .Lmemmove_fsrcul2l4
  604 
  605 .Lmemmove_fsrcul2loop4:
  606 #ifdef __ARMEB__
  607         mov     r12, lr, lsl #16
  608 #else
  609         mov     r12, lr, lsr #16
  610 #endif
  611         ldr     lr, [r1], #4
  612 #ifdef __ARMEB__
  613         orr     r12, r12, lr, lsr #16
  614 #else
  615         orr     r12, r12, lr, lsl #16
  616 #endif
  617         str     r12, [r0], #4
  618         subs    r2, r2, #4
  619         bge     .Lmemmove_fsrcul2loop4
  620 
  621 .Lmemmove_fsrcul2l4:
  622         sub     r1, r1, #2
  623         b       .Lmemmove_fl4
  624 
  625 .Lmemmove_fsrcul3:
  626         cmp     r2, #0x0c            
  627         blt     .Lmemmove_fsrcul3loop4
  628         sub     r2, r2, #0x0c         
  629         stmdb   sp!, {r4, r5}
  630 
  631 .Lmemmove_fsrcul3loop16:
  632 #ifdef __ARMEB__
  633         mov     r3, lr, lsl #24
  634 #else
  635         mov     r3, lr, lsr #24
  636 #endif
  637         ldmia   r1!, {r4, r5, r12, lr}
  638 #ifdef __ARMEB__
  639         orr     r3, r3, r4, lsr #8
  640         mov     r4, r4, lsl #24
  641         orr     r4, r4, r5, lsr #8
  642         mov     r5, r5, lsl #24
  643         orr     r5, r5, r12, lsr #8
  644         mov     r12, r12, lsl #24
  645         orr     r12, r12, lr, lsr #8
  646 #else
  647         orr     r3, r3, r4, lsl #8
  648         mov     r4, r4, lsr #24
  649         orr     r4, r4, r5, lsl #8
  650         mov     r5, r5, lsr #24
  651         orr     r5, r5, r12, lsl #8
  652         mov     r12, r12, lsr #24
  653         orr     r12, r12, lr, lsl #8
  654 #endif
  655         stmia   r0!, {r3-r5, r12}
  656         subs    r2, r2, #0x10         
  657         bge     .Lmemmove_fsrcul3loop16
  658         ldmia   sp!, {r4, r5}
  659         adds    r2, r2, #0x0c         
  660         blt     .Lmemmove_fsrcul3l4
  661 
  662 .Lmemmove_fsrcul3loop4:
  663 #ifdef __ARMEB__
  664         mov     r12, lr, lsl #24
  665 #else
  666         mov     r12, lr, lsr #24
  667 #endif
  668         ldr     lr, [r1], #4
  669 #ifdef __ARMEB__
  670         orr     r12, r12, lr, lsr #8
  671 #else
  672         orr     r12, r12, lr, lsl #8
  673 #endif
  674         str     r12, [r0], #4
  675         subs    r2, r2, #4
  676         bge     .Lmemmove_fsrcul3loop4
  677 
  678 .Lmemmove_fsrcul3l4:
  679         sub     r1, r1, #1
  680         b       .Lmemmove_fl4
  681 
  682 .Lmemmove_backwards:
  683         add     r1, r1, r2
  684         add     r0, r0, r2
  685         subs    r2, r2, #4
  686         blt     .Lmemmove_bl4           /* less than 4 bytes */
  687         ands    r12, r0, #3
  688         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
  689         ands    r12, r1, #3
  690         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
  691 
  692 .Lmemmove_bt8:
  693         /* We have aligned source and destination */
  694         subs    r2, r2, #8
  695         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
  696         stmdb   sp!, {r4, lr}
  697         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
  698         blt     .Lmemmove_bl32
  699 
  700         /* blat 32 bytes at a time */
  701         /* XXX for really big copies perhaps we should use more registers */
  702 .Lmemmove_bloop32:
  703         ldmdb   r1!, {r3, r4, r12, lr}
  704         stmdb   r0!, {r3, r4, r12, lr}
  705         ldmdb   r1!, {r3, r4, r12, lr}
  706         stmdb   r0!, {r3, r4, r12, lr}
  707         subs    r2, r2, #0x20         
  708         bge     .Lmemmove_bloop32
  709 
  710 .Lmemmove_bl32:
  711         cmn     r2, #0x10            
  712         ldmgedb r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  713         stmgedb r0!, {r3, r4, r12, lr}
  714         subge   r2, r2, #0x10         
  715         adds    r2, r2, #0x14         
  716         ldmgedb r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
  717         stmgedb r0!, {r3, r12, lr}
  718         subge   r2, r2, #0x0c         
  719         ldmia   sp!, {r4, lr}
  720 
  721 .Lmemmove_bl12:
  722         adds    r2, r2, #8
  723         blt     .Lmemmove_bl4
  724         subs    r2, r2, #4
  725         ldrlt   r3, [r1, #-4]!
  726         strlt   r3, [r0, #-4]!
  727         ldmgedb r1!, {r3, r12}
  728         stmgedb r0!, {r3, r12}
  729         subge   r2, r2, #4
  730 
  731 .Lmemmove_bl4:
  732         /* less than 4 bytes to go */
  733         adds    r2, r2, #4
  734         RETeq                   /* done */
  735 
  736         /* copy the crud byte at a time */
  737         cmp     r2, #2
  738         ldrb    r3, [r1, #-1]!
  739         strb    r3, [r0, #-1]!
  740         ldrgeb  r3, [r1, #-1]!
  741         strgeb  r3, [r0, #-1]!
  742         ldrgtb  r3, [r1, #-1]!
  743         strgtb  r3, [r0, #-1]!
  744         RET
  745 
  746         /* erg - unaligned destination */
  747 .Lmemmove_bdestul:
  748         cmp     r12, #2
  749 
  750         /* align destination with byte copies */
  751         ldrb    r3, [r1, #-1]!
  752         strb    r3, [r0, #-1]!
  753         ldrgeb  r3, [r1, #-1]!
  754         strgeb  r3, [r0, #-1]!
  755         ldrgtb  r3, [r1, #-1]!
  756         strgtb  r3, [r0, #-1]!
  757         subs    r2, r2, r12
  758         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
  759         ands    r12, r1, #3
  760         beq     .Lmemmove_bt8           /* we have an aligned source */
  761 
  762         /* erg - unaligned source */
  763         /* This is where it gets nasty ... */
  764 .Lmemmove_bsrcul:
  765         bic     r1, r1, #3
  766         ldr     r3, [r1, #0]
  767         cmp     r12, #2
  768         blt     .Lmemmove_bsrcul1
  769         beq     .Lmemmove_bsrcul2
  770         cmp     r2, #0x0c            
  771         blt     .Lmemmove_bsrcul3loop4
  772         sub     r2, r2, #0x0c         
  773         stmdb   sp!, {r4, r5, lr}
  774 
  775 .Lmemmove_bsrcul3loop16:
  776 #ifdef __ARMEB__
  777         mov     lr, r3, lsr #8
  778 #else
  779         mov     lr, r3, lsl #8
  780 #endif
  781         ldmdb   r1!, {r3-r5, r12}
  782 #ifdef __ARMEB__
  783         orr     lr, lr, r12, lsl #24
  784         mov     r12, r12, lsr #8
  785         orr     r12, r12, r5, lsl #24
  786         mov     r5, r5, lsr #8
  787         orr     r5, r5, r4, lsl #24
  788         mov     r4, r4, lsr #8
  789         orr     r4, r4, r3, lsl #24
  790 #else
  791         orr     lr, lr, r12, lsr #24
  792         mov     r12, r12, lsl #8
  793         orr     r12, r12, r5, lsr #24
  794         mov     r5, r5, lsl #8
  795         orr     r5, r5, r4, lsr #24
  796         mov     r4, r4, lsl #8
  797         orr     r4, r4, r3, lsr #24
  798 #endif
  799         stmdb   r0!, {r4, r5, r12, lr}
  800         subs    r2, r2, #0x10         
  801         bge     .Lmemmove_bsrcul3loop16
  802         ldmia   sp!, {r4, r5, lr}
  803         adds    r2, r2, #0x0c         
  804         blt     .Lmemmove_bsrcul3l4
  805 
  806 .Lmemmove_bsrcul3loop4:
  807 #ifdef __ARMEB__
  808         mov     r12, r3, lsr #8
  809 #else
  810         mov     r12, r3, lsl #8
  811 #endif
  812         ldr     r3, [r1, #-4]!
  813 #ifdef __ARMEB__
  814         orr     r12, r12, r3, lsl #24
  815 #else
  816         orr     r12, r12, r3, lsr #24
  817 #endif
  818         str     r12, [r0, #-4]!
  819         subs    r2, r2, #4
  820         bge     .Lmemmove_bsrcul3loop4
  821 
  822 .Lmemmove_bsrcul3l4:
  823         add     r1, r1, #3
  824         b       .Lmemmove_bl4
  825 
  826 .Lmemmove_bsrcul2:
  827         cmp     r2, #0x0c            
  828         blt     .Lmemmove_bsrcul2loop4
  829         sub     r2, r2, #0x0c         
  830         stmdb   sp!, {r4, r5, lr}
  831 
  832 .Lmemmove_bsrcul2loop16:
  833 #ifdef __ARMEB__
  834         mov     lr, r3, lsr #16
  835 #else
  836         mov     lr, r3, lsl #16
  837 #endif
  838         ldmdb   r1!, {r3-r5, r12}
  839 #ifdef __ARMEB__
  840         orr     lr, lr, r12, lsl #16
  841         mov     r12, r12, lsr #16
  842         orr     r12, r12, r5, lsl #16
  843         mov     r5, r5, lsr #16
  844         orr     r5, r5, r4, lsl #16
  845         mov     r4, r4, lsr #16
  846         orr     r4, r4, r3, lsl #16
  847 #else
  848         orr     lr, lr, r12, lsr #16
  849         mov     r12, r12, lsl #16
  850         orr     r12, r12, r5, lsr #16
  851         mov     r5, r5, lsl #16
  852         orr     r5, r5, r4, lsr #16
  853         mov     r4, r4, lsl #16
  854         orr     r4, r4, r3, lsr #16
  855 #endif
  856         stmdb   r0!, {r4, r5, r12, lr}
  857         subs    r2, r2, #0x10         
  858         bge     .Lmemmove_bsrcul2loop16
  859         ldmia   sp!, {r4, r5, lr}
  860         adds    r2, r2, #0x0c         
  861         blt     .Lmemmove_bsrcul2l4
  862 
  863 .Lmemmove_bsrcul2loop4:
  864 #ifdef __ARMEB__
  865         mov     r12, r3, lsr #16
  866 #else
  867         mov     r12, r3, lsl #16
  868 #endif
  869         ldr     r3, [r1, #-4]!
  870 #ifdef __ARMEB__
  871         orr     r12, r12, r3, lsl #16
  872 #else
  873         orr     r12, r12, r3, lsr #16
  874 #endif
  875         str     r12, [r0, #-4]!
  876         subs    r2, r2, #4
  877         bge     .Lmemmove_bsrcul2loop4
  878 
  879 .Lmemmove_bsrcul2l4:
  880         add     r1, r1, #2
  881         b       .Lmemmove_bl4
  882 
  883 .Lmemmove_bsrcul1:
  884         cmp     r2, #0x0c            
  885         blt     .Lmemmove_bsrcul1loop4
  886         sub     r2, r2, #0x0c         
  887         stmdb   sp!, {r4, r5, lr}
  888 
  889 .Lmemmove_bsrcul1loop32:
  890 #ifdef __ARMEB__
  891         mov     lr, r3, lsr #24
  892 #else
  893         mov     lr, r3, lsl #24
  894 #endif
  895         ldmdb   r1!, {r3-r5, r12}
  896 #ifdef __ARMEB__
  897         orr     lr, lr, r12, lsl #8
  898         mov     r12, r12, lsr #24
  899         orr     r12, r12, r5, lsl #8
  900         mov     r5, r5, lsr #24
  901         orr     r5, r5, r4, lsl #8
  902         mov     r4, r4, lsr #24
  903         orr     r4, r4, r3, lsl #8
  904 #else
  905         orr     lr, lr, r12, lsr #8
  906         mov     r12, r12, lsl #24
  907         orr     r12, r12, r5, lsr #8
  908         mov     r5, r5, lsl #24
  909         orr     r5, r5, r4, lsr #8
  910         mov     r4, r4, lsl #24
  911         orr     r4, r4, r3, lsr #8
  912 #endif
  913         stmdb   r0!, {r4, r5, r12, lr}
  914         subs    r2, r2, #0x10         
  915         bge     .Lmemmove_bsrcul1loop32
  916         ldmia   sp!, {r4, r5, lr}
  917         adds    r2, r2, #0x0c         
  918         blt     .Lmemmove_bsrcul1l4
  919 
  920 .Lmemmove_bsrcul1loop4:
  921 #ifdef __ARMEB__
  922         mov     r12, r3, lsr #24
  923 #else
  924         mov     r12, r3, lsl #24
  925 #endif
  926         ldr     r3, [r1, #-4]!
  927 #ifdef __ARMEB__
  928         orr     r12, r12, r3, lsl #8
  929 #else
  930         orr     r12, r12, r3, lsr #8
  931 #endif
  932         str     r12, [r0, #-4]!
  933         subs    r2, r2, #4
  934         bge     .Lmemmove_bsrcul1loop4
  935 
  936 .Lmemmove_bsrcul1l4:
  937         add     r1, r1, #1
  938         b       .Lmemmove_bl4
  939 
  940 #if !defined(_ARM_ARCH_5E)
  941 ENTRY(memcpy)
  942         /* save leaf functions having to store this away */
  943         /* Do not check arm_memcpy if we're running from flash */
  944 #ifdef FLASHADDR
  945 #if FLASHADDR > PHYSADDR
  946         ldr     r3, =FLASHADDR
  947         cmp     r3, pc
  948         bls     .Lnormal
  949 #else
  950         ldr     r3, =FLASHADDR
  951         cmp     r3, pc
  952         bhi     .Lnormal
  953 #endif
  954 #endif
  955         ldr     r3, .L_arm_memcpy
  956         ldr     r3, [r3]
  957         cmp     r3, #0
  958         beq     .Lnormal
  959         ldr     r3, .L_min_memcpy_size
  960         ldr     r3, [r3]
  961         cmp     r2, r3
  962         blt     .Lnormal
  963         stmfd   sp!, {r0-r2, r4, lr}
  964         mov     r3, #0
  965         ldr     r4, .L_arm_memcpy
  966         mov     lr, pc
  967         ldr     pc, [r4]
  968         cmp     r0, #0
  969         ldmfd   sp!, {r0-r2, r4, lr}
  970         RETeq
  971 
  972 .Lnormal:
  973         stmdb   sp!, {r0, lr}           /* memcpy() returns dest addr */
  974 
  975         subs    r2, r2, #4
  976         blt     .Lmemcpy_l4             /* less than 4 bytes */
  977         ands    r12, r0, #3
  978         bne     .Lmemcpy_destul         /* oh unaligned destination addr */
  979         ands    r12, r1, #3
  980         bne     .Lmemcpy_srcul          /* oh unaligned source addr */
  981 
  982 .Lmemcpy_t8:
  983         /* We have aligned source and destination */
  984         subs    r2, r2, #8
  985         blt     .Lmemcpy_l12            /* less than 12 bytes (4 from above) */
  986         subs    r2, r2, #0x14         
  987         blt     .Lmemcpy_l32            /* less than 32 bytes (12 from above) */
  988         stmdb   sp!, {r4}               /* borrow r4 */
  989 
  990         /* blat 32 bytes at a time */
  991         /* XXX for really big copies perhaps we should use more registers */
  992 .Lmemcpy_loop32:        
  993         ldmia   r1!, {r3, r4, r12, lr}
  994         stmia   r0!, {r3, r4, r12, lr}
  995         ldmia   r1!, {r3, r4, r12, lr}
  996         stmia   r0!, {r3, r4, r12, lr}
  997         subs    r2, r2, #0x20         
  998         bge     .Lmemcpy_loop32
  999 
 1000         cmn     r2, #0x10
 1001         ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
 1002         stmgeia r0!, {r3, r4, r12, lr}
 1003         subge   r2, r2, #0x10         
 1004         ldmia   sp!, {r4}               /* return r4 */
 1005 
 1006 .Lmemcpy_l32:
 1007         adds    r2, r2, #0x14         
 1008 
 1009         /* blat 12 bytes at a time */
 1010 .Lmemcpy_loop12:
 1011         ldmgeia r1!, {r3, r12, lr}
 1012         stmgeia r0!, {r3, r12, lr}
 1013         subges  r2, r2, #0x0c         
 1014         bge     .Lmemcpy_loop12
 1015 
 1016 .Lmemcpy_l12:
 1017         adds    r2, r2, #8
 1018         blt     .Lmemcpy_l4
 1019 
 1020         subs    r2, r2, #4
 1021         ldrlt   r3, [r1], #4
 1022         strlt   r3, [r0], #4
 1023         ldmgeia r1!, {r3, r12}
 1024         stmgeia r0!, {r3, r12}
 1025         subge   r2, r2, #4
 1026 
 1027 .Lmemcpy_l4:
 1028         /* less than 4 bytes to go */
 1029         adds    r2, r2, #4
 1030 #ifdef __APCS_26_
 1031         ldmeqia sp!, {r0, pc}^          /* done */
 1032 #else
 1033         ldmeqia sp!, {r0, pc}           /* done */
 1034 #endif
 1035         /* copy the crud byte at a time */
 1036         cmp     r2, #2
 1037         ldrb    r3, [r1], #1
 1038         strb    r3, [r0], #1
 1039         ldrgeb  r3, [r1], #1
 1040         strgeb  r3, [r0], #1
 1041         ldrgtb  r3, [r1], #1
 1042         strgtb  r3, [r0], #1
 1043         ldmia   sp!, {r0, pc}
 1044 
 1045         /* erg - unaligned destination */
 1046 .Lmemcpy_destul:
 1047         rsb     r12, r12, #4
 1048         cmp     r12, #2
 1049 
 1050         /* align destination with byte copies */
 1051         ldrb    r3, [r1], #1
 1052         strb    r3, [r0], #1
 1053         ldrgeb  r3, [r1], #1
 1054         strgeb  r3, [r0], #1
 1055         ldrgtb  r3, [r1], #1
 1056         strgtb  r3, [r0], #1
 1057         subs    r2, r2, r12
 1058         blt     .Lmemcpy_l4             /* less the 4 bytes */
 1059 
 1060         ands    r12, r1, #3
 1061         beq     .Lmemcpy_t8             /* we have an aligned source */
 1062 
 1063         /* erg - unaligned source */
 1064         /* This is where it gets nasty ... */
 1065 .Lmemcpy_srcul:
 1066         bic     r1, r1, #3
 1067         ldr     lr, [r1], #4
 1068         cmp     r12, #2
 1069         bgt     .Lmemcpy_srcul3
 1070         beq     .Lmemcpy_srcul2
 1071         cmp     r2, #0x0c            
 1072         blt     .Lmemcpy_srcul1loop4
 1073         sub     r2, r2, #0x0c         
 1074         stmdb   sp!, {r4, r5}
 1075 
 1076 .Lmemcpy_srcul1loop16:
 1077         mov     r3, lr, lsr #8
 1078         ldmia   r1!, {r4, r5, r12, lr}
 1079         orr     r3, r3, r4, lsl #24
 1080         mov     r4, r4, lsr #8
 1081         orr     r4, r4, r5, lsl #24
 1082         mov     r5, r5, lsr #8
 1083         orr     r5, r5, r12, lsl #24
 1084         mov     r12, r12, lsr #8
 1085         orr     r12, r12, lr, lsl #24
 1086         stmia   r0!, {r3-r5, r12}
 1087         subs    r2, r2, #0x10         
 1088         bge     .Lmemcpy_srcul1loop16
 1089         ldmia   sp!, {r4, r5}
 1090         adds    r2, r2, #0x0c         
 1091         blt     .Lmemcpy_srcul1l4
 1092 
 1093 .Lmemcpy_srcul1loop4:
 1094         mov     r12, lr, lsr #8
 1095         ldr     lr, [r1], #4
 1096         orr     r12, r12, lr, lsl #24
 1097         str     r12, [r0], #4
 1098         subs    r2, r2, #4
 1099         bge     .Lmemcpy_srcul1loop4
 1100 
 1101 .Lmemcpy_srcul1l4:
 1102         sub     r1, r1, #3
 1103         b       .Lmemcpy_l4
 1104 
 1105 .Lmemcpy_srcul2:
 1106         cmp     r2, #0x0c            
 1107         blt     .Lmemcpy_srcul2loop4
 1108         sub     r2, r2, #0x0c         
 1109         stmdb   sp!, {r4, r5}
 1110 
 1111 .Lmemcpy_srcul2loop16:
 1112         mov     r3, lr, lsr #16
 1113         ldmia   r1!, {r4, r5, r12, lr}
 1114         orr     r3, r3, r4, lsl #16
 1115         mov     r4, r4, lsr #16
 1116         orr     r4, r4, r5, lsl #16
 1117         mov     r5, r5, lsr #16
 1118         orr     r5, r5, r12, lsl #16
 1119         mov     r12, r12, lsr #16
 1120         orr     r12, r12, lr, lsl #16
 1121         stmia   r0!, {r3-r5, r12}
 1122         subs    r2, r2, #0x10         
 1123         bge     .Lmemcpy_srcul2loop16
 1124         ldmia   sp!, {r4, r5}
 1125         adds    r2, r2, #0x0c         
 1126         blt     .Lmemcpy_srcul2l4
 1127 
 1128 .Lmemcpy_srcul2loop4:
 1129         mov     r12, lr, lsr #16
 1130         ldr     lr, [r1], #4
 1131         orr     r12, r12, lr, lsl #16
 1132         str     r12, [r0], #4
 1133         subs    r2, r2, #4
 1134         bge     .Lmemcpy_srcul2loop4
 1135 
 1136 .Lmemcpy_srcul2l4:
 1137         sub     r1, r1, #2
 1138         b       .Lmemcpy_l4
 1139 
 1140 .Lmemcpy_srcul3:
 1141         cmp     r2, #0x0c            
 1142         blt     .Lmemcpy_srcul3loop4
 1143         sub     r2, r2, #0x0c         
 1144         stmdb   sp!, {r4, r5}
 1145 
 1146 .Lmemcpy_srcul3loop16:
 1147         mov     r3, lr, lsr #24
 1148         ldmia   r1!, {r4, r5, r12, lr}
 1149         orr     r3, r3, r4, lsl #8
 1150         mov     r4, r4, lsr #24
 1151         orr     r4, r4, r5, lsl #8
 1152         mov     r5, r5, lsr #24
 1153         orr     r5, r5, r12, lsl #8
 1154         mov     r12, r12, lsr #24
 1155         orr     r12, r12, lr, lsl #8
 1156         stmia   r0!, {r3-r5, r12}
 1157         subs    r2, r2, #0x10         
 1158         bge     .Lmemcpy_srcul3loop16
 1159         ldmia   sp!, {r4, r5}
 1160         adds    r2, r2, #0x0c         
 1161         blt     .Lmemcpy_srcul3l4
 1162 
 1163 .Lmemcpy_srcul3loop4:
 1164         mov     r12, lr, lsr #24
 1165         ldr     lr, [r1], #4
 1166         orr     r12, r12, lr, lsl #8
 1167         str     r12, [r0], #4
 1168         subs    r2, r2, #4
 1169         bge     .Lmemcpy_srcul3loop4
 1170 
 1171 .Lmemcpy_srcul3l4:
 1172         sub     r1, r1, #1
 1173         b       .Lmemcpy_l4
 1174 #else
 1175 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 1176 ENTRY(memcpy)
 1177         pld     [r1]
 1178         cmp     r2, #0x0c
 1179         ble     .Lmemcpy_short          /* <= 12 bytes */
 1180 #ifdef FLASHADDR
 1181 #if FLASHADDR > PHYSADDR
 1182         ldr     r3, =FLASHADDR
 1183         cmp     r3, pc
 1184         bls     .Lnormal
 1185 #else
 1186         ldr     r3, =FLASHADDR
 1187         cmp     r3, pc
 1188         bhi     .Lnormal
 1189 #endif
 1190 #endif
 1191         ldr     r3, .L_arm_memcpy
 1192         ldr     r3, [r3]
 1193         cmp     r3, #0
 1194         beq     .Lnormal
 1195         ldr     r3, .L_min_memcpy_size
 1196         ldr     r3, [r3]
 1197         cmp     r2, r3
 1198         blt     .Lnormal
 1199         stmfd   sp!, {r0-r2, r4, lr}
 1200         mov     r3, #0
 1201         ldr     r4, .L_arm_memcpy
 1202         mov     lr, pc
 1203         ldr     pc, [r4]
 1204         cmp     r0, #0
 1205         ldmfd   sp!, {r0-r2, r4, lr}
 1206         RETeq
 1207 .Lnormal:
 1208         mov     r3, r0                  /* We must not clobber r0 */
 1209 
 1210         /* Word-align the destination buffer */
 1211         ands    ip, r3, #0x03           /* Already word aligned? */
 1212         beq     .Lmemcpy_wordaligned    /* Yup */
 1213         cmp     ip, #0x02
 1214         ldrb    ip, [r1], #0x01
 1215         sub     r2, r2, #0x01
 1216         strb    ip, [r3], #0x01
 1217         ldrleb  ip, [r1], #0x01
 1218         suble   r2, r2, #0x01
 1219         strleb  ip, [r3], #0x01
 1220         ldrltb  ip, [r1], #0x01
 1221         sublt   r2, r2, #0x01
 1222         strltb  ip, [r3], #0x01
 1223 
 1224         /* Destination buffer is now word aligned */
 1225 .Lmemcpy_wordaligned:
 1226         ands    ip, r1, #0x03           /* Is src also word-aligned? */
 1227         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
 1228 
 1229         /* Quad-align the destination buffer */
 1230         tst     r3, #0x07               /* Already quad aligned? */
 1231         ldrne   ip, [r1], #0x04
 1232         stmfd   sp!, {r4-r9}            /* Free up some registers */
 1233         subne   r2, r2, #0x04
 1234         strne   ip, [r3], #0x04
 1235 
 1236         /* Destination buffer quad aligned, source is at least word aligned */
 1237         subs    r2, r2, #0x80
 1238         blt     .Lmemcpy_w_lessthan128
 1239 
 1240         /* Copy 128 bytes at a time */
 1241 .Lmemcpy_w_loop128:
 1242         ldr     r4, [r1], #0x04         /* LD:00-03 */
 1243         ldr     r5, [r1], #0x04         /* LD:04-07 */
 1244         pld     [r1, #0x18]             /* Prefetch 0x20 */
 1245         ldr     r6, [r1], #0x04         /* LD:08-0b */
 1246         ldr     r7, [r1], #0x04         /* LD:0c-0f */
 1247         ldr     r8, [r1], #0x04         /* LD:10-13 */
 1248         ldr     r9, [r1], #0x04         /* LD:14-17 */
 1249         strd    r4, [r3], #0x08         /* ST:00-07 */
 1250         ldr     r4, [r1], #0x04         /* LD:18-1b */
 1251         ldr     r5, [r1], #0x04         /* LD:1c-1f */
 1252         strd    r6, [r3], #0x08         /* ST:08-0f */
 1253         ldr     r6, [r1], #0x04         /* LD:20-23 */
 1254         ldr     r7, [r1], #0x04         /* LD:24-27 */
 1255         pld     [r1, #0x18]             /* Prefetch 0x40 */
 1256         strd    r8, [r3], #0x08         /* ST:10-17 */
 1257         ldr     r8, [r1], #0x04         /* LD:28-2b */
 1258         ldr     r9, [r1], #0x04         /* LD:2c-2f */
 1259         strd    r4, [r3], #0x08         /* ST:18-1f */
 1260         ldr     r4, [r1], #0x04         /* LD:30-33 */
 1261         ldr     r5, [r1], #0x04         /* LD:34-37 */
 1262         strd    r6, [r3], #0x08         /* ST:20-27 */
 1263         ldr     r6, [r1], #0x04         /* LD:38-3b */
 1264         ldr     r7, [r1], #0x04         /* LD:3c-3f */
 1265         strd    r8, [r3], #0x08         /* ST:28-2f */
 1266         ldr     r8, [r1], #0x04         /* LD:40-43 */
 1267         ldr     r9, [r1], #0x04         /* LD:44-47 */
 1268         pld     [r1, #0x18]             /* Prefetch 0x60 */
 1269         strd    r4, [r3], #0x08         /* ST:30-37 */
 1270         ldr     r4, [r1], #0x04         /* LD:48-4b */
 1271         ldr     r5, [r1], #0x04         /* LD:4c-4f */
 1272         strd    r6, [r3], #0x08         /* ST:38-3f */
 1273         ldr     r6, [r1], #0x04         /* LD:50-53 */
 1274         ldr     r7, [r1], #0x04         /* LD:54-57 */
 1275         strd    r8, [r3], #0x08         /* ST:40-47 */
 1276         ldr     r8, [r1], #0x04         /* LD:58-5b */
 1277         ldr     r9, [r1], #0x04         /* LD:5c-5f */
 1278         strd    r4, [r3], #0x08         /* ST:48-4f */
 1279         ldr     r4, [r1], #0x04         /* LD:60-63 */
 1280         ldr     r5, [r1], #0x04         /* LD:64-67 */
 1281         pld     [r1, #0x18]             /* Prefetch 0x80 */
 1282         strd    r6, [r3], #0x08         /* ST:50-57 */
 1283         ldr     r6, [r1], #0x04         /* LD:68-6b */
 1284         ldr     r7, [r1], #0x04         /* LD:6c-6f */
 1285         strd    r8, [r3], #0x08         /* ST:58-5f */
 1286         ldr     r8, [r1], #0x04         /* LD:70-73 */
 1287         ldr     r9, [r1], #0x04         /* LD:74-77 */
 1288         strd    r4, [r3], #0x08         /* ST:60-67 */
 1289         ldr     r4, [r1], #0x04         /* LD:78-7b */
 1290         ldr     r5, [r1], #0x04         /* LD:7c-7f */
 1291         strd    r6, [r3], #0x08         /* ST:68-6f */
 1292         strd    r8, [r3], #0x08         /* ST:70-77 */
 1293         subs    r2, r2, #0x80
 1294         strd    r4, [r3], #0x08         /* ST:78-7f */
 1295         bge     .Lmemcpy_w_loop128
 1296 
 1297 .Lmemcpy_w_lessthan128:
 1298         adds    r2, r2, #0x80           /* Adjust for extra sub */
 1299         ldmeqfd sp!, {r4-r9}
 1300         RETeq                   /* Return now if done */
 1301         subs    r2, r2, #0x20
 1302         blt     .Lmemcpy_w_lessthan32
 1303 
 1304         /* Copy 32 bytes at a time */
 1305 .Lmemcpy_w_loop32:
 1306         ldr     r4, [r1], #0x04
 1307         ldr     r5, [r1], #0x04
 1308         pld     [r1, #0x18]
 1309         ldr     r6, [r1], #0x04
 1310         ldr     r7, [r1], #0x04
 1311         ldr     r8, [r1], #0x04
 1312         ldr     r9, [r1], #0x04
 1313         strd    r4, [r3], #0x08
 1314         ldr     r4, [r1], #0x04
 1315         ldr     r5, [r1], #0x04
 1316         strd    r6, [r3], #0x08
 1317         strd    r8, [r3], #0x08
 1318         subs    r2, r2, #0x20
 1319         strd    r4, [r3], #0x08
 1320         bge     .Lmemcpy_w_loop32
 1321 
 1322 .Lmemcpy_w_lessthan32:
 1323         adds    r2, r2, #0x20           /* Adjust for extra sub */
 1324         ldmeqfd sp!, {r4-r9}
 1325         RETeq                   /* Return now if done */
 1326 
 1327         and     r4, r2, #0x18
 1328         rsbs    r4, r4, #0x18
 1329         addne   pc, pc, r4, lsl #1
 1330         nop
 1331 
 1332         /* At least 24 bytes remaining */
 1333         ldr     r4, [r1], #0x04
 1334         ldr     r5, [r1], #0x04
 1335         sub     r2, r2, #0x08
 1336         strd    r4, [r3], #0x08
 1337 
 1338         /* At least 16 bytes remaining */
 1339         ldr     r4, [r1], #0x04
 1340         ldr     r5, [r1], #0x04
 1341         sub     r2, r2, #0x08
 1342         strd    r4, [r3], #0x08
 1343 
 1344         /* At least 8 bytes remaining */
 1345         ldr     r4, [r1], #0x04
 1346         ldr     r5, [r1], #0x04
 1347         subs    r2, r2, #0x08
 1348         strd    r4, [r3], #0x08
 1349 
 1350         /* Less than 8 bytes remaining */
 1351         ldmfd   sp!, {r4-r9}
 1352         RETeq                   /* Return now if done */
 1353         subs    r2, r2, #0x04
 1354         ldrge   ip, [r1], #0x04
 1355         strge   ip, [r3], #0x04
 1356         RETeq                   /* Return now if done */
 1357         addlt   r2, r2, #0x04
 1358         ldrb    ip, [r1], #0x01
 1359         cmp     r2, #0x02
 1360         ldrgeb  r2, [r1], #0x01
 1361         strb    ip, [r3], #0x01
 1362         ldrgtb  ip, [r1]
 1363         strgeb  r2, [r3], #0x01
 1364         strgtb  ip, [r3]
 1365         RET
 1366 
 1367 
 1368 /*
 1369  * At this point, it has not been possible to word align both buffers.
 1370  * The destination buffer is word aligned, but the source buffer is not.
 1371  */
 1372 .Lmemcpy_bad_align:
 1373         stmfd   sp!, {r4-r7}
 1374         bic     r1, r1, #0x03
 1375         cmp     ip, #2
 1376         ldr     ip, [r1], #0x04
 1377         bgt     .Lmemcpy_bad3
 1378         beq     .Lmemcpy_bad2
 1379         b       .Lmemcpy_bad1
 1380 
 1381 .Lmemcpy_bad1_loop16:
 1382 #ifdef __ARMEB__
 1383         mov     r4, ip, lsl #8
 1384 #else
 1385         mov     r4, ip, lsr #8
 1386 #endif
 1387         ldr     r5, [r1], #0x04
 1388         pld     [r1, #0x018]
 1389         ldr     r6, [r1], #0x04
 1390         ldr     r7, [r1], #0x04
 1391         ldr     ip, [r1], #0x04
 1392 #ifdef __ARMEB__
 1393         orr     r4, r4, r5, lsr #24
 1394         mov     r5, r5, lsl #8
 1395         orr     r5, r5, r6, lsr #24
 1396         mov     r6, r6, lsl #8
 1397         orr     r6, r6, r7, lsr #24
 1398         mov     r7, r7, lsl #8
 1399         orr     r7, r7, ip, lsr #24
 1400 #else
 1401         orr     r4, r4, r5, lsl #24
 1402         mov     r5, r5, lsr #8
 1403         orr     r5, r5, r6, lsl #24
 1404         mov     r6, r6, lsr #8
 1405         orr     r6, r6, r7, lsl #24
 1406         mov     r7, r7, lsr #8
 1407         orr     r7, r7, ip, lsl #24
 1408 #endif
 1409         str     r4, [r3], #0x04
 1410         str     r5, [r3], #0x04
 1411         str     r6, [r3], #0x04
 1412         str     r7, [r3], #0x04
 1413 .Lmemcpy_bad1:
 1414         subs    r2, r2, #0x10         
 1415         bge     .Lmemcpy_bad1_loop16
 1416 
 1417         adds    r2, r2, #0x10         
 1418         ldmeqfd sp!, {r4-r7}
 1419         RETeq                   /* Return now if done */
 1420         subs    r2, r2, #0x04
 1421         sublt   r1, r1, #0x03
 1422         blt     .Lmemcpy_bad_done
 1423 
 1424 .Lmemcpy_bad1_loop4:
 1425 #ifdef __ARMEB__
 1426         mov     r4, ip, lsl #8
 1427 #else
 1428         mov     r4, ip, lsr #8
 1429 #endif
 1430         ldr     ip, [r1], #0x04
 1431         subs    r2, r2, #0x04
 1432 #ifdef __ARMEB__
 1433         orr     r4, r4, ip, lsr #24
 1434 #else
 1435         orr     r4, r4, ip, lsl #24
 1436 #endif
 1437         str     r4, [r3], #0x04
 1438         bge     .Lmemcpy_bad1_loop4
 1439         sub     r1, r1, #0x03
 1440         b       .Lmemcpy_bad_done
 1441 
 1442 .Lmemcpy_bad2_loop16:
 1443 #ifdef __ARMEB__
 1444         mov     r4, ip, lsl #16
 1445 #else
 1446         mov     r4, ip, lsr #16
 1447 #endif
 1448         ldr     r5, [r1], #0x04
 1449         pld     [r1, #0x018]
 1450         ldr     r6, [r1], #0x04
 1451         ldr     r7, [r1], #0x04
 1452         ldr     ip, [r1], #0x04
 1453 #ifdef __ARMEB__
 1454         orr     r4, r4, r5, lsr #16
 1455         mov     r5, r5, lsl #16
 1456         orr     r5, r5, r6, lsr #16
 1457         mov     r6, r6, lsl #16
 1458         orr     r6, r6, r7, lsr #16
 1459         mov     r7, r7, lsl #16
 1460         orr     r7, r7, ip, lsr #16
 1461 #else
 1462         orr     r4, r4, r5, lsl #16
 1463         mov     r5, r5, lsr #16
 1464         orr     r5, r5, r6, lsl #16
 1465         mov     r6, r6, lsr #16
 1466         orr     r6, r6, r7, lsl #16
 1467         mov     r7, r7, lsr #16
 1468         orr     r7, r7, ip, lsl #16
 1469 #endif
 1470         str     r4, [r3], #0x04
 1471         str     r5, [r3], #0x04
 1472         str     r6, [r3], #0x04
 1473         str     r7, [r3], #0x04
 1474 .Lmemcpy_bad2:
 1475         subs    r2, r2, #0x10         
 1476         bge     .Lmemcpy_bad2_loop16
 1477 
 1478         adds    r2, r2, #0x10         
 1479         ldmeqfd sp!, {r4-r7}
 1480         RETeq                   /* Return now if done */
 1481         subs    r2, r2, #0x04
 1482         sublt   r1, r1, #0x02
 1483         blt     .Lmemcpy_bad_done
 1484 
 1485 .Lmemcpy_bad2_loop4:
 1486 #ifdef __ARMEB__
 1487         mov     r4, ip, lsl #16
 1488 #else
 1489         mov     r4, ip, lsr #16
 1490 #endif
 1491         ldr     ip, [r1], #0x04
 1492         subs    r2, r2, #0x04
 1493 #ifdef __ARMEB__
 1494         orr     r4, r4, ip, lsr #16
 1495 #else
 1496         orr     r4, r4, ip, lsl #16
 1497 #endif
 1498         str     r4, [r3], #0x04
 1499         bge     .Lmemcpy_bad2_loop4
 1500         sub     r1, r1, #0x02
 1501         b       .Lmemcpy_bad_done
 1502 
 1503 .Lmemcpy_bad3_loop16:
 1504 #ifdef __ARMEB__
 1505         mov     r4, ip, lsl #24
 1506 #else
 1507         mov     r4, ip, lsr #24
 1508 #endif
 1509         ldr     r5, [r1], #0x04
 1510         pld     [r1, #0x018]
 1511         ldr     r6, [r1], #0x04
 1512         ldr     r7, [r1], #0x04
 1513         ldr     ip, [r1], #0x04
 1514 #ifdef __ARMEB__
 1515         orr     r4, r4, r5, lsr #8
 1516         mov     r5, r5, lsl #24
 1517         orr     r5, r5, r6, lsr #8
 1518         mov     r6, r6, lsl #24
 1519         orr     r6, r6, r7, lsr #8
 1520         mov     r7, r7, lsl #24
 1521         orr     r7, r7, ip, lsr #8
 1522 #else
 1523         orr     r4, r4, r5, lsl #8
 1524         mov     r5, r5, lsr #24
 1525         orr     r5, r5, r6, lsl #8
 1526         mov     r6, r6, lsr #24
 1527         orr     r6, r6, r7, lsl #8
 1528         mov     r7, r7, lsr #24
 1529         orr     r7, r7, ip, lsl #8
 1530 #endif
 1531         str     r4, [r3], #0x04
 1532         str     r5, [r3], #0x04
 1533         str     r6, [r3], #0x04
 1534         str     r7, [r3], #0x04
 1535 .Lmemcpy_bad3:
 1536         subs    r2, r2, #0x10         
 1537         bge     .Lmemcpy_bad3_loop16
 1538 
 1539         adds    r2, r2, #0x10         
 1540         ldmeqfd sp!, {r4-r7}
 1541         RETeq                   /* Return now if done */
 1542         subs    r2, r2, #0x04
 1543         sublt   r1, r1, #0x01
 1544         blt     .Lmemcpy_bad_done
 1545 
 1546 .Lmemcpy_bad3_loop4:
 1547 #ifdef __ARMEB__
 1548         mov     r4, ip, lsl #24
 1549 #else
 1550         mov     r4, ip, lsr #24
 1551 #endif
 1552         ldr     ip, [r1], #0x04
 1553         subs    r2, r2, #0x04
 1554 #ifdef __ARMEB__
 1555         orr     r4, r4, ip, lsr #8
 1556 #else
 1557         orr     r4, r4, ip, lsl #8
 1558 #endif
 1559         str     r4, [r3], #0x04
 1560         bge     .Lmemcpy_bad3_loop4
 1561         sub     r1, r1, #0x01
 1562 
 1563 .Lmemcpy_bad_done:
 1564         ldmfd   sp!, {r4-r7}
 1565         adds    r2, r2, #0x04
 1566         RETeq
 1567         ldrb    ip, [r1], #0x01
 1568         cmp     r2, #0x02
 1569         ldrgeb  r2, [r1], #0x01
 1570         strb    ip, [r3], #0x01
 1571         ldrgtb  ip, [r1]
 1572         strgeb  r2, [r3], #0x01
 1573         strgtb  ip, [r3]
 1574         RET
 1575 
 1576 
 1577 /*
 1578  * Handle short copies (less than 16 bytes), possibly misaligned.
 1579  * Some of these are *very* common, thanks to the network stack,
 1580  * and so are handled specially.
 1581  */
 1582 .Lmemcpy_short:
 1583         add     pc, pc, r2, lsl #2
 1584         nop
 1585         RET                     /* 0x00 */
 1586         b       .Lmemcpy_bytewise       /* 0x01 */
 1587         b       .Lmemcpy_bytewise       /* 0x02 */
 1588         b       .Lmemcpy_bytewise       /* 0x03 */
 1589         b       .Lmemcpy_4              /* 0x04 */
 1590         b       .Lmemcpy_bytewise       /* 0x05 */
 1591         b       .Lmemcpy_6              /* 0x06 */
 1592         b       .Lmemcpy_bytewise       /* 0x07 */
 1593         b       .Lmemcpy_8              /* 0x08 */
 1594         b       .Lmemcpy_bytewise       /* 0x09 */
 1595         b       .Lmemcpy_bytewise       /* 0x0a */
 1596         b       .Lmemcpy_bytewise       /* 0x0b */
 1597         b       .Lmemcpy_c              /* 0x0c */
 1598 .Lmemcpy_bytewise:
 1599         mov     r3, r0                  /* We must not clobber r0 */
 1600         ldrb    ip, [r1], #0x01
 1601 1:      subs    r2, r2, #0x01
 1602         strb    ip, [r3], #0x01
 1603         ldrneb  ip, [r1], #0x01
 1604         bne     1b
 1605         RET
 1606 
 1607 /******************************************************************************
 1608  * Special case for 4 byte copies
 1609  */
 1610 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
 1611 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
 1612         LMEMCPY_4_PAD
 1613 .Lmemcpy_4:
 1614         and     r2, r1, #0x03
 1615         orr     r2, r2, r0, lsl #2
 1616         ands    r2, r2, #0x0f
 1617         sub     r3, pc, #0x14
 1618         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
 1619 
 1620 /*
 1621  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1622  */
 1623         ldr     r2, [r1]
 1624         str     r2, [r0]
 1625         RET
 1626         LMEMCPY_4_PAD
 1627 
 1628 /*
 1629  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1630  */
 1631         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1632         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
 1633 #ifdef __ARMEB__
 1634         mov     r3, r3, lsl #8          /* r3 = 012. */
 1635         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 1636 #else
 1637         mov     r3, r3, lsr #8          /* r3 = .210 */
 1638         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1639 #endif
 1640         str     r3, [r0]
 1641         RET
 1642         LMEMCPY_4_PAD
 1643 
 1644 /*
 1645  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1646  */
 1647 #ifdef __ARMEB__
 1648         ldrh    r3, [r1]
 1649         ldrh    r2, [r1, #0x02]
 1650 #else
 1651         ldrh    r3, [r1, #0x02]
 1652         ldrh    r2, [r1]
 1653 #endif
 1654         orr     r3, r2, r3, lsl #16
 1655         str     r3, [r0]
 1656         RET
 1657         LMEMCPY_4_PAD
 1658 
 1659 /*
 1660  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1661  */
 1662         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
 1663         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
 1664 #ifdef __ARMEB__
 1665         mov     r3, r3, lsl #24         /* r3 = 0... */
 1666         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 1667 #else
 1668         mov     r3, r3, lsr #24         /* r3 = ...0 */
 1669         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1670 #endif
 1671         str     r3, [r0]
 1672         RET
 1673         LMEMCPY_4_PAD
 1674 
 1675 /*
 1676  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1677  */
 1678         ldr     r2, [r1]
 1679 #ifdef __ARMEB__
 1680         strb    r2, [r0, #0x03]
 1681         mov     r3, r2, lsr #8
 1682         mov     r1, r2, lsr #24
 1683         strb    r1, [r0]
 1684 #else
 1685         strb    r2, [r0]
 1686         mov     r3, r2, lsr #8
 1687         mov     r1, r2, lsr #24
 1688         strb    r1, [r0, #0x03]
 1689 #endif
 1690         strh    r3, [r0, #0x01]
 1691         RET
 1692         LMEMCPY_4_PAD
 1693 
 1694 /*
 1695  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1696  */
 1697         ldrb    r2, [r1]
 1698         ldrh    r3, [r1, #0x01]
 1699         ldrb    r1, [r1, #0x03]
 1700         strb    r2, [r0]
 1701         strh    r3, [r0, #0x01]
 1702         strb    r1, [r0, #0x03]
 1703         RET
 1704         LMEMCPY_4_PAD
 1705 
 1706 /*
 1707  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1708  */
 1709         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1710         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
 1711 #ifdef __ARMEB__
 1712         mov     r1, r2, lsr #8          /* r1 = ...0 */
 1713         strb    r1, [r0]
 1714         mov     r2, r2, lsl #8          /* r2 = .01. */
 1715         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
 1716 #else
 1717         strb    r2, [r0]
 1718         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1719         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1720         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1721 #endif
 1722         strh    r2, [r0, #0x01]
 1723         strb    r3, [r0, #0x03]
 1724         RET
 1725         LMEMCPY_4_PAD
 1726 
 1727 /*
 1728  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1729  */
 1730         ldrb    r2, [r1]
 1731         ldrh    r3, [r1, #0x01]
 1732         ldrb    r1, [r1, #0x03]
 1733         strb    r2, [r0]
 1734         strh    r3, [r0, #0x01]
 1735         strb    r1, [r0, #0x03]
 1736         RET
 1737         LMEMCPY_4_PAD
 1738 
 1739 /*
 1740  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1741  */
 1742         ldr     r2, [r1]
 1743 #ifdef __ARMEB__
 1744         strh    r2, [r0, #0x02]
 1745         mov     r3, r2, lsr #16
 1746         strh    r3, [r0]
 1747 #else
 1748         strh    r2, [r0]
 1749         mov     r3, r2, lsr #16
 1750         strh    r3, [r0, #0x02]
 1751 #endif
 1752         RET
 1753         LMEMCPY_4_PAD
 1754 
 1755 /*
 1756  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1757  */
 1758         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1759         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
 1760         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1761         strh    r1, [r0]
 1762 #ifdef __ARMEB__
 1763         mov     r2, r2, lsl #8          /* r2 = 012. */
 1764         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1765 #else
 1766         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1767         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
 1768 #endif
 1769         strh    r2, [r0, #0x02]
 1770         RET
 1771         LMEMCPY_4_PAD
 1772 
 1773 /*
 1774  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1775  */
 1776         ldrh    r2, [r1]
 1777         ldrh    r3, [r1, #0x02]
 1778         strh    r2, [r0]
 1779         strh    r3, [r0, #0x02]
 1780         RET
 1781         LMEMCPY_4_PAD
 1782 
 1783 /*
 1784  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1785  */
 1786         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
 1787         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1788         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
 1789         strh    r1, [r0, #0x02]
 1790 #ifdef __ARMEB__
 1791         mov     r3, r3, lsr #24         /* r3 = ...1 */
 1792         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
 1793 #else
 1794         mov     r3, r3, lsl #8          /* r3 = 321. */
 1795         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
 1796 #endif
 1797         strh    r3, [r0]
 1798         RET
 1799         LMEMCPY_4_PAD
 1800 
 1801 /*
 1802  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1803  */
 1804         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1805 #ifdef __ARMEB__
 1806         strb    r2, [r0, #0x03]
 1807         mov     r3, r2, lsr #8
 1808         mov     r1, r2, lsr #24
 1809         strh    r3, [r0, #0x01]
 1810         strb    r1, [r0]
 1811 #else
 1812         strb    r2, [r0]
 1813         mov     r3, r2, lsr #8
 1814         mov     r1, r2, lsr #24
 1815         strh    r3, [r0, #0x01]
 1816         strb    r1, [r0, #0x03]
 1817 #endif
 1818         RET
 1819         LMEMCPY_4_PAD
 1820 
 1821 /*
 1822  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1823  */
 1824         ldrb    r2, [r1]
 1825         ldrh    r3, [r1, #0x01]
 1826         ldrb    r1, [r1, #0x03]
 1827         strb    r2, [r0]
 1828         strh    r3, [r0, #0x01]
 1829         strb    r1, [r0, #0x03]
 1830         RET
 1831         LMEMCPY_4_PAD
 1832 
 1833 /*
 1834  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1835  */
 1836 #ifdef __ARMEB__
 1837         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1838         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1839         strb    r3, [r0, #0x03]
 1840         mov     r3, r3, lsr #8          /* r3 = ...2 */
 1841         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
 1842         strh    r3, [r0, #0x01]
 1843         mov     r2, r2, lsr #8          /* r2 = ...0 */
 1844         strb    r2, [r0]
 1845 #else
 1846         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1847         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1848         strb    r2, [r0]
 1849         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1850         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1851         strh    r2, [r0, #0x01]
 1852         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1853         strb    r3, [r0, #0x03]
 1854 #endif
 1855         RET
 1856         LMEMCPY_4_PAD
 1857 
 1858 /*
 1859  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1860  */
 1861         ldrb    r2, [r1]
 1862         ldrh    r3, [r1, #0x01]
 1863         ldrb    r1, [r1, #0x03]
 1864         strb    r2, [r0]
 1865         strh    r3, [r0, #0x01]
 1866         strb    r1, [r0, #0x03]
 1867         RET
 1868         LMEMCPY_4_PAD
 1869 
 1870 
 1871 /******************************************************************************
 1872  * Special case for 6 byte copies
 1873  */
 1874 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
 1875 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
 1876         LMEMCPY_6_PAD
 1877 .Lmemcpy_6:
 1878         and     r2, r1, #0x03
 1879         orr     r2, r2, r0, lsl #2
 1880         ands    r2, r2, #0x0f
 1881         sub     r3, pc, #0x14
 1882         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
 1883 
 1884 /*
 1885  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1886  */
 1887         ldr     r2, [r1]
 1888         ldrh    r3, [r1, #0x04]
 1889         str     r2, [r0]
 1890         strh    r3, [r0, #0x04]
 1891         RET
 1892         LMEMCPY_6_PAD
 1893 
 1894 /*
 1895  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1896  */
 1897         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1898         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
 1899 #ifdef __ARMEB__
 1900         mov     r2, r2, lsl #8          /* r2 = 012. */
 1901         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
 1902 #else
 1903         mov     r2, r2, lsr #8          /* r2 = .210 */
 1904         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
 1905 #endif
 1906         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
 1907         str     r2, [r0]
 1908         strh    r3, [r0, #0x04]
 1909         RET
 1910         LMEMCPY_6_PAD
 1911 
 1912 /*
 1913  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1914  */
 1915         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1916         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1917 #ifdef __ARMEB__
 1918         mov     r1, r3, lsr #16         /* r1 = ..23 */
 1919         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
 1920         str     r1, [r0]
 1921         strh    r3, [r0, #0x04]
 1922 #else
 1923         mov     r1, r3, lsr #16         /* r1 = ..54 */
 1924         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1925         str     r2, [r0]
 1926         strh    r1, [r0, #0x04]
 1927 #endif
 1928         RET
 1929         LMEMCPY_6_PAD
 1930 
 1931 /*
 1932  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1933  */
 1934         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1935         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
 1936         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
 1937 #ifdef __ARMEB__
 1938         mov     r2, r2, lsl #24         /* r2 = 0... */
 1939         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 1940         mov     r3, r3, lsl #8          /* r3 = 234. */
 1941         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
 1942 #else
 1943         mov     r2, r2, lsr #24         /* r2 = ...0 */
 1944         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1945         mov     r1, r1, lsl #8          /* r1 = xx5. */
 1946         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
 1947 #endif
 1948         str     r2, [r0]
 1949         strh    r1, [r0, #0x04]
 1950         RET
 1951         LMEMCPY_6_PAD
 1952 
 1953 /*
 1954  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1955  */
 1956         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1957         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
 1958         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1959         strh    r1, [r0, #0x01]
 1960 #ifdef __ARMEB__
 1961         mov     r1, r3, lsr #24         /* r1 = ...0 */
 1962         strb    r1, [r0]
 1963         mov     r3, r3, lsl #8          /* r3 = 123. */
 1964         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
 1965 #else
 1966         strb    r3, [r0]
 1967         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1968         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
 1969         mov     r2, r2, lsr #8          /* r2 = ...5 */
 1970 #endif
 1971         strh    r3, [r0, #0x03]
 1972         strb    r2, [r0, #0x05]
 1973         RET
 1974         LMEMCPY_6_PAD
 1975 
 1976 /*
 1977  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1978  */
 1979         ldrb    r2, [r1]
 1980         ldrh    r3, [r1, #0x01]
 1981         ldrh    ip, [r1, #0x03]
 1982         ldrb    r1, [r1, #0x05]
 1983         strb    r2, [r0]
 1984         strh    r3, [r0, #0x01]
 1985         strh    ip, [r0, #0x03]
 1986         strb    r1, [r0, #0x05]
 1987         RET
 1988         LMEMCPY_6_PAD
 1989 
 1990 /*
 1991  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1992  */
 1993         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1994         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1995 #ifdef __ARMEB__
 1996         mov     r3, r2, lsr #8          /* r3 = ...0 */
 1997         strb    r3, [r0]
 1998         strb    r1, [r0, #0x05]
 1999         mov     r3, r1, lsr #8          /* r3 = .234 */
 2000         strh    r3, [r0, #0x03]
 2001         mov     r3, r2, lsl #8          /* r3 = .01. */
 2002         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
 2003         strh    r3, [r0, #0x01]
 2004 #else
 2005         strb    r2, [r0]
 2006         mov     r3, r1, lsr #24
 2007         strb    r3, [r0, #0x05]
 2008         mov     r3, r1, lsr #8          /* r3 = .543 */
 2009         strh    r3, [r0, #0x03]
 2010         mov     r3, r2, lsr #8          /* r3 = ...1 */
 2011         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
 2012         strh    r3, [r0, #0x01]
 2013 #endif
 2014         RET
 2015         LMEMCPY_6_PAD
 2016 
 2017 /*
 2018  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2019  */
 2020         ldrb    r2, [r1]
 2021         ldrh    r3, [r1, #0x01]
 2022         ldrh    ip, [r1, #0x03]
 2023         ldrb    r1, [r1, #0x05]
 2024         strb    r2, [r0]
 2025         strh    r3, [r0, #0x01]
 2026         strh    ip, [r0, #0x03]
 2027         strb    r1, [r0, #0x05]
 2028         RET
 2029         LMEMCPY_6_PAD
 2030 
 2031 /*
 2032  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2033  */
 2034 #ifdef __ARMEB__
 2035         ldr     r2, [r1]                /* r2 = 0123 */
 2036         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
 2037         mov     r1, r2, lsr #16         /* r1 = ..01 */
 2038         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
 2039         strh    r1, [r0]
 2040         str     r3, [r0, #0x02]
 2041 #else
 2042         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
 2043         ldr     r3, [r1]                /* r3 = 3210 */
 2044         mov     r2, r2, lsl #16         /* r2 = 54.. */
 2045         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
 2046         strh    r3, [r0]
 2047         str     r2, [r0, #0x02]
 2048 #endif
 2049         RET
 2050         LMEMCPY_6_PAD
 2051 
 2052 /*
 2053  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2054  */
 2055         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2056         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
 2057         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2058 #ifdef __ARMEB__
 2059         mov     r2, r2, lsr #8          /* r2 = .345 */
 2060         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
 2061 #else
 2062         mov     r2, r2, lsl #8          /* r2 = 543. */
 2063         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
 2064 #endif
 2065         strh    r1, [r0]
 2066         str     r2, [r0, #0x02]
 2067         RET
 2068         LMEMCPY_6_PAD
 2069 
 2070 /*
 2071  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2072  */
 2073         ldrh    r2, [r1]
 2074         ldr     r3, [r1, #0x02]
 2075         strh    r2, [r0]
 2076         str     r3, [r0, #0x02]
 2077         RET
 2078         LMEMCPY_6_PAD
 2079 
 2080 /*
 2081  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2082  */
 2083         ldrb    r3, [r1]                /* r3 = ...0 */
 2084         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2085         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
 2086 #ifdef __ARMEB__
 2087         mov     r3, r3, lsl #8          /* r3 = ..0. */
 2088         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
 2089         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
 2090 #else
 2091         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2092         mov     r1, r1, lsl #24         /* r1 = 5... */
 2093         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
 2094 #endif
 2095         strh    r3, [r0]
 2096         str     r1, [r0, #0x02]
 2097         RET
 2098         LMEMCPY_6_PAD
 2099 
 2100 /*
 2101  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2102  */
 2103         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2104         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
 2105 #ifdef __ARMEB__
 2106         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2107         strb    r3, [r0]
 2108         mov     r2, r2, lsl #8          /* r2 = 123. */
 2109         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2110 #else
 2111         strb    r2, [r0]
 2112         mov     r2, r2, lsr #8          /* r2 = .321 */
 2113         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
 2114         mov     r1, r1, lsr #8          /* r1 = ...5 */
 2115 #endif
 2116         str     r2, [r0, #0x01]
 2117         strb    r1, [r0, #0x05]
 2118         RET
 2119         LMEMCPY_6_PAD
 2120 
 2121 /*
 2122  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2123  */
 2124         ldrb    r2, [r1]
 2125         ldrh    r3, [r1, #0x01]
 2126         ldrh    ip, [r1, #0x03]
 2127         ldrb    r1, [r1, #0x05]
 2128         strb    r2, [r0]
 2129         strh    r3, [r0, #0x01]
 2130         strh    ip, [r0, #0x03]
 2131         strb    r1, [r0, #0x05]
 2132         RET
 2133         LMEMCPY_6_PAD
 2134 
 2135 /*
 2136  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2137  */
 2138         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2139         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 2140 #ifdef __ARMEB__
 2141         mov     r3, r2, lsr #8          /* r3 = ...0 */
 2142         strb    r3, [r0]
 2143         mov     r2, r2, lsl #24         /* r2 = 1... */
 2144         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
 2145 #else
 2146         strb    r2, [r0]
 2147         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2148         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
 2149         mov     r1, r1, lsr #24         /* r1 = ...5 */
 2150 #endif
 2151         str     r2, [r0, #0x01]
 2152         strb    r1, [r0, #0x05]
 2153         RET
 2154         LMEMCPY_6_PAD
 2155 
 2156 /*
 2157  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2158  */
 2159         ldrb    r2, [r1]
 2160         ldr     r3, [r1, #0x01]
 2161         ldrb    r1, [r1, #0x05]
 2162         strb    r2, [r0]
 2163         str     r3, [r0, #0x01]
 2164         strb    r1, [r0, #0x05]
 2165         RET
 2166         LMEMCPY_6_PAD
 2167 
 2168 
 2169 /******************************************************************************
 2170  * Special case for 8 byte copies
 2171  */
 2172 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
 2173 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
 2174         LMEMCPY_8_PAD
 2175 .Lmemcpy_8:
 2176         and     r2, r1, #0x03
 2177         orr     r2, r2, r0, lsl #2
 2178         ands    r2, r2, #0x0f
 2179         sub     r3, pc, #0x14
 2180         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
 2181 
 2182 /*
 2183  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2184  */
 2185         ldr     r2, [r1]
 2186         ldr     r3, [r1, #0x04]
 2187         str     r2, [r0]
 2188         str     r3, [r0, #0x04]
 2189         RET
 2190         LMEMCPY_8_PAD
 2191 
 2192 /*
 2193  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2194  */
 2195         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 2196         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
 2197         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2198 #ifdef __ARMEB__
 2199         mov     r3, r3, lsl #8          /* r3 = 012. */
 2200         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
 2201         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
 2202 #else
 2203         mov     r3, r3, lsr #8          /* r3 = .210 */
 2204         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 2205         mov     r1, r1, lsl #24         /* r1 = 7... */
 2206         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
 2207 #endif
 2208         str     r3, [r0]
 2209         str     r2, [r0, #0x04]
 2210         RET
 2211         LMEMCPY_8_PAD
 2212 
 2213 /*
 2214  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2215  */
 2216         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2217         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2218         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2219 #ifdef __ARMEB__
 2220         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2221         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2222         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
 2223 #else
 2224         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2225         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2226         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
 2227 #endif
 2228         str     r2, [r0]
 2229         str     r3, [r0, #0x04]
 2230         RET
 2231         LMEMCPY_8_PAD
 2232 
 2233 /*
 2234  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2235  */
 2236         ldrb    r3, [r1]                /* r3 = ...0 */
 2237         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2238         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
 2239 #ifdef __ARMEB__
 2240         mov     r3, r3, lsl #24         /* r3 = 0... */
 2241         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
 2242         mov     r2, r2, lsl #24         /* r2 = 4... */
 2243         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
 2244 #else
 2245         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 2246         mov     r2, r2, lsr #24         /* r2 = ...4 */
 2247         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
 2248 #endif
 2249         str     r3, [r0]
 2250         str     r2, [r0, #0x04]
 2251         RET
 2252         LMEMCPY_8_PAD
 2253 
 2254 /*
 2255  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 2256  */
 2257         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 2258         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
 2259 #ifdef __ARMEB__
 2260         mov     r1, r3, lsr #24         /* r1 = ...0 */
 2261         strb    r1, [r0]
 2262         mov     r1, r3, lsr #8          /* r1 = .012 */
 2263         strb    r2, [r0, #0x07]
 2264         mov     r3, r3, lsl #24         /* r3 = 3... */
 2265         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
 2266 #else
 2267         strb    r3, [r0]
 2268         mov     r1, r2, lsr #24         /* r1 = ...7 */
 2269         strb    r1, [r0, #0x07]
 2270         mov     r1, r3, lsr #8          /* r1 = .321 */
 2271         mov     r3, r3, lsr #24         /* r3 = ...3 */
 2272         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
 2273 #endif
 2274         strh    r1, [r0, #0x01]
 2275         str     r3, [r0, #0x03]
 2276         RET
 2277         LMEMCPY_8_PAD
 2278 
 2279 /*
 2280  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 2281  */
 2282         ldrb    r2, [r1]
 2283         ldrh    r3, [r1, #0x01]
 2284         ldr     ip, [r1, #0x03]
 2285         ldrb    r1, [r1, #0x07]
 2286         strb    r2, [r0]
 2287         strh    r3, [r0, #0x01]
 2288         str     ip, [r0, #0x03]
 2289         strb    r1, [r0, #0x07]
 2290         RET
 2291         LMEMCPY_8_PAD
 2292 
 2293 /*
 2294  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 2295  */
 2296         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2297         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2298         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2299 #ifdef __ARMEB__
 2300         mov     ip, r2, lsr #8          /* ip = ...0 */
 2301         strb    ip, [r0]
 2302         mov     ip, r2, lsl #8          /* ip = .01. */
 2303         orr     ip, ip, r3, lsr #24     /* ip = .012 */
 2304         strb    r1, [r0, #0x07]
 2305         mov     r3, r3, lsl #8          /* r3 = 345. */
 2306         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
 2307 #else
 2308         strb    r2, [r0]                /* 0 */
 2309         mov     ip, r1, lsr #8          /* ip = ...7 */
 2310         strb    ip, [r0, #0x07]         /* 7 */
 2311         mov     ip, r2, lsr #8          /* ip = ...1 */
 2312         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2313         mov     r3, r3, lsr #8          /* r3 = .543 */
 2314         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
 2315 #endif
 2316         strh    ip, [r0, #0x01]
 2317         str     r3, [r0, #0x03]
 2318         RET
 2319         LMEMCPY_8_PAD
 2320 
 2321 /*
 2322  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 2323  */
 2324         ldrb    r3, [r1]                /* r3 = ...0 */
 2325         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2326         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
 2327         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2328         strb    r3, [r0]
 2329         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
 2330 #ifdef __ARMEB__
 2331         strh    r3, [r0, #0x01]
 2332         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
 2333 #else
 2334         strh    ip, [r0, #0x01]
 2335         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
 2336 #endif
 2337         str     r2, [r0, #0x03]
 2338         strb    r1, [r0, #0x07]
 2339         RET
 2340         LMEMCPY_8_PAD
 2341 
 2342 /*
 2343  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2344  */
 2345         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2346         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2347         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2348 #ifdef __ARMEB__
 2349         strh    r1, [r0]
 2350         mov     r1, r3, lsr #16         /* r1 = ..45 */
 2351         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
 2352 #else
 2353         strh    r2, [r0]
 2354         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
 2355         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2356 #endif
 2357         str     r2, [r0, #0x02]
 2358         strh    r3, [r0, #0x06]
 2359         RET
 2360         LMEMCPY_8_PAD
 2361 
 2362 /*
 2363  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 2364  */
 2365         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2366         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2367         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
 2368         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 2369         strh    r1, [r0]
 2370 #ifdef __ARMEB__
 2371         mov     r1, r2, lsl #24         /* r1 = 2... */
 2372         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
 2373         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
 2374 #else
 2375         mov     r1, r2, lsr #24         /* r1 = ...2 */
 2376         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
 2377         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2378         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
 2379 #endif
 2380         str     r1, [r0, #0x02]
 2381         strh    r3, [r0, #0x06]
 2382         RET
 2383         LMEMCPY_8_PAD
 2384 
 2385 /*
 2386  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2387  */
 2388         ldrh    r2, [r1]
 2389         ldr     ip, [r1, #0x02]
 2390         ldrh    r3, [r1, #0x06]
 2391         strh    r2, [r0]
 2392         str     ip, [r0, #0x02]
 2393         strh    r3, [r0, #0x06]
 2394         RET
 2395         LMEMCPY_8_PAD
 2396 
 2397 /*
 2398  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 2399  */
 2400         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
 2401         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 2402         ldrb    ip, [r1]                /* ip = ...0 */
 2403         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
 2404         strh    r1, [r0, #0x06]
 2405 #ifdef __ARMEB__
 2406         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2407         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
 2408         mov     r2, r2, lsr #24         /* r2 = ...1 */
 2409         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
 2410 #else
 2411         mov     r3, r3, lsl #24         /* r3 = 5... */
 2412         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
 2413         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
 2414 #endif
 2415         str     r3, [r0, #0x02]
 2416         strh    r2, [r0]
 2417         RET
 2418         LMEMCPY_8_PAD
 2419 
 2420 /*
 2421  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 2422  */
 2423         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2424         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2425         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
 2426         strh    r1, [r0, #0x05]
 2427 #ifdef __ARMEB__
 2428         strb    r3, [r0, #0x07]
 2429         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2430         strb    r1, [r0]
 2431         mov     r2, r2, lsl #8          /* r2 = 123. */
 2432         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
 2433         str     r2, [r0, #0x01]
 2434 #else
 2435         strb    r2, [r0]
 2436         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2437         strb    r1, [r0, #0x07]
 2438         mov     r2, r2, lsr #8          /* r2 = .321 */
 2439         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
 2440         str     r2, [r0, #0x01]
 2441 #endif
 2442         RET
 2443         LMEMCPY_8_PAD
 2444 
 2445 /*
 2446  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 2447  */
 2448         ldrb    r3, [r1]                /* r3 = ...0 */
 2449         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
 2450         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2451         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 2452         strb    r3, [r0]
 2453         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
 2454 #ifdef __ARMEB__
 2455         strh    ip, [r0, #0x05]
 2456         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
 2457 #else
 2458         strh    r3, [r0, #0x05]
 2459         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
 2460 #endif
 2461         str     r2, [r0, #0x01]
 2462         strb    r1, [r0, #0x07]
 2463         RET
 2464         LMEMCPY_8_PAD
 2465 
 2466 /*
 2467  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 2468  */
 2469         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2470         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2471         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 2472 #ifdef __ARMEB__
 2473         mov     ip, r2, lsr #8          /* ip = ...0 */
 2474         strb    ip, [r0]
 2475         mov     ip, r2, lsl #24         /* ip = 1... */
 2476         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
 2477         strb    r1, [r0, #0x07]
 2478         mov     r1, r1, lsr #8          /* r1 = ...6 */
 2479         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
 2480 #else
 2481         strb    r2, [r0]
 2482         mov     ip, r2, lsr #8          /* ip = ...1 */
 2483         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 2484         mov     r2, r1, lsr #8          /* r2 = ...7 */
 2485         strb    r2, [r0, #0x07]
 2486         mov     r1, r1, lsl #8          /* r1 = .76. */
 2487         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
 2488 #endif
 2489         str     ip, [r0, #0x01]
 2490         strh    r1, [r0, #0x05]
 2491         RET
 2492         LMEMCPY_8_PAD
 2493 
 2494 /*
 2495  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 2496  */
 2497         ldrb    r2, [r1]
 2498         ldr     ip, [r1, #0x01]
 2499         ldrh    r3, [r1, #0x05]
 2500         ldrb    r1, [r1, #0x07]
 2501         strb    r2, [r0]
 2502         str     ip, [r0, #0x01]
 2503         strh    r3, [r0, #0x05]
 2504         strb    r1, [r0, #0x07]
 2505         RET
 2506         LMEMCPY_8_PAD
 2507 
 2508 /******************************************************************************
 2509  * Special case for 12 byte copies
 2510  */
 2511 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
 2512 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
 2513         LMEMCPY_C_PAD
 2514 .Lmemcpy_c:
 2515         and     r2, r1, #0x03
 2516         orr     r2, r2, r0, lsl #2
 2517         ands    r2, r2, #0x0f
 2518         sub     r3, pc, #0x14
 2519         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
 2520 
 2521 /*
 2522  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 2523  */
 2524         ldr     r2, [r1]
 2525         ldr     r3, [r1, #0x04]
 2526         ldr     r1, [r1, #0x08]
 2527         str     r2, [r0]
 2528         str     r3, [r0, #0x04]
 2529         str     r1, [r0, #0x08]
 2530         RET
 2531         LMEMCPY_C_PAD
 2532 
 2533 /*
 2534  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 2535  */
 2536         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
 2537         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2538         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2539         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2540 #ifdef __ARMEB__
 2541         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
 2542         str     r2, [r0, #0x08]
 2543         mov     r2, ip, lsr #24         /* r2 = ...7 */
 2544         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
 2545         mov     r1, r1, lsl #8          /* r1 = 012. */
 2546         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
 2547 #else
 2548         mov     r2, r2, lsl #24         /* r2 = B... */
 2549         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
 2550         str     r2, [r0, #0x08]
 2551         mov     r2, ip, lsl #24         /* r2 = 7... */
 2552         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
 2553         mov     r1, r1, lsr #8          /* r1 = .210 */
 2554         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
 2555 #endif
 2556         str     r2, [r0, #0x04]
 2557         str     r1, [r0]
 2558         RET
 2559         LMEMCPY_C_PAD
 2560 
 2561 /*
 2562  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 2563  */
 2564         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2565         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2566         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2567         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2568 #ifdef __ARMEB__
 2569         mov     r2, r2, lsl #16         /* r2 = 01.. */
 2570         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
 2571         str     r2, [r0]
 2572         mov     r3, r3, lsl #16         /* r3 = 45.. */
 2573         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
 2574         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
 2575 #else
 2576         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 2577         str     r2, [r0]
 2578         mov     r3, r3, lsr #16         /* r3 = ..54 */
 2579         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
 2580         mov     r1, r1, lsl #16         /* r1 = BA.. */
 2581         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
 2582 #endif
 2583         str     r3, [r0, #0x04]
 2584         str     r1, [r0, #0x08]
 2585         RET
 2586         LMEMCPY_C_PAD
 2587 
 2588 /*
 2589  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 2590  */
 2591         ldrb    r2, [r1]                /* r2 = ...0 */
 2592         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2593         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2594         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2595 #ifdef __ARMEB__
 2596         mov     r2, r2, lsl #24         /* r2 = 0... */
 2597         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
 2598         str     r2, [r0]
 2599         mov     r3, r3, lsl #24         /* r3 = 4... */
 2600         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
 2601         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2602         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
 2603 #else
 2604         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 2605         str     r2, [r0]
 2606         mov     r3, r3, lsr #24         /* r3 = ...4 */
 2607         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
 2608         mov     r1, r1, lsl #8          /* r1 = BA9. */
 2609         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
 2610 #endif
 2611         str     r3, [r0, #0x04]
 2612         str     r1, [r0, #0x08]
 2613         RET
 2614         LMEMCPY_C_PAD
 2615 
 2616 /*
 2617  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
 2618  */
 2619         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2620         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2621         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
 2622         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 2623         strh    r1, [r0, #0x01]
 2624 #ifdef __ARMEB__
 2625         mov     r1, r2, lsr #24         /* r1 = ...0 */
 2626         strb    r1, [r0]
 2627         mov     r1, r2, lsl #24         /* r1 = 3... */
 2628         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
 2629         mov     r1, r3, lsl #24         /* r1 = 7... */
 2630         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
 2631 #else
 2632         strb    r2, [r0]
 2633         mov     r1, r2, lsr #24         /* r1 = ...3 */
 2634         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
 2635         mov     r1, r3, lsr #24         /* r1 = ...7 */
 2636         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
 2637         mov     ip, ip, lsr #24         /* ip = ...B */
 2638 #endif
 2639         str     r2, [r0, #0x03]
 2640         str     r1, [r0, #0x07]
 2641         strb    ip, [r0, #0x0b]
 2642         RET
 2643         LMEMCPY_C_PAD
 2644 
 2645 /*
 2646  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
 2647  */
 2648         ldrb    r2, [r1]
 2649         ldrh    r3, [r1, #0x01]
 2650         ldr     ip, [r1, #0x03]
 2651         strb    r2, [r0]
 2652         ldr     r2, [r1, #0x07]
 2653         ldrb    r1, [r1, #0x0b]
 2654         strh    r3, [r0, #0x01]
 2655         str     ip, [r0, #0x03]
 2656         str     r2, [r0, #0x07]
 2657         strb    r1, [r0, #0x0b]
 2658         RET
 2659         LMEMCPY_C_PAD
 2660 
 2661 /*
 2662  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
 2663  */
 2664         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 2665         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 2666         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 2667         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 2668 #ifdef __ARMEB__
 2669         mov     r2, r2, ror #8          /* r2 = 1..0 */
 2670         strb    r2, [r0]
 2671         mov     r2, r2, lsr #16         /* r2 = ..1. */
 2672         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
 2673         strh    r2, [r0, #0x01]
 2674         mov     r2, r3, lsl #8          /* r2 = 345. */
 2675         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
 2676         mov     r2, ip, lsl #8          /* r2 = 789. */
 2677         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
 2678 #else
 2679         strb    r2, [r0]
 2680         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2681         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2682         strh    r2, [r0, #0x01]
 2683         mov     r2, r3, lsr #8          /* r2 = .543 */
 2684         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
 2685         mov     r2, ip, lsr #8          /* r2 = .987 */
 2686         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
 2687         mov     r1, r1, lsr #8          /* r1 = ...B */
 2688 #endif
 2689         str     r3, [r0, #0x03]
 2690         str     r2, [r0, #0x07]
 2691         strb    r1, [r0, #0x0b]
 2692         RET
 2693         LMEMCPY_C_PAD
 2694 
 2695 /*
 2696  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
 2697  */
 2698         ldrb    r2, [r1]
 2699         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 2700         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 2701         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 2702         strb    r2, [r0]
 2703 #ifdef __ARMEB__
 2704         mov     r2, r3, lsr #16         /* r2 = ..12 */
 2705         strh    r2, [r0, #0x01]
 2706         mov     r3, r3, lsl #16         /* r3 = 34.. */
 2707         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
 2708         mov     ip, ip, lsl #16         /* ip = 78.. */
 2709         orr     ip, ip, r1, lsr #16     /* ip = 789A */
 2710         mov     r1, r1, lsr #8          /* r1 = .9AB */
 2711 #else
 2712         strh    r3, [r0, #0x01]
 2713         mov     r3, r3, lsr #16         /* r3 = ..43 */
 2714         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
 2715         mov     ip, ip, lsr #16         /* ip = ..87 */
 2716         orr     ip, ip, r1, lsl #16     /* ip = A987 */
 2717         mov     r1, r1, lsr #16         /* r1 = ..xB */
 2718 #endif
 2719         str     r3, [r0, #0x03]
 2720         str     ip, [r0, #0x07]
 2721         strb    r1, [r0, #0x0b]
 2722         RET
 2723         LMEMCPY_C_PAD
 2724 
 2725 /*
 2726  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 2727  */
 2728         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
 2729         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 2730         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
 2731         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 2732 #ifdef __ARMEB__
 2733         strh    r1, [r0]
 2734         mov     r1, ip, lsl #16         /* r1 = 23.. */
 2735         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
 2736         mov     r3, r3, lsl #16         /* r3 = 67.. */
 2737         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
 2738 #else
 2739         strh    ip, [r0]
 2740         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
 2741         mov     r3, r3, lsr #16         /* r3 = ..76 */
 2742         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
 2743         mov     r2, r2, lsr #16         /* r2 = ..BA */
 2744 #endif
 2745         str     r1, [r0, #0x02]
 2746         str     r3, [r0, #0x06]
 2747         strh    r2, [r0, #0x0a]
 2748         RET
 2749         LMEMCPY_C_PAD
 2750 
 2751 /*
 2752  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
 2753  */
 2754         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 2755         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 2756         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
 2757         strh    ip, [r0]
 2758         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 2759         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
 2760 #ifdef __ARMEB__
 2761         mov     r2, r2, lsl #24         /* r2 = 2... */
 2762         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
 2763         mov     r3, r3, lsl #24         /* r3 = 6... */
 2764         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
 2765         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
 2766 #else
 2767         mov     r2, r2, lsr #24         /* r2 = ...2 */
 2768         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
 2769         mov     r3, r3, lsr #24         /* r3 = ...6 */
 2770         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
 2771         mov     r1, r1, lsl #8          /* r1 = ..B. */
 2772         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
 2773 #endif
 2774         str     r2, [r0, #0x02]
 2775         str     r3, [r0, #0x06]
 2776         strh    r1, [r0, #0x0a]
 2777         RET
 2778         LMEMCPY_C_PAD
 2779 
 2780 /*
 2781  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 2782  */
 2783         ldrh    r2, [r1]
 2784         ldr     r3, [r1, #0x02]
 2785         ldr     ip, [r1, #0x06]
 2786         ldrh    r1, [r1, #0x0a]
 2787         strh    r2, [r0]
 2788         str     r3, [r0, #0x02]
 2789         str     ip, [r0, #0x06]
 2790         strh    r1, [r0, #0x0a]
 2791         RET
 2792         LMEMCPY_C_PAD
 2793 
 2794 /*
 2795  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
 2796  */
 2797         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
 2798         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
 2799         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
 2800         strh    ip, [r0, #0x0a]
 2801         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 2802         ldrb    r1, [r1]                /* r1 = ...0 */
 2803 #ifdef __ARMEB__
 2804         mov     r2, r2, lsr #24         /* r2 = ...9 */
 2805         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
 2806         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2807         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
 2808         mov     r1, r1, lsl #8          /* r1 = ..0. */
 2809         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
 2810 #else
 2811         mov     r2, r2, lsl #24         /* r2 = 9... */
 2812         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
 2813         mov     r3, r3, lsl #24         /* r3 = 5... */
 2814         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
 2815         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
 2816 #endif
 2817         str     r2, [r0, #0x06]
 2818         str     r3, [r0, #0x02]
 2819         strh    r1, [r0]
 2820         RET
 2821         LMEMCPY_C_PAD
 2822 
 2823 /*
 2824  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
 2825  */
 2826         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 2827         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
 2828         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
 2829 #ifdef __ARMEB__
 2830         mov     r3, r2, lsr #24         /* r3 = ...0 */
 2831         strb    r3, [r0]
 2832         mov     r2, r2, lsl #8          /* r2 = 123. */
 2833         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
 2834         str     r2, [r0, #0x01]
 2835         mov     r2, ip, lsl #8          /* r2 = 567. */
 2836         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
 2837         str     r2, [r0, #0x05]
 2838         mov     r2, r1, lsr #8          /* r2 = ..9A */
 2839         strh    r2, [r0, #0x09]
 2840         strb    r1, [r0, #0x0b]
 2841 #else
 2842         strb    r2, [r0]
 2843         mov     r3, r2, lsr #8          /* r3 = .321 */
 2844         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
 2845         str     r3, [r0, #0x01]
 2846         mov     r3, ip, lsr #8          /* r3 = .765 */
 2847         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
 2848         str     r3, [r0, #0x05]
 2849         mov     r1, r1, lsr #8          /* r1 = .BA9 */
 2850         strh    r1, [r0, #0x09]
 2851         mov     r1, r1, lsr #16         /* r1 = ...B */
 2852         strb    r1, [r0, #0x0b]
 2853 #endif
 2854         RET
 2855         LMEMCPY_C_PAD
 2856 
 2857 /*
 2858  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
 2859  */
 2860         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
 2861         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
 2862         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2863         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2864         strb    r2, [r0, #0x0b]
 2865 #ifdef __ARMEB__
 2866         strh    r3, [r0, #0x09]
 2867         mov     r3, r3, lsr #16         /* r3 = ..78 */
 2868         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
 2869         mov     ip, ip, lsr #16         /* ip = ..34 */
 2870         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
 2871         mov     r1, r1, lsr #16         /* r1 = ..x0 */
 2872 #else
 2873         mov     r2, r3, lsr #16         /* r2 = ..A9 */
 2874         strh    r2, [r0, #0x09]
 2875         mov     r3, r3, lsl #16         /* r3 = 87.. */
 2876         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
 2877         mov     ip, ip, lsl #16         /* ip = 43.. */
 2878         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
 2879         mov     r1, r1, lsr #8          /* r1 = .210 */
 2880 #endif
 2881         str     r3, [r0, #0x05]
 2882         str     ip, [r0, #0x01]
 2883         strb    r1, [r0]
 2884         RET
 2885         LMEMCPY_C_PAD
 2886 
 2887 /*
 2888  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
 2889  */
 2890 #ifdef __ARMEB__
 2891         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
 2892         ldr     ip, [r1, #0x06]         /* ip = 6789 */
 2893         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
 2894         ldrh    r1, [r1]                /* r1 = ..01 */
 2895         strb    r2, [r0, #0x0b]
 2896         mov     r2, r2, lsr #8          /* r2 = ...A */
 2897         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
 2898         mov     ip, ip, lsr #8          /* ip = .678 */
 2899         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
 2900         mov     r3, r3, lsr #8          /* r3 = .234 */
 2901         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
 2902         mov     r1, r1, lsr #8          /* r1 = ...0 */
 2903         strb    r1, [r0]
 2904         str     r3, [r0, #0x01]
 2905         str     ip, [r0, #0x05]
 2906         strh    r2, [r0, #0x09]
 2907 #else
 2908         ldrh    r2, [r1]                /* r2 = ..10 */
 2909         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
 2910         ldr     ip, [r1, #0x06]         /* ip = 9876 */
 2911         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
 2912         strb    r2, [r0]
 2913         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2914         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2915         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2916         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
 2917         mov     ip, ip, lsr #24         /* ip = ...9 */
 2918         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
 2919         mov     r1, r1, lsr #8          /* r1 = ...B */
 2920         str     r2, [r0, #0x01]
 2921         str     r3, [r0, #0x05]
 2922         strh    ip, [r0, #0x09]
 2923         strb    r1, [r0, #0x0b]
 2924 #endif
 2925         RET
 2926         LMEMCPY_C_PAD
 2927 
 2928 /*
 2929  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
 2930  */
 2931         ldrb    r2, [r1]
 2932         ldr     r3, [r1, #0x01]
 2933         ldr     ip, [r1, #0x05]
 2934         strb    r2, [r0]
 2935         ldrh    r2, [r1, #0x09]
 2936         ldrb    r1, [r1, #0x0b]
 2937         str     r3, [r0, #0x01]
 2938         str     ip, [r0, #0x05]
 2939         strh    r2, [r0, #0x09]
 2940         strb    r1, [r0, #0x0b]
 2941         RET
 2942 #endif /* _ARM_ARCH_5E */
 2943 
 2944 #ifdef GPROF
 2945 
 2946 ENTRY(user)
 2947         nop
 2948 ENTRY(btrap)
 2949         nop
 2950 ENTRY(etrap)
 2951         nop
 2952 ENTRY(bintr)
 2953         nop
 2954 ENTRY(eintr)
 2955         nop
 2956 
 2957 #endif
Cache object: 2565a246e52a2cf6a2fee1f062ca883c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/support.S

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S