support.S

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 Olivier Houchard
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 /*
   27  * Copyright 2003 Wasabi Systems, Inc.
   28  * All rights reserved.
   29  *
   30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
   31  *
   32  * Redistribution and use in source and binary forms, with or without
   33  * modification, are permitted provided that the following conditions
   34  * are met:
   35  * 1. Redistributions of source code must retain the above copyright
   36  *    notice, this list of conditions and the following disclaimer.
   37  * 2. Redistributions in binary form must reproduce the above copyright
   38  *    notice, this list of conditions and the following disclaimer in the
   39  *    documentation and/or other materials provided with the distribution.
   40  * 3. All advertising materials mentioning features or use of this software
   41  *    must display the following acknowledgement:
   42  *      This product includes software developed for the NetBSD Project by
   43  *      Wasabi Systems, Inc.
   44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   45  *    or promote products derived from this software without specific prior
   46  *    written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   58  * POSSIBILITY OF SUCH DAMAGE.
   59  */
   60 /*
   61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
   62  * All rights reserved.
   63  *
   64  * This code is derived from software contributed to The NetBSD Foundation
   65  * by Neil A. Carson and Mark Brinicombe
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  *
   76  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   77  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   78  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   79  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   80  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   81  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   82  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   83  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   84  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   85  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   86  * POSSIBILITY OF SUCH DAMAGE.
   87  */
   88 
   89 #include <machine/asm.h>
   90 __FBSDID("$FreeBSD$");
   91 
   92 #include "assym.inc"
   93 
   94         .syntax unified
   95 
   96 /*
   97  * memset: Sets a block of memory to the specified value
   98  *
   99  * On entry:
  100  *   r0 - dest address
  101  *   r1 - byte to write
  102  *   r2 - number of bytes to write
  103  *
  104  * On exit:
  105  *   r0 - dest address
  106  */
  107 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
  108 ENTRY(memset)
  109         and     r3, r1, #0xff           /* We deal with bytes */
  110         mov     r1, r2
  111 do_memset:
  112         cmp     r1, #0x04               /* Do we have less than 4 bytes */
  113         mov     ip, r0
  114         blt     .Lmemset_lessthanfour
  115 
  116         /* Ok first we will word align the address */
  117         ands    r2, ip, #0x03           /* Get the bottom two bits */
  118         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
  119 
  120         /* We are now word aligned */
  121 .Lmemset_wordaligned:
  122         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
  123         tst     ip, #0x04               /* Quad-align for armv5e */
  124         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
  125         subne   r1, r1, #0x04           /* Quad-align if necessary */
  126         strne   r3, [ip], #0x04
  127         cmp     r1, #0x10
  128         blt     .Lmemset_loop4          /* If less than 16 then use words */
  129         mov     r2, r3                  /* Duplicate data */
  130         cmp     r1, #0x80               /* If < 128 then skip the big loop */
  131         blt     .Lmemset_loop32
  132 
  133         /* Do 128 bytes at a time */
  134 .Lmemset_loop128:
  135         subs    r1, r1, #0x80
  136         strdge  r2, [ip], #0x08
  137         strdge  r2, [ip], #0x08
  138         strdge  r2, [ip], #0x08
  139         strdge  r2, [ip], #0x08
  140         strdge  r2, [ip], #0x08
  141         strdge  r2, [ip], #0x08
  142         strdge  r2, [ip], #0x08
  143         strdge  r2, [ip], #0x08
  144         strdge  r2, [ip], #0x08
  145         strdge  r2, [ip], #0x08
  146         strdge  r2, [ip], #0x08
  147         strdge  r2, [ip], #0x08
  148         strdge  r2, [ip], #0x08
  149         strdge  r2, [ip], #0x08
  150         strdge  r2, [ip], #0x08
  151         strdge  r2, [ip], #0x08
  152         bgt     .Lmemset_loop128
  153         RETeq                   /* Zero length so just exit */
  154 
  155         add     r1, r1, #0x80           /* Adjust for extra sub */
  156 
  157         /* Do 32 bytes at a time */
  158 .Lmemset_loop32:
  159         subs    r1, r1, #0x20
  160         strdge  r2, [ip], #0x08
  161         strdge  r2, [ip], #0x08
  162         strdge  r2, [ip], #0x08
  163         strdge  r2, [ip], #0x08
  164         bgt     .Lmemset_loop32
  165         RETeq                   /* Zero length so just exit */
  166 
  167         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
  168 
  169         /* Deal with 16 bytes or more */
  170         strdge  r2, [ip], #0x08
  171         strdge  r2, [ip], #0x08
  172         RETeq                   /* Zero length so just exit */
  173 
  174         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
  175 
  176         /* We have at least 4 bytes so copy as words */
  177 .Lmemset_loop4:
  178         subs    r1, r1, #0x04
  179         strge   r3, [ip], #0x04
  180         bgt     .Lmemset_loop4
  181         RETeq                   /* Zero length so just exit */
  182 
  183         /* Compensate for 64-bit alignment check */
  184         adds    r1, r1, #0x04
  185         RETeq
  186         cmp     r1, #2
  187 
  188         strb    r3, [ip], #0x01         /* Set 1 byte */
  189         strbge  r3, [ip], #0x01         /* Set another byte */
  190         strbgt  r3, [ip]                /* and a third */
  191         RET                     /* Exit */
  192 
  193 .Lmemset_wordunaligned:
  194         rsb     r2, r2, #0x004
  195         strb    r3, [ip], #0x01         /* Set 1 byte */
  196         cmp     r2, #0x02
  197         strbge  r3, [ip], #0x01         /* Set another byte */
  198         sub     r1, r1, r2
  199         strbgt  r3, [ip], #0x01         /* and a third */
  200         cmp     r1, #0x04               /* More than 4 bytes left? */
  201         bge     .Lmemset_wordaligned    /* Yup */
  202 
  203 .Lmemset_lessthanfour:
  204         cmp     r1, #0x00
  205         RETeq                   /* Zero length so exit */
  206         strb    r3, [ip], #0x01         /* Set 1 byte */
  207         cmp     r1, #0x02
  208         strbge  r3, [ip], #0x01         /* Set another byte */
  209         strbgt  r3, [ip]                /* and a third */
  210         RET                     /* Exit */
  211 END(memset)
  212 
  213 ENTRY(memcmp)
  214         mov     ip, r0
  215         cmp     r2, #0x06
  216         beq     .Lmemcmp_6bytes
  217         mov     r0, #0x00
  218 
  219         /* Are both addresses aligned the same way? */
  220         cmp     r2, #0x00
  221         eorsne  r3, ip, r1
  222         RETeq                   /* len == 0, or same addresses! */
  223         tst     r3, #0x03
  224         subne   r2, r2, #0x01
  225         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
  226 
  227         /* Word-align the addresses, if necessary */
  228         sub     r3, r1, #0x05
  229         ands    r3, r3, #0x03
  230         add     r3, r3, r3, lsl #1
  231         addne   pc, pc, r3, lsl #3
  232         nop
  233 
  234         /* Compare up to 3 bytes */
  235         ldrb    r0, [ip], #0x01
  236         ldrb    r3, [r1], #0x01
  237         subs    r0, r0, r3
  238         RETne
  239         subs    r2, r2, #0x01
  240         RETeq
  241 
  242         /* Compare up to 2 bytes */
  243         ldrb    r0, [ip], #0x01
  244         ldrb    r3, [r1], #0x01
  245         subs    r0, r0, r3
  246         RETne
  247         subs    r2, r2, #0x01
  248         RETeq
  249 
  250         /* Compare 1 byte */
  251         ldrb    r0, [ip], #0x01
  252         ldrb    r3, [r1], #0x01
  253         subs    r0, r0, r3
  254         RETne
  255         subs    r2, r2, #0x01
  256         RETeq
  257 
  258         /* Compare 4 bytes at a time, if possible */
  259         subs    r2, r2, #0x04
  260         bcc     .Lmemcmp_bytewise
  261 .Lmemcmp_word_aligned:
  262         ldr     r0, [ip], #0x04
  263         ldr     r3, [r1], #0x04
  264         subs    r2, r2, #0x04
  265         cmpcs   r0, r3
  266         beq     .Lmemcmp_word_aligned
  267         sub     r0, r0, r3
  268 
  269         /* Correct for extra subtraction, and check if done */
  270         adds    r2, r2, #0x04
  271         cmpeq   r0, #0x00               /* If done, did all bytes match? */
  272         RETeq                   /* Yup. Just return */
  273 
  274         /* Re-do the final word byte-wise */
  275         sub     ip, ip, #0x04
  276         sub     r1, r1, #0x04
  277 
  278 .Lmemcmp_bytewise:
  279         add     r2, r2, #0x03
  280 .Lmemcmp_bytewise2:
  281         ldrb    r0, [ip], #0x01
  282         ldrb    r3, [r1], #0x01
  283         subs    r2, r2, #0x01
  284         cmpcs   r0, r3
  285         beq     .Lmemcmp_bytewise2
  286         sub     r0, r0, r3
  287         RET
  288 
  289         /*
  290          * 6 byte compares are very common, thanks to the network stack.
  291          * This code is hand-scheduled to reduce the number of stalls for
  292          * load results. Everything else being equal, this will be ~32%
  293          * faster than a byte-wise memcmp.
  294          */
  295         .align  5
  296 .Lmemcmp_6bytes:
  297         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
  298         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
  299         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
  300         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
  301         ldrbeq  r3, [ip, #0x01]         /* r3 = b1#1 */
  302         RETne                   /* Return if mismatch on #0 */
  303         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
  304         ldrbeq  r3, [r1, #0x02]         /* r3 = b2#2 */
  305         ldrbeq  r0, [ip, #0x02]         /* r0 = b1#2 */
  306         RETne                   /* Return if mismatch on #1 */
  307         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
  308         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
  309         ldrbeq  r3, [ip, #0x03]         /* r3 = b1#3 */
  310         RETne                   /* Return if mismatch on #2 */
  311         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
  312         ldrbeq  r3, [r1, #0x04]         /* r3 = b2#4 */
  313         ldrbeq  r0, [ip, #0x04]         /* r0 = b1#4 */
  314         RETne                   /* Return if mismatch on #3 */
  315         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
  316         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
  317         ldrbeq  r3, [ip, #0x05]         /* r3 = b1#5 */
  318         RETne                   /* Return if mismatch on #4 */
  319         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
  320         RET
  321 END(memcmp)
  322 
  323 ENTRY(memmove)
  324         /* Do the buffers overlap? */
  325         cmp     r0, r1
  326         RETeq           /* Bail now if src/dst are the same */
  327         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
  328         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
  329         cmp     r3, r2          /* if (r3 < len) we have an overlap */
  330         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
  331 
  332         /* Determine copy direction */
  333         cmp     r1, r0
  334         bcc     .Lmemmove_backwards
  335 
  336         moveq   r0, #0                  /* Quick abort for len=0 */
  337         RETeq
  338 
  339         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
  340         subs    r2, r2, #4
  341         blt     .Lmemmove_fl4           /* less than 4 bytes */
  342         ands    r12, r0, #3
  343         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
  344         ands    r12, r1, #3
  345         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
  346 
  347 .Lmemmove_ft8:
  348         /* We have aligned source and destination */
  349         subs    r2, r2, #8
  350         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
  351         subs    r2, r2, #0x14
  352         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
  353         stmdb   sp!, {r4}               /* borrow r4 */
  354 
  355         /* blat 32 bytes at a time */
  356         /* XXX for really big copies perhaps we should use more registers */
  357 .Lmemmove_floop32:
  358         ldmia   r1!, {r3, r4, r12, lr}
  359         stmia   r0!, {r3, r4, r12, lr}
  360         ldmia   r1!, {r3, r4, r12, lr}
  361         stmia   r0!, {r3, r4, r12, lr}
  362         subs    r2, r2, #0x20
  363         bge     .Lmemmove_floop32
  364 
  365         cmn     r2, #0x10
  366         ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  367         stmiage r0!, {r3, r4, r12, lr}
  368         subge   r2, r2, #0x10
  369         ldmia   sp!, {r4}               /* return r4 */
  370 
  371 .Lmemmove_fl32:
  372         adds    r2, r2, #0x14
  373 
  374         /* blat 12 bytes at a time */
  375 .Lmemmove_floop12:
  376         ldmiage r1!, {r3, r12, lr}
  377         stmiage r0!, {r3, r12, lr}
  378         subsge  r2, r2, #0x0c
  379         bge     .Lmemmove_floop12
  380 
  381 .Lmemmove_fl12:
  382         adds    r2, r2, #8
  383         blt     .Lmemmove_fl4
  384 
  385         subs    r2, r2, #4
  386         ldrlt   r3, [r1], #4
  387         strlt   r3, [r0], #4
  388         ldmiage r1!, {r3, r12}
  389         stmiage r0!, {r3, r12}
  390         subge   r2, r2, #4
  391 
  392 .Lmemmove_fl4:
  393         /* less than 4 bytes to go */
  394         adds    r2, r2, #4
  395         ldmiaeq sp!, {r0, pc}           /* done */
  396 
  397         /* copy the crud byte at a time */
  398         cmp     r2, #2
  399         ldrb    r3, [r1], #1
  400         strb    r3, [r0], #1
  401         ldrbge  r3, [r1], #1
  402         strbge  r3, [r0], #1
  403         ldrbgt  r3, [r1], #1
  404         strbgt  r3, [r0], #1
  405         ldmia   sp!, {r0, pc}
  406 
  407         /* erg - unaligned destination */
  408 .Lmemmove_fdestul:
  409         rsb     r12, r12, #4
  410         cmp     r12, #2
  411 
  412         /* align destination with byte copies */
  413         ldrb    r3, [r1], #1
  414         strb    r3, [r0], #1
  415         ldrbge  r3, [r1], #1
  416         strbge  r3, [r0], #1
  417         ldrbgt  r3, [r1], #1
  418         strbgt  r3, [r0], #1
  419         subs    r2, r2, r12
  420         blt     .Lmemmove_fl4           /* less the 4 bytes */
  421 
  422         ands    r12, r1, #3
  423         beq     .Lmemmove_ft8           /* we have an aligned source */
  424 
  425         /* erg - unaligned source */
  426         /* This is where it gets nasty ... */
  427 .Lmemmove_fsrcul:
  428         bic     r1, r1, #3
  429         ldr     lr, [r1], #4
  430         cmp     r12, #2
  431         bgt     .Lmemmove_fsrcul3
  432         beq     .Lmemmove_fsrcul2
  433         cmp     r2, #0x0c
  434         blt     .Lmemmove_fsrcul1loop4
  435         sub     r2, r2, #0x0c
  436         stmdb   sp!, {r4, r5}
  437 
  438 .Lmemmove_fsrcul1loop16:
  439         mov     r3, lr, lsr #8
  440         ldmia   r1!, {r4, r5, r12, lr}
  441         orr     r3, r3, r4, lsl #24
  442         mov     r4, r4, lsr #8
  443         orr     r4, r4, r5, lsl #24
  444         mov     r5, r5, lsr #8
  445         orr     r5, r5, r12, lsl #24
  446         mov     r12, r12, lsr #8
  447         orr     r12, r12, lr, lsl #24
  448         stmia   r0!, {r3-r5, r12}
  449         subs    r2, r2, #0x10
  450         bge     .Lmemmove_fsrcul1loop16
  451         ldmia   sp!, {r4, r5}
  452         adds    r2, r2, #0x0c
  453         blt     .Lmemmove_fsrcul1l4
  454 
  455 .Lmemmove_fsrcul1loop4:
  456         mov     r12, lr, lsr #8
  457         ldr     lr, [r1], #4
  458         orr     r12, r12, lr, lsl #24
  459         str     r12, [r0], #4
  460         subs    r2, r2, #4
  461         bge     .Lmemmove_fsrcul1loop4
  462 
  463 .Lmemmove_fsrcul1l4:
  464         sub     r1, r1, #3
  465         b       .Lmemmove_fl4
  466 
  467 .Lmemmove_fsrcul2:
  468         cmp     r2, #0x0c
  469         blt     .Lmemmove_fsrcul2loop4
  470         sub     r2, r2, #0x0c
  471         stmdb   sp!, {r4, r5}
  472 
  473 .Lmemmove_fsrcul2loop16:
  474         mov     r3, lr, lsr #16
  475         ldmia   r1!, {r4, r5, r12, lr}
  476         orr     r3, r3, r4, lsl #16
  477         mov     r4, r4, lsr #16
  478         orr     r4, r4, r5, lsl #16
  479         mov     r5, r5, lsr #16
  480         orr     r5, r5, r12, lsl #16
  481         mov     r12, r12, lsr #16
  482         orr     r12, r12, lr, lsl #16
  483         stmia   r0!, {r3-r5, r12}
  484         subs    r2, r2, #0x10
  485         bge     .Lmemmove_fsrcul2loop16
  486         ldmia   sp!, {r4, r5}
  487         adds    r2, r2, #0x0c
  488         blt     .Lmemmove_fsrcul2l4
  489 
  490 .Lmemmove_fsrcul2loop4:
  491         mov     r12, lr, lsr #16
  492         ldr     lr, [r1], #4
  493         orr     r12, r12, lr, lsl #16
  494         str     r12, [r0], #4
  495         subs    r2, r2, #4
  496         bge     .Lmemmove_fsrcul2loop4
  497 
  498 .Lmemmove_fsrcul2l4:
  499         sub     r1, r1, #2
  500         b       .Lmemmove_fl4
  501 
  502 .Lmemmove_fsrcul3:
  503         cmp     r2, #0x0c
  504         blt     .Lmemmove_fsrcul3loop4
  505         sub     r2, r2, #0x0c
  506         stmdb   sp!, {r4, r5}
  507 
  508 .Lmemmove_fsrcul3loop16:
  509         mov     r3, lr, lsr #24
  510         ldmia   r1!, {r4, r5, r12, lr}
  511         orr     r3, r3, r4, lsl #8
  512         mov     r4, r4, lsr #24
  513         orr     r4, r4, r5, lsl #8
  514         mov     r5, r5, lsr #24
  515         orr     r5, r5, r12, lsl #8
  516         mov     r12, r12, lsr #24
  517         orr     r12, r12, lr, lsl #8
  518         stmia   r0!, {r3-r5, r12}
  519         subs    r2, r2, #0x10
  520         bge     .Lmemmove_fsrcul3loop16
  521         ldmia   sp!, {r4, r5}
  522         adds    r2, r2, #0x0c
  523         blt     .Lmemmove_fsrcul3l4
  524 
  525 .Lmemmove_fsrcul3loop4:
  526         mov     r12, lr, lsr #24
  527         ldr     lr, [r1], #4
  528         orr     r12, r12, lr, lsl #8
  529         str     r12, [r0], #4
  530         subs    r2, r2, #4
  531         bge     .Lmemmove_fsrcul3loop4
  532 
  533 .Lmemmove_fsrcul3l4:
  534         sub     r1, r1, #1
  535         b       .Lmemmove_fl4
  536 
  537 .Lmemmove_backwards:
  538         add     r1, r1, r2
  539         add     r0, r0, r2
  540         subs    r2, r2, #4
  541         blt     .Lmemmove_bl4           /* less than 4 bytes */
  542         ands    r12, r0, #3
  543         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
  544         ands    r12, r1, #3
  545         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
  546 
  547 .Lmemmove_bt8:
  548         /* We have aligned source and destination */
  549         subs    r2, r2, #8
  550         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
  551         stmdb   sp!, {r4, lr}
  552         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
  553         blt     .Lmemmove_bl32
  554 
  555         /* blat 32 bytes at a time */
  556         /* XXX for really big copies perhaps we should use more registers */
  557 .Lmemmove_bloop32:
  558         ldmdb   r1!, {r3, r4, r12, lr}
  559         stmdb   r0!, {r3, r4, r12, lr}
  560         ldmdb   r1!, {r3, r4, r12, lr}
  561         stmdb   r0!, {r3, r4, r12, lr}
  562         subs    r2, r2, #0x20
  563         bge     .Lmemmove_bloop32
  564 
  565 .Lmemmove_bl32:
  566         cmn     r2, #0x10
  567         ldmdbge r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
  568         stmdbge r0!, {r3, r4, r12, lr}
  569         subge   r2, r2, #0x10
  570         adds    r2, r2, #0x14
  571         ldmdbge r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
  572         stmdbge r0!, {r3, r12, lr}
  573         subge   r2, r2, #0x0c
  574         ldmia   sp!, {r4, lr}
  575 
  576 .Lmemmove_bl12:
  577         adds    r2, r2, #8
  578         blt     .Lmemmove_bl4
  579         subs    r2, r2, #4
  580         ldrlt   r3, [r1, #-4]!
  581         strlt   r3, [r0, #-4]!
  582         ldmdbge r1!, {r3, r12}
  583         stmdbge r0!, {r3, r12}
  584         subge   r2, r2, #4
  585 
  586 .Lmemmove_bl4:
  587         /* less than 4 bytes to go */
  588         adds    r2, r2, #4
  589         RETeq                   /* done */
  590 
  591         /* copy the crud byte at a time */
  592         cmp     r2, #2
  593         ldrb    r3, [r1, #-1]!
  594         strb    r3, [r0, #-1]!
  595         ldrbge  r3, [r1, #-1]!
  596         strbge  r3, [r0, #-1]!
  597         ldrbgt  r3, [r1, #-1]!
  598         strbgt  r3, [r0, #-1]!
  599         RET
  600 
  601         /* erg - unaligned destination */
  602 .Lmemmove_bdestul:
  603         cmp     r12, #2
  604 
  605         /* align destination with byte copies */
  606         ldrb    r3, [r1, #-1]!
  607         strb    r3, [r0, #-1]!
  608         ldrbge  r3, [r1, #-1]!
  609         strbge  r3, [r0, #-1]!
  610         ldrbgt  r3, [r1, #-1]!
  611         strbgt  r3, [r0, #-1]!
  612         subs    r2, r2, r12
  613         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
  614         ands    r12, r1, #3
  615         beq     .Lmemmove_bt8           /* we have an aligned source */
  616 
  617         /* erg - unaligned source */
  618         /* This is where it gets nasty ... */
  619 .Lmemmove_bsrcul:
  620         bic     r1, r1, #3
  621         ldr     r3, [r1, #0]
  622         cmp     r12, #2
  623         blt     .Lmemmove_bsrcul1
  624         beq     .Lmemmove_bsrcul2
  625         cmp     r2, #0x0c
  626         blt     .Lmemmove_bsrcul3loop4
  627         sub     r2, r2, #0x0c
  628         stmdb   sp!, {r4, r5, lr}
  629 
  630 .Lmemmove_bsrcul3loop16:
  631         mov     lr, r3, lsl #8
  632         ldmdb   r1!, {r3-r5, r12}
  633         orr     lr, lr, r12, lsr #24
  634         mov     r12, r12, lsl #8
  635         orr     r12, r12, r5, lsr #24
  636         mov     r5, r5, lsl #8
  637         orr     r5, r5, r4, lsr #24
  638         mov     r4, r4, lsl #8
  639         orr     r4, r4, r3, lsr #24
  640         stmdb   r0!, {r4, r5, r12, lr}
  641         subs    r2, r2, #0x10
  642         bge     .Lmemmove_bsrcul3loop16
  643         ldmia   sp!, {r4, r5, lr}
  644         adds    r2, r2, #0x0c
  645         blt     .Lmemmove_bsrcul3l4
  646 
  647 .Lmemmove_bsrcul3loop4:
  648         mov     r12, r3, lsl #8
  649         ldr     r3, [r1, #-4]!
  650         orr     r12, r12, r3, lsr #24
  651         str     r12, [r0, #-4]!
  652         subs    r2, r2, #4
  653         bge     .Lmemmove_bsrcul3loop4
  654 
  655 .Lmemmove_bsrcul3l4:
  656         add     r1, r1, #3
  657         b       .Lmemmove_bl4
  658 
  659 .Lmemmove_bsrcul2:
  660         cmp     r2, #0x0c
  661         blt     .Lmemmove_bsrcul2loop4
  662         sub     r2, r2, #0x0c
  663         stmdb   sp!, {r4, r5, lr}
  664 
  665 .Lmemmove_bsrcul2loop16:
  666         mov     lr, r3, lsl #16
  667         ldmdb   r1!, {r3-r5, r12}
  668         orr     lr, lr, r12, lsr #16
  669         mov     r12, r12, lsl #16
  670         orr     r12, r12, r5, lsr #16
  671         mov     r5, r5, lsl #16
  672         orr     r5, r5, r4, lsr #16
  673         mov     r4, r4, lsl #16
  674         orr     r4, r4, r3, lsr #16
  675         stmdb   r0!, {r4, r5, r12, lr}
  676         subs    r2, r2, #0x10
  677         bge     .Lmemmove_bsrcul2loop16
  678         ldmia   sp!, {r4, r5, lr}
  679         adds    r2, r2, #0x0c
  680         blt     .Lmemmove_bsrcul2l4
  681 
  682 .Lmemmove_bsrcul2loop4:
  683         mov     r12, r3, lsl #16
  684         ldr     r3, [r1, #-4]!
  685         orr     r12, r12, r3, lsr #16
  686         str     r12, [r0, #-4]!
  687         subs    r2, r2, #4
  688         bge     .Lmemmove_bsrcul2loop4
  689 
  690 .Lmemmove_bsrcul2l4:
  691         add     r1, r1, #2
  692         b       .Lmemmove_bl4
  693 
  694 .Lmemmove_bsrcul1:
  695         cmp     r2, #0x0c
  696         blt     .Lmemmove_bsrcul1loop4
  697         sub     r2, r2, #0x0c
  698         stmdb   sp!, {r4, r5, lr}
  699 
  700 .Lmemmove_bsrcul1loop32:
  701         mov     lr, r3, lsl #24
  702         ldmdb   r1!, {r3-r5, r12}
  703         orr     lr, lr, r12, lsr #8
  704         mov     r12, r12, lsl #24
  705         orr     r12, r12, r5, lsr #8
  706         mov     r5, r5, lsl #24
  707         orr     r5, r5, r4, lsr #8
  708         mov     r4, r4, lsl #24
  709         orr     r4, r4, r3, lsr #8
  710         stmdb   r0!, {r4, r5, r12, lr}
  711         subs    r2, r2, #0x10
  712         bge     .Lmemmove_bsrcul1loop32
  713         ldmia   sp!, {r4, r5, lr}
  714         adds    r2, r2, #0x0c
  715         blt     .Lmemmove_bsrcul1l4
  716 
  717 .Lmemmove_bsrcul1loop4:
  718         mov     r12, r3, lsl #24
  719         ldr     r3, [r1, #-4]!
  720         orr     r12, r12, r3, lsr #8
  721         str     r12, [r0, #-4]!
  722         subs    r2, r2, #4
  723         bge     .Lmemmove_bsrcul1loop4
  724 
  725 .Lmemmove_bsrcul1l4:
  726         add     r1, r1, #1
  727         b       .Lmemmove_bl4
  728 END(memmove)
  729 
  730 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
  731 ENTRY(memcpy)
  732         pld     [r1]
  733         cmp     r2, #0x0c
  734         ble     .Lmemcpy_short          /* <= 12 bytes */
  735 #ifdef FLASHADDR
  736 #if FLASHADDR > PHYSADDR
  737         ldr     r3, =FLASHADDR
  738         cmp     r3, pc
  739         bls     .Lnormal
  740 #else
  741         ldr     r3, =FLASHADDR
  742         cmp     r3, pc
  743         bhi     .Lnormal
  744 #endif
  745 #endif
  746         mov     r3, r0                  /* We must not clobber r0 */
  747 
  748         /* Word-align the destination buffer */
  749         ands    ip, r3, #0x03           /* Already word aligned? */
  750         beq     .Lmemcpy_wordaligned    /* Yup */
  751         cmp     ip, #0x02
  752         ldrb    ip, [r1], #0x01
  753         sub     r2, r2, #0x01
  754         strb    ip, [r3], #0x01
  755         ldrble  ip, [r1], #0x01
  756         suble   r2, r2, #0x01
  757         strble  ip, [r3], #0x01
  758         ldrblt  ip, [r1], #0x01
  759         sublt   r2, r2, #0x01
  760         strblt  ip, [r3], #0x01
  761 
  762         /* Destination buffer is now word aligned */
  763 .Lmemcpy_wordaligned:
  764         ands    ip, r1, #0x03           /* Is src also word-aligned? */
  765         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
  766 
  767         /* Quad-align the destination buffer */
  768         tst     r3, #0x07               /* Already quad aligned? */
  769         ldrne   ip, [r1], #0x04
  770         stmfd   sp!, {r4-r9}            /* Free up some registers */
  771         subne   r2, r2, #0x04
  772         strne   ip, [r3], #0x04
  773 
  774         /* Destination buffer quad aligned, source is at least word aligned */
  775         subs    r2, r2, #0x80
  776         blt     .Lmemcpy_w_lessthan128
  777 
  778         /* Copy 128 bytes at a time */
  779 .Lmemcpy_w_loop128:
  780         ldr     r4, [r1], #0x04         /* LD:00-03 */
  781         ldr     r5, [r1], #0x04         /* LD:04-07 */
  782         pld     [r1, #0x18]             /* Prefetch 0x20 */
  783         ldr     r6, [r1], #0x04         /* LD:08-0b */
  784         ldr     r7, [r1], #0x04         /* LD:0c-0f */
  785         ldr     r8, [r1], #0x04         /* LD:10-13 */
  786         ldr     r9, [r1], #0x04         /* LD:14-17 */
  787         strd    r4, [r3], #0x08         /* ST:00-07 */
  788         ldr     r4, [r1], #0x04         /* LD:18-1b */
  789         ldr     r5, [r1], #0x04         /* LD:1c-1f */
  790         strd    r6, [r3], #0x08         /* ST:08-0f */
  791         ldr     r6, [r1], #0x04         /* LD:20-23 */
  792         ldr     r7, [r1], #0x04         /* LD:24-27 */
  793         pld     [r1, #0x18]             /* Prefetch 0x40 */
  794         strd    r8, [r3], #0x08         /* ST:10-17 */
  795         ldr     r8, [r1], #0x04         /* LD:28-2b */
  796         ldr     r9, [r1], #0x04         /* LD:2c-2f */
  797         strd    r4, [r3], #0x08         /* ST:18-1f */
  798         ldr     r4, [r1], #0x04         /* LD:30-33 */
  799         ldr     r5, [r1], #0x04         /* LD:34-37 */
  800         strd    r6, [r3], #0x08         /* ST:20-27 */
  801         ldr     r6, [r1], #0x04         /* LD:38-3b */
  802         ldr     r7, [r1], #0x04         /* LD:3c-3f */
  803         strd    r8, [r3], #0x08         /* ST:28-2f */
  804         ldr     r8, [r1], #0x04         /* LD:40-43 */
  805         ldr     r9, [r1], #0x04         /* LD:44-47 */
  806         pld     [r1, #0x18]             /* Prefetch 0x60 */
  807         strd    r4, [r3], #0x08         /* ST:30-37 */
  808         ldr     r4, [r1], #0x04         /* LD:48-4b */
  809         ldr     r5, [r1], #0x04         /* LD:4c-4f */
  810         strd    r6, [r3], #0x08         /* ST:38-3f */
  811         ldr     r6, [r1], #0x04         /* LD:50-53 */
  812         ldr     r7, [r1], #0x04         /* LD:54-57 */
  813         strd    r8, [r3], #0x08         /* ST:40-47 */
  814         ldr     r8, [r1], #0x04         /* LD:58-5b */
  815         ldr     r9, [r1], #0x04         /* LD:5c-5f */
  816         strd    r4, [r3], #0x08         /* ST:48-4f */
  817         ldr     r4, [r1], #0x04         /* LD:60-63 */
  818         ldr     r5, [r1], #0x04         /* LD:64-67 */
  819         pld     [r1, #0x18]             /* Prefetch 0x80 */
  820         strd    r6, [r3], #0x08         /* ST:50-57 */
  821         ldr     r6, [r1], #0x04         /* LD:68-6b */
  822         ldr     r7, [r1], #0x04         /* LD:6c-6f */
  823         strd    r8, [r3], #0x08         /* ST:58-5f */
  824         ldr     r8, [r1], #0x04         /* LD:70-73 */
  825         ldr     r9, [r1], #0x04         /* LD:74-77 */
  826         strd    r4, [r3], #0x08         /* ST:60-67 */
  827         ldr     r4, [r1], #0x04         /* LD:78-7b */
  828         ldr     r5, [r1], #0x04         /* LD:7c-7f */
  829         strd    r6, [r3], #0x08         /* ST:68-6f */
  830         strd    r8, [r3], #0x08         /* ST:70-77 */
  831         subs    r2, r2, #0x80
  832         strd    r4, [r3], #0x08         /* ST:78-7f */
  833         bge     .Lmemcpy_w_loop128
  834 
  835 .Lmemcpy_w_lessthan128:
  836         adds    r2, r2, #0x80           /* Adjust for extra sub */
  837         ldmfdeq sp!, {r4-r9}
  838         RETeq                   /* Return now if done */
  839         subs    r2, r2, #0x20
  840         blt     .Lmemcpy_w_lessthan32
  841 
  842         /* Copy 32 bytes at a time */
  843 .Lmemcpy_w_loop32:
  844         ldr     r4, [r1], #0x04
  845         ldr     r5, [r1], #0x04
  846         pld     [r1, #0x18]
  847         ldr     r6, [r1], #0x04
  848         ldr     r7, [r1], #0x04
  849         ldr     r8, [r1], #0x04
  850         ldr     r9, [r1], #0x04
  851         strd    r4, [r3], #0x08
  852         ldr     r4, [r1], #0x04
  853         ldr     r5, [r1], #0x04
  854         strd    r6, [r3], #0x08
  855         strd    r8, [r3], #0x08
  856         subs    r2, r2, #0x20
  857         strd    r4, [r3], #0x08
  858         bge     .Lmemcpy_w_loop32
  859 
  860 .Lmemcpy_w_lessthan32:
  861         adds    r2, r2, #0x20           /* Adjust for extra sub */
  862         ldmfdeq sp!, {r4-r9}
  863         RETeq                   /* Return now if done */
  864 
  865         and     r4, r2, #0x18
  866         rsbs    r4, r4, #0x18
  867         addne   pc, pc, r4, lsl #1
  868         nop
  869 
  870         /* At least 24 bytes remaining */
  871         ldr     r4, [r1], #0x04
  872         ldr     r5, [r1], #0x04
  873         sub     r2, r2, #0x08
  874         strd    r4, [r3], #0x08
  875 
  876         /* At least 16 bytes remaining */
  877         ldr     r4, [r1], #0x04
  878         ldr     r5, [r1], #0x04
  879         sub     r2, r2, #0x08
  880         strd    r4, [r3], #0x08
  881 
  882         /* At least 8 bytes remaining */
  883         ldr     r4, [r1], #0x04
  884         ldr     r5, [r1], #0x04
  885         subs    r2, r2, #0x08
  886         strd    r4, [r3], #0x08
  887 
  888         /* Less than 8 bytes remaining */
  889         ldmfd   sp!, {r4-r9}
  890         RETeq                   /* Return now if done */
  891         subs    r2, r2, #0x04
  892         ldrge   ip, [r1], #0x04
  893         strge   ip, [r3], #0x04
  894         RETeq                   /* Return now if done */
  895         addlt   r2, r2, #0x04
  896         ldrb    ip, [r1], #0x01
  897         cmp     r2, #0x02
  898         ldrbge  r2, [r1], #0x01
  899         strb    ip, [r3], #0x01
  900         ldrbgt  ip, [r1]
  901         strbge  r2, [r3], #0x01
  902         strbgt  ip, [r3]
  903         RET
  904 /* Place a literal pool here for the above ldr instructions to use */
  905 .ltorg
  906 
  907 
  908 /*
  909  * At this point, it has not been possible to word align both buffers.
  910  * The destination buffer is word aligned, but the source buffer is not.
  911  */
  912 .Lmemcpy_bad_align:
  913         stmfd   sp!, {r4-r7}
  914         bic     r1, r1, #0x03
  915         cmp     ip, #2
  916         ldr     ip, [r1], #0x04
  917         bgt     .Lmemcpy_bad3
  918         beq     .Lmemcpy_bad2
  919         b       .Lmemcpy_bad1
  920 
  921 .Lmemcpy_bad1_loop16:
  922         mov     r4, ip, lsr #8
  923         ldr     r5, [r1], #0x04
  924         pld     [r1, #0x018]
  925         ldr     r6, [r1], #0x04
  926         ldr     r7, [r1], #0x04
  927         ldr     ip, [r1], #0x04
  928         orr     r4, r4, r5, lsl #24
  929         mov     r5, r5, lsr #8
  930         orr     r5, r5, r6, lsl #24
  931         mov     r6, r6, lsr #8
  932         orr     r6, r6, r7, lsl #24
  933         mov     r7, r7, lsr #8
  934         orr     r7, r7, ip, lsl #24
  935         str     r4, [r3], #0x04
  936         str     r5, [r3], #0x04
  937         str     r6, [r3], #0x04
  938         str     r7, [r3], #0x04
  939 .Lmemcpy_bad1:
  940         subs    r2, r2, #0x10
  941         bge     .Lmemcpy_bad1_loop16
  942 
  943         adds    r2, r2, #0x10
  944         ldmfdeq sp!, {r4-r7}
  945         RETeq                   /* Return now if done */
  946         subs    r2, r2, #0x04
  947         sublt   r1, r1, #0x03
  948         blt     .Lmemcpy_bad_done
  949 
  950 .Lmemcpy_bad1_loop4:
  951         mov     r4, ip, lsr #8
  952         ldr     ip, [r1], #0x04
  953         subs    r2, r2, #0x04
  954         orr     r4, r4, ip, lsl #24
  955         str     r4, [r3], #0x04
  956         bge     .Lmemcpy_bad1_loop4
  957         sub     r1, r1, #0x03
  958         b       .Lmemcpy_bad_done
  959 
  960 .Lmemcpy_bad2_loop16:
  961         mov     r4, ip, lsr #16
  962         ldr     r5, [r1], #0x04
  963         pld     [r1, #0x018]
  964         ldr     r6, [r1], #0x04
  965         ldr     r7, [r1], #0x04
  966         ldr     ip, [r1], #0x04
  967         orr     r4, r4, r5, lsl #16
  968         mov     r5, r5, lsr #16
  969         orr     r5, r5, r6, lsl #16
  970         mov     r6, r6, lsr #16
  971         orr     r6, r6, r7, lsl #16
  972         mov     r7, r7, lsr #16
  973         orr     r7, r7, ip, lsl #16
  974         str     r4, [r3], #0x04
  975         str     r5, [r3], #0x04
  976         str     r6, [r3], #0x04
  977         str     r7, [r3], #0x04
  978 .Lmemcpy_bad2:
  979         subs    r2, r2, #0x10
  980         bge     .Lmemcpy_bad2_loop16
  981 
  982         adds    r2, r2, #0x10
  983         ldmfdeq sp!, {r4-r7}
  984         RETeq                   /* Return now if done */
  985         subs    r2, r2, #0x04
  986         sublt   r1, r1, #0x02
  987         blt     .Lmemcpy_bad_done
  988 
  989 .Lmemcpy_bad2_loop4:
  990         mov     r4, ip, lsr #16
  991         ldr     ip, [r1], #0x04
  992         subs    r2, r2, #0x04
  993         orr     r4, r4, ip, lsl #16
  994         str     r4, [r3], #0x04
  995         bge     .Lmemcpy_bad2_loop4
  996         sub     r1, r1, #0x02
  997         b       .Lmemcpy_bad_done
  998 
  999 .Lmemcpy_bad3_loop16:
 1000         mov     r4, ip, lsr #24
 1001         ldr     r5, [r1], #0x04
 1002         pld     [r1, #0x018]
 1003         ldr     r6, [r1], #0x04
 1004         ldr     r7, [r1], #0x04
 1005         ldr     ip, [r1], #0x04
 1006         orr     r4, r4, r5, lsl #8
 1007         mov     r5, r5, lsr #24
 1008         orr     r5, r5, r6, lsl #8
 1009         mov     r6, r6, lsr #24
 1010         orr     r6, r6, r7, lsl #8
 1011         mov     r7, r7, lsr #24
 1012         orr     r7, r7, ip, lsl #8
 1013         str     r4, [r3], #0x04
 1014         str     r5, [r3], #0x04
 1015         str     r6, [r3], #0x04
 1016         str     r7, [r3], #0x04
 1017 .Lmemcpy_bad3:
 1018         subs    r2, r2, #0x10
 1019         bge     .Lmemcpy_bad3_loop16
 1020 
 1021         adds    r2, r2, #0x10
 1022         ldmfdeq sp!, {r4-r7}
 1023         RETeq                   /* Return now if done */
 1024         subs    r2, r2, #0x04
 1025         sublt   r1, r1, #0x01
 1026         blt     .Lmemcpy_bad_done
 1027 
 1028 .Lmemcpy_bad3_loop4:
 1029         mov     r4, ip, lsr #24
 1030         ldr     ip, [r1], #0x04
 1031         subs    r2, r2, #0x04
 1032         orr     r4, r4, ip, lsl #8
 1033         str     r4, [r3], #0x04
 1034         bge     .Lmemcpy_bad3_loop4
 1035         sub     r1, r1, #0x01
 1036 
 1037 .Lmemcpy_bad_done:
 1038         ldmfd   sp!, {r4-r7}
 1039         adds    r2, r2, #0x04
 1040         RETeq
 1041         ldrb    ip, [r1], #0x01
 1042         cmp     r2, #0x02
 1043         ldrbge  r2, [r1], #0x01
 1044         strb    ip, [r3], #0x01
 1045         ldrbgt  ip, [r1]
 1046         strbge  r2, [r3], #0x01
 1047         strbgt  ip, [r3]
 1048         RET
 1049 
 1050 
 1051 /*
 1052  * Handle short copies (less than 16 bytes), possibly misaligned.
 1053  * Some of these are *very* common, thanks to the network stack,
 1054  * and so are handled specially.
 1055  */
 1056 .Lmemcpy_short:
 1057         add     pc, pc, r2, lsl #2
 1058         nop
 1059         RET                     /* 0x00 */
 1060         b       .Lmemcpy_bytewise       /* 0x01 */
 1061         b       .Lmemcpy_bytewise       /* 0x02 */
 1062         b       .Lmemcpy_bytewise       /* 0x03 */
 1063         b       .Lmemcpy_4              /* 0x04 */
 1064         b       .Lmemcpy_bytewise       /* 0x05 */
 1065         b       .Lmemcpy_6              /* 0x06 */
 1066         b       .Lmemcpy_bytewise       /* 0x07 */
 1067         b       .Lmemcpy_8              /* 0x08 */
 1068         b       .Lmemcpy_bytewise       /* 0x09 */
 1069         b       .Lmemcpy_bytewise       /* 0x0a */
 1070         b       .Lmemcpy_bytewise       /* 0x0b */
 1071         b       .Lmemcpy_c              /* 0x0c */
 1072 .Lmemcpy_bytewise:
 1073         mov     r3, r0                  /* We must not clobber r0 */
 1074         ldrb    ip, [r1], #0x01
 1075 1:      subs    r2, r2, #0x01
 1076         strb    ip, [r3], #0x01
 1077         ldrbne  ip, [r1], #0x01
 1078         bne     1b
 1079         RET
 1080 
 1081 /******************************************************************************
 1082  * Special case for 4 byte copies
 1083  */
 1084 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
 1085 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
 1086         LMEMCPY_4_PAD
 1087 .Lmemcpy_4:
 1088         and     r2, r1, #0x03
 1089         orr     r2, r2, r0, lsl #2
 1090         ands    r2, r2, #0x0f
 1091         sub     r3, pc, #0x14
 1092         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
 1093 
 1094 /*
 1095  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1096  */
 1097         ldr     r2, [r1]
 1098         str     r2, [r0]
 1099         RET
 1100         LMEMCPY_4_PAD
 1101 
 1102 /*
 1103  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1104  */
 1105         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1106         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
 1107         mov     r3, r3, lsr #8          /* r3 = .210 */
 1108         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1109         str     r3, [r0]
 1110         RET
 1111         LMEMCPY_4_PAD
 1112 
 1113 /*
 1114  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1115  */
 1116         ldrh    r3, [r1, #0x02]
 1117         ldrh    r2, [r1]
 1118         orr     r3, r2, r3, lsl #16
 1119         str     r3, [r0]
 1120         RET
 1121         LMEMCPY_4_PAD
 1122 
 1123 /*
 1124  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1125  */
 1126         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
 1127         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
 1128         mov     r3, r3, lsr #24         /* r3 = ...0 */
 1129         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1130         str     r3, [r0]
 1131         RET
 1132         LMEMCPY_4_PAD
 1133 
 1134 /*
 1135  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1136  */
 1137         ldr     r2, [r1]
 1138         strb    r2, [r0]
 1139         mov     r3, r2, lsr #8
 1140         mov     r1, r2, lsr #24
 1141         strb    r1, [r0, #0x03]
 1142         strh    r3, [r0, #0x01]
 1143         RET
 1144         LMEMCPY_4_PAD
 1145 
 1146 /*
 1147  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1148  */
 1149         ldrb    r2, [r1]
 1150         ldrh    r3, [r1, #0x01]
 1151         ldrb    r1, [r1, #0x03]
 1152         strb    r2, [r0]
 1153         strh    r3, [r0, #0x01]
 1154         strb    r1, [r0, #0x03]
 1155         RET
 1156         LMEMCPY_4_PAD
 1157 
 1158 /*
 1159  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1160  */
 1161         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1162         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
 1163         strb    r2, [r0]
 1164         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1165         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1166         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1167         strh    r2, [r0, #0x01]
 1168         strb    r3, [r0, #0x03]
 1169         RET
 1170         LMEMCPY_4_PAD
 1171 
 1172 /*
 1173  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1174  */
 1175         ldrb    r2, [r1]
 1176         ldrh    r3, [r1, #0x01]
 1177         ldrb    r1, [r1, #0x03]
 1178         strb    r2, [r0]
 1179         strh    r3, [r0, #0x01]
 1180         strb    r1, [r0, #0x03]
 1181         RET
 1182         LMEMCPY_4_PAD
 1183 
 1184 /*
 1185  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1186  */
 1187         ldr     r2, [r1]
 1188         strh    r2, [r0]
 1189         mov     r3, r2, lsr #16
 1190         strh    r3, [r0, #0x02]
 1191         RET
 1192         LMEMCPY_4_PAD
 1193 
 1194 /*
 1195  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1196  */
 1197         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1198         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
 1199         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1200         strh    r1, [r0]
 1201         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1202         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
 1203         strh    r2, [r0, #0x02]
 1204         RET
 1205         LMEMCPY_4_PAD
 1206 
 1207 /*
 1208  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1209  */
 1210         ldrh    r2, [r1]
 1211         ldrh    r3, [r1, #0x02]
 1212         strh    r2, [r0]
 1213         strh    r3, [r0, #0x02]
 1214         RET
 1215         LMEMCPY_4_PAD
 1216 
 1217 /*
 1218  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1219  */
 1220         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
 1221         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1222         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
 1223         strh    r1, [r0, #0x02]
 1224         mov     r3, r3, lsl #8          /* r3 = 321. */
 1225         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
 1226         strh    r3, [r0]
 1227         RET
 1228         LMEMCPY_4_PAD
 1229 
 1230 /*
 1231  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1232  */
 1233         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1234         strb    r2, [r0]
 1235         mov     r3, r2, lsr #8
 1236         mov     r1, r2, lsr #24
 1237         strh    r3, [r0, #0x01]
 1238         strb    r1, [r0, #0x03]
 1239         RET
 1240         LMEMCPY_4_PAD
 1241 
 1242 /*
 1243  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1244  */
 1245         ldrb    r2, [r1]
 1246         ldrh    r3, [r1, #0x01]
 1247         ldrb    r1, [r1, #0x03]
 1248         strb    r2, [r0]
 1249         strh    r3, [r0, #0x01]
 1250         strb    r1, [r0, #0x03]
 1251         RET
 1252         LMEMCPY_4_PAD
 1253 
 1254 /*
 1255  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1256  */
 1257         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1258         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
 1259         strb    r2, [r0]
 1260         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1261         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
 1262         strh    r2, [r0, #0x01]
 1263         mov     r3, r3, lsr #8          /* r3 = ...3 */
 1264         strb    r3, [r0, #0x03]
 1265         RET
 1266         LMEMCPY_4_PAD
 1267 
 1268 /*
 1269  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1270  */
 1271         ldrb    r2, [r1]
 1272         ldrh    r3, [r1, #0x01]
 1273         ldrb    r1, [r1, #0x03]
 1274         strb    r2, [r0]
 1275         strh    r3, [r0, #0x01]
 1276         strb    r1, [r0, #0x03]
 1277         RET
 1278         LMEMCPY_4_PAD
 1279 
 1280 
 1281 /******************************************************************************
 1282  * Special case for 6 byte copies
 1283  */
 1284 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
 1285 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
 1286         LMEMCPY_6_PAD
 1287 .Lmemcpy_6:
 1288         and     r2, r1, #0x03
 1289         orr     r2, r2, r0, lsl #2
 1290         ands    r2, r2, #0x0f
 1291         sub     r3, pc, #0x14
 1292         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
 1293 
 1294 /*
 1295  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1296  */
 1297         ldr     r2, [r1]
 1298         ldrh    r3, [r1, #0x04]
 1299         str     r2, [r0]
 1300         strh    r3, [r0, #0x04]
 1301         RET
 1302         LMEMCPY_6_PAD
 1303 
 1304 /*
 1305  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1306  */
 1307         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1308         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
 1309         mov     r2, r2, lsr #8          /* r2 = .210 */
 1310         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
 1311         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
 1312         str     r2, [r0]
 1313         strh    r3, [r0, #0x04]
 1314         RET
 1315         LMEMCPY_6_PAD
 1316 
 1317 /*
 1318  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1319  */
 1320         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1321         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1322         mov     r1, r3, lsr #16         /* r1 = ..54 */
 1323         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1324         str     r2, [r0]
 1325         strh    r1, [r0, #0x04]
 1326         RET
 1327         LMEMCPY_6_PAD
 1328 
 1329 /*
 1330  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1331  */
 1332         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
 1333         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
 1334         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
 1335         mov     r2, r2, lsr #24         /* r2 = ...0 */
 1336         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1337         mov     r1, r1, lsl #8          /* r1 = xx5. */
 1338         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
 1339         str     r2, [r0]
 1340         strh    r1, [r0, #0x04]
 1341         RET
 1342         LMEMCPY_6_PAD
 1343 
 1344 /*
 1345  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1346  */
 1347         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1348         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
 1349         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1350         strh    r1, [r0, #0x01]
 1351         strb    r3, [r0]
 1352         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1353         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
 1354         mov     r2, r2, lsr #8          /* r2 = ...5 */
 1355         strh    r3, [r0, #0x03]
 1356         strb    r2, [r0, #0x05]
 1357         RET
 1358         LMEMCPY_6_PAD
 1359 
 1360 /*
 1361  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1362  */
 1363         ldrb    r2, [r1]
 1364         ldrh    r3, [r1, #0x01]
 1365         ldrh    ip, [r1, #0x03]
 1366         ldrb    r1, [r1, #0x05]
 1367         strb    r2, [r0]
 1368         strh    r3, [r0, #0x01]
 1369         strh    ip, [r0, #0x03]
 1370         strb    r1, [r0, #0x05]
 1371         RET
 1372         LMEMCPY_6_PAD
 1373 
 1374 /*
 1375  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1376  */
 1377         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1378         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1379         strb    r2, [r0]
 1380         mov     r3, r1, lsr #24
 1381         strb    r3, [r0, #0x05]
 1382         mov     r3, r1, lsr #8          /* r3 = .543 */
 1383         strh    r3, [r0, #0x03]
 1384         mov     r3, r2, lsr #8          /* r3 = ...1 */
 1385         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
 1386         strh    r3, [r0, #0x01]
 1387         RET
 1388         LMEMCPY_6_PAD
 1389 
 1390 /*
 1391  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1392  */
 1393         ldrb    r2, [r1]
 1394         ldrh    r3, [r1, #0x01]
 1395         ldrh    ip, [r1, #0x03]
 1396         ldrb    r1, [r1, #0x05]
 1397         strb    r2, [r0]
 1398         strh    r3, [r0, #0x01]
 1399         strh    ip, [r0, #0x03]
 1400         strb    r1, [r0, #0x05]
 1401         RET
 1402         LMEMCPY_6_PAD
 1403 
 1404 /*
 1405  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1406  */
 1407         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
 1408         ldr     r3, [r1]                /* r3 = 3210 */
 1409         mov     r2, r2, lsl #16         /* r2 = 54.. */
 1410         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
 1411         strh    r3, [r0]
 1412         str     r2, [r0, #0x02]
 1413         RET
 1414         LMEMCPY_6_PAD
 1415 
 1416 /*
 1417  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1418  */
 1419         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1420         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
 1421         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1422         mov     r2, r2, lsl #8          /* r2 = 543. */
 1423         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
 1424         strh    r1, [r0]
 1425         str     r2, [r0, #0x02]
 1426         RET
 1427         LMEMCPY_6_PAD
 1428 
 1429 /*
 1430  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1431  */
 1432         ldrh    r2, [r1]
 1433         ldr     r3, [r1, #0x02]
 1434         strh    r2, [r0]
 1435         str     r3, [r0, #0x02]
 1436         RET
 1437         LMEMCPY_6_PAD
 1438 
 1439 /*
 1440  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1441  */
 1442         ldrb    r3, [r1]                /* r3 = ...0 */
 1443         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 1444         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
 1445         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1446         mov     r1, r1, lsl #24         /* r1 = 5... */
 1447         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
 1448         strh    r3, [r0]
 1449         str     r1, [r0, #0x02]
 1450         RET
 1451         LMEMCPY_6_PAD
 1452 
 1453 /*
 1454  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1455  */
 1456         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1457         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
 1458         strb    r2, [r0]
 1459         mov     r2, r2, lsr #8          /* r2 = .321 */
 1460         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
 1461         mov     r1, r1, lsr #8          /* r1 = ...5 */
 1462         str     r2, [r0, #0x01]
 1463         strb    r1, [r0, #0x05]
 1464         RET
 1465         LMEMCPY_6_PAD
 1466 
 1467 /*
 1468  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1469  */
 1470         ldrb    r2, [r1]
 1471         ldrh    r3, [r1, #0x01]
 1472         ldrh    ip, [r1, #0x03]
 1473         ldrb    r1, [r1, #0x05]
 1474         strb    r2, [r0]
 1475         strh    r3, [r0, #0x01]
 1476         strh    ip, [r0, #0x03]
 1477         strb    r1, [r0, #0x05]
 1478         RET
 1479         LMEMCPY_6_PAD
 1480 
 1481 /*
 1482  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1483  */
 1484         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1485         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
 1486         strb    r2, [r0]
 1487         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1488         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
 1489         mov     r1, r1, lsr #24         /* r1 = ...5 */
 1490         str     r2, [r0, #0x01]
 1491         strb    r1, [r0, #0x05]
 1492         RET
 1493         LMEMCPY_6_PAD
 1494 
 1495 /*
 1496  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1497  */
 1498         ldrb    r2, [r1]
 1499         ldr     r3, [r1, #0x01]
 1500         ldrb    r1, [r1, #0x05]
 1501         strb    r2, [r0]
 1502         str     r3, [r0, #0x01]
 1503         strb    r1, [r0, #0x05]
 1504         RET
 1505         LMEMCPY_6_PAD
 1506 
 1507 
 1508 /******************************************************************************
 1509  * Special case for 8 byte copies
 1510  */
 1511 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
 1512 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
 1513         LMEMCPY_8_PAD
 1514 .Lmemcpy_8:
 1515         and     r2, r1, #0x03
 1516         orr     r2, r2, r0, lsl #2
 1517         ands    r2, r2, #0x0f
 1518         sub     r3, pc, #0x14
 1519         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
 1520 
 1521 /*
 1522  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1523  */
 1524         ldr     r2, [r1]
 1525         ldr     r3, [r1, #0x04]
 1526         str     r2, [r0]
 1527         str     r3, [r0, #0x04]
 1528         RET
 1529         LMEMCPY_8_PAD
 1530 
 1531 /*
 1532  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1533  */
 1534         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
 1535         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
 1536         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 1537         mov     r3, r3, lsr #8          /* r3 = .210 */
 1538         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
 1539         mov     r1, r1, lsl #24         /* r1 = 7... */
 1540         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
 1541         str     r3, [r0]
 1542         str     r2, [r0, #0x04]
 1543         RET
 1544         LMEMCPY_8_PAD
 1545 
 1546 /*
 1547  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1548  */
 1549         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1550         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1551         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 1552         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1553         mov     r3, r3, lsr #16         /* r3 = ..54 */
 1554         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
 1555         str     r2, [r0]
 1556         str     r3, [r0, #0x04]
 1557         RET
 1558         LMEMCPY_8_PAD
 1559 
 1560 /*
 1561  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1562  */
 1563         ldrb    r3, [r1]                /* r3 = ...0 */
 1564         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 1565         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
 1566         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
 1567         mov     r2, r2, lsr #24         /* r2 = ...4 */
 1568         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
 1569         str     r3, [r0]
 1570         str     r2, [r0, #0x04]
 1571         RET
 1572         LMEMCPY_8_PAD
 1573 
 1574 /*
 1575  * 0100: dst is 8-bit aligned, src is 32-bit aligned
 1576  */
 1577         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
 1578         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
 1579         strb    r3, [r0]
 1580         mov     r1, r2, lsr #24         /* r1 = ...7 */
 1581         strb    r1, [r0, #0x07]
 1582         mov     r1, r3, lsr #8          /* r1 = .321 */
 1583         mov     r3, r3, lsr #24         /* r3 = ...3 */
 1584         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
 1585         strh    r1, [r0, #0x01]
 1586         str     r3, [r0, #0x03]
 1587         RET
 1588         LMEMCPY_8_PAD
 1589 
 1590 /*
 1591  * 0101: dst is 8-bit aligned, src is 8-bit aligned
 1592  */
 1593         ldrb    r2, [r1]
 1594         ldrh    r3, [r1, #0x01]
 1595         ldr     ip, [r1, #0x03]
 1596         ldrb    r1, [r1, #0x07]
 1597         strb    r2, [r0]
 1598         strh    r3, [r0, #0x01]
 1599         str     ip, [r0, #0x03]
 1600         strb    r1, [r0, #0x07]
 1601         RET
 1602         LMEMCPY_8_PAD
 1603 
 1604 /*
 1605  * 0110: dst is 8-bit aligned, src is 16-bit aligned
 1606  */
 1607         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1608         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1609         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 1610         strb    r2, [r0]                /* 0 */
 1611         mov     ip, r1, lsr #8          /* ip = ...7 */
 1612         strb    ip, [r0, #0x07]         /* 7 */
 1613         mov     ip, r2, lsr #8          /* ip = ...1 */
 1614         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 1615         mov     r3, r3, lsr #8          /* r3 = .543 */
 1616         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
 1617         strh    ip, [r0, #0x01]
 1618         str     r3, [r0, #0x03]
 1619         RET
 1620         LMEMCPY_8_PAD
 1621 
 1622 /*
 1623  * 0111: dst is 8-bit aligned, src is 8-bit aligned
 1624  */
 1625         ldrb    r3, [r1]                /* r3 = ...0 */
 1626         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 1627         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
 1628         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 1629         strb    r3, [r0]
 1630         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
 1631         strh    ip, [r0, #0x01]
 1632         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
 1633         str     r2, [r0, #0x03]
 1634         strb    r1, [r0, #0x07]
 1635         RET
 1636         LMEMCPY_8_PAD
 1637 
 1638 /*
 1639  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1640  */
 1641         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1642         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1643         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 1644         strh    r2, [r0]
 1645         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
 1646         mov     r3, r3, lsr #16         /* r3 = ..76 */
 1647         str     r2, [r0, #0x02]
 1648         strh    r3, [r0, #0x06]
 1649         RET
 1650         LMEMCPY_8_PAD
 1651 
 1652 /*
 1653  * 1001: dst is 16-bit aligned, src is 8-bit aligned
 1654  */
 1655         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1656         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 1657         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
 1658         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
 1659         strh    r1, [r0]
 1660         mov     r1, r2, lsr #24         /* r1 = ...2 */
 1661         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
 1662         mov     r3, r3, lsr #24         /* r3 = ...6 */
 1663         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
 1664         str     r1, [r0, #0x02]
 1665         strh    r3, [r0, #0x06]
 1666         RET
 1667         LMEMCPY_8_PAD
 1668 
 1669 /*
 1670  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1671  */
 1672         ldrh    r2, [r1]
 1673         ldr     ip, [r1, #0x02]
 1674         ldrh    r3, [r1, #0x06]
 1675         strh    r2, [r0]
 1676         str     ip, [r0, #0x02]
 1677         strh    r3, [r0, #0x06]
 1678         RET
 1679         LMEMCPY_8_PAD
 1680 
 1681 /*
 1682  * 1011: dst is 16-bit aligned, src is 8-bit aligned
 1683  */
 1684         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
 1685         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
 1686         ldrb    ip, [r1]                /* ip = ...0 */
 1687         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
 1688         strh    r1, [r0, #0x06]
 1689         mov     r3, r3, lsl #24         /* r3 = 5... */
 1690         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
 1691         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
 1692         str     r3, [r0, #0x02]
 1693         strh    r2, [r0]
 1694         RET
 1695         LMEMCPY_8_PAD
 1696 
 1697 /*
 1698  * 1100: dst is 8-bit aligned, src is 32-bit aligned
 1699  */
 1700         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1701         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1702         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
 1703         strh    r1, [r0, #0x05]
 1704         strb    r2, [r0]
 1705         mov     r1, r3, lsr #24         /* r1 = ...7 */
 1706         strb    r1, [r0, #0x07]
 1707         mov     r2, r2, lsr #8          /* r2 = .321 */
 1708         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
 1709         str     r2, [r0, #0x01]
 1710         RET
 1711         LMEMCPY_8_PAD
 1712 
 1713 /*
 1714  * 1101: dst is 8-bit aligned, src is 8-bit aligned
 1715  */
 1716         ldrb    r3, [r1]                /* r3 = ...0 */
 1717         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
 1718         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 1719         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
 1720         strb    r3, [r0]
 1721         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
 1722         strh    r3, [r0, #0x05]
 1723         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
 1724         str     r2, [r0, #0x01]
 1725         strb    r1, [r0, #0x07]
 1726         RET
 1727         LMEMCPY_8_PAD
 1728 
 1729 /*
 1730  * 1110: dst is 8-bit aligned, src is 16-bit aligned
 1731  */
 1732         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1733         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1734         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
 1735         strb    r2, [r0]
 1736         mov     ip, r2, lsr #8          /* ip = ...1 */
 1737         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
 1738         mov     r2, r1, lsr #8          /* r2 = ...7 */
 1739         strb    r2, [r0, #0x07]
 1740         mov     r1, r1, lsl #8          /* r1 = .76. */
 1741         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
 1742         str     ip, [r0, #0x01]
 1743         strh    r1, [r0, #0x05]
 1744         RET
 1745         LMEMCPY_8_PAD
 1746 
 1747 /*
 1748  * 1111: dst is 8-bit aligned, src is 8-bit aligned
 1749  */
 1750         ldrb    r2, [r1]
 1751         ldr     ip, [r1, #0x01]
 1752         ldrh    r3, [r1, #0x05]
 1753         ldrb    r1, [r1, #0x07]
 1754         strb    r2, [r0]
 1755         str     ip, [r0, #0x01]
 1756         strh    r3, [r0, #0x05]
 1757         strb    r1, [r0, #0x07]
 1758         RET
 1759         LMEMCPY_8_PAD
 1760 
 1761 /******************************************************************************
 1762  * Special case for 12 byte copies
 1763  */
 1764 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
 1765 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
 1766         LMEMCPY_C_PAD
 1767 .Lmemcpy_c:
 1768         and     r2, r1, #0x03
 1769         orr     r2, r2, r0, lsl #2
 1770         ands    r2, r2, #0x0f
 1771         sub     r3, pc, #0x14
 1772         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
 1773 
 1774 /*
 1775  * 0000: dst is 32-bit aligned, src is 32-bit aligned
 1776  */
 1777         ldr     r2, [r1]
 1778         ldr     r3, [r1, #0x04]
 1779         ldr     r1, [r1, #0x08]
 1780         str     r2, [r0]
 1781         str     r3, [r0, #0x04]
 1782         str     r1, [r0, #0x08]
 1783         RET
 1784         LMEMCPY_C_PAD
 1785 
 1786 /*
 1787  * 0001: dst is 32-bit aligned, src is 8-bit aligned
 1788  */
 1789         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
 1790         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 1791         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 1792         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 1793         mov     r2, r2, lsl #24         /* r2 = B... */
 1794         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
 1795         str     r2, [r0, #0x08]
 1796         mov     r2, ip, lsl #24         /* r2 = 7... */
 1797         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
 1798         mov     r1, r1, lsr #8          /* r1 = .210 */
 1799         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
 1800         str     r2, [r0, #0x04]
 1801         str     r1, [r0]
 1802         RET
 1803         LMEMCPY_C_PAD
 1804 
 1805 /*
 1806  * 0010: dst is 32-bit aligned, src is 16-bit aligned
 1807  */
 1808         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1809         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1810         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 1811         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 1812         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
 1813         str     r2, [r0]
 1814         mov     r3, r3, lsr #16         /* r3 = ..54 */
 1815         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
 1816         mov     r1, r1, lsl #16         /* r1 = BA.. */
 1817         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
 1818         str     r3, [r0, #0x04]
 1819         str     r1, [r0, #0x08]
 1820         RET
 1821         LMEMCPY_C_PAD
 1822 
 1823 /*
 1824  * 0011: dst is 32-bit aligned, src is 8-bit aligned
 1825  */
 1826         ldrb    r2, [r1]                /* r2 = ...0 */
 1827         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 1828         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 1829         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 1830         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
 1831         str     r2, [r0]
 1832         mov     r3, r3, lsr #24         /* r3 = ...4 */
 1833         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
 1834         mov     r1, r1, lsl #8          /* r1 = BA9. */
 1835         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
 1836         str     r3, [r0, #0x04]
 1837         str     r1, [r0, #0x08]
 1838         RET
 1839         LMEMCPY_C_PAD
 1840 
 1841 /*
 1842  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
 1843  */
 1844         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1845         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1846         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
 1847         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
 1848         strh    r1, [r0, #0x01]
 1849         strb    r2, [r0]
 1850         mov     r1, r2, lsr #24         /* r1 = ...3 */
 1851         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
 1852         mov     r1, r3, lsr #24         /* r1 = ...7 */
 1853         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
 1854         mov     ip, ip, lsr #24         /* ip = ...B */
 1855         str     r2, [r0, #0x03]
 1856         str     r1, [r0, #0x07]
 1857         strb    ip, [r0, #0x0b]
 1858         RET
 1859         LMEMCPY_C_PAD
 1860 
 1861 /*
 1862  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
 1863  */
 1864         ldrb    r2, [r1]
 1865         ldrh    r3, [r1, #0x01]
 1866         ldr     ip, [r1, #0x03]
 1867         strb    r2, [r0]
 1868         ldr     r2, [r1, #0x07]
 1869         ldrb    r1, [r1, #0x0b]
 1870         strh    r3, [r0, #0x01]
 1871         str     ip, [r0, #0x03]
 1872         str     r2, [r0, #0x07]
 1873         strb    r1, [r0, #0x0b]
 1874         RET
 1875         LMEMCPY_C_PAD
 1876 
 1877 /*
 1878  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
 1879  */
 1880         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
 1881         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
 1882         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
 1883         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
 1884         strb    r2, [r0]
 1885         mov     r2, r2, lsr #8          /* r2 = ...1 */
 1886         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 1887         strh    r2, [r0, #0x01]
 1888         mov     r2, r3, lsr #8          /* r2 = .543 */
 1889         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
 1890         mov     r2, ip, lsr #8          /* r2 = .987 */
 1891         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
 1892         mov     r1, r1, lsr #8          /* r1 = ...B */
 1893         str     r3, [r0, #0x03]
 1894         str     r2, [r0, #0x07]
 1895         strb    r1, [r0, #0x0b]
 1896         RET
 1897         LMEMCPY_C_PAD
 1898 
 1899 /*
 1900  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
 1901  */
 1902         ldrb    r2, [r1]
 1903         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
 1904         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
 1905         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
 1906         strb    r2, [r0]
 1907         strh    r3, [r0, #0x01]
 1908         mov     r3, r3, lsr #16         /* r3 = ..43 */
 1909         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
 1910         mov     ip, ip, lsr #16         /* ip = ..87 */
 1911         orr     ip, ip, r1, lsl #16     /* ip = A987 */
 1912         mov     r1, r1, lsr #16         /* r1 = ..xB */
 1913         str     r3, [r0, #0x03]
 1914         str     ip, [r0, #0x07]
 1915         strb    r1, [r0, #0x0b]
 1916         RET
 1917         LMEMCPY_C_PAD
 1918 
 1919 /*
 1920  * 1000: dst is 16-bit aligned, src is 32-bit aligned
 1921  */
 1922         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
 1923         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
 1924         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
 1925         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
 1926         strh    ip, [r0]
 1927         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
 1928         mov     r3, r3, lsr #16         /* r3 = ..76 */
 1929         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
 1930         mov     r2, r2, lsr #16         /* r2 = ..BA */
 1931         str     r1, [r0, #0x02]
 1932         str     r3, [r0, #0x06]
 1933         strh    r2, [r0, #0x0a]
 1934         RET
 1935         LMEMCPY_C_PAD
 1936 
 1937 /*
 1938  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
 1939  */
 1940         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
 1941         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
 1942         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
 1943         strh    ip, [r0]
 1944         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
 1945         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
 1946         mov     r2, r2, lsr #24         /* r2 = ...2 */
 1947         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
 1948         mov     r3, r3, lsr #24         /* r3 = ...6 */
 1949         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
 1950         mov     r1, r1, lsl #8          /* r1 = ..B. */
 1951         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
 1952         str     r2, [r0, #0x02]
 1953         str     r3, [r0, #0x06]
 1954         strh    r1, [r0, #0x0a]
 1955         RET
 1956         LMEMCPY_C_PAD
 1957 
 1958 /*
 1959  * 1010: dst is 16-bit aligned, src is 16-bit aligned
 1960  */
 1961         ldrh    r2, [r1]
 1962         ldr     r3, [r1, #0x02]
 1963         ldr     ip, [r1, #0x06]
 1964         ldrh    r1, [r1, #0x0a]
 1965         strh    r2, [r0]
 1966         str     r3, [r0, #0x02]
 1967         str     ip, [r0, #0x06]
 1968         strh    r1, [r0, #0x0a]
 1969         RET
 1970         LMEMCPY_C_PAD
 1971 
 1972 /*
 1973  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
 1974  */
 1975         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
 1976         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
 1977         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
 1978         strh    ip, [r0, #0x0a]
 1979         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
 1980         ldrb    r1, [r1]                /* r1 = ...0 */
 1981         mov     r2, r2, lsl #24         /* r2 = 9... */
 1982         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
 1983         mov     r3, r3, lsl #24         /* r3 = 5... */
 1984         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
 1985         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
 1986         str     r2, [r0, #0x06]
 1987         str     r3, [r0, #0x02]
 1988         strh    r1, [r0]
 1989         RET
 1990         LMEMCPY_C_PAD
 1991 
 1992 /*
 1993  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
 1994  */
 1995         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
 1996         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
 1997         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
 1998         strb    r2, [r0]
 1999         mov     r3, r2, lsr #8          /* r3 = .321 */
 2000         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
 2001         str     r3, [r0, #0x01]
 2002         mov     r3, ip, lsr #8          /* r3 = .765 */
 2003         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
 2004         str     r3, [r0, #0x05]
 2005         mov     r1, r1, lsr #8          /* r1 = .BA9 */
 2006         strh    r1, [r0, #0x09]
 2007         mov     r1, r1, lsr #16         /* r1 = ...B */
 2008         strb    r1, [r0, #0x0b]
 2009         RET
 2010         LMEMCPY_C_PAD
 2011 
 2012 /*
 2013  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
 2014  */
 2015         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
 2016         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
 2017         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
 2018         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
 2019         strb    r2, [r0, #0x0b]
 2020         mov     r2, r3, lsr #16         /* r2 = ..A9 */
 2021         strh    r2, [r0, #0x09]
 2022         mov     r3, r3, lsl #16         /* r3 = 87.. */
 2023         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
 2024         mov     ip, ip, lsl #16         /* ip = 43.. */
 2025         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
 2026         mov     r1, r1, lsr #8          /* r1 = .210 */
 2027         str     r3, [r0, #0x05]
 2028         str     ip, [r0, #0x01]
 2029         strb    r1, [r0]
 2030         RET
 2031         LMEMCPY_C_PAD
 2032 
 2033 /*
 2034  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
 2035  */
 2036         ldrh    r2, [r1]                /* r2 = ..10 */
 2037         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
 2038         ldr     ip, [r1, #0x06]         /* ip = 9876 */
 2039         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
 2040         strb    r2, [r0]
 2041         mov     r2, r2, lsr #8          /* r2 = ...1 */
 2042         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
 2043         mov     r3, r3, lsr #24         /* r3 = ...5 */
 2044         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
 2045         mov     ip, ip, lsr #24         /* ip = ...9 */
 2046         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
 2047         mov     r1, r1, lsr #8          /* r1 = ...B */
 2048         str     r2, [r0, #0x01]
 2049         str     r3, [r0, #0x05]
 2050         strh    ip, [r0, #0x09]
 2051         strb    r1, [r0, #0x0b]
 2052         RET
 2053         LMEMCPY_C_PAD
 2054 
 2055 /*
 2056  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
 2057  */
 2058         ldrb    r2, [r1]
 2059         ldr     r3, [r1, #0x01]
 2060         ldr     ip, [r1, #0x05]
 2061         strb    r2, [r0]
 2062         ldrh    r2, [r1, #0x09]
 2063         ldrb    r1, [r1, #0x0b]
 2064         str     r3, [r0, #0x01]
 2065         str     ip, [r0, #0x05]
 2066         strh    r2, [r0, #0x09]
 2067         strb    r1, [r0, #0x0b]
 2068         RET
 2069 END(memcpy)
Cache object: 9da32b34e4ea16013bec43389f94bb24
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/support.S

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/support.S