The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/arch/score/lib/checksum.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * arch/score/lib/csum_partial.S
    3  *
    4  * Score Processor version.
    5  *
    6  * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
    7  *  Lennox Wu <lennox.wu@sunplusct.com>
    8  *  Chen Liqin <liqin.chen@sunplusct.com>
    9  *
   10  * This program is free software; you can redistribute it and/or modify
   11  * it under the terms of the GNU General Public License as published by
   12  * the Free Software Foundation; either version 2 of the License, or
   13  * (at your option) any later version.
   14  *
   15  * This program is distributed in the hope that it will be useful,
   16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18  * GNU General Public License for more details.
   19  *
   20  * You should have received a copy of the GNU General Public License
   21  * along with this program; if not, see the file COPYING, or write
   22  * to the Free Software Foundation, Inc.,
   23  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   24  */
   25 #include <linux/linkage.h>
   26 
   27 #define ADDC(sum,reg)                   \
   28         add     sum, sum, reg;          \
   29         cmp.c   reg, sum;               \
   30         bleu    9f;                     \
   31         addi    sum, 0x1;               \
   32 9:
   33 
   34 #define CSUM_BIGCHUNK(src, offset, sum)         \
   35         lw      r8, [src, offset + 0x00];       \
   36         lw      r9, [src, offset + 0x04];       \
   37         lw      r10, [src, offset + 0x08];      \
   38         lw      r11, [src, offset + 0x0c];      \
   39         ADDC(sum, r8);                          \
   40         ADDC(sum, r9);                          \
   41         ADDC(sum, r10);                         \
   42         ADDC(sum, r11);                         \
   43         lw      r8, [src, offset + 0x10];       \
   44         lw      r9, [src, offset + 0x14];       \
   45         lw      r10, [src, offset + 0x18];      \
   46         lw      r11, [src, offset + 0x1c];      \
   47         ADDC(sum, r8);                          \
   48         ADDC(sum, r9);                          \
   49         ADDC(sum, r10);                         \
   50         ADDC(sum, r11);                         \
   51 
   52 #define src r4
   53 #define dest r5
   54 #define sum r27
   55 
   56         .text
   57 /* unknown src alignment and < 8 bytes to go */
   58 small_csumcpy:
   59         mv      r5, r10
   60         ldi     r9, 0x0
   61         cmpi.c  r25, 0x1
   62         beq pass_small_set_t7   /*already set, jump to pass_small_set_t7*/
   63         andri.c r25,r4 , 0x1    /*Is src 2 bytes aligned?*/
   64 
   65 pass_small_set_t7:
   66         beq     aligned
   67         cmpi.c  r5, 0x0
   68         beq     fold
   69         lbu     r9, [src]
   70         slli    r9,r9, 0x8      /*Little endian*/
   71         ADDC(sum, r9)
   72         addi    src, 0x1
   73         subi.c  r5, 0x1
   74 
   75         /*len still a full word */
   76 aligned:
   77         andri.c r8, r5, 0x4     /*Len >= 4?*/
   78         beq     len_less_4bytes
   79 
   80         /* Still a full word (4byte) to go,and the src is word aligned.*/
   81         andri.c r8, src, 0x3    /*src is 4bytes aligned, so use LW!!*/
   82         beq     four_byte_aligned
   83         lhu     r9, [src]
   84         addi    src, 2
   85         ADDC(sum, r9)
   86         lhu     r9, [src]
   87         addi    src, 2
   88         ADDC(sum, r9)
   89         b len_less_4bytes
   90 
   91 four_byte_aligned:              /* Len >=4 and four byte aligned */
   92         lw      r9, [src]
   93         addi    src, 4
   94         ADDC(sum, r9)
   95 
   96 len_less_4bytes:                /* 2 byte aligned aligned and length<4B */
   97         andri.c r8, r5, 0x2
   98         beq     len_less_2bytes
   99         lhu     r9, [src]
  100         addi    src, 0x2        /* src+=2 */
  101         ADDC(sum, r9)
  102 
  103 len_less_2bytes:                /* len = 1 */
  104         andri.c r8, r5, 0x1
  105         beq     fold            /* less than 2 and not equal 1--> len=0 -> fold */
  106         lbu     r9, [src]
  107 
  108 fold_ADDC:
  109         ADDC(sum, r9)
  110 fold:
  111         /* fold checksum */
  112         slli    r26, sum, 16
  113         add     sum, sum, r26
  114         cmp.c   r26, sum
  115         srli    sum, sum, 16
  116         bleu    1f              /* if r26<=sum */
  117         addi    sum, 0x1        /* r26>sum */
  118 1:
  119         /* odd buffer alignment? r25 was set in csum_partial */
  120         cmpi.c  r25, 0x0
  121         beq     1f
  122         slli    r26, sum, 8
  123         srli    sum, sum, 8
  124         or      sum, sum, r26
  125         andi    sum, 0xffff
  126 1:
  127         .set    optimize
  128         /* Add the passed partial csum. */
  129         ADDC(sum, r6)
  130         mv      r4, sum
  131         br      r3
  132         .set    volatile
  133 
  134         .align  5
  135 ENTRY(csum_partial)
  136         ldi sum, 0
  137         ldi r25, 0
  138         mv r10, r5
  139         cmpi.c  r5, 0x8
  140         blt     small_csumcpy           /* < 8(singed) bytes to copy */
  141         cmpi.c  r5, 0x0
  142         beq     out
  143         andri.c r25, src, 0x1           /* odd buffer? */
  144 
  145         beq     word_align
  146 hword_align:                            /* 1 byte */
  147         lbu     r8, [src]
  148         subi    r5, 0x1
  149         slli    r8, r8, 8
  150         ADDC(sum, r8)
  151         addi    src, 0x1
  152 
  153 word_align:                             /* 2 bytes */
  154         andri.c r8, src, 0x2            /* 4bytes(dword)_aligned? */
  155         beq     dword_align             /* not, maybe dword_align */
  156         lhu     r8, [src]
  157         subi    r5, 0x2
  158         ADDC(sum, r8)
  159         addi    src, 0x2
  160 
  161 dword_align:                            /* 4bytes */
  162         mv      r26, r5                 /* maybe useless when len >=56 */
  163         ldi     r8, 56
  164         cmp.c   r8, r5
  165         bgtu    do_end_words            /* if a1(len)<t0(56) ,unsigned */
  166         andri.c r26, src, 0x4
  167         beq     qword_align
  168         lw      r8, [src]
  169         subi    r5, 0x4
  170         ADDC(sum, r8)
  171         addi    src, 0x4
  172 
  173 qword_align:                            /* 8 bytes */
  174         andri.c r26, src, 0x8
  175         beq     oword_align
  176         lw      r8, [src, 0x0]
  177         lw      r9, [src, 0x4]
  178         subi    r5, 0x8                 /* len-=0x8 */
  179         ADDC(sum, r8)
  180         ADDC(sum, r9)
  181         addi    src, 0x8
  182 
  183 oword_align:                            /* 16bytes */
  184         andri.c r26, src, 0x10
  185         beq     begin_movement
  186         lw      r10, [src, 0x08]
  187         lw      r11, [src, 0x0c]
  188         lw      r8, [src, 0x00]
  189         lw      r9, [src, 0x04]
  190         ADDC(sum, r10)
  191         ADDC(sum, r11)
  192         ADDC(sum, r8)
  193         ADDC(sum, r9)
  194         subi    r5, 0x10
  195         addi    src, 0x10
  196 
  197 begin_movement:
  198         srli.c  r26, r5, 0x7            /* len>=128? */
  199         beq     1f                      /* len<128 */
  200 
  201 /* r26 is the result that computed in oword_align */
  202 move_128bytes:
  203         CSUM_BIGCHUNK(src, 0x00, sum)
  204         CSUM_BIGCHUNK(src, 0x20, sum)
  205         CSUM_BIGCHUNK(src, 0x40, sum)
  206         CSUM_BIGCHUNK(src, 0x60, sum)
  207         subi.c  r26, 0x01               /* r26 equals len/128 */
  208         addi    src, 0x80
  209         bne     move_128bytes
  210 
  211 1:      /* len<128,we process 64byte here */
  212         andri.c r10, r5, 0x40
  213         beq     1f
  214 
  215 move_64bytes:
  216         CSUM_BIGCHUNK(src, 0x00, sum)
  217         CSUM_BIGCHUNK(src, 0x20, sum)
  218         addi    src, 0x40
  219 
  220 1:                                      /* len<64 */
  221         andri   r26, r5, 0x1c           /* 0x1c=28 */
  222         andri.c r10, r5, 0x20
  223         beq     do_end_words            /* decided by andri */
  224 
  225 move_32bytes:
  226         CSUM_BIGCHUNK(src, 0x00, sum)
  227         andri   r26, r5, 0x1c
  228         addri   src, src, 0x20
  229 
  230 do_end_words:                           /* len<32 */
  231         /* r26 was set already in dword_align */
  232         cmpi.c  r26, 0x0
  233         beq     maybe_end_cruft         /* len<28 or len<56 */
  234         srli    r26, r26, 0x2
  235 
  236 end_words:
  237         lw      r8, [src]
  238         subi.c  r26, 0x1                /* unit is 4 byte */
  239         ADDC(sum, r8)
  240         addi    src, 0x4
  241         cmpi.c  r26, 0x0
  242         bne     end_words               /* r26!=0 */
  243 
  244 maybe_end_cruft:                        /* len<4 */
  245         andri   r10, r5, 0x3
  246 
  247 small_memcpy:
  248         mv      r5, r10
  249         j       small_csumcpy
  250 
  251 out:
  252         mv      r4, sum
  253         br      r3
  254 
  255 END(csum_partial)

Cache object: c66c99623c5102c6f5d9474ab58fefdb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.