The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/hyperv/utilities/unicode.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $NetBSD: unicode.h,v 1.1.1.1 2007/03/06 00:10:39 dillo Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Dieter Baron.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  *
   31  * $FreeBSD$
   32  */
   33 
   34 #include <sys/types.h>
   35 
   36 #define UNICODE_DECOMPOSE               0x01
   37 #define UNICODE_PRECOMPOSE              0x02
   38 #define UNICODE_UTF8_LATIN1_FALLBACK    0x03
   39 
   40 size_t utf8_to_utf16(uint16_t *, size_t, const char *, size_t, int, int *);
   41 size_t utf16_to_utf8(char *, size_t, const uint16_t *, size_t, int, int *);
   42 
   43 size_t
   44 utf8_to_utf16(uint16_t *dst, size_t dst_len,
   45               const char *src, size_t src_len,
   46               int flags, int *errp)
   47 {
   48     const unsigned char *s;
   49     size_t spos, dpos;
   50     int error;
   51     uint16_t c;
   52 
   53 #define IS_CONT(c)      (((c)&0xc0) == 0x80)
   54 
   55     error = 0;
   56     s = (const unsigned char *)src;
   57     spos = dpos = 0;
   58     while (spos<src_len) {
   59         if (s[spos] < 0x80)
   60             c = s[spos++];
   61         else if ((flags & UNICODE_UTF8_LATIN1_FALLBACK)
   62                  && (spos >= src_len || !IS_CONT(s[spos+1]))
   63                  && s[spos]>=0xa0) {
   64             /* not valid UTF-8, assume ISO 8859-1 */
   65             c = s[spos++];
   66         }
   67         else if (s[spos] < 0xc0 || s[spos] >= 0xf5) {
   68             /* continuation byte without lead byte
   69                or lead byte for codepoint above 0x10ffff */
   70             error++;
   71             spos++;
   72             continue;
   73         }
   74         else if (s[spos] < 0xe0) {
   75             if (spos >= src_len || !IS_CONT(s[spos+1])) {
   76                 spos++;
   77                 error++;
   78                 continue;
   79             }
   80             c = ((s[spos] & 0x3f) << 6) | (s[spos+1] & 0x3f);
   81             spos += 2;
   82             if (c < 0x80) {
   83                 /* overlong encoding */
   84                 error++;
   85                 continue;
   86             }
   87         }
   88         else if (s[spos] < 0xf0) {
   89             if (spos >= src_len-2
   90                 || !IS_CONT(s[spos+1]) || !IS_CONT(s[spos+2])) {
   91                 spos++;
   92                 error++;
   93                 continue;
   94             }
   95             c = ((s[spos] & 0x0f) << 12) | ((s[spos+1] & 0x3f) << 6)
   96                 | (s[spos+2] & 0x3f);
   97             spos += 3;
   98             if (c < 0x800 || (c & 0xdf00) == 0xd800 ) {
   99                 /* overlong encoding or encoded surrogate */
  100                 error++;
  101                 continue;
  102             }
  103         }
  104         else {
  105             uint32_t cc;
  106             /* UTF-16 surrogate pair */
  107 
  108             if (spos >= src_len-3 || !IS_CONT(s[spos+1])
  109                 || !IS_CONT(s[spos+2]) || !IS_CONT(s[spos+3])) {
  110                 spos++;
  111                 error++;
  112                 
  113                 continue;
  114             }
  115             cc = ((s[spos] & 0x03) << 18) | ((s[spos+1] & 0x3f) << 12)
  116                  | ((s[spos+2] & 0x3f) << 6) | (s[spos+3] & 0x3f);
  117             spos += 4;
  118             if (cc < 0x10000) {
  119                 /* overlong encoding */
  120                 error++;
  121                 continue;
  122             }
  123             if (dst && dpos < dst_len)
  124                 dst[dpos] = (0xd800 | ((cc-0x10000)>>10));
  125             dpos++;
  126             c = 0xdc00 | ((cc-0x10000) & 0x3ffff);
  127         }
  128 
  129         if (dst && dpos < dst_len)
  130             dst[dpos] = c;
  131         dpos++;
  132     }
  133     
  134     if (errp)
  135         *errp = error;
  136 
  137     return dpos;
  138 
  139 #undef IS_CONT
  140 }
  141 
  142 
  143 size_t
  144 utf16_to_utf8(char *dst, size_t dst_len,
  145               const uint16_t *src, size_t src_len,
  146               int flags, int *errp)
  147 {
  148     uint16_t spos, dpos;
  149     int error;
  150 
  151 #define CHECK_LENGTH(l) (dpos > dst_len-(l) ? dst=NULL : NULL)
  152 #define ADD_BYTE(b)     (dst ? dst[dpos] = (b) : 0, dpos++)
  153 
  154     error = 0;
  155     dpos = 0;
  156     for (spos=0; spos<src_len; spos++) {
  157         if (src[spos] < 0x80) {
  158             CHECK_LENGTH(1);
  159             ADD_BYTE(src[spos]);
  160         }
  161         else if (src[spos] < 0x800) {
  162             CHECK_LENGTH(2);
  163             ADD_BYTE(0xc0 | (src[spos]>>6));
  164             ADD_BYTE(0x80 | (src[spos] & 0x3f));
  165         }
  166         else if ((src[spos] & 0xdc00) == 0xd800) {
  167             uint32_t c;
  168             /* first surrogate */
  169             if (spos == src_len - 1 || (src[spos] & 0xdc00) != 0xdc00) {
  170                 /* no second surrogate present */
  171                 error++;
  172                 continue;
  173             }
  174             spos++;
  175             CHECK_LENGTH(4);
  176             c = (((src[spos]&0x3ff) << 10) | (src[spos+1]&0x3ff)) + 0x10000;
  177             ADD_BYTE(0xf0 | (c>>18));
  178             ADD_BYTE(0x80 | ((c>>12) & 0x3f));
  179             ADD_BYTE(0x80 | ((c>>6) & 0x3f));
  180             ADD_BYTE(0x80 | (c & 0x3f));
  181         }
  182         else if ((src[spos] & 0xdc00) == 0xdc00) {
  183             /* second surrogate without preceding first surrogate */
  184             error++;
  185         }
  186         else {
  187             CHECK_LENGTH(3);
  188             ADD_BYTE(0xe0 | src[spos]>>12);
  189             ADD_BYTE(0x80 | ((src[spos]>>6) & 0x3f));
  190             ADD_BYTE(0x80 | (src[spos] & 0x3f));
  191         }
  192     }
  193 
  194     if (errp)
  195         *errp = error;
  196 
  197     return dpos;
  198 
  199 #undef ADD_BYTE
  200 #undef CHECK_LENGTH
  201 }

Cache object: 76433921610b14eb5aaef2061d513ac5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.