The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/bsd/hfs/hfs_encodings.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
    3  *
    4  * @APPLE_LICENSE_HEADER_START@
    5  * 
    6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
    7  * 
    8  * This file contains Original Code and/or Modifications of Original Code
    9  * as defined in and that are subject to the Apple Public Source License
   10  * Version 2.0 (the 'License'). You may not use this file except in
   11  * compliance with the License. Please obtain a copy of the License at
   12  * http://www.opensource.apple.com/apsl/ and read it before using this
   13  * file.
   14  * 
   15  * The Original Code and all software distributed under the License are
   16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   20  * Please see the License for the specific language governing rights and
   21  * limitations under the License.
   22  * 
   23  * @APPLE_LICENSE_HEADER_END@
   24  */
   25 
   26 #include <sys/param.h>
   27 #include <sys/systm.h>
   28 #include <sys/kernel.h>
   29 #include <sys/lock.h>
   30 #include <sys/malloc.h>
   31 #include <sys/queue.h>
   32 #include <sys/utfconv.h>
   33 
   34 #include "hfs.h"
   35 
   36 
   37 /* hfs encoding converter list */
   38 SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
   39 decl_simple_lock_data(,hfs_encoding_list_slock);
   40 
   41 
   42 /* hfs encoding converter entry */
   43 struct  hfs_encoding {
   44         SLIST_ENTRY(hfs_encoding)  link;
   45         int                     refcount;
   46         int                     kmod_id;
   47         UInt32                  encoding;
   48         hfs_to_unicode_func_t   get_unicode_func;
   49         unicode_to_hfs_func_t   get_hfsname_func;
   50 };
   51 
   52 /* XXX We should use an "official" interface! */
   53 extern kern_return_t kmod_destroy(host_priv_t host_priv, kmod_t id);
   54 extern struct host realhost;
   55 
   56 #define MAX_HFS_UNICODE_CHARS   (15*5)
   57 
   58 int mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str, UInt32 maxCharLen, UInt32 *usedCharLen);
   59 
   60 static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str);
   61 
   62 
   63 void
   64 hfs_converterinit(void)
   65 {
   66         SLIST_INIT(&hfs_encoding_list);
   67         simple_lock_init(&hfs_encoding_list_slock);
   68 
   69         /*
   70          * add resident MacRoman converter and take a reference
   71          * since its always "loaded".
   72          */
   73         hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
   74         SLIST_FIRST(&hfs_encoding_list)->refcount++;
   75 }
   76 
   77 
   78 /*
   79  * hfs_addconverter - add an HFS encoding converter
   80  *
   81  * This is called exclusivly by kernel loadable modules
   82  * (like HFS_Japanese.kmod) to register hfs encoding
   83  * conversion routines.
   84  *
   85  */
   86 int
   87 hfs_addconverter(int id, UInt32 encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
   88 {
   89         struct hfs_encoding *encp;
   90         
   91         MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK);
   92 
   93         simple_lock(&hfs_encoding_list_slock);
   94 
   95         encp->link.sle_next = NULL;
   96         encp->refcount = 0;
   97         encp->encoding = encoding;
   98         encp->get_unicode_func = get_unicode;
   99         encp->get_hfsname_func = get_hfsname;
  100         encp->kmod_id = id;
  101         SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
  102 
  103         simple_unlock(&hfs_encoding_list_slock);
  104         return (0);
  105 }
  106 
  107 
  108 /*
  109  * hfs_remconverter - remove an HFS encoding converter
  110  *
  111  * Can be called by a kernel loadable module's finalize
  112  * routine to remove an encoding converter so that the
  113  * module (i.e. the code) can be unloaded.
  114  *
  115  * However, in the normal case, the removing and unloading
  116  * of these converters is done in hfs_relconverter.
  117  * The call is initiated from within the kernel during the unmounting of an hfs voulume.
  118  */
  119 int
  120 hfs_remconverter(int id, UInt32 encoding)
  121 {
  122         struct hfs_encoding *encp;
  123         int busy = 0;
  124 
  125         simple_lock(&hfs_encoding_list_slock);
  126         SLIST_FOREACH(encp, &hfs_encoding_list, link) {
  127                 if (encp->encoding == encoding && encp->kmod_id == id) {
  128                         encp->refcount--;
  129                         
  130                         /* if converter is no longer in use, release it */
  131                         if (encp->refcount <= 0 && encp->kmod_id != 0) {
  132                                 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
  133                                 FREE(encp, M_TEMP);
  134                         } else {
  135                                 busy = 1;
  136                         }
  137                         break;
  138                 }
  139         }
  140         simple_unlock(&hfs_encoding_list_slock);
  141 
  142         return (busy);
  143 }
  144 
  145 
  146 /*
  147  * hfs_getconverter - get HFS encoding converters
  148  *
  149  * Normally called during the mounting of an hfs voulume.
  150  */
  151 int
  152 hfs_getconverter(UInt32 encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
  153 {
  154         struct hfs_encoding *encp;
  155         int found = 0;
  156 
  157         simple_lock(&hfs_encoding_list_slock);
  158         SLIST_FOREACH(encp, &hfs_encoding_list, link) {
  159                 if (encp->encoding == encoding) {
  160                         found = 1;
  161                         *get_unicode = encp->get_unicode_func;
  162                         *get_hfsname = encp->get_hfsname_func;
  163                         ++encp->refcount;
  164                         break;
  165                 }
  166         }
  167         simple_unlock(&hfs_encoding_list_slock);
  168 
  169         if (!found) {
  170                 *get_unicode = NULL;
  171                 *get_hfsname = NULL;
  172                 return (EINVAL);
  173         }
  174         
  175         return (0);
  176 }
  177 
  178 
  179 /*
  180  * hfs_relconverter - release interest in an HFS encoding converter
  181  *
  182  * Normally called during the unmounting of an hfs voulume.
  183  */
  184 int
  185 hfs_relconverter(UInt32 encoding)
  186 {
  187         struct hfs_encoding *encp;
  188         int found = 0;
  189 
  190         simple_lock(&hfs_encoding_list_slock);
  191         SLIST_FOREACH(encp, &hfs_encoding_list, link) {
  192                 if (encp->encoding == encoding) {
  193                         found = 1;
  194                         encp->refcount--;
  195                         
  196                         /* if converter is no longer in use, release it */
  197                         if (encp->refcount <= 0 && encp->kmod_id != 0) {
  198                                 int id = encp->kmod_id;
  199 
  200                                 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
  201                                 FREE(encp, M_TEMP);
  202                                 encp = NULL;
  203 
  204                                 simple_unlock(&hfs_encoding_list_slock);
  205                                 kmod_destroy((host_priv_t) host_priv_self(), id);
  206                                 simple_lock(&hfs_encoding_list_slock);
  207                         }
  208                         break;
  209                 }
  210         }
  211         simple_unlock(&hfs_encoding_list_slock);
  212 
  213         return (found ? 0 : EINVAL);
  214 }
  215 
  216 
  217 /*
  218  * Convert HFS encoded string into UTF-8
  219  *
  220  * Unicode output is fully decomposed
  221  * '/' chars are converted to ':'
  222  */
  223 int
  224 hfs_to_utf8(ExtendedVCB *vcb, Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
  225 {
  226         int error;
  227         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
  228         ItemCount uniCount;
  229         size_t utf8len;
  230         hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
  231 
  232         error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
  233         
  234         if (uniCount == 0)
  235                 error = EINVAL;
  236 
  237         if (error == 0) {
  238                 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
  239                 if (error == ENAMETOOLONG)
  240                         *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
  241                 else
  242                         *actualDstLen = utf8len;
  243         }
  244 
  245         return error;
  246 }
  247 
  248 
  249 /*
  250  * When an HFS name cannot be encoded with the current
  251  * volume encoding then MacRoman is used as a fallback.
  252  */
  253 int
  254 mac_roman_to_utf8(Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
  255 {
  256         int error;
  257         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
  258         ItemCount uniCount;
  259         size_t utf8len;
  260 
  261         error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
  262         
  263         if (uniCount == 0)
  264                 error = EINVAL;
  265 
  266         if (error == 0) {
  267                 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
  268                 if (error == ENAMETOOLONG)
  269                         *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
  270                 else
  271                         *actualDstLen = utf8len;
  272         }
  273 
  274         return error;
  275 }
  276 
  277 
  278 /*
  279  * Convert Unicode string into HFS encoding
  280  *
  281  * ':' chars are converted to '/'
  282  * Assumes input represents fully decomposed Unicode
  283  */
  284 int
  285 unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
  286 {
  287         int error;
  288         unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
  289 
  290         error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
  291         if (error && retry) {
  292                 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
  293         }
  294         return error;
  295 }
  296 
  297 /*
  298  * Convert UTF-8 string into HFS encoding
  299  *
  300  * ':' chars are converted to '/'
  301  * Assumes input represents fully decomposed Unicode
  302  */
  303 int
  304 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
  305 {
  306         int error;
  307         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
  308         size_t ucslen;
  309 
  310         error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
  311         if (error == 0)
  312                 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
  313 
  314         return error;
  315 }
  316 
  317 int
  318 utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
  319 {
  320         int error;
  321         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
  322         size_t ucslen;
  323 
  324         error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
  325         if (error == 0)
  326                 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
  327 
  328         return error;
  329 }
  330 
  331 /*
  332  * HFS MacRoman to/from Unicode conversions are built into the kernel
  333  * All others hfs encodings are loadable.
  334  */
  335 
  336 /* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
  337 static UInt8 gLatin1Table[] = {
  338   /*              0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
  339   /* 0x00A0 */  0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4,  '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2,  '?', 0xA8, 0xF8,
  340   /* 0x00B0 */  0xA1, 0XB1,  '?',  '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC,  '?', 0xBC, 0xC8,  '?',  '?',  '?', 0xC0,
  341   /* 0x00C0 */   '?',  '?',  '?',  '?',  '?',  '?', 0xAE,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  342   /* 0x00D0 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xAF,  '?',  '?',  '?',  '?',  '?',  '?', 0xA7,
  343   /* 0x00E0 */   '?',  '?',  '?',  '?',  '?',  '?', 0xBE,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  344   /* 0x00F0 */   '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xD6, 0xBF,  '?',  '?',  '?',  '?',  '?',  '?',  '?'
  345 };
  346 
  347 /* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
  348 static UInt8 gSpaceModsTable[] = {
  349   /*              0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
  350   /* 0x02C0 */   '?',  '?',  '?',  '?',  '?',  '?', 0xF6, 0xFF,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  351   /* 0x02D0 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD,  '?',  '?'
  352 };
  353 
  354 /* 0x2010 - 0x20AF = General Punctuation (17 total) */
  355 static UInt8 gPunctTable[] = {
  356   /*              0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
  357   /* 0x2010 */   '?',  '?',  '?', 0xd0, 0xd1,  '?',  '?',  '?', 0xd4, 0xd5, 0xe2,  '?', 0xd2, 0xd3, 0xe3,  '?',
  358   /* 0x2020 */  0xa0, 0xe0, 0xa5,  '?',  '?',  '?', 0xc9,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  359   /* 0x2030 */  0xe4,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xdc, 0xdd,  '?',  '?',  '?',  '?',  '?',
  360   /* 0x2040 */   '?',  '?',  '?',  '?', 0xda,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  361   /* 0x2050 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  362   /* 0x2060 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  363   /* 0x2070 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  364   /* 0x2080 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  365   /* 0x2090 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  366   /* 0x20A0 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xdb,  '?',  '?',  '?'
  367 };
  368 
  369 /* 0x22xx = Mathematical Operators (11 total) */
  370 static UInt8 gMathTable[] = {
  371   /*              0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
  372   /* 0x2200 */   '?',  '?', 0xb6,  '?',  '?',  '?', 0xc6,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xb8,
  373   /* 0x2210 */   '?', 0xb7,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xc3,  '?',  '?',  '?', 0xb0,  '?',
  374   /* 0x2220 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xba,  '?',  '?',  '?',  '?',
  375   /* 0x2230 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  376   /* 0x2240 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xc5,  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  377   /* 0x2250 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  378   /* 0x2260 */  0xad,  '?',  '?',  '?', 0xb2, 0xb3,  '?',  '?'
  379 };
  380 
  381 /* */
  382 static UInt8 gReverseCombTable[] = {
  383   /*              0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
  384   /* 0x40 */    0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
  385   /* 0x50 */    0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
  386   /* 0x60 */    0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
  387   /* 0x70 */    0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
  388 
  389   /* Combining Diacritical Marks (0x0300 - 0x030A) */
  390   /*              0     1     2     3     4     5     6     7     8     9     A  */
  391   /*  'A'   */
  392   /* 0x0300 */  0xCB, 0xE7, 0xE5, 0xCC,  '?',  '?',  '?',  '?', 0x80,  '?', 0x81,
  393 
  394   /*  'a'   */
  395   /* 0x0300 */  0x88, 0x87, 0x89, 0x8B,  '?',  '?',  '?',  '?', 0x8A,  '?', 0x8C,
  396 
  397   /*  'E'   */
  398   /* 0x0300 */  0xE9, 0x83, 0xE6,  '?',  '?',  '?',  '?',  '?', 0xE8,  '?',  '?',
  399 
  400   /*  'e'   */
  401   /* 0x0300 */  0x8F, 0x8E, 0x90,  '?',  '?',  '?',  '?',  '?', 0x91,  '?',  '?',
  402 
  403   /*  'I'   */
  404   /* 0x0300 */  0xED, 0xEA, 0xEB,  '?',  '?',  '?',  '?',  '?', 0xEC,  '?',  '?',
  405 
  406   /*  'i'   */
  407   /* 0x0300 */  0x93, 0x92, 0x94,  '?',  '?',  '?',  '?',  '?', 0x95,  '?',  '?',
  408 
  409   /*  'N'   */
  410   /* 0x0300 */   '?',  '?',  '?', 0x84,  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  411 
  412   /*  'n'   */
  413   /* 0x0300 */   '?',  '?',  '?', 0x96,  '?',  '?',  '?',  '?',  '?',  '?',  '?',
  414 
  415   /*  'O'   */
  416   /* 0x0300 */  0xF1, 0xEE, 0xEF, 0xCD,  '?',  '?',  '?',  '?', 0x85,  '?',  '?',
  417 
  418   /*  'o'   */
  419   /* 0x0300 */  0x98, 0x97, 0x99, 0x9B,  '?',  '?',  '?',  '?', 0x9A,  '?',  '?',
  420 
  421   /*  'U'   */
  422   /* 0x0300 */  0xF4, 0xF2, 0xF3,  '?',  '?',  '?',  '?',  '?', 0x86,  '?',  '?',
  423 
  424   /*  'u'   */
  425   /* 0x0300 */  0x9D, 0x9C, 0x9E,  '?',  '?',  '?',  '?',  '?', 0x9F,  '?',  '?',
  426 
  427   /*  'Y'   */
  428   /* 0x0300 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xD9,  '?',  '?',
  429 
  430   /*  'y'   */
  431   /* 0x0300 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xD8,  '?',  '?',
  432 
  433   /*  else  */
  434   /* 0x0300 */   '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?'
  435 };
  436 
  437 
  438 /*
  439  * Convert Unicode string into HFS MacRoman encoding
  440  *
  441  * Assumes Unicode input is fully decomposed
  442  */
  443 static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str)
  444 {
  445         UInt8           *p;
  446         const UniChar   *u;
  447         UniChar         c;
  448         UniChar         mask;
  449         UInt16          inputChars;
  450         UInt16          pascalChars;
  451         OSErr           result = noErr;
  452         UInt8           lsb;
  453         UInt8           prevChar;
  454         UInt8           mc;
  455 
  456         mask = (UniChar) 0xFF80;
  457         p = &hfs_str[1];
  458         u = uni_str;
  459         inputChars = unicodeChars;
  460         pascalChars = prevChar = 0;
  461         
  462         while (inputChars) {
  463                 c = *(u++);
  464                 lsb = (UInt8) c;
  465 
  466                 /*
  467                  * If its not 7-bit ascii, then we need to map it
  468                  */
  469                 if ( c & mask ) {
  470                         mc = '?';
  471                         switch (c & 0xFF00) {
  472                         case 0x0000:
  473                                 if (lsb >= 0xA0)
  474                                         mc = gLatin1Table[lsb - 0xA0];
  475                                 break;
  476 
  477                         case 0x0200:
  478                                 if (lsb >= 0xC0 && lsb <= 0xDF)
  479                                         mc = gSpaceModsTable[lsb - 0xC0];
  480                                 break;
  481 
  482                         case 0x2000:
  483                                 if (lsb >= 0x10 && lsb <= 0xAF)
  484                                         mc = gPunctTable[lsb- 0x10];
  485                                 break;
  486 
  487                         case 0x2200:
  488                                 if (lsb <= 0x68)
  489                                         mc = gMathTable[lsb];
  490                                 break;
  491 
  492                         case 0x0300:
  493                                 if (c <= 0x030A) {
  494                                         if (prevChar >= 'A' && prevChar < 'z') {
  495                                                 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
  496                                                 --p;    /* backup over base char */
  497                                                 --pascalChars;
  498                                         }
  499                                 } else {
  500                                         switch (c) {
  501                                         case 0x0327:    /* combining cedilla */
  502                                                 if (prevChar == 'C')
  503                                                         mc = 0x82;
  504                                                 else if (prevChar == 'c')
  505                                                         mc = 0x8D;
  506                                                 else
  507                                                         break;
  508                                                 --p;    /* backup over base char */
  509                                                 --pascalChars;
  510                                                 break;
  511 
  512                                         case 0x03A9: mc = 0xBD; break;  /* omega */
  513 
  514                                         case 0x03C0: mc = 0xB9; break;  /* pi */
  515                                         }
  516                                 }
  517                                 break;
  518                                 
  519                         default:
  520                                 switch (c) {
  521                                 case 0x0131: mc = 0xf5; break;  /* dotless i */
  522 
  523                                 case 0x0152: mc = 0xce; break;  /* OE */
  524 
  525                                 case 0x0153: mc = 0xcf; break;  /* oe */
  526 
  527                                 case 0x0192: mc = 0xc4; break;  /* Ä */
  528 
  529                                 case 0x2122: mc = 0xaa; break;  /* TM */
  530 
  531                                 case 0x25ca: mc = 0xd7; break;  /* diamond */
  532 
  533                                 case 0xf8ff: mc = 0xf0; break;  /* apple logo */
  534 
  535                                 case 0xfb01: mc = 0xde; break;  /* fi */
  536 
  537                                 case 0xfb02: mc = 0xdf; break;  /* fl */
  538                                 }
  539                         } /* end switch (c & 0xFF00) */
  540                         
  541                         /*
  542                          * If we have an unmapped character then we need to mangle the name...
  543                          */
  544                         if (mc == '?')
  545                                 result = kTECUsedFallbacksStatus;
  546                         
  547                         prevChar = 0;
  548                         lsb = mc;
  549 
  550                 } else {
  551                         prevChar = lsb;
  552                 }
  553 
  554                 if (pascalChars >= 31)
  555                         break;
  556 
  557                 *(p++) = lsb;
  558                 ++pascalChars;
  559                 --inputChars;
  560 
  561         } /* end while */
  562         
  563         hfs_str[0] = pascalChars;
  564         
  565         if (inputChars > 0)
  566                 result = ENAMETOOLONG;  /* ran out of room! */
  567 
  568         return result;
  569 }
  570 
  571 
  572 static UniChar gHiBitBaseUnicode[128] = {
  573   /* 0x80 */    0x0041, 0x0041, 0x0043, 0x0045, 0x004e, 0x004f, 0x0055, 0x0061, 
  574   /* 0x88 */    0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0065, 0x0065, 
  575   /* 0x90 */    0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0x006e, 0x006f, 
  576   /* 0x98 */    0x006f, 0x006f, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 
  577   /* 0xa0 */    0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, 
  578   /* 0xa8 */    0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8, 
  579   /* 0xb0 */    0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211, 
  580   /* 0xb8 */    0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8, 
  581   /* 0xc0 */    0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 
  582   /* 0xc8 */    0x00bb, 0x2026, 0x00a0, 0x0041, 0x0041, 0x004f, 0x0152, 0x0153, 
  583   /* 0xd0 */    0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, 
  584   /* 0xd8 */    0x0079, 0x0059, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02, 
  585   /* 0xe0 */    0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x0041, 0x0045, 0x0041, 
  586   /* 0xe8 */    0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004f, 0x004f, 
  587   /* 0xf0 */    0xf8ff, 0x004f, 0x0055, 0x0055, 0x0055, 0x0131, 0x02c6, 0x02dc, 
  588   /* 0xf8 */    0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7
  589 };
  590 
  591 static UniChar gHiBitCombUnicode[128] = {
  592   /* 0x80 */    0x0308, 0x030a, 0x0327, 0x0301, 0x0303, 0x0308, 0x0308, 0x0301, 
  593   /* 0x88 */    0x0300, 0x0302, 0x0308, 0x0303, 0x030a, 0x0327, 0x0301, 0x0300, 
  594   /* 0x90 */    0x0302, 0x0308, 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 
  595   /* 0x98 */    0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 0x0300, 0x0302, 0x0308, 
  596   /* 0xa0 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  597   /* 0xa8 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
  598   /* 0xb0 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
  599   /* 0xb8 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
  600   /* 0xc0 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
  601   /* 0xc8 */    0x0000, 0x0000, 0x0000, 0x0300, 0x0303, 0x0303, 0x0000, 0x0000, 
  602   /* 0xd0 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
  603   /* 0xd8 */    0x0308, 0x0308, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
  604   /* 0xe0 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0302, 0x0302, 0x0301, 
  605   /* 0xe8 */    0x0308, 0x0300, 0x0301, 0x0302, 0x0308, 0x0300, 0x0301, 0x0302, 
  606   /* 0xf0 */    0x0000, 0x0300, 0x0301, 0x0302, 0x0300, 0x0000, 0x0000, 0x0000, 
  607   /* 0xf8 */    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
  608 };
  609 
  610 
  611 /*
  612  * Convert HFS MacRoman encoded string into Unicode
  613  *
  614  * Unicode output is fully decomposed
  615  */
  616 int
  617 mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str,
  618                                 UInt32 maxCharLen, UInt32 *unicodeChars)
  619 {
  620         const UInt8  *p;
  621         UniChar  *u;
  622         UInt16  pascalChars;
  623         UInt8  c;
  624 
  625         p = hfs_str;
  626         u = uni_str;
  627 
  628         *unicodeChars = pascalChars = *(p++);   /* pick up length byte */
  629 
  630         while (pascalChars--) {
  631                 c = *(p++);
  632 
  633                 if ( (SInt8) c >= 0 ) {         /* check if seven bit ascii */
  634                         *(u++) = (UniChar) c;   /* just pad high byte with zero */
  635                 } else { /* its a hi bit character */
  636                         UniChar uc;
  637 
  638                         c &= 0x7F;
  639                         *(u++) = uc = gHiBitBaseUnicode[c];
  640                         
  641                         /*
  642                          * if the unicode character we get back is an alpha char
  643                          * then we must have an additional combining character
  644                          */
  645                         if ((uc <= (UniChar) 'z') && (uc >= (UniChar) 'A')) {
  646                                 *(u++) = gHiBitCombUnicode[c];
  647                                 ++(*unicodeChars);
  648                         }
  649                 }
  650         }
  651         
  652         return noErr;
  653 }
  654 

Cache object: 6a8c541042e6e3c6465361641569ad76


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.