The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/umsdos/mangle.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/fs/umsdos/mangle.c
    3  *
    4  *      Written 1993 by Jacques Gelinas 
    5  *
    6  * Control the mangling of file name to fit msdos name space.
    7  * Many optimisations by GLU == dglaude@is1.vub.ac.be (Glaude David)
    8  */
    9 
   10 #include <linux/errno.h>
   11 #include <linux/string.h>
   12 #include <linux/kernel.h>
   13 #include <linux/umsdos_fs.h>
   14 
   15 /* (This file is used outside of the kernel) */
   16 #ifndef __KERNEL__
   17 #define KERN_WARNING
   18 #endif
   19 
   20 /*
   21  * Complete the mangling of the MSDOS fake name
   22  * based on the position of the entry in the EMD file.
   23  * 
   24  * Simply complete the job of umsdos_parse; fill the extension.
   25  * 
   26  * Beware that info->f_pos must be set.
   27  */
   28 void umsdos_manglename (struct umsdos_info *info)
   29 {
   30         if (info->msdos_reject) {
   31                 /* #Specification: file name / non MSDOS conforming / mangling
   32                  * Each non MSDOS conforming file has a special extension
   33                  * build from the entry position in the EMD file.
   34                  * 
   35                  * This number is then transform in a base 32 number, where
   36                  * each digit is expressed like hexadecimal number, using
   37                  * digit and letter, except it uses 22 letters from 'a' to 'v'.
   38                  * The number 32 comes from 2**5. It is faster to split a binary
   39                  * number using a base which is a power of two. And I was 32
   40                  * when I started this project. Pick your answer :-) .
   41                  * 
   42                  * If the result is '', it is replace with '_', simply
   43                  * to make it odd.
   44                  * 
   45                  * This is true for the first two character of the extension.
   46                  * The last one is taken from a list of odd character, which
   47                  * are:
   48                  * 
   49                  * { } ( ) ! ` ^ & @
   50                  * 
   51                  * With this scheme, we can produce 9216 ( 9* 32 * 32)
   52                  * different extensions which should not clash with any useful
   53                  * extension already popular or meaningful. Since most directory
   54                  * have much less than 32 * 32 files in it, the first character
   55                  * of the extension of any mangled name will be {.
   56                  * 
   57                  * Here are the reason to do this (this kind of mangling).
   58                  * 
   59                  * -The mangling is deterministic. Just by the extension, we
   60                  * are able to locate the entry in the EMD file.
   61                  * 
   62                  * -By keeping to beginning of the file name almost unchanged,
   63                  * we are helping the MSDOS user.
   64                  * 
   65                  * -The mangling produces names not too ugly, so an msdos user
   66                  * may live with it (remember it, type it, etc...).
   67                  * 
   68                  * -The mangling produces names ugly enough so no one will
   69                  * ever think of using such a name in real life. This is not
   70                  * fool proof. I don't think there is a total solution to this.
   71                  */
   72                 int entry_num;
   73                 char *pt = info->fake.fname + info->fake.len;
   74                 /* lookup for encoding the last character of the extension 
   75                  * It contains valid character after the ugly one to make sure 
   76                  * even if someone overflows the 32 * 32 * 9 limit, it still 
   77                  * does something 
   78                  */
   79 #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
   80                 static char lookup3[] =
   81                 {
   82                         SPECIAL_MANGLING,
   83                 /* This is the start of lookup12 */
   84                         '_', '1', '2', '3', '4', '5', '6', '7', '8', '9',
   85                         'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
   86                         'p', 'q', 'r', 's', 't', 'u', 'v'
   87                 };
   88 
   89 #define lookup12 (lookup3+9)
   90                 entry_num = info->f_pos / UMSDOS_REC_SIZE;
   91                 if (entry_num > (9* 32 * 32)){
   92                         printk (KERN_WARNING "UMSDOS: more than 9216 files in a directory.\n"
   93                                 "This may break the mangling strategy.\n"
   94                                 "Not a killer problem. See doc.\n");
   95                 }
   96                 *pt++ = '.';
   97                 *pt++ = lookup3 [(entry_num >> 10) & 31];
   98                 *pt++ = lookup12[(entry_num >> 5) & 31];
   99                 *pt++ = lookup12[entry_num & 31];
  100                 *pt = '\0';             /* help doing printk */ 
  101                 info->fake.len += 4;
  102                 info->msdos_reject = 0;         /* Avoid mangling twice */
  103         }
  104 }
  105 
  106 /*
  107  * Evaluate the record size needed to store of name of len character.
  108  * The value returned is a multiple of UMSDOS_REC_SIZE.
  109  */
  110 int umsdos_evalrecsize (int len)
  111 {
  112         struct umsdos_dirent dirent;
  113         int nbrec = 1 + ((len - 1 + (dirent.name - (char *) &dirent))
  114                          / UMSDOS_REC_SIZE);
  115 
  116         return nbrec * UMSDOS_REC_SIZE;
  117         /*
  118          * GLU        This should be inlined or something to speed it up to the max.
  119          * GLU        nbrec is absolutely not needed to return the value.
  120          */
  121 }
  122 #ifdef TEST
  123 int umsdos_evalrecsize_old (int len)
  124 {
  125         struct umsdos_dirent dirent;
  126         int size = len + (dirent.name - (char *) &dirent);
  127         int nbrec = size / UMSDOS_REC_SIZE;
  128         int extra = size % UMSDOS_REC_SIZE;
  129 
  130         if (extra > 0)
  131                 nbrec++;
  132         return nbrec * UMSDOS_REC_SIZE;
  133 }
  134 #endif
  135 
  136 
  137 /*
  138  * Fill the struct info with the full and msdos name of a file
  139  * Return 0 if all is OK, a negative error code otherwise.
  140  */
  141 int umsdos_parse (
  142                          const char *fname,
  143                          int len,
  144                          struct umsdos_info *info)
  145 {
  146         int ret = -ENAMETOOLONG;
  147 
  148         /* #Specification: file name / too long
  149          * If a file name exceed UMSDOS maxima, the file name is silently
  150          * truncated. This makes it conformant with the other file system
  151          * of Linux (minix and ext2 at least).
  152          */
  153         if (len > UMSDOS_MAXNAME)
  154                 len = UMSDOS_MAXNAME;
  155         {
  156                 const char *firstpt = NULL;     /* First place we saw a "." in fname */
  157 
  158                 /* #Specification: file name / non MSDOS conforming / base length 0
  159                  * file names beginning with a period '.' are invalid for MS-DOS.
  160                  * It needs absolutely a base name. So the file name is mangled
  161                  */
  162                 int ivldchar = fname[0] == '.';         /* At least one invalid character */
  163                 int msdos_len = len;
  164                 int base_len;
  165 
  166                 /*
  167                  * cardinal_per_size tells if there exists at least one
  168                  * DOS pseudo device on length n.  See the test below.
  169                  */
  170                 static const char cardinal_per_size[9] =
  171                 {
  172                         0, 0, 0, 1, 1, 0, 1, 0, 1
  173                 };
  174 
  175                 /*
  176                  * lkp translate all character to acceptable character (for DOS).
  177                  * When lkp[n] == n, it means also it is an acceptable one.
  178                  * So it serves both as a flag and as a translator.
  179                  */
  180                 static char lkp[256];
  181                 static char is_init = 0;
  182 
  183                 if (!is_init) {
  184                         /*
  185                          * Initialisation of the array is easier and less error
  186                          * prone like this.
  187                          */
  188                         int i;
  189                         static const char *spc = "\"*+,/:;<=>?[\\]|~";
  190 
  191                         is_init = 1;
  192                         for (i = 0; i <= 32; i++)
  193                                 lkp[i] = '#';
  194                         for (i = 33; i < 'A'; i++)
  195                                 lkp[i] = (char) i;
  196                         for (i = 'A'; i <= 'Z'; i++)
  197                                 lkp[i] = (char) (i + ('a' - 'A'));
  198                         for (i = 'Z' + 1; i < 127; i++)
  199                                 lkp[i] = (char) i;
  200                         for (i = 128; i < 256; i++)
  201                                 lkp[i] = '#';
  202 
  203                         lkp['.'] = '_';
  204                         while (*spc != '\0')
  205                                 lkp[(unsigned char) (*spc++)] = '#';
  206                 }
  207                 /*  GLU
  208                  * File names longer than 8+'.'+3 are invalid for MS-DOS,
  209                  * so the file name is to be mangled--no further test is needed.
  210                  * This speeds up handling of long names.
  211                  * The position of the last point is no more necessary anyway.
  212                  */
  213                 if (len <= (8 + 1 + 3)) {
  214                         const char *pt = fname;
  215                         const char *endpt = fname + len;
  216 
  217                         while (pt < endpt) {
  218                                 if (*pt == '.') {
  219                                         if (firstpt != NULL) {
  220                                                 /* 2 . in a file name. Reject */
  221                                                 ivldchar = 1;
  222                                                 break;
  223                                         } else {
  224                                                 int extlen = (int) (endpt - pt);
  225 
  226                                                 firstpt = pt;
  227                                                 if (firstpt - fname > 8) {
  228                                                         /* base name longer than 8: reject */
  229                                                         ivldchar = 1;
  230                                                         break;
  231                                                 } else if (extlen > 4) {
  232                                                         /* Extension longer than 4 (including .): reject */
  233                                                         ivldchar = 1;
  234                                                         break;
  235                                                 } else if (extlen == 1) {
  236                                                         /* #Specification: file name / non MSDOS conforming / last char == .
  237                                                          * If the last character of a file name is
  238                                                          * a period, mangling is applied. MS-DOS does
  239                                                          * not support those file names.
  240                                                          */
  241                                                         ivldchar = 1;
  242                                                         break;
  243                                                 } else if (extlen == 4) {
  244                                                         /* #Specification: file name / non MSDOS conforming / mangling clash
  245                                                          * To avoid clash with    the umsdos mangling, any file
  246                                                          * with a special character as the first character
  247                                                          * of the extension will be mangled. This solves the
  248                                                          * following problem:
  249                                                          * 
  250                                                          * #
  251                                                          * touch FILE
  252                                                          * # FILE is invalid for DOS, so mangling is applied
  253                                                          * # file.{_1 is created in the DOS directory
  254                                                          * touch file.{_1
  255                                                          * # To UMSDOS file point to a single DOS entry.
  256                                                          * # So file.{_1 has to be mangled.
  257                                                          * #
  258                                                          */
  259                                                         static char special[] =
  260                                                         {
  261                                                                 SPECIAL_MANGLING, '\0'
  262                                                         };
  263 
  264                                                         if (strchr (special, firstpt[1]) != NULL) {
  265                                                                 ivldchar = 1;
  266                                                                 break;
  267                                                         }
  268                                                 }
  269                                         }
  270                                 } else if (lkp[(unsigned char) (*pt)] != *pt) {
  271                                         ivldchar = 1;
  272                                         break;
  273                                 }
  274                                 pt++;
  275                         }
  276                 } else {
  277                         ivldchar = 1;
  278                 }
  279                 if (ivldchar
  280                     || (firstpt == NULL && len > 8)
  281                     || (len == UMSDOS_EMD_NAMELEN
  282                         && memcmp (fname, UMSDOS_EMD_FILE, UMSDOS_EMD_NAMELEN) == 0)) {
  283                         /* #Specification: file name / --linux-.---
  284                          * The name of the EMD file --linux-.--- is map to a mangled
  285                          * name. So UMSDOS does not restrict its use.
  286                          */
  287                         /* #Specification: file name / non MSDOS conforming / mangling
  288                          * Non MSDOS conforming file names must use some alias to fit
  289                          * in the MSDOS name space.
  290                          * 
  291                          * The strategy is simple. The name is simply truncated to
  292                          * 8 char. points are replace with underscore and a
  293                          * number is given as an extension. This number correspond
  294                          * to the entry number in the EMD file. The EMD file
  295                          * only need to carry the real name.
  296                          * 
  297                          * Upper case is also converted to lower case.
  298                          * Control character are converted to #.
  299                          * Spaces are converted to #.
  300                          * The following characters are also converted to #.
  301                          * #
  302                          * " * + , / : ; < = > ? [ \ ] | ~
  303                          * #
  304                          * 
  305                          * Sometimes the problem is not in MS-DOS itself but in
  306                          * command.com.
  307                          */
  308                         int i;
  309                         char *pt = info->fake.fname;
  310 
  311                         base_len = msdos_len = (msdos_len > 8) ? 8 : msdos_len;
  312                         /*
  313                          * There is no '.' any more so we know for a fact that
  314                          * the base length is the length.
  315                          */
  316                         memcpy (info->fake.fname, fname, msdos_len);
  317                         for (i = 0; i < msdos_len; i++, pt++)
  318                                 *pt = lkp[(unsigned char) (*pt)];
  319                         *pt = '\0';     /* GLU  We force null termination. */
  320                         info->msdos_reject = 1;
  321                         /*
  322                          * The numeric extension is added only when we know
  323                          * the position in the EMD file, in umsdos_newentry(),
  324                          * umsdos_delentry(), and umsdos_findentry().
  325                          * See umsdos_manglename().
  326                          */
  327                 } else {
  328                         /* Conforming MSDOS file name */
  329                         strncpy (info->fake.fname, fname, len);
  330                         info->msdos_reject = 0;
  331                         base_len = firstpt != NULL ? (int) (firstpt - fname) : len;
  332                 }
  333                 if (cardinal_per_size[base_len]) {
  334                         /* #Specification: file name / MSDOS devices / mangling
  335                          * To avoid unreachable file from MS-DOS, any MS-DOS conforming
  336                          * file with a basename equal to one of the MS-DOS pseudo
  337                          * devices will be mangled.
  338                          * 
  339                          * If a file such as "prn" was created, it would be unreachable
  340                          * under MS-DOS because "prn" is assumed to be the printer, even
  341                          * if the file does have an extension.
  342                          * 
  343                          * Since the extension is unimportant to MS-DOS, we must patch
  344                          * the basename also. We simply insert a minus '-'. To avoid
  345                          * conflict with valid file with a minus in front (such as
  346                          * "-prn"), we add an mangled extension like any other
  347                          * mangled file name.
  348                          * 
  349                          * Here is the list of DOS pseudo devices:
  350                          * 
  351                          * #
  352                          * "prn","con","aux","nul",
  353                          * "lpt1","lpt2","lpt3","lpt4",
  354                          * "com1","com2","com3","com4",
  355                          * "clock$"
  356                          * #
  357                          * 
  358                          * and some standard ones for common DOS programs
  359                          * 
  360                          * "emmxxxx0","xmsxxxx0","setverxx"
  361                          * 
  362                          * (Thanks to Chris Hall <cah17@phoenix.cambridge.ac.uk>
  363                          * for pointing these out to me).
  364                          * 
  365                          * Is there one missing?
  366                          */
  367                         /* This table must be ordered by length */
  368                         static const char *tbdev[] =
  369                         {
  370                                 "prn", "con", "aux", "nul",
  371                                 "lpt1", "lpt2", "lpt3", "lpt4",
  372                                 "com1", "com2", "com3", "com4",
  373                                 "clock$",
  374                                 "emmxxxx0", "xmsxxxx0", "setverxx"
  375                         };
  376 
  377                         /* Tell where to find in tbdev[], the first name of */
  378                         /* a certain length */
  379                         static const char start_ind_dev[9] =
  380                         {
  381                                 0, 0, 0, 4, 12, 12, 13, 13, 16
  382                         };
  383                         char basen[9];
  384                         int i;
  385 
  386                         for (i = start_ind_dev[base_len - 1]; i < start_ind_dev[base_len]; i++) {
  387                                 if (memcmp (info->fake.fname, tbdev[i], base_len) == 0) {
  388                                         memcpy (basen, info->fake.fname, base_len);
  389                                         basen[base_len] = '\0';         /* GLU  We force null termination. */
  390                                         /*
  391                                          * GLU        We do that only if necessary; we try to do the
  392                                          * GLU        simple thing in the usual circumstance. 
  393                                          */
  394                                         info->fake.fname[0] = '-';
  395                                         strcpy (info->fake.fname + 1, basen);   /* GLU  We already guaranteed a null would be at the end. */
  396                                         msdos_len = (base_len == 8) ? 8 : base_len + 1;
  397                                         info->msdos_reject = 1;
  398                                         break;
  399                                 }
  400                         }
  401                 }
  402                 info->fake.fname[msdos_len] = '\0';     /* Help doing printk */
  403                 /* GLU      This zero should (always?) be there already. */
  404                 info->fake.len = msdos_len;
  405                 /* Why not use info->fake.len everywhere? Is it longer?
  406                  */
  407                 memcpy (info->entry.name, fname, len);
  408                 info->entry.name[len] = '\0';   /* for printk */
  409                 info->entry.name_len = len;
  410                 ret = 0;
  411         }
  412         /*
  413          * Evaluate how many records are needed to store this entry.
  414          */
  415         info->recsize = umsdos_evalrecsize (len);
  416         return ret;
  417 }
  418 
  419 #ifdef TEST
  420 
  421 struct MANG_TEST {
  422         char *fname;            /* Name to validate */
  423         int msdos_reject;       /* Expected msdos_reject flag */
  424         char *msname;           /* Expected msdos name */
  425 };
  426 
  427 struct MANG_TEST tb[] =
  428 {
  429         "hello", 0, "hello",
  430         "hello.1", 0, "hello.1",
  431         "hello.1_", 0, "hello.1_",
  432         "prm", 0, "prm",
  433 
  434 #ifdef PROPOSITION
  435         "HELLO", 1, "hello",
  436         "Hello.1", 1, "hello.1",
  437         "Hello.c", 1, "hello.c",
  438 #else
  439 /*
  440  * I find the three examples below very unfortunate.  I propose to
  441  * convert them to lower case in a quick preliminary pass, then test
  442  * whether there are other troublesome characters.  I have not made
  443  * this change, because it is not easy, but I wanted to mention the 
  444  * principle.  Obviously something like that would increase the chance
  445  * of collisions, for example between "HELLO" and "Hello", but these
  446  * can be treated elsewhere along with the other collisions.
  447  */
  448 
  449         "HELLO", 1, "hello",
  450         "Hello.1", 1, "hello_1",
  451         "Hello.c", 1, "hello_c",
  452 #endif
  453 
  454         "hello.{_1", 1, "hello_{_",
  455         "hello\t", 1, "hello#",
  456         "hello.1.1", 1, "hello_1_",
  457         "hel,lo", 1, "hel#lo",
  458         "Salut.Tu.vas.bien?", 1, "salut_tu",
  459         ".profile", 1, "_profile",
  460         ".xv", 1, "_xv",
  461         "toto.", 1, "toto_",
  462         "clock$.x", 1, "-clock$",
  463         "emmxxxx0", 1, "-emmxxxx",
  464         "emmxxxx0.abcd", 1, "-emmxxxx",
  465         "aux", 1, "-aux",
  466         "prn", 1, "-prn",
  467         "prn.abc", 1, "-prn",
  468         "PRN", 1, "-prn",
  469   /* 
  470    * GLU        WARNING:  the results of these are different with my version
  471    * GLU        of mangling compared to the original one.
  472    * GLU        CAUSE:  the manner of calculating the baselen variable.
  473    * GLU                For you they are always 3.
  474    * GLU                For me they are respectively 7, 8, and 8.
  475 
  476    */
  477         "PRN.abc", 1, "prn_abc",
  478         "Prn.abcd", 1, "prn_abcd",
  479         "prn.abcd", 1, "prn_abcd",
  480         "Prn.abcdefghij", 1, "prn_abcd"
  481 };
  482 
  483 int main (int argc, char *argv[])
  484 {
  485         int i, rold, rnew;
  486 
  487         printf ("Testing the umsdos_parse.\n");
  488         for (i = 0; i < sizeof (tb) / sizeof (tb[0]); i++) {
  489                 struct MANG_TEST *pttb = tb + i;
  490                 struct umsdos_info info;
  491                 int ok = umsdos_parse (pttb->fname, strlen (pttb->fname), &info);
  492 
  493                 if (strcmp (info.fake.fname, pttb->msname) != 0) {
  494                         printf ("**** %s -> ", pttb->fname);
  495                         printf ("%s <> %s\n", info.fake.fname, pttb->msname);
  496                 } else if (info.msdos_reject != pttb->msdos_reject) {
  497                         printf ("**** %s -> %s ", pttb->fname, pttb->msname);
  498                         printf ("%d <> %d\n", info.msdos_reject, pttb->msdos_reject);
  499                 } else {
  500                         printf ("     %s -> %s %d\n", pttb->fname, pttb->msname
  501                                 ,pttb->msdos_reject);
  502                 }
  503         }
  504         printf ("Testing the new umsdos_evalrecsize.");
  505         for (i = 0; i < UMSDOS_MAXNAME; i++) {
  506                 rnew = umsdos_evalrecsize (i);
  507                 rold = umsdos_evalrecsize_old (i);
  508                 if (!(i % UMSDOS_REC_SIZE)) {
  509                         printf ("\n%d:\t", i);
  510                 }
  511                 if (rnew != rold) {
  512                         printf ("**** %d newres: %d != %d \n", i, rnew, rold);
  513                 } else {
  514                         printf (".");
  515                 }
  516         }
  517         printf ("\nEnd of Testing.\n");
  518 
  519         return 0;
  520 }
  521 
  522 #endif

Cache object: 1ec5a4107ec449471b3836c0e5112f79


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.