FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_scanf.c
1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: src/sys/kern/subr_scanf.c,v 1.13 1999/11/24 01:03:01 archie Exp $
33 * $DragonFly: src/sys/kern/subr_scanf.c,v 1.4 2006/12/13 21:58:50 dillon Exp $
34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
35 * From: static char sccsid[] = "@(#)strtol.c 8.1 (Berkeley) 6/4/93";
36 * From: static char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93";
37 */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/ctype.h>
42 #include <machine/limits.h>
43
44 /*
45 * Note that stdarg.h and the ANSI style va_start macro is used for both
46 * ANSI and traditional C compilers.
47 */
48 #include <machine/stdarg.h>
49
50 #define BUF 32 /* Maximum length of numeric string. */
51
52 /*
53 * Flags used during conversion.
54 */
55 #define LONG 0x01 /* l: long or double */
56 #define SHORT 0x04 /* h: short */
57 #define SUPPRESS 0x08 /* suppress assignment */
58 #define POINTER 0x10 /* weird %p pointer (`fake hex') */
59 #define NOSKIP 0x20 /* do not skip blanks */
60 #define QUAD 0x400
61
62 /*
63 * The following are used in numeric conversions only:
64 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
65 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
66 */
67 #define SIGNOK 0x40 /* +/- is (still) legal */
68 #define NDIGITS 0x80 /* no digits detected */
69
70 #define DPTOK 0x100 /* (float) decimal point is still legal */
71 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
72
73 #define PFXOK 0x100 /* 0x prefix is (still) legal */
74 #define NZDIGITS 0x200 /* no zero digits detected */
75
76 /*
77 * Conversion types.
78 */
79 #define CT_CHAR 0 /* %c conversion */
80 #define CT_CCL 1 /* %[...] conversion */
81 #define CT_STRING 2 /* %s conversion */
82 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */
83 typedef u_quad_t (*ccfntype)(const char *, char **, int);
84
85 static const u_char *__sccl(char *, const u_char *);
86
87 int
88 ksscanf(const char *ibuf, const char *fmt, ...)
89 {
90 __va_list ap;
91 int ret;
92
93 __va_start(ap, fmt);
94 ret = kvsscanf(ibuf, fmt, ap);
95 __va_end(ap);
96 return(ret);
97 }
98
99 int
100 kvsscanf(const char *inp, char const *fmt0, __va_list ap)
101 {
102 int inr;
103 const u_char *fmt = (const u_char *)fmt0;
104 int c; /* character from format, or conversion */
105 size_t width; /* field width, or 0 */
106 char *p; /* points into all kinds of strings */
107 int n; /* handy integer */
108 int flags; /* flags as defined above */
109 char *p0; /* saves original value of p when necessary */
110 int nassigned; /* number of fields assigned */
111 int nconversions; /* number of conversions */
112 int nread; /* number of characters consumed from fp */
113 int base; /* base argument to strtoq/strtouq */
114 ccfntype ccfn; /* conversion function (strtoq/strtouq) */
115 char ccltab[256]; /* character class table for %[...] */
116 char buf[BUF]; /* buffer for numeric conversions */
117
118 /* `basefix' is used to avoid `if' tests in the integer scanner */
119 static short basefix[17] =
120 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
121
122 inr = strlen(inp);
123
124 nassigned = 0;
125 nconversions = 0;
126 nread = 0;
127 base = 0; /* XXX just to keep gcc happy */
128 ccfn = NULL; /* XXX just to keep gcc happy */
129 for (;;) {
130 c = *fmt++;
131 if (c == 0)
132 return (nassigned);
133 if (isspace(c)) {
134 while (inr > 0 && isspace(*inp))
135 nread++, inr--, inp++;
136 continue;
137 }
138 if (c != '%')
139 goto literal;
140 width = 0;
141 flags = 0;
142 /*
143 * switch on the format. continue if done;
144 * break once format type is derived.
145 */
146 again: c = *fmt++;
147 switch (c) {
148 case '%':
149 literal:
150 if (inr <= 0)
151 goto input_failure;
152 if (*inp != c)
153 goto match_failure;
154 inr--, inp++;
155 nread++;
156 continue;
157
158 case '*':
159 flags |= SUPPRESS;
160 goto again;
161 case 'l':
162 flags |= LONG;
163 goto again;
164 case 'q':
165 flags |= QUAD;
166 goto again;
167 case 'h':
168 flags |= SHORT;
169 goto again;
170
171 case '': case '1': case '2': case '3': case '4':
172 case '5': case '6': case '7': case '8': case '9':
173 width = width * 10 + c - '';
174 goto again;
175
176 /*
177 * Conversions.
178 *
179 */
180 case 'd':
181 c = CT_INT;
182 ccfn = (ccfntype)strtoq;
183 base = 10;
184 break;
185
186 case 'i':
187 c = CT_INT;
188 ccfn = (ccfntype)strtoq;
189 base = 0;
190 break;
191
192 case 'o':
193 c = CT_INT;
194 ccfn = strtouq;
195 base = 8;
196 break;
197
198 case 'u':
199 c = CT_INT;
200 ccfn = strtouq;
201 base = 10;
202 break;
203
204 case 'x':
205 flags |= PFXOK; /* enable 0x prefixing */
206 c = CT_INT;
207 ccfn = strtouq;
208 base = 16;
209 break;
210
211 case 's':
212 c = CT_STRING;
213 break;
214
215 case '[':
216 fmt = __sccl(ccltab, fmt);
217 flags |= NOSKIP;
218 c = CT_CCL;
219 break;
220
221 case 'c':
222 flags |= NOSKIP;
223 c = CT_CHAR;
224 break;
225
226 case 'p': /* pointer format is like hex */
227 flags |= POINTER | PFXOK;
228 c = CT_INT;
229 ccfn = strtouq;
230 base = 16;
231 break;
232
233 case 'n':
234 nconversions++;
235 if (flags & SUPPRESS) /* ??? */
236 continue;
237 if (flags & SHORT)
238 *__va_arg(ap, short *) = nread;
239 else if (flags & LONG)
240 *__va_arg(ap, long *) = nread;
241 else if (flags & QUAD)
242 *__va_arg(ap, quad_t *) = nread;
243 else
244 *__va_arg(ap, int *) = nread;
245 continue;
246 }
247
248 /*
249 * We have a conversion that requires input.
250 */
251 if (inr <= 0)
252 goto input_failure;
253
254 /*
255 * Consume leading white space, except for formats
256 * that suppress this.
257 */
258 if ((flags & NOSKIP) == 0) {
259 while (isspace(*inp)) {
260 nread++;
261 if (--inr > 0)
262 inp++;
263 else
264 goto input_failure;
265 }
266 /*
267 * Note that there is at least one character in
268 * the buffer, so conversions that do not set NOSKIP
269 * can no longer result in an input failure.
270 */
271 }
272
273 /*
274 * Do the conversion.
275 */
276 switch (c) {
277
278 case CT_CHAR:
279 /* scan arbitrary characters (sets NOSKIP) */
280 if (width == 0)
281 width = 1;
282 if (flags & SUPPRESS) {
283 size_t sum = 0;
284 for (;;) {
285 if ((n = inr) < width) {
286 sum += n;
287 width -= n;
288 inp += n;
289 if (sum == 0)
290 goto input_failure;
291 break;
292 } else {
293 sum += width;
294 inr -= width;
295 inp += width;
296 break;
297 }
298 }
299 nread += sum;
300 } else {
301 bcopy(inp, __va_arg(ap, char *), width);
302 inr -= width;
303 inp += width;
304 nread += width;
305 nassigned++;
306 }
307 nconversions++;
308 break;
309
310 case CT_CCL:
311 /* scan a (nonempty) character class (sets NOSKIP) */
312 if (width == 0)
313 width = (size_t)~0; /* `infinity' */
314 /* take only those things in the class */
315 if (flags & SUPPRESS) {
316 n = 0;
317 while (ccltab[(unsigned char)*inp]) {
318 n++, inr--, inp++;
319 if (--width == 0)
320 break;
321 if (inr <= 0) {
322 if (n == 0)
323 goto input_failure;
324 break;
325 }
326 }
327 if (n == 0)
328 goto match_failure;
329 } else {
330 p0 = p = __va_arg(ap, char *);
331 while (ccltab[(unsigned char)*inp]) {
332 inr--;
333 *p++ = *inp++;
334 if (--width == 0)
335 break;
336 if (inr <= 0) {
337 if (p == p0)
338 goto input_failure;
339 break;
340 }
341 }
342 n = p - p0;
343 if (n == 0)
344 goto match_failure;
345 *p = 0;
346 nassigned++;
347 }
348 nread += n;
349 nconversions++;
350 break;
351
352 case CT_STRING:
353 /* like CCL, but zero-length string OK, & no NOSKIP */
354 if (width == 0)
355 width = (size_t)~0;
356 if (flags & SUPPRESS) {
357 n = 0;
358 while (!isspace(*inp)) {
359 n++, inr--, inp++;
360 if (--width == 0)
361 break;
362 if (inr <= 0)
363 break;
364 }
365 nread += n;
366 } else {
367 p0 = p = __va_arg(ap, char *);
368 while (!isspace(*inp)) {
369 inr--;
370 *p++ = *inp++;
371 if (--width == 0)
372 break;
373 if (inr <= 0)
374 break;
375 }
376 *p = 0;
377 nread += p - p0;
378 nassigned++;
379 }
380 nconversions++;
381 continue;
382
383 case CT_INT:
384 /* scan an integer as if by strtoq/strtouq */
385 #ifdef hardway
386 if (width == 0 || width > sizeof(buf) - 1)
387 width = sizeof(buf) - 1;
388 #else
389 /* size_t is unsigned, hence this optimisation */
390 if (--width > sizeof(buf) - 2)
391 width = sizeof(buf) - 2;
392 width++;
393 #endif
394 flags |= SIGNOK | NDIGITS | NZDIGITS;
395 for (p = buf; width; width--) {
396 c = *inp;
397 /*
398 * Switch on the character; `goto ok'
399 * if we accept it as a part of number.
400 */
401 switch (c) {
402
403 /*
404 * The digit 0 is always legal, but is
405 * special. For %i conversions, if no
406 * digits (zero or nonzero) have been
407 * scanned (only signs), we will have
408 * base==0. In that case, we should set
409 * it to 8 and enable 0x prefixing.
410 * Also, if we have not scanned zero digits
411 * before this, do not turn off prefixing
412 * (someone else will turn it off if we
413 * have scanned any nonzero digits).
414 */
415 case '':
416 if (base == 0) {
417 base = 8;
418 flags |= PFXOK;
419 }
420 if (flags & NZDIGITS)
421 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
422 else
423 flags &= ~(SIGNOK|PFXOK|NDIGITS);
424 goto ok;
425
426 /* 1 through 7 always legal */
427 case '1': case '2': case '3':
428 case '4': case '5': case '6': case '7':
429 base = basefix[base];
430 flags &= ~(SIGNOK | PFXOK | NDIGITS);
431 goto ok;
432
433 /* digits 8 and 9 ok iff decimal or hex */
434 case '8': case '9':
435 base = basefix[base];
436 if (base <= 8)
437 break; /* not legal here */
438 flags &= ~(SIGNOK | PFXOK | NDIGITS);
439 goto ok;
440
441 /* letters ok iff hex */
442 case 'A': case 'B': case 'C':
443 case 'D': case 'E': case 'F':
444 case 'a': case 'b': case 'c':
445 case 'd': case 'e': case 'f':
446 /* no need to fix base here */
447 if (base <= 10)
448 break; /* not legal here */
449 flags &= ~(SIGNOK | PFXOK | NDIGITS);
450 goto ok;
451
452 /* sign ok only as first character */
453 case '+': case '-':
454 if (flags & SIGNOK) {
455 flags &= ~SIGNOK;
456 goto ok;
457 }
458 break;
459
460 /* x ok iff flag still set & 2nd char */
461 case 'x': case 'X':
462 if (flags & PFXOK && p == buf + 1) {
463 base = 16; /* if %i */
464 flags &= ~PFXOK;
465 goto ok;
466 }
467 break;
468 }
469
470 /*
471 * If we got here, c is not a legal character
472 * for a number. Stop accumulating digits.
473 */
474 break;
475 ok:
476 /*
477 * c is legal: store it and look at the next.
478 */
479 *p++ = c;
480 if (--inr > 0)
481 inp++;
482 else
483 break; /* end of input */
484 }
485 /*
486 * If we had only a sign, it is no good; push
487 * back the sign. If the number ends in `x',
488 * it was [sign] '' 'x', so push back the x
489 * and treat it as [sign] ''.
490 */
491 if (flags & NDIGITS) {
492 if (p > buf) {
493 inp--;
494 inr++;
495 }
496 goto match_failure;
497 }
498 c = ((u_char *)p)[-1];
499 if (c == 'x' || c == 'X') {
500 --p;
501 inp--;
502 inr++;
503 }
504 if ((flags & SUPPRESS) == 0) {
505 u_quad_t res;
506
507 *p = 0;
508 res = (*ccfn)(buf, NULL, base);
509 if (flags & POINTER)
510 *__va_arg(ap, void **) =
511 (void *)(uintptr_t)res;
512 else if (flags & SHORT)
513 *__va_arg(ap, short *) = res;
514 else if (flags & LONG)
515 *__va_arg(ap, long *) = res;
516 else if (flags & QUAD)
517 *__va_arg(ap, quad_t *) = res;
518 else
519 *__va_arg(ap, int *) = res;
520 nassigned++;
521 }
522 nread += p - buf;
523 nconversions++;
524 break;
525
526 }
527 }
528 input_failure:
529 return (nconversions != 0 ? nassigned : -1);
530 match_failure:
531 return (nassigned);
532 }
533
534 /*
535 * Fill in the given table from the scanset at the given format
536 * (just after `['). Return a pointer to the character past the
537 * closing `]'. The table has a 1 wherever characters should be
538 * considered part of the scanset.
539 */
540 static const u_char *
541 __sccl(char *tab, const u_char *fmt)
542 {
543 int c, n, v;
544
545 /* first `clear' the whole table */
546 c = *fmt++; /* first char hat => negated scanset */
547 if (c == '^') {
548 v = 1; /* default => accept */
549 c = *fmt++; /* get new first char */
550 } else
551 v = 0; /* default => reject */
552
553 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
554 for (n = 0; n < 256; n++)
555 tab[n] = v; /* memset(tab, v, 256) */
556
557 if (c == 0)
558 return (fmt - 1);/* format ended before closing ] */
559
560 /*
561 * Now set the entries corresponding to the actual scanset
562 * to the opposite of the above.
563 *
564 * The first character may be ']' (or '-') without being special;
565 * the last character may be '-'.
566 */
567 v = 1 - v;
568 for (;;) {
569 tab[c] = v; /* take character c */
570 doswitch:
571 n = *fmt++; /* and examine the next */
572 switch (n) {
573
574 case 0: /* format ended too soon */
575 return (fmt - 1);
576
577 case '-':
578 /*
579 * A scanset of the form
580 * [01+-]
581 * is defined as `the digit 0, the digit 1,
582 * the character +, the character -', but
583 * the effect of a scanset such as
584 * [a-zA-Z0-9]
585 * is implementation defined. The V7 Unix
586 * scanf treats `a-z' as `the letters a through
587 * z', but treats `a-a' as `the letter a, the
588 * character -, and the letter a'.
589 *
590 * For compatibility, the `-' is not considerd
591 * to define a range if the character following
592 * it is either a close bracket (required by ANSI)
593 * or is not numerically greater than the character
594 * we just stored in the table (c).
595 */
596 n = *fmt;
597 if (n == ']' || n < c) {
598 c = '-';
599 break; /* resume the for(;;) */
600 }
601 fmt++;
602 /* fill in the range */
603 do {
604 tab[++c] = v;
605 } while (c < n);
606 c = n;
607 /*
608 * Alas, the V7 Unix scanf also treats formats
609 * such as [a-c-e] as `the letters a through e'.
610 * This too is permitted by the standard....
611 */
612 goto doswitch;
613 break;
614
615 case ']': /* end of scanset */
616 return (fmt);
617
618 default: /* just another character */
619 c = n;
620 break;
621 }
622 }
623 /* NOTREACHED */
624 }
625
Cache object: 02ddcf8984fa38d72e27d4b89ea53597
|