1 /*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5 * All rights reserved.
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 #include <sys/conf.h>
38 #include <sys/bio.h>
39 #include <sys/buf.h>
40 #include <sys/vnode.h>
41 #include <sys/queue.h>
42 #include <sys/device.h>
43 #include <sys/tree.h>
44 #include <sys/syslink_rpc.h>
45 #include <sys/proc.h>
46 #include <machine/stdarg.h>
47 #include <sys/devfs.h>
48 #include <sys/dsched.h>
49
50 #include <sys/thread2.h>
51 #include <sys/mplock2.h>
52
53 static int mpsafe_writes;
54 static int mplock_writes;
55 static int mpsafe_reads;
56 static int mplock_reads;
57 static int mpsafe_strategies;
58 static int mplock_strategies;
59
60 SYSCTL_INT(_kern, OID_AUTO, mpsafe_writes, CTLFLAG_RD, &mpsafe_writes,
61 0, "mpsafe writes");
62 SYSCTL_INT(_kern, OID_AUTO, mplock_writes, CTLFLAG_RD, &mplock_writes,
63 0, "non-mpsafe writes");
64 SYSCTL_INT(_kern, OID_AUTO, mpsafe_reads, CTLFLAG_RD, &mpsafe_reads,
65 0, "mpsafe reads");
66 SYSCTL_INT(_kern, OID_AUTO, mplock_reads, CTLFLAG_RD, &mplock_reads,
67 0, "non-mpsafe reads");
68 SYSCTL_INT(_kern, OID_AUTO, mpsafe_strategies, CTLFLAG_RD, &mpsafe_strategies,
69 0, "mpsafe strategies");
70 SYSCTL_INT(_kern, OID_AUTO, mplock_strategies, CTLFLAG_RD, &mplock_strategies,
71 0, "non-mpsafe strategies");
72
73 /*
74 * system link descriptors identify the command in the
75 * arguments structure.
76 */
77 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
78
79 #define DEVOP_DESC_INIT(name) \
80 struct syslink_desc DDESCNAME(name) = { \
81 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
82 #name }
83
84 DEVOP_DESC_INIT(default);
85 DEVOP_DESC_INIT(open);
86 DEVOP_DESC_INIT(close);
87 DEVOP_DESC_INIT(read);
88 DEVOP_DESC_INIT(write);
89 DEVOP_DESC_INIT(ioctl);
90 DEVOP_DESC_INIT(dump);
91 DEVOP_DESC_INIT(psize);
92 DEVOP_DESC_INIT(mmap);
93 DEVOP_DESC_INIT(mmap_single);
94 DEVOP_DESC_INIT(strategy);
95 DEVOP_DESC_INIT(kqfilter);
96 DEVOP_DESC_INIT(revoke);
97 DEVOP_DESC_INIT(clone);
98
99 /*
100 * Misc default ops
101 */
102 struct dev_ops dead_dev_ops;
103
104 struct dev_ops default_dev_ops = {
105 { "null" },
106 .d_default = NULL, /* must be NULL */
107 .d_open = noopen,
108 .d_close = noclose,
109 .d_read = noread,
110 .d_write = nowrite,
111 .d_ioctl = noioctl,
112 .d_mmap = nommap,
113 .d_mmap_single = nommap_single,
114 .d_strategy = nostrategy,
115 .d_dump = nodump,
116 .d_psize = nopsize,
117 .d_kqfilter = nokqfilter,
118 .d_revoke = norevoke,
119 .d_clone = noclone
120 };
121
122 static __inline
123 int
124 dev_needmplock(cdev_t dev)
125 {
126 return((dev->si_ops->head.flags & D_MPSAFE) == 0);
127 }
128
129 /************************************************************************
130 * GENERAL DEVICE API FUNCTIONS *
131 ************************************************************************
132 *
133 * The MPSAFEness of these depends on dev->si_ops->head.flags
134 */
135 int
136 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
137 {
138 struct dev_open_args ap;
139 int needmplock = dev_needmplock(dev);
140 int error;
141
142 ap.a_head.a_desc = &dev_open_desc;
143 ap.a_head.a_dev = dev;
144 ap.a_oflags = oflags;
145 ap.a_devtype = devtype;
146 ap.a_cred = cred;
147
148 if (needmplock)
149 get_mplock();
150 error = dev->si_ops->d_open(&ap);
151 if (needmplock)
152 rel_mplock();
153 return (error);
154 }
155
156 int
157 dev_dclose(cdev_t dev, int fflag, int devtype)
158 {
159 struct dev_close_args ap;
160 int needmplock = dev_needmplock(dev);
161 int error;
162
163 ap.a_head.a_desc = &dev_close_desc;
164 ap.a_head.a_dev = dev;
165 ap.a_fflag = fflag;
166 ap.a_devtype = devtype;
167
168 if (needmplock)
169 get_mplock();
170 error = dev->si_ops->d_close(&ap);
171 if (needmplock)
172 rel_mplock();
173 return (error);
174 }
175
176 int
177 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
178 {
179 struct dev_read_args ap;
180 int needmplock = dev_needmplock(dev);
181 int error;
182
183 ap.a_head.a_desc = &dev_read_desc;
184 ap.a_head.a_dev = dev;
185 ap.a_uio = uio;
186 ap.a_ioflag = ioflag;
187
188 if (needmplock) {
189 get_mplock();
190 ++mplock_reads;
191 } else {
192 ++mpsafe_reads;
193 }
194 error = dev->si_ops->d_read(&ap);
195 if (needmplock)
196 rel_mplock();
197 if (error == 0)
198 dev->si_lastread = time_uptime;
199 return (error);
200 }
201
202 int
203 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
204 {
205 struct dev_write_args ap;
206 int needmplock = dev_needmplock(dev);
207 int error;
208
209 dev->si_lastwrite = time_uptime;
210 ap.a_head.a_desc = &dev_write_desc;
211 ap.a_head.a_dev = dev;
212 ap.a_uio = uio;
213 ap.a_ioflag = ioflag;
214
215 if (needmplock) {
216 get_mplock();
217 ++mplock_writes;
218 } else {
219 ++mpsafe_writes;
220 }
221 error = dev->si_ops->d_write(&ap);
222 if (needmplock)
223 rel_mplock();
224 return (error);
225 }
226
227 int
228 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred,
229 struct sysmsg *msg)
230 {
231 struct dev_ioctl_args ap;
232 int needmplock = dev_needmplock(dev);
233 int error;
234
235 ap.a_head.a_desc = &dev_ioctl_desc;
236 ap.a_head.a_dev = dev;
237 ap.a_cmd = cmd;
238 ap.a_data = data;
239 ap.a_fflag = fflag;
240 ap.a_cred = cred;
241 ap.a_sysmsg = msg;
242
243 if (needmplock)
244 get_mplock();
245 error = dev->si_ops->d_ioctl(&ap);
246 if (needmplock)
247 rel_mplock();
248 return (error);
249 }
250
251 int
252 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
253 {
254 struct dev_mmap_args ap;
255 int needmplock = dev_needmplock(dev);
256 int error;
257
258 ap.a_head.a_desc = &dev_mmap_desc;
259 ap.a_head.a_dev = dev;
260 ap.a_offset = offset;
261 ap.a_nprot = nprot;
262
263 if (needmplock)
264 get_mplock();
265 error = dev->si_ops->d_mmap(&ap);
266 if (needmplock)
267 rel_mplock();
268
269 if (error == 0)
270 return(ap.a_result);
271 return(-1);
272 }
273
274 int
275 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size,
276 struct vm_object **object, int nprot)
277 {
278 struct dev_mmap_single_args ap;
279 int needmplock = dev_needmplock(dev);
280 int error;
281
282 ap.a_head.a_desc = &dev_mmap_single_desc;
283 ap.a_head.a_dev = dev;
284 ap.a_offset = offset;
285 ap.a_size = size;
286 ap.a_object = object;
287 ap.a_nprot = nprot;
288
289 if (needmplock)
290 get_mplock();
291 error = dev->si_ops->d_mmap_single(&ap);
292 if (needmplock)
293 rel_mplock();
294
295 return(error);
296 }
297
298 int
299 dev_dclone(cdev_t dev)
300 {
301 struct dev_clone_args ap;
302 int needmplock = dev_needmplock(dev);
303 int error;
304
305 ap.a_head.a_desc = &dev_clone_desc;
306 ap.a_head.a_dev = dev;
307
308 if (needmplock)
309 get_mplock();
310 error = dev->si_ops->d_clone(&ap);
311 if (needmplock)
312 rel_mplock();
313 return (error);
314 }
315
316 int
317 dev_drevoke(cdev_t dev)
318 {
319 struct dev_revoke_args ap;
320 int needmplock = dev_needmplock(dev);
321 int error;
322
323 ap.a_head.a_desc = &dev_revoke_desc;
324 ap.a_head.a_dev = dev;
325
326 if (needmplock)
327 get_mplock();
328 error = dev->si_ops->d_revoke(&ap);
329 if (needmplock)
330 rel_mplock();
331
332 return (error);
333 }
334
335 /*
336 * Core device strategy call, used to issue I/O on a device. There are
337 * two versions, a non-chained version and a chained version. The chained
338 * version reuses a BIO set up by vn_strategy(). The only difference is
339 * that, for now, we do not push a new tracking structure when chaining
340 * from vn_strategy. XXX this will ultimately have to change.
341 */
342 void
343 dev_dstrategy(cdev_t dev, struct bio *bio)
344 {
345 struct dev_strategy_args ap;
346 struct bio_track *track;
347 int needmplock = dev_needmplock(dev);
348
349 ap.a_head.a_desc = &dev_strategy_desc;
350 ap.a_head.a_dev = dev;
351 ap.a_bio = bio;
352
353 KKASSERT(bio->bio_track == NULL);
354 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
355 if (bio->bio_buf->b_cmd == BUF_CMD_READ)
356 track = &dev->si_track_read;
357 else
358 track = &dev->si_track_write;
359 bio_track_ref(track);
360 bio->bio_track = track;
361
362 if (dsched_is_clear_buf_priv(bio->bio_buf))
363 dsched_new_buf(bio->bio_buf);
364
365 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
366 if (needmplock) {
367 get_mplock();
368 ++mplock_strategies;
369 } else {
370 ++mpsafe_strategies;
371 }
372 (void)dev->si_ops->d_strategy(&ap);
373 if (needmplock)
374 rel_mplock();
375 }
376
377 void
378 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
379 {
380 struct dev_strategy_args ap;
381 int needmplock = dev_needmplock(dev);
382
383 ap.a_head.a_desc = &dev_strategy_desc;
384 ap.a_head.a_dev = dev;
385 ap.a_bio = bio;
386
387 KKASSERT(bio->bio_track != NULL);
388 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
389 if (needmplock)
390 get_mplock();
391 (void)dev->si_ops->d_strategy(&ap);
392 if (needmplock)
393 rel_mplock();
394 }
395
396 /*
397 * note: the disk layer is expected to set count, blkno, and secsize before
398 * forwarding the message.
399 */
400 int
401 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset,
402 size_t length)
403 {
404 struct dev_dump_args ap;
405 int needmplock = dev_needmplock(dev);
406 int error;
407
408 ap.a_head.a_desc = &dev_dump_desc;
409 ap.a_head.a_dev = dev;
410 ap.a_count = 0;
411 ap.a_blkno = 0;
412 ap.a_secsize = 0;
413 ap.a_virtual = virtual;
414 ap.a_physical = physical;
415 ap.a_offset = offset;
416 ap.a_length = length;
417
418 if (needmplock)
419 get_mplock();
420 error = dev->si_ops->d_dump(&ap);
421 if (needmplock)
422 rel_mplock();
423 return (error);
424 }
425
426 int64_t
427 dev_dpsize(cdev_t dev)
428 {
429 struct dev_psize_args ap;
430 int needmplock = dev_needmplock(dev);
431 int error;
432
433 ap.a_head.a_desc = &dev_psize_desc;
434 ap.a_head.a_dev = dev;
435
436 if (needmplock)
437 get_mplock();
438 error = dev->si_ops->d_psize(&ap);
439 if (needmplock)
440 rel_mplock();
441
442 if (error == 0)
443 return (ap.a_result);
444 return(-1);
445 }
446
447 /*
448 * Pass-thru to the device kqfilter.
449 *
450 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
451 * which return 0 do not have to bother setting a_result.
452 */
453 int
454 dev_dkqfilter(cdev_t dev, struct knote *kn)
455 {
456 struct dev_kqfilter_args ap;
457 int needmplock = dev_needmplock(dev);
458 int error;
459
460 ap.a_head.a_desc = &dev_kqfilter_desc;
461 ap.a_head.a_dev = dev;
462 ap.a_kn = kn;
463 ap.a_result = 0;
464
465 if (needmplock)
466 get_mplock();
467 error = dev->si_ops->d_kqfilter(&ap);
468 if (needmplock)
469 rel_mplock();
470
471 if (error == 0)
472 return(ap.a_result);
473 return(ENODEV);
474 }
475
476 /************************************************************************
477 * DEVICE HELPER FUNCTIONS *
478 ************************************************************************/
479
480 /*
481 * MPSAFE
482 */
483 int
484 dev_drefs(cdev_t dev)
485 {
486 return(dev->si_sysref.refcnt);
487 }
488
489 /*
490 * MPSAFE
491 */
492 const char *
493 dev_dname(cdev_t dev)
494 {
495 return(dev->si_ops->head.name);
496 }
497
498 /*
499 * MPSAFE
500 */
501 int
502 dev_dflags(cdev_t dev)
503 {
504 return(dev->si_ops->head.flags);
505 }
506
507 /*
508 * MPSAFE
509 */
510 int
511 dev_dmaj(cdev_t dev)
512 {
513 return(dev->si_ops->head.maj);
514 }
515
516 /*
517 * Used when forwarding a request through layers. The caller adjusts
518 * ap->a_head.a_dev and then calls this function.
519 */
520 int
521 dev_doperate(struct dev_generic_args *ap)
522 {
523 int (*func)(struct dev_generic_args *);
524 int needmplock = dev_needmplock(ap->a_dev);
525 int error;
526
527 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
528
529 if (needmplock)
530 get_mplock();
531 error = func(ap);
532 if (needmplock)
533 rel_mplock();
534
535 return (error);
536 }
537
538 /*
539 * Used by the console intercept code only. Issue an operation through
540 * a foreign ops structure allowing the ops structure associated
541 * with the device to remain intact.
542 */
543 int
544 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
545 {
546 int (*func)(struct dev_generic_args *);
547 int needmplock = ((ops->head.flags & D_MPSAFE) == 0);
548 int error;
549
550 func = *(void **)((char *)ops + ap->a_desc->sd_offset);
551
552 if (needmplock)
553 get_mplock();
554 error = func(ap);
555 if (needmplock)
556 rel_mplock();
557
558 return (error);
559 }
560
561 /*
562 * Convert a template dev_ops into the real thing by filling in
563 * uninitialized fields.
564 */
565 void
566 compile_dev_ops(struct dev_ops *ops)
567 {
568 int offset;
569
570 for (offset = offsetof(struct dev_ops, dev_ops_first_field);
571 offset <= offsetof(struct dev_ops, dev_ops_last_field);
572 offset += sizeof(void *)
573 ) {
574 void **func_p = (void **)((char *)ops + offset);
575 void **def_p = (void **)((char *)&default_dev_ops + offset);
576 if (*func_p == NULL) {
577 if (ops->d_default)
578 *func_p = ops->d_default;
579 else
580 *func_p = *def_p;
581 }
582 }
583 }
584
585 /************************************************************************
586 * MAJOR/MINOR SPACE FUNCTION *
587 ************************************************************************/
588
589 /*
590 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
591 *
592 * Disk devices typically register their major, e.g. 'ad0', and then call
593 * into the disk label management code which overloads its own onto e.g. 'ad0'
594 * to support all the various slice and partition combinations.
595 *
596 * The mask/match supplied in this call are a full 32 bits and the same
597 * mask and match must be specified in a later dev_ops_remove() call to
598 * match this add. However, the match value for the minor number should never
599 * have any bits set in the major number's bit range (8-15). The mask value
600 * may be conveniently specified as -1 without creating any major number
601 * interference.
602 */
603
604 static
605 int
606 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
607 {
608 if (a->maj < b->maj)
609 return(-1);
610 else if (a->maj > b->maj)
611 return(1);
612 return(0);
613 }
614
615 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
616
617 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
618
619 int
620 dev_ops_remove_all(struct dev_ops *ops)
621 {
622 return devfs_destroy_dev_by_ops(ops, -1);
623 }
624
625 int
626 dev_ops_remove_minor(struct dev_ops *ops, int minor)
627 {
628 return devfs_destroy_dev_by_ops(ops, minor);
629 }
630
631 struct dev_ops *
632 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
633 {
634 struct dev_ops *oops = dev->si_ops;
635
636 compile_dev_ops(iops);
637 iops->head.maj = oops->head.maj;
638 iops->head.data = oops->head.data;
639 iops->head.flags = oops->head.flags;
640 dev->si_ops = iops;
641 dev->si_flags |= SI_INTERCEPTED;
642
643 return (oops);
644 }
645
646 void
647 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
648 {
649 struct dev_ops *iops = dev->si_ops;
650
651 dev->si_ops = oops;
652 dev->si_flags &= ~SI_INTERCEPTED;
653 iops->head.maj = 0;
654 iops->head.data = NULL;
655 iops->head.flags = 0;
656 }
657
658 /************************************************************************
659 * DEFAULT DEV OPS FUNCTIONS *
660 ************************************************************************/
661
662
663 /*
664 * Unsupported devswitch functions (e.g. for writing to read-only device).
665 * XXX may belong elsewhere.
666 */
667 int
668 norevoke(struct dev_revoke_args *ap)
669 {
670 /* take no action */
671 return(0);
672 }
673
674 int
675 noclone(struct dev_clone_args *ap)
676 {
677 /* take no action */
678 return (0); /* allow the clone */
679 }
680
681 int
682 noopen(struct dev_open_args *ap)
683 {
684 return (ENODEV);
685 }
686
687 int
688 noclose(struct dev_close_args *ap)
689 {
690 return (ENODEV);
691 }
692
693 int
694 noread(struct dev_read_args *ap)
695 {
696 return (ENODEV);
697 }
698
699 int
700 nowrite(struct dev_write_args *ap)
701 {
702 return (ENODEV);
703 }
704
705 int
706 noioctl(struct dev_ioctl_args *ap)
707 {
708 return (ENODEV);
709 }
710
711 int
712 nokqfilter(struct dev_kqfilter_args *ap)
713 {
714 return (ENODEV);
715 }
716
717 int
718 nommap(struct dev_mmap_args *ap)
719 {
720 return (ENODEV);
721 }
722
723 int
724 nommap_single(struct dev_mmap_single_args *ap)
725 {
726 return (ENODEV);
727 }
728
729 int
730 nostrategy(struct dev_strategy_args *ap)
731 {
732 struct bio *bio = ap->a_bio;
733
734 bio->bio_buf->b_flags |= B_ERROR;
735 bio->bio_buf->b_error = EOPNOTSUPP;
736 biodone(bio);
737 return(0);
738 }
739
740 int
741 nopsize(struct dev_psize_args *ap)
742 {
743 ap->a_result = 0;
744 return(0);
745 }
746
747 int
748 nodump(struct dev_dump_args *ap)
749 {
750 return (ENODEV);
751 }
752
753 /*
754 * XXX this is probably bogus. Any device that uses it isn't checking the
755 * minor number.
756 */
757 int
758 nullopen(struct dev_open_args *ap)
759 {
760 return (0);
761 }
762
763 int
764 nullclose(struct dev_close_args *ap)
765 {
766 return (0);
767 }
768
Cache object: 6db3352490a17ca0cdf64669ca7edea7
|