1 /*
2 * Mach Operating System
3 * Copyright (c) 1993,1992,1991,1990,1989 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: ds_routines.c,v $
29 * Revision 2.36 93/11/17 16:32:53 dbg
30 * Declare continuation functions as returning 'no_return'.
31 * Added AST_KERNEL_CHECK to io_done thread loop. Added ANSI
32 * function prototypes.
33 * [93/05/21 dbg]
34 *
35 * Revision 2.35 93/08/10 15:10:38 mrt
36 * Added support for device write kernel traps.
37 * [93/06/11 cmaeda]
38 *
39 * Revision 2.34 93/05/15 18:53:00 mrt
40 * machparam.h -> machspl.h
41 *
42 * Revision 2.33 93/01/14 17:27:00 danner
43 * 64bit cleanup.
44 * [92/11/30 af]
45 *
46 * Return D_INVALID_SIZE instead of panicking in write routines when
47 * there is no data. This is the case when device_write() is given
48 * zero as the data length.
49 * [92/10/08 jvh]
50 *
51 * Revision 2.32 92/08/03 17:33:33 jfriedl
52 * removed silly prototypes
53 * [92/08/02 jfriedl]
54 *
55 * Revision 2.31 92/05/21 17:09:22 jfriedl
56 * Cleanup to quiet gcc warnings.
57 * [92/05/20 jfriedl]
58 *
59 * Revision 2.30 92/04/01 19:31:44 rpd
60 * Calculate minimum transfer size in device_write_get and
61 * pass it to kmem_io_map_copyout. This makes large block
62 * sizes work with tapes.
63 *
64 * Increase DEVICE_IO_MAP_SIZE to 2 megs
65 * [92/03/03 13:53:25 jeffreyh]
66 *
67 * Revision 2.29 92/03/10 16:25:21 jsb
68 * Check protection argument to device_map.
69 * [92/02/22 17:03:11 dlb]
70 *
71 * Merged in norma branch changes as of NORMA_MK7:
72 * Added ds_notify and ds_no_senders routines in preparation for using
73 * no-senders notifications to close unreferenced devices.
74 * Use remote_device() instead of norma_get_special_port().
75 *
76 * Revision 2.28 92/03/05 18:54:14 rpd
77 * Undid norma changes that were inadvertently picked up in 2.27.
78 * [92/03/05 rpd]
79 *
80 * Revision 2.27 92/02/23 19:49:11 elf
81 * Change panic message to match previous change.
82 * [92/02/21 10:11:55 dlb]
83 *
84 * Use page lists for device reads going to default pager.
85 * [92/02/20 15:16:19 dlb]
86 *
87 * Temporary change to return data read from devices as a page
88 * list instead of an entry list. The keep_wired logic has
89 * to be updated to convert the default pager to this mechanism
90 * when making this change permanent.
91 * [92/02/19 17:36:50 dlb]
92 *
93 * Revision 2.26 91/12/10 13:25:33 jsb
94 * Change ds_read_done to call vm_map_copyin_page_list directly
95 * if destination of reply is remote.
96 *
97 * Revision 2.25 91/11/14 16:52:41 rpd
98 * Replaced master_device_port_at_node call with
99 * call to norma_get_special_port.
100 * [91/11/00 00:00:00 jsb]
101 *
102 * Revision 2.24 91/10/09 16:05:37 af
103 * Fixed device_write_get to check kmem_io_map_copyout return code.
104 * Enabled wait_for_space in device_io_map.
105 * [91/09/17 rpd]
106 *
107 * Revision 2.23 91/09/12 16:37:22 bohman
108 * Changed device_write_inband() to not require a reply port.
109 * Fixed device_write_get() to allow inband calls. In this case,
110 * an io_inband buffer is allocated and the data is copied into it.
111 * Fixed device_write_dealloc() to correctly deallocate io_inband
112 * buffers.
113 * Fixed ds_read_done() to free io_inband buffers only if one was
114 * actually allocated.
115 * [91/09/11 17:06:50 bohman]
116 *
117 * Revision 2.22 91/08/28 11:11:16 jsb
118 * From rpd: increased DEVICE_IO_MAP_SIZE; documented why
119 * device_write_get cannot be used for in-band data.
120 * [91/08/22 15:28:19 jsb]
121 *
122 * In device_write_get, always set the wait parameter to something;
123 * by default, it's FALSE.
124 * [91/08/16 14:19:31 jsb]
125 *
126 * Support synchronous wait by writers when vm continuations are present.
127 * Optimize device_write_dealloc. Fix MP bug in iowait/iodone.
128 * Convert from bsize to dev_info entry in device op vector.
129 * [91/08/12 17:27:15 dlb]
130 *
131 * Page lists working reliably: delete old code.
132 * [91/08/06 17:16:09 dlb]
133 *
134 * Clean up and add continuation support for device_write page lists.
135 * [91/08/05 17:30:38 dlb]
136 *
137 * First version of support for vm page lists in device_write.
138 * Still needs cleanup and continuation support. Old code left
139 * under #ifdef 0.
140 * [91/07/31 14:42:24 dlb]
141 *
142 * Revision 2.21 91/08/24 11:55:43 af
143 * Spls definitions.
144 * [91/08/02 02:44:45 af]
145 *
146 * Revision 2.20 91/08/03 18:17:33 jsb
147 * Device_write_get doesn't need to do anything for loaned ior's.
148 * [91/08/02 12:13:15 jsb]
149 *
150 * Create the right flavor of copy object in ds_read_done.
151 * Replace NORMA_BOOT conditionals with NORMA_DEVICE.
152 * Free loaned ior's directly in iodone().
153 * [91/07/27 22:45:09 jsb]
154 *
155 * Revision 2.19 91/06/25 10:26:57 rpd
156 * Changed mach_port_t to ipc_port_t where appropriate.
157 * Removed device_reply_search and device_reply_terminate.
158 * [91/05/28 rpd]
159 *
160 * Revision 2.18 91/06/17 15:43:58 jsb
161 * Renamed NORMA conditionals.
162 * [91/06/17 09:58:59 jsb]
163 *
164 * Revision 2.17 91/05/18 14:29:52 rpd
165 * Added vm/memory_object.h.
166 * [91/03/22 rpd]
167 *
168 * Revision 2.16 91/05/14 15:47:34 mrt
169 * Correcting copyright
170 *
171 * Revision 2.15 91/03/16 14:43:02 rpd
172 * Updated for new kmem_alloc interface.
173 * [91/03/03 rpd]
174 * Added io_done_thread_continue.
175 * [91/02/13 rpd]
176 * Removed thread_swappable.
177 * [91/01/18 rpd]
178 *
179 * Revision 2.14 91/02/05 17:09:25 mrt
180 * Changed to new Mach copyright
181 * [91/01/31 17:28:57 mrt]
182 *
183 * Revision 2.13 91/01/08 15:09:38 rpd
184 * Added continuation argument to thread_block.
185 * [90/12/08 rpd]
186 *
187 * Revision 2.12 90/12/14 10:59:39 jsb
188 * Moved mechanism for mapping global to local device names
189 * to the machine-dependent function dev_forward_name.
190 * [90/12/14 09:37:18 jsb]
191 *
192 * Added device request forwarding to support inter-node device access.
193 * [90/12/14 08:30:53 jsb]
194 *
195 * Revision 2.11 90/10/25 14:44:32 rwd
196 * Let ds_device_write proceed w/o a valid reply port. This is used
197 * by the unix server ether_output routine.
198 * [90/10/22 rwd]
199 * Fixed ds_write_done to use ds_device_write_inband_reply
200 * when appropriate.
201 * [90/10/18 rpd]
202 * Check for invalid reply ports.
203 * [90/10/17 rwd]
204 *
205 * Revision 2.10 90/09/09 14:31:27 rpd
206 * Use decl_simple_lock_data.
207 * [90/08/30 rpd]
208 *
209 * Revision 2.9 90/06/02 14:48:00 rpd
210 * Cleaned up check for default pager in ds_read_done.
211 * [90/04/29 rpd]
212 *
213 * Fixed ds_read_done to leave memory wired if the read reply
214 * is being sent to the default pager.
215 * [90/04/05 rpd]
216 * Converted to new IPC. Purged MACH_XP_FPD.
217 * [90/03/26 21:55:28 rpd]
218 *
219 * Revision 2.8 90/02/22 20:02:12 dbg
220 * Use vm_map_copy routines.
221 * [90/01/25 dbg]
222 *
223 * Revision 2.7 90/01/11 11:42:01 dbg
224 * De-lint.
225 * [89/12/06 dbg]
226 *
227 * Revision 2.6 89/11/29 14:08:54 af
228 * iodone() should set the IO_DONE flag.
229 * [89/11/03 16:58:16 af]
230 *
231 * Revision 2.5 89/11/14 10:28:19 dbg
232 * Make read and write handle zero-length transfers correctly (used
233 * to implement select).
234 * [89/10/27 dbg]
235 *
236 * Revision 2.4 89/09/08 11:24:17 dbg
237 * Converted to run in kernel context.
238 * Add list of wired pages to tail of IOR allocated for write.
239 * Reorganized file: moved open/close to beginning, map to end.
240 * [89/08/23 dbg]
241 *
242 * Revision 2.3 89/08/31 16:18:46 rwd
243 * Added ds_read_inband and support
244 * [89/08/15 rwd]
245 *
246 * Revision 2.2 89/08/05 16:06:39 rwd
247 * Added ds_write_inband for use by tty and ds_device_map_device.
248 * [89/07/17 rwd]
249 *
250 * 12-Apr-89 David Golub (dbg) at Carnegie-Mellon University
251 * Added device_reply_terminate.
252 *
253 * 3-Mar-89 David Golub (dbg) at Carnegie-Mellon University
254 * Created.
255 *
256 */
257 /*
258 * Author: David B. Golub, Carnegie Mellon University
259 * Date: 3/89
260 */
261
262 #include <norma_device.h>
263
264 #include <mach/boolean.h>
265 #include <mach/kern_return.h>
266 #include <mach/mig_errors.h>
267 #include <mach/port.h>
268 #include <mach/vm_param.h>
269 #include <mach/notify.h>
270 #include <machine/machspl.h> /* spl definitions */
271
272 #include <ipc/ipc_port.h>
273 #include <ipc/ipc_space.h>
274
275 #include <kern/ast.h>
276 #include <kern/counters.h>
277 #include <kern/kern_io.h>
278 #include <kern/memory.h>
279 #include <kern/queue.h>
280 #include <kern/stack.h>
281 #include <kern/zalloc.h>
282 #include <kern/thread.h>
283 #include <kern/task.h>
284 #include <kern/sched_prim.h>
285
286 #include <vm/memory_object.h>
287 #include <vm/vm_map.h>
288 #include <vm/vm_kern.h>
289 #include <vm/vm_user.h>
290
291 #include <device/device_types.h>
292 #include <device/dev_hdr.h>
293 #include <device/conf.h>
294 #include <device/io_req.h>
295 #include <device/ds_routines.h>
296 #include <device/net_status.h>
297 #include <device/device_port.h>
298 #include <device/device_reply.h>
299
300 #include <machine/machspl.h>
301
302 /*
303 * Places to put memory being read to or written from.
304 */
305 vm_map_t device_io_map; /* for out-of-line data */
306 zone_t io_inband_zone; /* for in-line data */
307
308 io_return_t
309 ds_device_open(
310 ipc_port_t open_port,
311 ipc_port_t reply_port,
312 mach_msg_type_name_t reply_port_type,
313 dev_mode_t mode,
314 char * name,
315 device_t *device_p) /* out */
316 {
317 register device_t device;
318 register kern_return_t result;
319 register io_req_t ior;
320 ipc_port_t notify;
321
322 /*
323 * Open must be called on the master device port.
324 */
325 if (open_port != master_device_port)
326 return D_INVALID_OPERATION;
327
328 /*
329 * There must be a reply port.
330 */
331 if (!IP_VALID(reply_port)) {
332 printf("ds_* invalid reply port\n");
333 Debugger("ds_* reply_port");
334 return MIG_NO_REPLY; /* no sense in doing anything */
335 }
336
337 #if NORMA_DEVICE
338 /*
339 * Map global device name to <node> + local device name.
340 */
341 if (name[0] != '<') {
342 char namebuf[64];
343 extern char *dev_forward_name(char *, char *, unsigned int);
344
345 name = dev_forward_name(name, namebuf, sizeof(namebuf));
346 }
347 /*
348 * Look for explicit node specifier, e.g., <2>sd0a.
349 * If found, then forward request to correct device server.
350 * If not found, then remove '<n>' and process locally.
351 *
352 * XXX should handle send-right reply_port as well as send-once XXX
353 */
354 if (name[0] == '<') {
355 char *n;
356 int node = 0;
357
358 for (n = &name[1]; *n != '>'; n++) {
359 if (*n >= '' && *n <= '9') {
360 node = 10 * node + (*n - '');
361 } else {
362 return D_NO_SUCH_DEVICE;
363 }
364 }
365 if (node == node_self()) {
366 name = &n[1]; /* skip trailing '>' */
367 } else {
368 forward_device_open_send(remote_device(node),
369 reply_port, mode, name);
370 return MIG_NO_REPLY;
371 }
372 }
373 #endif /* NORMA_DEVICE */
374
375 /*
376 * Find the device.
377 */
378 device = device_lookup(name);
379 if (device == DEVICE_NULL)
380 return D_NO_SUCH_DEVICE;
381
382 /*
383 * If the device is being opened or closed,
384 * wait for that operation to finish.
385 */
386 device_lock(device);
387 while (device->state == DEV_STATE_OPENING ||
388 device->state == DEV_STATE_CLOSING) {
389 device->io_wait = TRUE;
390 thread_sleep((event_t)device, simple_lock_addr(device->lock), TRUE);
391 device_lock(device);
392 }
393
394 /*
395 * If the device is already open, increment the open count
396 * and return.
397 */
398 if (device->state == DEV_STATE_OPEN) {
399
400 if (device->flag & D_EXCL_OPEN) {
401 /*
402 * Cannot open a second time.
403 */
404 device_unlock(device);
405 device_deallocate(device);
406 return D_ALREADY_OPEN;
407 }
408
409 device->open_count++;
410 device_unlock(device);
411 *device_p = device;
412 return D_SUCCESS;
413 /*
414 * Return deallocates device reference while acquiring
415 * port.
416 */
417 }
418
419 /*
420 * Allocate the device port and register the device before
421 * opening it.
422 */
423 device->state = DEV_STATE_OPENING;
424 device_unlock(device);
425
426 /*
427 * Allocate port, keeping a reference for it.
428 */
429 device->port = ipc_port_alloc_kernel();
430 if (device->port == IP_NULL) {
431 device_lock(device);
432 device->state = DEV_STATE_INIT;
433 device->port = IP_NULL;
434 if (device->io_wait) {
435 device->io_wait = FALSE;
436 thread_wakeup((event_t)device);
437 }
438 device_unlock(device);
439 device_deallocate(device);
440 return KERN_RESOURCE_SHORTAGE;
441 }
442
443 dev_port_enter(device);
444
445 /*
446 * Request no-senders notifications on device port.
447 */
448 notify = ipc_port_make_sonce(device->port);
449 ip_lock(device->port);
450 ipc_port_nsrequest(device->port, 1, notify, ¬ify);
451 assert(notify == IP_NULL);
452
453 /*
454 * Open the device.
455 */
456 io_req_alloc(ior, 0);
457
458 ior->io_device = device;
459 ior->io_unit = device->dev_number;
460 ior->io_op = IO_OPEN | IO_CALL;
461 ior->io_mode = mode;
462 ior->io_error = 0;
463 ior->io_done = ds_open_done;
464 ior->io_reply_port = reply_port;
465 ior->io_reply_port_type = reply_port_type;
466
467 result = (*device->dev_ops->d_open)(device->dev_number, (int)mode, ior);
468 if (result == D_IO_QUEUED)
469 return MIG_NO_REPLY;
470
471 /*
472 * Return result via ds_open_done.
473 */
474 ior->io_error = result;
475 (void) ds_open_done(ior);
476
477 io_req_free(ior);
478
479 return MIG_NO_REPLY; /* reply already sent */
480 }
481
482 boolean_t
483 ds_open_done(
484 register io_req_t ior)
485 {
486 kern_return_t result;
487 register device_t device;
488
489 device = ior->io_device;
490 result = ior->io_error;
491
492 if (result != D_SUCCESS) {
493 /*
494 * Open failed. Deallocate port and device.
495 */
496 dev_port_remove(device);
497 ipc_port_dealloc_kernel(device->port);
498 device->port = IP_NULL;
499
500 device_lock(device);
501 device->state = DEV_STATE_INIT;
502 if (device->io_wait) {
503 device->io_wait = FALSE;
504 thread_wakeup((event_t)device);
505 }
506 device_unlock(device);
507
508 device_deallocate(device);
509 device = DEVICE_NULL;
510 }
511 else {
512 /*
513 * Open succeeded.
514 */
515 device_lock(device);
516 device->state = DEV_STATE_OPEN;
517 device->open_count = 1;
518 if (device->io_wait) {
519 device->io_wait = FALSE;
520 thread_wakeup((event_t)device);
521 }
522 device_unlock(device);
523
524 /* donate device reference to get port */
525 }
526 /*
527 * Must explicitly convert device to port, since
528 * device_reply interface is built as 'user' side
529 * (thus cannot get translation).
530 */
531 if (IP_VALID(ior->io_reply_port)) {
532 (void) ds_device_open_reply(ior->io_reply_port,
533 ior->io_reply_port_type,
534 result,
535 convert_device_to_port(device));
536 } else
537 device_deallocate(device);
538
539 return TRUE;
540 }
541
542 io_return_t
543 ds_device_close(
544 register device_t device)
545 {
546 if (device == DEVICE_NULL)
547 return D_NO_SUCH_DEVICE;
548
549 device_lock(device);
550
551 /*
552 * If device will remain open, do nothing.
553 */
554 if (--device->open_count > 0) {
555 device_unlock(device);
556 return D_SUCCESS;
557 }
558
559 /*
560 * If device is being closed, do nothing.
561 */
562 if (device->state == DEV_STATE_CLOSING) {
563 device_unlock(device);
564 return D_SUCCESS;
565 }
566
567 /*
568 * Mark device as closing, to prevent new IO.
569 * Outstanding IO will still be in progress.
570 */
571 device->state = DEV_STATE_CLOSING;
572 device_unlock(device);
573
574 /*
575 * ? wait for IO to end ?
576 * only if device wants to
577 */
578
579 /*
580 * Remove the device-port association.
581 */
582 dev_port_remove(device);
583 ipc_port_dealloc_kernel(device->port);
584
585 /*
586 * Close the device
587 */
588 (*device->dev_ops->d_close)(device->dev_number);
589
590 /*
591 * Finally mark it closed. If someone else is trying
592 * to open it, the open can now proceed.
593 */
594 device_lock(device);
595 device->state = DEV_STATE_INIT;
596 if (device->io_wait) {
597 device->io_wait = FALSE;
598 thread_wakeup((event_t)device);
599 }
600 device_unlock(device);
601
602 return D_SUCCESS;
603 }
604
605 /*
606 * Write to a device.
607 */
608 io_return_t
609 ds_device_write(
610 register device_t device,
611 ipc_port_t reply_port,
612 mach_msg_type_name_t reply_port_type,
613 dev_mode_t mode,
614 recnum_t recnum,
615 io_buf_ptr_t data,
616 mach_msg_type_number_t data_count,
617 int *bytes_written) /* out */
618 {
619 register io_req_t ior;
620 register io_return_t result;
621
622 /*
623 * Refuse if device is dead or not completely open.
624 */
625 if (device == DEVICE_NULL)
626 return D_NO_SUCH_DEVICE;
627
628 if (device->state != DEV_STATE_OPEN)
629 return D_NO_SUCH_DEVICE;
630
631 if (data == 0)
632 return D_INVALID_SIZE;
633
634 /*
635 * XXX Need logic to reject ridiculously big requests.
636 */
637
638 /* XXX note that a CLOSE may proceed at any point */
639
640 /*
641 * Package the write request for the device driver
642 */
643 io_req_alloc(ior, data_count);
644
645 ior->io_device = device;
646 ior->io_unit = device->dev_number;
647 ior->io_op = IO_WRITE | IO_CALL;
648 ior->io_mode = mode;
649 ior->io_recnum = recnum;
650 ior->io_data = data;
651 ior->io_count = data_count;
652 ior->io_total = data_count;
653 ior->io_alloc_size = 0;
654 ior->io_residual = 0;
655 ior->io_error = 0;
656 ior->io_done = ds_write_done;
657 ior->io_reply_port = reply_port;
658 ior->io_reply_port_type = reply_port_type;
659 ior->io_copy = VM_MAP_COPY_NULL;
660
661 /*
662 * The ior keeps an extra reference for the device.
663 */
664 device_reference(device);
665
666 /*
667 * And do the write ...
668 *
669 * device_write_dealoc returns false if there's more
670 * to do; it has updated the ior appropriately and expects
671 * its caller to reinvoke it on the device.
672 */
673
674 do {
675
676 result = (*device->dev_ops->d_write)(device->dev_number, ior);
677
678 /*
679 * If the IO was queued, delay reply until it is finished.
680 */
681 if (result == D_IO_QUEUED)
682 return MIG_NO_REPLY;
683
684 /*
685 * Discard the local mapping of the data.
686 */
687
688 } while (!device_write_dealloc(ior));
689
690 /*
691 * Return the number of bytes actually written.
692 */
693 *bytes_written = ior->io_total - ior->io_residual;
694
695 /*
696 * Remove the extra reference.
697 */
698 device_deallocate(device);
699
700 io_req_free(ior);
701 return result;
702 }
703
704 /*
705 * Write to a device, but memory is in message.
706 */
707 io_return_t
708 ds_device_write_inband(
709 register device_t device,
710 ipc_port_t reply_port,
711 mach_msg_type_name_t reply_port_type,
712 dev_mode_t mode,
713 recnum_t recnum,
714 io_buf_ptr_inband_t data,
715 mach_msg_type_number_t data_count,
716 int *bytes_written) /* out */
717 {
718 register io_req_t ior;
719 register io_return_t result;
720
721 /*
722 * Refuse if device is dead or not completely open.
723 */
724 if (device == DEVICE_NULL)
725 return D_NO_SUCH_DEVICE;
726
727 if (device->state != DEV_STATE_OPEN)
728 return D_NO_SUCH_DEVICE;
729
730 if (data == 0)
731 return D_INVALID_SIZE;
732
733 /* XXX note that a CLOSE may proceed at any point */
734
735 /*
736 * Package the write request for the device driver.
737 */
738 io_req_alloc(ior, 0);
739
740 ior->io_device = device;
741 ior->io_unit = device->dev_number;
742 ior->io_op = IO_WRITE | IO_CALL | IO_INBAND;
743 ior->io_mode = mode;
744 ior->io_recnum = recnum;
745 ior->io_data = data;
746 ior->io_count = data_count;
747 ior->io_total = data_count;
748 ior->io_alloc_size = 0;
749 ior->io_residual = 0;
750 ior->io_error = 0;
751 ior->io_done = ds_write_done;
752 ior->io_reply_port = reply_port;
753 ior->io_reply_port_type = reply_port_type;
754
755 /*
756 * The ior keeps an extra reference for the device.
757 */
758 device_reference(device);
759
760 /*
761 * And do the write.
762 */
763 result = (*device->dev_ops->d_write)(device->dev_number, ior);
764
765 /*
766 * If the IO was queued, delay reply until it is finished.
767 */
768 if (result == D_IO_QUEUED)
769 return MIG_NO_REPLY;
770
771 /*
772 * Return the number of bytes actually written.
773 */
774 *bytes_written = ior->io_total - ior->io_residual;
775
776 /*
777 * Remove the extra reference.
778 */
779 device_deallocate(device);
780
781 io_req_free(ior);
782 return result;
783 }
784
785 /*
786 * Wire down incoming memory to give to device.
787 */
788 kern_return_t
789 device_write_get(
790 register io_req_t ior,
791 boolean_t *wait)
792 {
793 vm_map_copy_t io_copy;
794 vm_offset_t new_addr;
795 register kern_return_t result;
796 int bsize;
797 vm_size_t min_size;
798
799 /*
800 * By default, caller does not have to wait.
801 */
802 *wait = FALSE;
803
804 /*
805 * Nothing to do if no data.
806 */
807 if (ior->io_count == 0)
808 return KERN_SUCCESS;
809
810 /*
811 * Loaned iors already have valid data.
812 */
813 if (ior->io_op & IO_LOANED)
814 return KERN_SUCCESS;
815
816 /*
817 * Inband case.
818 */
819 if (ior->io_op & IO_INBAND) {
820 assert(ior->io_count <= sizeof (io_buf_ptr_inband_t));
821 new_addr = zalloc(io_inband_zone);
822 bcopy((void *)ior->io_data, (void *)new_addr, ior->io_count);
823 ior->io_data = (io_buf_ptr_t)new_addr;
824 ior->io_alloc_size = sizeof (io_buf_ptr_inband_t);
825
826 return KERN_SUCCESS;
827 }
828
829 /*
830 * Figure out how much data to move this time. If the device
831 * won't return a block size, then we have to do the whole
832 * request in one shot (ditto if this is a block fragment),
833 * otherwise, move at least one block's worth.
834 */
835 result = (*ior->io_device->dev_ops->d_dev_info)(
836 ior->io_device->dev_number,
837 D_INFO_BLOCK_SIZE,
838 &bsize);
839
840 if (result != KERN_SUCCESS || ior->io_count < (vm_size_t) bsize)
841 min_size = (vm_size_t) ior->io_count;
842 else
843 min_size = (vm_size_t) bsize;
844
845 /*
846 * Map the pages from this page list into memory.
847 * io_data records location of data.
848 * io_alloc_size is the vm size of the region to deallocate.
849 */
850 io_copy = (vm_map_copy_t) ior->io_data;
851 result = kmem_io_map_copyout(device_io_map,
852 (vm_offset_t*)&ior->io_data, &new_addr,
853 &ior->io_alloc_size, io_copy, min_size);
854 if (result != KERN_SUCCESS)
855 return result;
856
857 if ((ior->io_data + ior->io_count) >
858 (((char *)new_addr) + ior->io_alloc_size)) {
859
860 /*
861 * Operation has to be split. Reset io_count for how
862 * much we can do this time.
863 */
864 assert(vm_map_copy_has_cont(io_copy));
865 assert(ior->io_count == io_copy->size);
866 ior->io_count = ior->io_alloc_size -
867 (ior->io_data - ((char *)new_addr));
868
869 /*
870 * Caller must wait synchronously.
871 */
872 ior->io_op &= ~IO_CALL;
873 *wait = TRUE;
874 }
875
876 ior->io_copy = io_copy; /* vm_map_copy to discard */
877 return KERN_SUCCESS;
878 }
879
880 /*
881 * Clean up memory allocated for IO.
882 */
883 boolean_t
884 device_write_dealloc(
885 register io_req_t ior)
886 {
887 vm_map_copy_t new_copy = VM_MAP_COPY_NULL;
888 register
889 vm_map_copy_t io_copy;
890 kern_return_t result;
891 vm_offset_t size_to_do;
892 int bsize;
893
894 if (ior->io_alloc_size == 0)
895 return TRUE;
896
897 /*
898 * Inband case.
899 */
900 if (ior->io_op & IO_INBAND) {
901 zfree(io_inband_zone, (vm_offset_t)ior->io_data);
902
903 return TRUE;
904 }
905
906 if ((io_copy = ior->io_copy) == VM_MAP_COPY_NULL)
907 return TRUE;
908
909 /*
910 * To prevent a possible deadlock with the default pager,
911 * we have to release space in the device_io_map before
912 * we allocate any memory. (Which vm_map_copy_invoke_cont
913 * might do.) See the discussion in ds_init.
914 */
915
916 kmem_io_map_deallocate(device_io_map,
917 trunc_page(ior->io_data),
918 (vm_size_t) ior->io_alloc_size);
919
920 if (vm_map_copy_has_cont(io_copy)) {
921
922 /*
923 * Remember how much is left, then
924 * invoke or abort the continuation.
925 */
926 size_to_do = io_copy->size - ior->io_count;
927 if (ior->io_error == 0) {
928 vm_map_copy_invoke_cont(io_copy, &new_copy, &result);
929 }
930 else {
931 vm_map_copy_abort_cont(io_copy);
932 result = KERN_FAILURE;
933 }
934
935 if (result == KERN_SUCCESS && new_copy != VM_MAP_COPY_NULL) {
936 register int res;
937
938 /*
939 * We have a new continuation, reset the ior to
940 * represent the remainder of the request. Must
941 * adjust the recnum because drivers assume
942 * that the residual is zero.
943 */
944 ior->io_op &= ~IO_DONE;
945 ior->io_op |= IO_CALL;
946
947 res = (*ior->io_device->dev_ops->d_dev_info)(
948 ior->io_device->dev_number,
949 D_INFO_BLOCK_SIZE,
950 &bsize);
951
952 if (res != D_SUCCESS)
953 panic("device_write_dealloc: No block size");
954
955 ior->io_recnum += ior->io_count/bsize;
956 ior->io_count = new_copy->size;
957 }
958 else {
959
960 /*
961 * No continuation. Add amount we didn't get
962 * to into residual.
963 */
964 ior->io_residual += size_to_do;
965 }
966 }
967
968 /*
969 * Clean up the state for the IO that just completed.
970 */
971 vm_map_copy_discard(ior->io_copy);
972 ior->io_copy = VM_MAP_COPY_NULL;
973 ior->io_data = (char *) new_copy;
974
975 /*
976 * Return FALSE if there's more IO to do.
977 */
978
979 return (new_copy == VM_MAP_COPY_NULL);
980 }
981
982 /*
983 * Send write completion message to client, and discard the data.
984 */
985 boolean_t
986 ds_write_done(
987 register io_req_t ior)
988 {
989 /*
990 * device_write_dealloc discards the data that has been
991 * written, but may decide that there is more to write.
992 */
993 while (!device_write_dealloc(ior)) {
994 register io_return_t result;
995 register device_t device;
996
997 /*
998 * More IO to do -- invoke it.
999 */
1000 device = ior->io_device;
1001 result = (*device->dev_ops->d_write)(device->dev_number, ior);
1002
1003 /*
1004 * If the IO was queued, return FALSE -- not done yet.
1005 */
1006 if (result == D_IO_QUEUED)
1007 return FALSE;
1008 }
1009
1010 /*
1011 * Now the write is really complete. Send reply.
1012 */
1013
1014 if (IP_VALID(ior->io_reply_port)) {
1015 (void) (*((ior->io_op & IO_INBAND) ?
1016 ds_device_write_reply_inband :
1017 ds_device_write_reply))(ior->io_reply_port,
1018 ior->io_reply_port_type,
1019 ior->io_error,
1020 (int) (ior->io_total -
1021 ior->io_residual));
1022 }
1023 device_deallocate(ior->io_device);
1024
1025 return TRUE;
1026 }
1027
1028 /*
1029 * Read from a device.
1030 */
1031 io_return_t
1032 ds_device_read(
1033 register device_t device,
1034 ipc_port_t reply_port,
1035 mach_msg_type_name_t reply_port_type,
1036 dev_mode_t mode,
1037 recnum_t recnum,
1038 int bytes_wanted,
1039 io_buf_ptr_t *data, /* out */
1040 mach_msg_type_number_t *data_count) /* out */
1041 {
1042 register io_req_t ior;
1043 register io_return_t result;
1044
1045 #ifdef lint
1046 *data = *data;
1047 *data_count = *data_count;
1048 #endif /* lint */
1049
1050 /*
1051 * Refuse if device is dead or not completely open.
1052 */
1053 if (device == DEVICE_NULL)
1054 return D_NO_SUCH_DEVICE;
1055
1056 if (device->state != DEV_STATE_OPEN)
1057 return D_NO_SUCH_DEVICE;
1058
1059 /* XXX note that a CLOSE may proceed at any point */
1060
1061 /*
1062 * There must be a reply port.
1063 */
1064 if (!IP_VALID(reply_port)) {
1065 printf("ds_* invalid reply port\n");
1066 Debugger("ds_* reply_port");
1067 return MIG_NO_REPLY; /* no sense in doing anything */
1068 }
1069
1070 /*
1071 * Package the read request for the device driver
1072 */
1073 io_req_alloc(ior, 0);
1074
1075 ior->io_device = device;
1076 ior->io_unit = device->dev_number;
1077 ior->io_op = IO_READ | IO_CALL;
1078 ior->io_mode = mode;
1079 ior->io_recnum = recnum;
1080 ior->io_data = 0; /* driver must allocate data */
1081 ior->io_count = bytes_wanted;
1082 ior->io_alloc_size = 0; /* no data allocated yet */
1083 ior->io_residual = 0;
1084 ior->io_error = 0;
1085 ior->io_done = ds_read_done;
1086 ior->io_reply_port = reply_port;
1087 ior->io_reply_port_type = reply_port_type;
1088
1089 /*
1090 * The ior keeps an extra reference for the device.
1091 */
1092 device_reference(device);
1093
1094 /*
1095 * And do the read.
1096 */
1097 result = (*device->dev_ops->d_read)(device->dev_number, ior);
1098
1099 /*
1100 * If the IO was queued, delay reply until it is finished.
1101 */
1102 if (result == D_IO_QUEUED)
1103 return MIG_NO_REPLY;
1104
1105 /*
1106 * Return result via ds_read_done.
1107 */
1108 ior->io_error = result;
1109 (void) ds_read_done(ior);
1110 io_req_free(ior);
1111
1112 return MIG_NO_REPLY; /* reply has already been sent. */
1113 }
1114
1115 /*
1116 * Read from a device, but return the data 'inband.'
1117 */
1118 io_return_t
1119 ds_device_read_inband(
1120 register device_t device,
1121 ipc_port_t reply_port,
1122 mach_msg_type_name_t reply_port_type,
1123 dev_mode_t mode,
1124 recnum_t recnum,
1125 int bytes_wanted,
1126 char *data, /* pointer to OUT array */
1127 mach_msg_type_number_t *data_count) /* out */
1128 {
1129 register io_req_t ior;
1130 register io_return_t result;
1131
1132 #ifdef lint
1133 *data = *data;
1134 *data_count = *data_count;
1135 #endif /* lint */
1136
1137 /*
1138 * Refuse if device is dead or not completely open.
1139 */
1140 if (device == DEVICE_NULL)
1141 return D_NO_SUCH_DEVICE;
1142
1143 if (device->state != DEV_STATE_OPEN)
1144 return D_NO_SUCH_DEVICE;
1145
1146 /* XXX note that a CLOSE may proceed at any point */
1147
1148 /*
1149 * There must be a reply port.
1150 */
1151 if (!IP_VALID(reply_port)) {
1152 printf("ds_* invalid reply port\n");
1153 Debugger("ds_* reply_port");
1154 return MIG_NO_REPLY; /* no sense in doing anything */
1155 }
1156
1157 /*
1158 * Package the read for the device driver
1159 */
1160 io_req_alloc(ior, 0);
1161
1162 ior->io_device = device;
1163 ior->io_unit = device->dev_number;
1164 ior->io_op = IO_READ | IO_CALL | IO_INBAND;
1165 ior->io_mode = mode;
1166 ior->io_recnum = recnum;
1167 ior->io_data = 0; /* driver must allocate data */
1168 ior->io_count =
1169 ((bytes_wanted < sizeof(io_buf_ptr_inband_t)) ?
1170 bytes_wanted : sizeof(io_buf_ptr_inband_t));
1171 ior->io_alloc_size = 0; /* no data allocated yet */
1172 ior->io_residual = 0;
1173 ior->io_error = 0;
1174 ior->io_done = ds_read_done;
1175 ior->io_reply_port = reply_port;
1176 ior->io_reply_port_type = reply_port_type;
1177
1178 /*
1179 * The ior keeps an extra reference for the device.
1180 */
1181 device_reference(device);
1182
1183 /*
1184 * Do the read.
1185 */
1186 result = (*device->dev_ops->d_read)(device->dev_number, ior);
1187
1188 /*
1189 * If the io was queued, delay reply until it is finished.
1190 */
1191 if (result == D_IO_QUEUED)
1192 return MIG_NO_REPLY;
1193
1194 /*
1195 * Return result, via ds_read_done.
1196 */
1197 ior->io_error = result;
1198 (void) ds_read_done(ior);
1199 io_req_free(ior);
1200
1201 return MIG_NO_REPLY; /* reply has already been sent. */
1202 }
1203
1204
1205 /*
1206 * Allocate wired-down memory for device read.
1207 */
1208 kern_return_t device_read_alloc(
1209 register io_req_t ior,
1210 register vm_size_t size)
1211 {
1212 vm_offset_t addr;
1213 kern_return_t kr;
1214
1215 /*
1216 * Nothing to do if no data.
1217 */
1218 if (ior->io_count == 0)
1219 return KERN_SUCCESS;
1220
1221 if (ior->io_op & IO_INBAND) {
1222 ior->io_data = (io_buf_ptr_t) zalloc(io_inband_zone);
1223 ior->io_alloc_size = sizeof(io_buf_ptr_inband_t);
1224 } else {
1225 size = round_page(size);
1226 kr = kmem_alloc(kernel_map, &addr, size);
1227 if (kr != KERN_SUCCESS)
1228 return kr;
1229
1230 ior->io_data = (io_buf_ptr_t) addr;
1231 ior->io_alloc_size = size;
1232 }
1233
1234 return KERN_SUCCESS;
1235 }
1236
1237 boolean_t ds_read_done(
1238 io_req_t ior)
1239 {
1240 vm_offset_t start_data, end_data;
1241 vm_offset_t start_sent, end_sent;
1242 register vm_size_t size_read;
1243
1244 if (ior->io_error)
1245 size_read = 0;
1246 else
1247 size_read = ior->io_count - ior->io_residual;
1248
1249 start_data = (vm_offset_t)ior->io_data;
1250 end_data = start_data + size_read;
1251
1252 start_sent = (ior->io_op & IO_INBAND) ? start_data :
1253 trunc_page(start_data);
1254 end_sent = (ior->io_op & IO_INBAND) ?
1255 start_data + ior->io_alloc_size : round_page(end_data);
1256
1257 /*
1258 * Zero memory that the device did not fill.
1259 */
1260 if (start_sent < start_data)
1261 bzero((char *)start_sent, start_data - start_sent);
1262 if (end_sent > end_data)
1263 bzero((char *)end_data, end_sent - end_data);
1264
1265
1266 /*
1267 * Touch the data being returned, to mark it dirty.
1268 * If the pages were filled by DMA, the pmap module
1269 * may think that they are clean.
1270 */
1271 {
1272 register vm_offset_t touch;
1273 register int c;
1274
1275 for (touch = start_sent; touch < end_sent; touch += PAGE_SIZE) {
1276 c = *(char *)touch;
1277 *(char *)touch = c;
1278 }
1279 }
1280
1281 /*
1282 * Send the data to the reply port - this
1283 * unwires and deallocates it.
1284 */
1285 if (ior->io_op & IO_INBAND) {
1286 (void)ds_device_read_reply_inband(ior->io_reply_port,
1287 ior->io_reply_port_type,
1288 ior->io_error,
1289 (char *) start_data,
1290 size_read);
1291 } else {
1292 vm_map_copy_t copy;
1293 kern_return_t kr;
1294
1295 kr = vm_map_copyin_page_list(kernel_map, start_data,
1296 size_read, TRUE, TRUE,
1297 ©, FALSE);
1298
1299 if (kr != KERN_SUCCESS)
1300 panic("read_done: vm_map_copyin_page_list failed");
1301
1302 (void)ds_device_read_reply(ior->io_reply_port,
1303 ior->io_reply_port_type,
1304 ior->io_error,
1305 (char *) copy,
1306 size_read);
1307 }
1308
1309 /*
1310 * Free any memory that was allocated but not sent.
1311 */
1312 if (ior->io_count != 0) {
1313 if (ior->io_op & IO_INBAND) {
1314 if (ior->io_alloc_size > 0)
1315 zfree(io_inband_zone, (vm_offset_t)ior->io_data);
1316 } else {
1317 register vm_offset_t end_alloc;
1318
1319 end_alloc = start_sent + round_page(ior->io_alloc_size);
1320 if (end_alloc > end_sent)
1321 (void) vm_deallocate(kernel_map,
1322 end_sent,
1323 end_alloc - end_sent);
1324 }
1325 }
1326
1327 device_deallocate(ior->io_device);
1328
1329 return TRUE;
1330 }
1331
1332 io_return_t
1333 ds_device_set_status(
1334 register device_t device,
1335 dev_flavor_t flavor,
1336 dev_status_t status,
1337 mach_msg_type_number_t status_count)
1338 {
1339 /*
1340 * Refuse if device is dead or not completely open.
1341 */
1342 if (device == DEVICE_NULL)
1343 return D_NO_SUCH_DEVICE;
1344
1345 if (device->state != DEV_STATE_OPEN)
1346 return D_NO_SUCH_DEVICE;
1347
1348 /* XXX note that a CLOSE may proceed at any point */
1349
1350 return (*device->dev_ops->d_setstat)(device->dev_number,
1351 flavor,
1352 status,
1353 status_count);
1354 }
1355
1356 io_return_t
1357 ds_device_get_status(
1358 register device_t device,
1359 dev_flavor_t flavor,
1360 dev_status_t status, /* pointer to OUT array */
1361 mach_msg_type_number_t *status_count) /* out */
1362 {
1363 /*
1364 * Refuse if device is dead or not completely open.
1365 */
1366 if (device == DEVICE_NULL)
1367 return D_NO_SUCH_DEVICE;
1368
1369 if (device->state != DEV_STATE_OPEN)
1370 return D_NO_SUCH_DEVICE;
1371
1372 /* XXX note that a CLOSE may proceed at any point */
1373
1374 return (*device->dev_ops->d_getstat)(device->dev_number,
1375 flavor,
1376 status,
1377 status_count);
1378 }
1379
1380 io_return_t
1381 ds_device_set_filter(
1382 register device_t device,
1383 ipc_port_t receive_port,
1384 int priority,
1385 filter_t filter[], /* pointer to IN array */
1386 mach_msg_type_number_t filter_count)
1387 {
1388 /*
1389 * Refuse if device is dead or not completely open.
1390 */
1391 if (device == DEVICE_NULL)
1392 return D_NO_SUCH_DEVICE;
1393
1394 if (device->state != DEV_STATE_OPEN)
1395 return D_NO_SUCH_DEVICE;
1396
1397 /* XXX note that a CLOSE may proceed at any point */
1398
1399 /*
1400 * Request is absurd if no receive port is specified.
1401 */
1402 if (!IP_VALID(receive_port))
1403 return D_INVALID_OPERATION;
1404
1405 return (*device->dev_ops->d_async_in)(device->dev_number,
1406 receive_port,
1407 priority,
1408 filter,
1409 filter_count);
1410 }
1411
1412 io_return_t
1413 ds_device_map(
1414 register device_t device,
1415 vm_prot_t protection,
1416 vm_offset_t offset,
1417 vm_size_t size,
1418 ipc_port_t *pager, /* out */
1419 boolean_t unmap) /* ? */
1420 {
1421 #ifdef lint
1422 unmap = unmap;
1423 #endif /* lint */
1424 if (protection & ~VM_PROT_ALL)
1425 return KERN_INVALID_ARGUMENT;
1426 /*
1427 * Refuse if device is dead or not completely open.
1428 */
1429 if (device == DEVICE_NULL)
1430 return D_NO_SUCH_DEVICE;
1431
1432 if (device->state != DEV_STATE_OPEN)
1433 return D_NO_SUCH_DEVICE;
1434
1435 /* XXX note that a CLOSE may proceed at any point */
1436
1437 return device_pager_setup(device, protection, offset, size,
1438 (mach_port_t *)pager);
1439 }
1440
1441 /*
1442 * Doesn't do anything (yet).
1443 */
1444 void ds_no_senders(
1445 mach_no_senders_notification_t *notification)
1446 {
1447 printf("ds_no_senders called! device_port=0x%x count=%d\n",
1448 notification->not_header.msgh_remote_port,
1449 notification->not_count);
1450 }
1451
1452 boolean_t
1453 ds_notify(
1454 mach_msg_header_t *msg)
1455 {
1456 switch (msg->msgh_id) {
1457 case MACH_NOTIFY_NO_SENDERS:
1458 ds_no_senders((mach_no_senders_notification_t *) msg);
1459 return TRUE;
1460
1461 default:
1462 printf("ds_notify: strange notification %d\n", msg->msgh_id);
1463 return FALSE;
1464 }
1465 }
1466
1467 queue_head_t io_done_list;
1468 decl_simple_lock_data(, io_done_list_lock)
1469
1470 #define splio splsched /* XXX must block ALL io devices */
1471
1472 void iodone(
1473 register io_req_t ior)
1474 {
1475 register spl_t s;
1476
1477 /*
1478 * If this ior was loaned to us, return it directly.
1479 */
1480 if (ior->io_op & IO_LOANED) {
1481 (*ior->io_done)(ior);
1482 return;
1483 }
1484 /*
1485 * If !IO_CALL, some thread is waiting for this. Must lock
1486 * structure to interlock correctly with iowait(). Else can
1487 * toss on queue for io_done thread to call completion.
1488 */
1489 s = splio();
1490 if ((ior->io_op & IO_CALL) == 0) {
1491 ior_lock(ior);
1492 ior->io_op |= IO_DONE;
1493 ior->io_op &= ~IO_WANTED;
1494 ior_unlock(ior);
1495 thread_wakeup((event_t)ior);
1496 } else {
1497 ior->io_op |= IO_DONE;
1498 simple_lock(&io_done_list_lock);
1499 enqueue_tail(&io_done_list, (queue_entry_t)ior);
1500 thread_wakeup((event_t)&io_done_list);
1501 simple_unlock(&io_done_list_lock);
1502 }
1503 splx(s);
1504 }
1505
1506 no_return io_done_thread_continue(void)
1507 {
1508 for (;;) {
1509 register spl_t s;
1510 register io_req_t ior;
1511
1512 s = splio();
1513 simple_lock(&io_done_list_lock);
1514 while ((ior = (io_req_t)dequeue_head(&io_done_list)) != 0) {
1515 simple_unlock(&io_done_list_lock);
1516 splx(s);
1517
1518 if ((*ior->io_done)(ior)) {
1519 /*
1520 * IO done - free io_req_elt
1521 */
1522 io_req_free(ior);
1523 }
1524 /* else routine has re-queued it somewhere */
1525
1526 AST_KERNEL_CHECK(cpu_number());
1527
1528 s = splio();
1529 simple_lock(&io_done_list_lock);
1530 }
1531
1532 assert_wait(&io_done_list, FALSE);
1533 simple_unlock(&io_done_list_lock);
1534 splx(s);
1535 counter(c_io_done_thread_block++);
1536 thread_block(io_done_thread_continue);
1537 }
1538 }
1539
1540 no_return io_done_thread(void)
1541 {
1542 /*
1543 * Set thread privileges and highest priority.
1544 */
1545 current_thread()->vm_privilege = TRUE;
1546 stack_privilege(current_thread());
1547 thread_set_own_priority(0);
1548
1549 io_done_thread_continue();
1550 /*NOTREACHED*/
1551 }
1552
1553 #define DEVICE_IO_MAP_SIZE (2 * 1024 * 1024)
1554
1555 void ds_trap_init(void); /* forward */
1556
1557 void ds_init(void)
1558 {
1559 vm_offset_t device_io_min, device_io_max;
1560
1561 queue_init(&io_done_list);
1562 simple_lock_init(&io_done_list_lock);
1563
1564 device_io_map = kmem_suballoc(kernel_map,
1565 &device_io_min,
1566 &device_io_max,
1567 DEVICE_IO_MAP_SIZE,
1568 FALSE);
1569 /*
1570 * If the kernel receives many device_write requests, the
1571 * device_io_map might run out of space. To prevent
1572 * device_write_get from failing in this case, we enable
1573 * wait_for_space on the map. This causes kmem_io_map_copyout
1574 * to block until there is sufficient space.
1575 * (XXX Large writes may be starved by small writes.)
1576 *
1577 * There is a potential deadlock problem with this solution,
1578 * if a device_write from the default pager has to wait
1579 * for the completion of a device_write which needs to wait
1580 * for memory allocation. Hence, once device_write_get
1581 * allocates space in device_io_map, no blocking memory
1582 * allocations should happen until device_write_dealloc
1583 * frees the space. (XXX A large write might starve
1584 * a small write from the default pager.)
1585 */
1586 device_io_map->wait_for_space = TRUE;
1587
1588 io_inband_zone = zinit(sizeof(io_buf_ptr_inband_t),
1589 1000 * sizeof(io_buf_ptr_inband_t),
1590 10 * sizeof(io_buf_ptr_inband_t),
1591 FALSE,
1592 "io inband read buffers");
1593
1594 ds_trap_init();
1595 }
1596
1597 void iowait(
1598 io_req_t ior)
1599 {
1600 spl_t s;
1601
1602 s = splio();
1603 ior_lock(ior);
1604 while ((ior->io_op&IO_DONE)==0) {
1605 assert_wait((event_t)ior, FALSE);
1606 ior_unlock(ior);
1607 thread_block(CONTINUE_NULL);
1608 ior_lock(ior);
1609 }
1610 ior_unlock(ior);
1611 splx(s);
1612 }
1613
1614
1615 /*
1616 * Device trap support.
1617 */
1618
1619 /*
1620 * Memory Management
1621 *
1622 * This currently has a single pool of 2k wired buffers
1623 * since we only handle writes to an ethernet device.
1624 * Should be more general.
1625 */
1626 #define IOTRAP_REQSIZE 2048
1627
1628 zone_t io_trap_zone;
1629
1630 /*
1631 * Initialization. Called from ds_init().
1632 */
1633 void
1634 ds_trap_init(void)
1635 {
1636 io_trap_zone = zinit(IOTRAP_REQSIZE,
1637 256 * IOTRAP_REQSIZE,
1638 16 * IOTRAP_REQSIZE,
1639 FALSE,
1640 "wired device trap buffers");
1641 }
1642
1643 /*
1644 * Allocate an io_req_t.
1645 * Currently zalloc's from io_trap_zone.
1646 *
1647 * Could have lists of different size zones.
1648 * Could call a device-specific routine.
1649 */
1650 io_req_t
1651 ds_trap_req_alloc(device_t device, vm_size_t data_size)
1652 {
1653 return (io_req_t) zalloc(io_trap_zone);
1654 }
1655
1656 /*
1657 * Called by iodone to release ior.
1658 */
1659 boolean_t
1660 ds_trap_write_done(io_req_t ior)
1661 {
1662 register device_t dev;
1663
1664 dev = ior->io_device;
1665
1666 /*
1667 * Should look at reply port and maybe send a message.
1668 */
1669 zfree(io_trap_zone, (vm_offset_t) ior);
1670
1671 /*
1672 * Give up device reference from ds_write_trap.
1673 */
1674 device_deallocate(dev);
1675 return TRUE;
1676 }
1677
1678 /*
1679 * Like device_write except that data is in user space.
1680 */
1681 io_return_t
1682 ds_device_write_trap(device_t device,
1683 dev_mode_t mode,
1684 recnum_t recnum,
1685 vm_offset_t data,
1686 mach_msg_type_number_t data_count)
1687 {
1688 io_req_t ior;
1689 io_return_t result;
1690
1691 /*
1692 * Refuse if device is dead or not completely open.
1693 */
1694 if (device == DEVICE_NULL)
1695 return D_NO_SUCH_DEVICE;
1696
1697 if (device->state != DEV_STATE_OPEN)
1698 return D_NO_SUCH_DEVICE;
1699
1700 /* XXX note that a CLOSE may proceed at any point */
1701
1702 /*
1703 * Get a buffer to hold the ioreq.
1704 */
1705 ior = ds_trap_req_alloc(device, data_count);
1706
1707 /*
1708 * Package the write request for the device driver.
1709 */
1710
1711 ior->io_device = device;
1712 ior->io_unit = device->dev_number;
1713 ior->io_op = IO_WRITE | IO_CALL | IO_LOANED;
1714 ior->io_mode = mode;
1715 ior->io_recnum = recnum;
1716 ior->io_data = (io_buf_ptr_t)
1717 (vm_offset_t)ior + sizeof(struct io_req);
1718 ior->io_count = data_count;
1719 ior->io_total = data_count;
1720 ior->io_alloc_size = 0;
1721 ior->io_residual = 0;
1722 ior->io_error = 0;
1723 ior->io_done = ds_trap_write_done;
1724 ior->io_reply_port = IP_NULL; /* XXX */
1725 ior->io_reply_port_type = 0; /* XXX */
1726
1727 /*
1728 * Copy the data from user space.
1729 */
1730 if (data_count > 0)
1731 copyin((char *)data, (char *)ior->io_data, data_count);
1732
1733 /*
1734 * The ior keeps an extra reference for the device.
1735 */
1736 device_reference(device);
1737
1738 /*
1739 * And do the write.
1740 */
1741 result = (*device->dev_ops->d_write)(device->dev_number, ior);
1742
1743 /*
1744 * If the IO was queued, delay reply until it is finished.
1745 */
1746 if (result == D_IO_QUEUED)
1747 return MIG_NO_REPLY;
1748
1749 /*
1750 * Remove the extra reference.
1751 */
1752 device_deallocate(device);
1753
1754 zfree(io_trap_zone, (vm_offset_t) ior);
1755 return result;
1756 }
1757
1758 io_return_t
1759 ds_device_writev_trap(device_t device,
1760 dev_mode_t mode,
1761 recnum_t recnum,
1762 io_buf_vec_t *iovec,
1763 vm_size_t iocount)
1764 {
1765 io_req_t ior;
1766 io_return_t result;
1767 io_buf_vec_t stack_iovec[16]; /* XXX */
1768 vm_size_t data_count;
1769 int i;
1770
1771 /*
1772 * Refuse if device is dead or not completely open.
1773 */
1774 if (device == DEVICE_NULL)
1775 return D_NO_SUCH_DEVICE;
1776
1777 if (device->state != DEV_STATE_OPEN)
1778 return D_NO_SUCH_DEVICE;
1779
1780 /* XXX note that a CLOSE may proceed at any point */
1781
1782 /*
1783 * Copyin user addresses.
1784 */
1785 if (iocount > 16)
1786 return KERN_INVALID_VALUE; /* lame */
1787 copyin((char *)iovec,
1788 (char *)stack_iovec,
1789 iocount * sizeof(io_buf_vec_t));
1790 for (data_count = 0, i = 0; i < iocount; i++)
1791 data_count += stack_iovec[i].count;
1792
1793 /*
1794 * Get a buffer to hold the ioreq.
1795 */
1796 ior = ds_trap_req_alloc(device, data_count);
1797
1798 /*
1799 * Package the write request for the device driver.
1800 */
1801
1802 ior->io_device = device;
1803 ior->io_unit = device->dev_number;
1804 ior->io_op = IO_WRITE | IO_CALL | IO_LOANED;
1805 ior->io_mode = mode;
1806 ior->io_recnum = recnum;
1807 ior->io_data = (io_buf_ptr_t)
1808 (vm_offset_t)ior + sizeof(struct io_req);
1809 ior->io_count = data_count;
1810 ior->io_total = data_count;
1811 ior->io_alloc_size = 0;
1812 ior->io_residual = 0;
1813 ior->io_error = 0;
1814 ior->io_done = ds_trap_write_done;
1815 ior->io_reply_port = IP_NULL; /* XXX */
1816 ior->io_reply_port_type = 0; /* XXX */
1817
1818 /*
1819 * Copy the data from user space.
1820 */
1821 if (data_count > 0) {
1822 vm_offset_t p;
1823
1824 p = (vm_offset_t) ior->io_data;
1825 for (i = 0; i < iocount; i++) {
1826 copyin((char *) stack_iovec[i].data,
1827 (char *) p,
1828 stack_iovec[i].count);
1829 p += stack_iovec[i].count;
1830 }
1831 }
1832
1833 /*
1834 * The ior keeps an extra reference for the device.
1835 */
1836 device_reference(device);
1837
1838 /*
1839 * And do the write.
1840 */
1841 result = (*device->dev_ops->d_write)(device->dev_number, ior);
1842
1843 /*
1844 * If the IO was queued, delay reply until it is finished.
1845 */
1846 if (result == D_IO_QUEUED)
1847 return MIG_NO_REPLY;
1848
1849 /*
1850 * Remove the extra reference.
1851 */
1852 device_deallocate(device);
1853
1854 zfree(io_trap_zone, (vm_offset_t) ior);
1855 return result;
1856 }
Cache object: ba74c7ea6e07e8ccb61913fe93c15bd4
|