1 /*
2 * Mach Operating System
3 * Copyright (c) 1993,1991,1990,1989 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: ds_routines.c,v $
29 * Revision 2.35 93/08/10 15:10:38 mrt
30 * Added support for device write kernel traps.
31 * [93/06/11 cmaeda]
32 *
33 * Revision 2.34 93/05/15 18:53:00 mrt
34 * machparam.h -> machspl.h
35 *
36 * Revision 2.33 93/01/14 17:27:00 danner
37 * 64bit cleanup.
38 * [92/11/30 af]
39 *
40 * Return D_INVALID_SIZE instead of panicking in write routines when
41 * there is no data. This is the case when device_write() is given
42 * zero as the data length.
43 * [92/10/08 jvh]
44 *
45 * Revision 2.32 92/08/03 17:33:33 jfriedl
46 * removed silly prototypes
47 * [92/08/02 jfriedl]
48 *
49 * Revision 2.31 92/05/21 17:09:22 jfriedl
50 * Cleanup to quiet gcc warnings.
51 * [92/05/20 jfriedl]
52 *
53 * Revision 2.30 92/04/01 19:31:44 rpd
54 * Calculate minimum transfer size in device_write_get and
55 * pass it to kmem_io_map_copyout. This makes large block
56 * sizes work with tapes.
57 *
58 * Increase DEVICE_IO_MAP_SIZE to 2 megs
59 * [92/03/03 13:53:25 jeffreyh]
60 *
61 * Revision 2.29 92/03/10 16:25:21 jsb
62 * Check protection argument to device_map.
63 * [92/02/22 17:03:11 dlb]
64 *
65 * Merged in norma branch changes as of NORMA_MK7:
66 * Added ds_notify and ds_no_senders routines in preparation for using
67 * no-senders notifications to close unreferenced devices.
68 * Use remote_device() instead of norma_get_special_port().
69 *
70 * Revision 2.28 92/03/05 18:54:14 rpd
71 * Undid norma changes that were inadvertently picked up in 2.27.
72 * [92/03/05 rpd]
73 *
74 * Revision 2.27 92/02/23 19:49:11 elf
75 * Change panic message to match previous change.
76 * [92/02/21 10:11:55 dlb]
77 *
78 * Use page lists for device reads going to default pager.
79 * [92/02/20 15:16:19 dlb]
80 *
81 * Temporary change to return data read from devices as a page
82 * list instead of an entry list. The keep_wired logic has
83 * to be updated to convert the default pager to this mechanism
84 * when making this change permanent.
85 * [92/02/19 17:36:50 dlb]
86 *
87 * Revision 2.26 91/12/10 13:25:33 jsb
88 * Change ds_read_done to call vm_map_copyin_page_list directly
89 * if destination of reply is remote.
90 *
91 * Revision 2.25 91/11/14 16:52:41 rpd
92 * Replaced master_device_port_at_node call with
93 * call to norma_get_special_port.
94 * [91/11/00 00:00:00 jsb]
95 *
96 * Revision 2.24 91/10/09 16:05:37 af
97 * Fixed device_write_get to check kmem_io_map_copyout return code.
98 * Enabled wait_for_space in device_io_map.
99 * [91/09/17 rpd]
100 *
101 * Revision 2.23 91/09/12 16:37:22 bohman
102 * Changed device_write_inband() to not require a reply port.
103 * Fixed device_write_get() to allow inband calls. In this case,
104 * an io_inband buffer is allocated and the data is copied into it.
105 * Fixed device_write_dealloc() to correctly deallocate io_inband
106 * buffers.
107 * Fixed ds_read_done() to free io_inband buffers only if one was
108 * actually allocated.
109 * [91/09/11 17:06:50 bohman]
110 *
111 * Revision 2.22 91/08/28 11:11:16 jsb
112 * From rpd: increased DEVICE_IO_MAP_SIZE; documented why
113 * device_write_get cannot be used for in-band data.
114 * [91/08/22 15:28:19 jsb]
115 *
116 * In device_write_get, always set the wait parameter to something;
117 * by default, it's FALSE.
118 * [91/08/16 14:19:31 jsb]
119 *
120 * Support synchronous wait by writers when vm continuations are present.
121 * Optimize device_write_dealloc. Fix MP bug in iowait/iodone.
122 * Convert from bsize to dev_info entry in device op vector.
123 * [91/08/12 17:27:15 dlb]
124 *
125 * Page lists working reliably: delete old code.
126 * [91/08/06 17:16:09 dlb]
127 *
128 * Clean up and add continuation support for device_write page lists.
129 * [91/08/05 17:30:38 dlb]
130 *
131 * First version of support for vm page lists in device_write.
132 * Still needs cleanup and continuation support. Old code left
133 * under #ifdef 0.
134 * [91/07/31 14:42:24 dlb]
135 *
136 * Revision 2.21 91/08/24 11:55:43 af
137 * Spls definitions.
138 * [91/08/02 02:44:45 af]
139 *
140 * Revision 2.20 91/08/03 18:17:33 jsb
141 * Device_write_get doesn't need to do anything for loaned ior's.
142 * [91/08/02 12:13:15 jsb]
143 *
144 * Create the right flavor of copy object in ds_read_done.
145 * Replace NORMA_BOOT conditionals with NORMA_DEVICE.
146 * Free loaned ior's directly in iodone().
147 * [91/07/27 22:45:09 jsb]
148 *
149 * Revision 2.19 91/06/25 10:26:57 rpd
150 * Changed mach_port_t to ipc_port_t where appropriate.
151 * Removed device_reply_search and device_reply_terminate.
152 * [91/05/28 rpd]
153 *
154 * Revision 2.18 91/06/17 15:43:58 jsb
155 * Renamed NORMA conditionals.
156 * [91/06/17 09:58:59 jsb]
157 *
158 * Revision 2.17 91/05/18 14:29:52 rpd
159 * Added vm/memory_object.h.
160 * [91/03/22 rpd]
161 *
162 * Revision 2.16 91/05/14 15:47:34 mrt
163 * Correcting copyright
164 *
165 * Revision 2.15 91/03/16 14:43:02 rpd
166 * Updated for new kmem_alloc interface.
167 * [91/03/03 rpd]
168 * Added io_done_thread_continue.
169 * [91/02/13 rpd]
170 * Removed thread_swappable.
171 * [91/01/18 rpd]
172 *
173 * Revision 2.14 91/02/05 17:09:25 mrt
174 * Changed to new Mach copyright
175 * [91/01/31 17:28:57 mrt]
176 *
177 * Revision 2.13 91/01/08 15:09:38 rpd
178 * Added continuation argument to thread_block.
179 * [90/12/08 rpd]
180 *
181 * Revision 2.12 90/12/14 10:59:39 jsb
182 * Moved mechanism for mapping global to local device names
183 * to the machine-dependent function dev_forward_name.
184 * [90/12/14 09:37:18 jsb]
185 *
186 * Added device request forwarding to support inter-node device access.
187 * [90/12/14 08:30:53 jsb]
188 *
189 * Revision 2.11 90/10/25 14:44:32 rwd
190 * Let ds_device_write proceed w/o a valid reply port. This is used
191 * by the unix server ether_output routine.
192 * [90/10/22 rwd]
193 * Fixed ds_write_done to use ds_device_write_inband_reply
194 * when appropriate.
195 * [90/10/18 rpd]
196 * Check for invalid reply ports.
197 * [90/10/17 rwd]
198 *
199 * Revision 2.10 90/09/09 14:31:27 rpd
200 * Use decl_simple_lock_data.
201 * [90/08/30 rpd]
202 *
203 * Revision 2.9 90/06/02 14:48:00 rpd
204 * Cleaned up check for default pager in ds_read_done.
205 * [90/04/29 rpd]
206 *
207 * Fixed ds_read_done to leave memory wired if the read reply
208 * is being sent to the default pager.
209 * [90/04/05 rpd]
210 * Converted to new IPC. Purged MACH_XP_FPD.
211 * [90/03/26 21:55:28 rpd]
212 *
213 * Revision 2.8 90/02/22 20:02:12 dbg
214 * Use vm_map_copy routines.
215 * [90/01/25 dbg]
216 *
217 * Revision 2.7 90/01/11 11:42:01 dbg
218 * De-lint.
219 * [89/12/06 dbg]
220 *
221 * Revision 2.6 89/11/29 14:08:54 af
222 * iodone() should set the IO_DONE flag.
223 * [89/11/03 16:58:16 af]
224 *
225 * Revision 2.5 89/11/14 10:28:19 dbg
226 * Make read and write handle zero-length transfers correctly (used
227 * to implement select).
228 * [89/10/27 dbg]
229 *
230 * Revision 2.4 89/09/08 11:24:17 dbg
231 * Converted to run in kernel context.
232 * Add list of wired pages to tail of IOR allocated for write.
233 * Reorganized file: moved open/close to beginning, map to end.
234 * [89/08/23 dbg]
235 *
236 * Revision 2.3 89/08/31 16:18:46 rwd
237 * Added ds_read_inband and support
238 * [89/08/15 rwd]
239 *
240 * Revision 2.2 89/08/05 16:06:39 rwd
241 * Added ds_write_inband for use by tty and ds_device_map_device.
242 * [89/07/17 rwd]
243 *
244 * 12-Apr-89 David Golub (dbg) at Carnegie-Mellon University
245 * Added device_reply_terminate.
246 *
247 * 3-Mar-89 David Golub (dbg) at Carnegie-Mellon University
248 * Created.
249 *
250 */
251 /*
252 * Author: David B. Golub, Carnegie Mellon University
253 * Date: 3/89
254 */
255
256 #include <norma_device.h>
257
258 #include <mach/boolean.h>
259 #include <mach/kern_return.h>
260 #include <mach/mig_errors.h>
261 #include <mach/port.h>
262 #include <mach/vm_param.h>
263 #include <mach/notify.h>
264 #include <machine/machspl.h> /* spl definitions */
265
266 #include <ipc/ipc_port.h>
267 #include <ipc/ipc_space.h>
268
269 #include <kern/ast.h>
270 #include <kern/counters.h>
271 #include <kern/queue.h>
272 #include <kern/zalloc.h>
273 #include <kern/thread.h>
274 #include <kern/task.h>
275 #include <kern/sched_prim.h>
276
277 #include <vm/memory_object.h>
278 #include <vm/vm_map.h>
279 #include <vm/vm_kern.h>
280
281 #include <device/device_types.h>
282 #include <device/dev_hdr.h>
283 #include <device/conf.h>
284 #include <device/io_req.h>
285 #include <device/ds_routines.h>
286 #include <device/net_status.h>
287 #include <device/device_port.h>
288 #include <device/device_reply.h>
289
290 #include <machine/machspl.h>
291
292 io_return_t
293 ds_device_open(open_port, reply_port, reply_port_type,
294 mode, name, device_p)
295 ipc_port_t open_port;
296 ipc_port_t reply_port;
297 mach_msg_type_name_t reply_port_type;
298 dev_mode_t mode;
299 char * name;
300 device_t *device_p; /* out */
301 {
302 register device_t device;
303 register kern_return_t result;
304 register io_req_t ior;
305 char namebuf[64];
306 ipc_port_t notify;
307
308 /*
309 * Open must be called on the master device port.
310 */
311 if (open_port != master_device_port)
312 return (D_INVALID_OPERATION);
313
314 /*
315 * There must be a reply port.
316 */
317 if (!IP_VALID(reply_port)) {
318 printf("ds_* invalid reply port\n");
319 Debugger("ds_* reply_port");
320 return (MIG_NO_REPLY); /* no sense in doing anything */
321 }
322
323 #if NORMA_DEVICE
324 /*
325 * Map global device name to <node> + local device name.
326 */
327 if (name[0] != '<') {
328 extern char *dev_forward_name();
329
330 name = dev_forward_name(name, namebuf, sizeof(namebuf));
331 }
332 /*
333 * Look for explicit node specifier, e.g., <2>sd0a.
334 * If found, then forward request to correct device server.
335 * If not found, then remove '<n>' and process locally.
336 *
337 * XXX should handle send-right reply_port as well as send-once XXX
338 */
339 if (name[0] == '<') {
340 char *n;
341 int node = 0;
342
343 for (n = &name[1]; *n != '>'; n++) {
344 if (*n >= '' && *n <= '9') {
345 node = 10 * node + (*n - '');
346 } else {
347 return (D_NO_SUCH_DEVICE);
348 }
349 }
350 if (node == node_self()) {
351 name = &n[1]; /* skip trailing '>' */
352 } else {
353 forward_device_open_send(remote_device(node),
354 reply_port, mode, name);
355 return (MIG_NO_REPLY);
356 }
357 }
358 #endif NORMA_DEVICE
359
360 /*
361 * Find the device.
362 */
363 device = device_lookup(name);
364 if (device == DEVICE_NULL)
365 return (D_NO_SUCH_DEVICE);
366
367 /*
368 * If the device is being opened or closed,
369 * wait for that operation to finish.
370 */
371 device_lock(device);
372 while (device->state == DEV_STATE_OPENING ||
373 device->state == DEV_STATE_CLOSING) {
374 device->io_wait = TRUE;
375 thread_sleep((event_t)device, simple_lock_addr(device->lock), TRUE);
376 device_lock(device);
377 }
378
379 /*
380 * If the device is already open, increment the open count
381 * and return.
382 */
383 if (device->state == DEV_STATE_OPEN) {
384
385 if (device->flag & D_EXCL_OPEN) {
386 /*
387 * Cannot open a second time.
388 */
389 device_unlock(device);
390 device_deallocate(device);
391 return (D_ALREADY_OPEN);
392 }
393
394 device->open_count++;
395 device_unlock(device);
396 *device_p = device;
397 return (D_SUCCESS);
398 /*
399 * Return deallocates device reference while acquiring
400 * port.
401 */
402 }
403
404 /*
405 * Allocate the device port and register the device before
406 * opening it.
407 */
408 device->state = DEV_STATE_OPENING;
409 device_unlock(device);
410
411 /*
412 * Allocate port, keeping a reference for it.
413 */
414 device->port = ipc_port_alloc_kernel();
415 if (device->port == IP_NULL) {
416 device_lock(device);
417 device->state = DEV_STATE_INIT;
418 device->port = IP_NULL;
419 if (device->io_wait) {
420 device->io_wait = FALSE;
421 thread_wakeup((event_t)device);
422 }
423 device_unlock(device);
424 device_deallocate(device);
425 return (KERN_RESOURCE_SHORTAGE);
426 }
427
428 dev_port_enter(device);
429
430 /*
431 * Request no-senders notifications on device port.
432 */
433 notify = ipc_port_make_sonce(device->port);
434 ip_lock(device->port);
435 ipc_port_nsrequest(device->port, 1, notify, ¬ify);
436 assert(notify == IP_NULL);
437
438 /*
439 * Open the device.
440 */
441 io_req_alloc(ior, 0);
442
443 ior->io_device = device;
444 ior->io_unit = device->dev_number;
445 ior->io_op = IO_OPEN | IO_CALL;
446 ior->io_mode = mode;
447 ior->io_error = 0;
448 ior->io_done = ds_open_done;
449 ior->io_reply_port = reply_port;
450 ior->io_reply_port_type = reply_port_type;
451
452 result = (*device->dev_ops->d_open)(device->dev_number, (int)mode, ior);
453 if (result == D_IO_QUEUED)
454 return (MIG_NO_REPLY);
455
456 /*
457 * Return result via ds_open_done.
458 */
459 ior->io_error = result;
460 (void) ds_open_done(ior);
461
462 io_req_free(ior);
463
464 return (MIG_NO_REPLY); /* reply already sent */
465 }
466
467 boolean_t
468 ds_open_done(ior)
469 register io_req_t ior;
470 {
471 kern_return_t result;
472 register device_t device;
473
474 device = ior->io_device;
475 result = ior->io_error;
476
477 if (result != D_SUCCESS) {
478 /*
479 * Open failed. Deallocate port and device.
480 */
481 dev_port_remove(device);
482 ipc_port_dealloc_kernel(device->port);
483 device->port = IP_NULL;
484
485 device_lock(device);
486 device->state = DEV_STATE_INIT;
487 if (device->io_wait) {
488 device->io_wait = FALSE;
489 thread_wakeup((event_t)device);
490 }
491 device_unlock(device);
492
493 device_deallocate(device);
494 device = DEVICE_NULL;
495 }
496 else {
497 /*
498 * Open succeeded.
499 */
500 device_lock(device);
501 device->state = DEV_STATE_OPEN;
502 device->open_count = 1;
503 if (device->io_wait) {
504 device->io_wait = FALSE;
505 thread_wakeup((event_t)device);
506 }
507 device_unlock(device);
508
509 /* donate device reference to get port */
510 }
511 /*
512 * Must explicitly convert device to port, since
513 * device_reply interface is built as 'user' side
514 * (thus cannot get translation).
515 */
516 if (IP_VALID(ior->io_reply_port)) {
517 (void) ds_device_open_reply(ior->io_reply_port,
518 ior->io_reply_port_type,
519 result,
520 convert_device_to_port(device));
521 } else
522 device_deallocate(device);
523
524 return (TRUE);
525 }
526
527 io_return_t
528 ds_device_close(device)
529 register device_t device;
530 {
531 if (device == DEVICE_NULL)
532 return (D_NO_SUCH_DEVICE);
533
534 device_lock(device);
535
536 /*
537 * If device will remain open, do nothing.
538 */
539 if (--device->open_count > 0) {
540 device_unlock(device);
541 return (D_SUCCESS);
542 }
543
544 /*
545 * If device is being closed, do nothing.
546 */
547 if (device->state == DEV_STATE_CLOSING) {
548 device_unlock(device);
549 return (D_SUCCESS);
550 }
551
552 /*
553 * Mark device as closing, to prevent new IO.
554 * Outstanding IO will still be in progress.
555 */
556 device->state = DEV_STATE_CLOSING;
557 device_unlock(device);
558
559 /*
560 * ? wait for IO to end ?
561 * only if device wants to
562 */
563
564 /*
565 * Remove the device-port association.
566 */
567 dev_port_remove(device);
568 ipc_port_dealloc_kernel(device->port);
569
570 /*
571 * Close the device
572 */
573 (*device->dev_ops->d_close)(device->dev_number);
574
575 /*
576 * Finally mark it closed. If someone else is trying
577 * to open it, the open can now proceed.
578 */
579 device_lock(device);
580 device->state = DEV_STATE_INIT;
581 if (device->io_wait) {
582 device->io_wait = FALSE;
583 thread_wakeup((event_t)device);
584 }
585 device_unlock(device);
586
587 return (D_SUCCESS);
588 }
589
590 /*
591 * Write to a device.
592 */
593 io_return_t
594 ds_device_write(device, reply_port, reply_port_type, mode, recnum,
595 data, data_count, bytes_written)
596 register device_t device;
597 ipc_port_t reply_port;
598 mach_msg_type_name_t reply_port_type;
599 dev_mode_t mode;
600 recnum_t recnum;
601 io_buf_ptr_t data;
602 unsigned int data_count;
603 int *bytes_written; /* out */
604 {
605 register io_req_t ior;
606 register io_return_t result;
607
608 /*
609 * Refuse if device is dead or not completely open.
610 */
611 if (device == DEVICE_NULL)
612 return (D_NO_SUCH_DEVICE);
613
614 if (device->state != DEV_STATE_OPEN)
615 return (D_NO_SUCH_DEVICE);
616
617 if (data == 0)
618 return (D_INVALID_SIZE);
619
620 /*
621 * XXX Need logic to reject ridiculously big requests.
622 */
623
624 /* XXX note that a CLOSE may proceed at any point */
625
626 /*
627 * Package the write request for the device driver
628 */
629 io_req_alloc(ior, data_count);
630
631 ior->io_device = device;
632 ior->io_unit = device->dev_number;
633 ior->io_op = IO_WRITE | IO_CALL;
634 ior->io_mode = mode;
635 ior->io_recnum = recnum;
636 ior->io_data = data;
637 ior->io_count = data_count;
638 ior->io_total = data_count;
639 ior->io_alloc_size = 0;
640 ior->io_residual = 0;
641 ior->io_error = 0;
642 ior->io_done = ds_write_done;
643 ior->io_reply_port = reply_port;
644 ior->io_reply_port_type = reply_port_type;
645 ior->io_copy = VM_MAP_COPY_NULL;
646
647 /*
648 * The ior keeps an extra reference for the device.
649 */
650 device_reference(device);
651
652 /*
653 * And do the write ...
654 *
655 * device_write_dealoc returns false if there's more
656 * to do; it has updated the ior appropriately and expects
657 * its caller to reinvoke it on the device.
658 */
659
660 do {
661
662 result = (*device->dev_ops->d_write)(device->dev_number, ior);
663
664 /*
665 * If the IO was queued, delay reply until it is finished.
666 */
667 if (result == D_IO_QUEUED)
668 return (MIG_NO_REPLY);
669
670 /*
671 * Discard the local mapping of the data.
672 */
673
674 } while (!device_write_dealloc(ior));
675
676 /*
677 * Return the number of bytes actually written.
678 */
679 *bytes_written = ior->io_total - ior->io_residual;
680
681 /*
682 * Remove the extra reference.
683 */
684 device_deallocate(device);
685
686 io_req_free(ior);
687 return (result);
688 }
689
690 /*
691 * Write to a device, but memory is in message.
692 */
693 io_return_t
694 ds_device_write_inband(device, reply_port, reply_port_type, mode, recnum,
695 data, data_count, bytes_written)
696 register device_t device;
697 ipc_port_t reply_port;
698 mach_msg_type_name_t reply_port_type;
699 dev_mode_t mode;
700 recnum_t recnum;
701 io_buf_ptr_inband_t data;
702 unsigned int data_count;
703 int *bytes_written; /* out */
704 {
705 register io_req_t ior;
706 register io_return_t result;
707
708 /*
709 * Refuse if device is dead or not completely open.
710 */
711 if (device == DEVICE_NULL)
712 return (D_NO_SUCH_DEVICE);
713
714 if (device->state != DEV_STATE_OPEN)
715 return (D_NO_SUCH_DEVICE);
716
717 if (data == 0)
718 return (D_INVALID_SIZE);
719
720 /* XXX note that a CLOSE may proceed at any point */
721
722 /*
723 * Package the write request for the device driver.
724 */
725 io_req_alloc(ior, 0);
726
727 ior->io_device = device;
728 ior->io_unit = device->dev_number;
729 ior->io_op = IO_WRITE | IO_CALL | IO_INBAND;
730 ior->io_mode = mode;
731 ior->io_recnum = recnum;
732 ior->io_data = data;
733 ior->io_count = data_count;
734 ior->io_total = data_count;
735 ior->io_alloc_size = 0;
736 ior->io_residual = 0;
737 ior->io_error = 0;
738 ior->io_done = ds_write_done;
739 ior->io_reply_port = reply_port;
740 ior->io_reply_port_type = reply_port_type;
741
742 /*
743 * The ior keeps an extra reference for the device.
744 */
745 device_reference(device);
746
747 /*
748 * And do the write.
749 */
750 result = (*device->dev_ops->d_write)(device->dev_number, ior);
751
752 /*
753 * If the IO was queued, delay reply until it is finished.
754 */
755 if (result == D_IO_QUEUED)
756 return (MIG_NO_REPLY);
757
758 /*
759 * Return the number of bytes actually written.
760 */
761 *bytes_written = ior->io_total - ior->io_residual;
762
763 /*
764 * Remove the extra reference.
765 */
766 device_deallocate(device);
767
768 io_req_free(ior);
769 return (result);
770 }
771
772 /*
773 * Wire down incoming memory to give to device.
774 */
775 kern_return_t
776 device_write_get(ior, wait)
777 register io_req_t ior;
778 boolean_t *wait;
779 {
780 vm_map_copy_t io_copy;
781 vm_offset_t new_addr;
782 register kern_return_t result;
783 int bsize;
784 vm_size_t min_size;
785
786 /*
787 * By default, caller does not have to wait.
788 */
789 *wait = FALSE;
790
791 /*
792 * Nothing to do if no data.
793 */
794 if (ior->io_count == 0)
795 return (KERN_SUCCESS);
796
797 /*
798 * Loaned iors already have valid data.
799 */
800 if (ior->io_op & IO_LOANED)
801 return (KERN_SUCCESS);
802
803 /*
804 * Inband case.
805 */
806 if (ior->io_op & IO_INBAND) {
807 assert(ior->io_count <= sizeof (io_buf_ptr_inband_t));
808 new_addr = zalloc(io_inband_zone);
809 bcopy((void*)ior->io_data, (void*)new_addr, ior->io_count);
810 ior->io_data = (io_buf_ptr_t)new_addr;
811 ior->io_alloc_size = sizeof (io_buf_ptr_inband_t);
812
813 return (KERN_SUCCESS);
814 }
815
816 /*
817 * Figure out how much data to move this time. If the device
818 * won't return a block size, then we have to do the whole
819 * request in one shot (ditto if this is a block fragment),
820 * otherwise, move at least one block's worth.
821 */
822 result = (*ior->io_device->dev_ops->d_dev_info)(
823 ior->io_device->dev_number,
824 D_INFO_BLOCK_SIZE,
825 &bsize);
826
827 if (result != KERN_SUCCESS || ior->io_count < (vm_size_t) bsize)
828 min_size = (vm_size_t) ior->io_count;
829 else
830 min_size = (vm_size_t) bsize;
831
832 /*
833 * Map the pages from this page list into memory.
834 * io_data records location of data.
835 * io_alloc_size is the vm size of the region to deallocate.
836 */
837 io_copy = (vm_map_copy_t) ior->io_data;
838 result = kmem_io_map_copyout(device_io_map,
839 (vm_offset_t*)&ior->io_data, &new_addr,
840 &ior->io_alloc_size, io_copy, min_size);
841 if (result != KERN_SUCCESS)
842 return (result);
843
844 if ((ior->io_data + ior->io_count) >
845 (((char *)new_addr) + ior->io_alloc_size)) {
846
847 /*
848 * Operation has to be split. Reset io_count for how
849 * much we can do this time.
850 */
851 assert(vm_map_copy_has_cont(io_copy));
852 assert(ior->io_count == io_copy->size);
853 ior->io_count = ior->io_alloc_size -
854 (ior->io_data - ((char *)new_addr));
855
856 /*
857 * Caller must wait synchronously.
858 */
859 ior->io_op &= ~IO_CALL;
860 *wait = TRUE;
861 }
862
863 ior->io_copy = io_copy; /* vm_map_copy to discard */
864 return (KERN_SUCCESS);
865 }
866
867 /*
868 * Clean up memory allocated for IO.
869 */
870 boolean_t
871 device_write_dealloc(ior)
872 register io_req_t ior;
873 {
874 vm_map_copy_t new_copy = VM_MAP_COPY_NULL;
875 register
876 vm_map_copy_t io_copy;
877 kern_return_t result;
878 vm_offset_t size_to_do;
879 int bsize;
880
881 if (ior->io_alloc_size == 0)
882 return (TRUE);
883
884 /*
885 * Inband case.
886 */
887 if (ior->io_op & IO_INBAND) {
888 zfree(io_inband_zone, (vm_offset_t)ior->io_data);
889
890 return (TRUE);
891 }
892
893 if ((io_copy = ior->io_copy) == VM_MAP_COPY_NULL)
894 return (TRUE);
895
896 /*
897 * To prevent a possible deadlock with the default pager,
898 * we have to release space in the device_io_map before
899 * we allocate any memory. (Which vm_map_copy_invoke_cont
900 * might do.) See the discussion in ds_init.
901 */
902
903 kmem_io_map_deallocate(device_io_map,
904 trunc_page(ior->io_data),
905 (vm_size_t) ior->io_alloc_size);
906
907 if (vm_map_copy_has_cont(io_copy)) {
908
909 /*
910 * Remember how much is left, then
911 * invoke or abort the continuation.
912 */
913 size_to_do = io_copy->size - ior->io_count;
914 if (ior->io_error == 0) {
915 vm_map_copy_invoke_cont(io_copy, &new_copy, &result);
916 }
917 else {
918 vm_map_copy_abort_cont(io_copy);
919 result = KERN_FAILURE;
920 }
921
922 if (result == KERN_SUCCESS && new_copy != VM_MAP_COPY_NULL) {
923 register int res;
924
925 /*
926 * We have a new continuation, reset the ior to
927 * represent the remainder of the request. Must
928 * adjust the recnum because drivers assume
929 * that the residual is zero.
930 */
931 ior->io_op &= ~IO_DONE;
932 ior->io_op |= IO_CALL;
933
934 res = (*ior->io_device->dev_ops->d_dev_info)(
935 ior->io_device->dev_number,
936 D_INFO_BLOCK_SIZE,
937 &bsize);
938
939 if (res != D_SUCCESS)
940 panic("device_write_dealloc: No block size");
941
942 ior->io_recnum += ior->io_count/bsize;
943 ior->io_count = new_copy->size;
944 }
945 else {
946
947 /*
948 * No continuation. Add amount we didn't get
949 * to into residual.
950 */
951 ior->io_residual += size_to_do;
952 }
953 }
954
955 /*
956 * Clean up the state for the IO that just completed.
957 */
958 vm_map_copy_discard(ior->io_copy);
959 ior->io_copy = VM_MAP_COPY_NULL;
960 ior->io_data = (char *) new_copy;
961
962 /*
963 * Return FALSE if there's more IO to do.
964 */
965
966 return(new_copy == VM_MAP_COPY_NULL);
967 }
968
969 /*
970 * Send write completion message to client, and discard the data.
971 */
972 boolean_t
973 ds_write_done(ior)
974 register io_req_t ior;
975 {
976 /*
977 * device_write_dealloc discards the data that has been
978 * written, but may decide that there is more to write.
979 */
980 while (!device_write_dealloc(ior)) {
981 register io_return_t result;
982 register device_t device;
983
984 /*
985 * More IO to do -- invoke it.
986 */
987 device = ior->io_device;
988 result = (*device->dev_ops->d_write)(device->dev_number, ior);
989
990 /*
991 * If the IO was queued, return FALSE -- not done yet.
992 */
993 if (result == D_IO_QUEUED)
994 return (FALSE);
995 }
996
997 /*
998 * Now the write is really complete. Send reply.
999 */
1000
1001 if (IP_VALID(ior->io_reply_port)) {
1002 (void) (*((ior->io_op & IO_INBAND) ?
1003 ds_device_write_reply_inband :
1004 ds_device_write_reply))(ior->io_reply_port,
1005 ior->io_reply_port_type,
1006 ior->io_error,
1007 (int) (ior->io_total -
1008 ior->io_residual));
1009 }
1010 device_deallocate(ior->io_device);
1011
1012 return (TRUE);
1013 }
1014
1015 /*
1016 * Read from a device.
1017 */
1018 io_return_t
1019 ds_device_read(device, reply_port, reply_port_type, mode, recnum,
1020 bytes_wanted, data, data_count)
1021 register device_t device;
1022 ipc_port_t reply_port;
1023 mach_msg_type_name_t reply_port_type;
1024 dev_mode_t mode;
1025 recnum_t recnum;
1026 int bytes_wanted;
1027 io_buf_ptr_t *data; /* out */
1028 unsigned int *data_count; /* out */
1029 {
1030 register io_req_t ior;
1031 register io_return_t result;
1032
1033 #ifdef lint
1034 *data = *data;
1035 *data_count = *data_count;
1036 #endif lint
1037
1038 /*
1039 * Refuse if device is dead or not completely open.
1040 */
1041 if (device == DEVICE_NULL)
1042 return (D_NO_SUCH_DEVICE);
1043
1044 if (device->state != DEV_STATE_OPEN)
1045 return (D_NO_SUCH_DEVICE);
1046
1047 /* XXX note that a CLOSE may proceed at any point */
1048
1049 /*
1050 * There must be a reply port.
1051 */
1052 if (!IP_VALID(reply_port)) {
1053 printf("ds_* invalid reply port\n");
1054 Debugger("ds_* reply_port");
1055 return (MIG_NO_REPLY); /* no sense in doing anything */
1056 }
1057
1058 /*
1059 * Package the read request for the device driver
1060 */
1061 io_req_alloc(ior, 0);
1062
1063 ior->io_device = device;
1064 ior->io_unit = device->dev_number;
1065 ior->io_op = IO_READ | IO_CALL;
1066 ior->io_mode = mode;
1067 ior->io_recnum = recnum;
1068 ior->io_data = 0; /* driver must allocate data */
1069 ior->io_count = bytes_wanted;
1070 ior->io_alloc_size = 0; /* no data allocated yet */
1071 ior->io_residual = 0;
1072 ior->io_error = 0;
1073 ior->io_done = ds_read_done;
1074 ior->io_reply_port = reply_port;
1075 ior->io_reply_port_type = reply_port_type;
1076
1077 /*
1078 * The ior keeps an extra reference for the device.
1079 */
1080 device_reference(device);
1081
1082 /*
1083 * And do the read.
1084 */
1085 result = (*device->dev_ops->d_read)(device->dev_number, ior);
1086
1087 /*
1088 * If the IO was queued, delay reply until it is finished.
1089 */
1090 if (result == D_IO_QUEUED)
1091 return (MIG_NO_REPLY);
1092
1093 /*
1094 * Return result via ds_read_done.
1095 */
1096 ior->io_error = result;
1097 (void) ds_read_done(ior);
1098 io_req_free(ior);
1099
1100 return (MIG_NO_REPLY); /* reply has already been sent. */
1101 }
1102
1103 /*
1104 * Read from a device, but return the data 'inband.'
1105 */
1106 io_return_t
1107 ds_device_read_inband(device, reply_port, reply_port_type, mode, recnum,
1108 bytes_wanted, data, data_count)
1109 register device_t device;
1110 ipc_port_t reply_port;
1111 mach_msg_type_name_t reply_port_type;
1112 dev_mode_t mode;
1113 recnum_t recnum;
1114 int bytes_wanted;
1115 char *data; /* pointer to OUT array */
1116 unsigned int *data_count; /* out */
1117 {
1118 register io_req_t ior;
1119 register io_return_t result;
1120
1121 #ifdef lint
1122 *data = *data;
1123 *data_count = *data_count;
1124 #endif lint
1125
1126 /*
1127 * Refuse if device is dead or not completely open.
1128 */
1129 if (device == DEVICE_NULL)
1130 return (D_NO_SUCH_DEVICE);
1131
1132 if (device->state != DEV_STATE_OPEN)
1133 return (D_NO_SUCH_DEVICE);
1134
1135 /* XXX note that a CLOSE may proceed at any point */
1136
1137 /*
1138 * There must be a reply port.
1139 */
1140 if (!IP_VALID(reply_port)) {
1141 printf("ds_* invalid reply port\n");
1142 Debugger("ds_* reply_port");
1143 return (MIG_NO_REPLY); /* no sense in doing anything */
1144 }
1145
1146 /*
1147 * Package the read for the device driver
1148 */
1149 io_req_alloc(ior, 0);
1150
1151 ior->io_device = device;
1152 ior->io_unit = device->dev_number;
1153 ior->io_op = IO_READ | IO_CALL | IO_INBAND;
1154 ior->io_mode = mode;
1155 ior->io_recnum = recnum;
1156 ior->io_data = 0; /* driver must allocate data */
1157 ior->io_count =
1158 ((bytes_wanted < sizeof(io_buf_ptr_inband_t)) ?
1159 bytes_wanted : sizeof(io_buf_ptr_inband_t));
1160 ior->io_alloc_size = 0; /* no data allocated yet */
1161 ior->io_residual = 0;
1162 ior->io_error = 0;
1163 ior->io_done = ds_read_done;
1164 ior->io_reply_port = reply_port;
1165 ior->io_reply_port_type = reply_port_type;
1166
1167 /*
1168 * The ior keeps an extra reference for the device.
1169 */
1170 device_reference(device);
1171
1172 /*
1173 * Do the read.
1174 */
1175 result = (*device->dev_ops->d_read)(device->dev_number, ior);
1176
1177 /*
1178 * If the io was queued, delay reply until it is finished.
1179 */
1180 if (result == D_IO_QUEUED)
1181 return (MIG_NO_REPLY);
1182
1183 /*
1184 * Return result, via ds_read_done.
1185 */
1186 ior->io_error = result;
1187 (void) ds_read_done(ior);
1188 io_req_free(ior);
1189
1190 return (MIG_NO_REPLY); /* reply has already been sent. */
1191 }
1192
1193
1194 /*
1195 * Allocate wired-down memory for device read.
1196 */
1197 kern_return_t device_read_alloc(ior, size)
1198 register io_req_t ior;
1199 register vm_size_t size;
1200 {
1201 vm_offset_t addr;
1202 kern_return_t kr;
1203
1204 /*
1205 * Nothing to do if no data.
1206 */
1207 if (ior->io_count == 0)
1208 return (KERN_SUCCESS);
1209
1210 if (ior->io_op & IO_INBAND) {
1211 ior->io_data = (io_buf_ptr_t) zalloc(io_inband_zone);
1212 ior->io_alloc_size = sizeof(io_buf_ptr_inband_t);
1213 } else {
1214 size = round_page(size);
1215 kr = kmem_alloc(kernel_map, &addr, size);
1216 if (kr != KERN_SUCCESS)
1217 return (kr);
1218
1219 ior->io_data = (io_buf_ptr_t) addr;
1220 ior->io_alloc_size = size;
1221 }
1222
1223 return (KERN_SUCCESS);
1224 }
1225
1226 boolean_t ds_read_done(ior)
1227 io_req_t ior;
1228 {
1229 vm_offset_t start_data, end_data;
1230 vm_offset_t start_sent, end_sent;
1231 register vm_size_t size_read;
1232
1233 if (ior->io_error)
1234 size_read = 0;
1235 else
1236 size_read = ior->io_count - ior->io_residual;
1237
1238 start_data = (vm_offset_t)ior->io_data;
1239 end_data = start_data + size_read;
1240
1241 start_sent = (ior->io_op & IO_INBAND) ? start_data :
1242 trunc_page(start_data);
1243 end_sent = (ior->io_op & IO_INBAND) ?
1244 start_data + ior->io_alloc_size : round_page(end_data);
1245
1246 /*
1247 * Zero memory that the device did not fill.
1248 */
1249 if (start_sent < start_data)
1250 bzero((char *)start_sent, start_data - start_sent);
1251 if (end_sent > end_data)
1252 bzero((char *)end_data, end_sent - end_data);
1253
1254
1255 /*
1256 * Touch the data being returned, to mark it dirty.
1257 * If the pages were filled by DMA, the pmap module
1258 * may think that they are clean.
1259 */
1260 {
1261 register vm_offset_t touch;
1262 register int c;
1263
1264 for (touch = start_sent; touch < end_sent; touch += PAGE_SIZE) {
1265 c = *(char *)touch;
1266 *(char *)touch = c;
1267 }
1268 }
1269
1270 /*
1271 * Send the data to the reply port - this
1272 * unwires and deallocates it.
1273 */
1274 if (ior->io_op & IO_INBAND) {
1275 (void)ds_device_read_reply_inband(ior->io_reply_port,
1276 ior->io_reply_port_type,
1277 ior->io_error,
1278 (char *) start_data,
1279 size_read);
1280 } else {
1281 vm_map_copy_t copy;
1282 kern_return_t kr;
1283
1284 kr = vm_map_copyin_page_list(kernel_map, start_data,
1285 size_read, TRUE, TRUE,
1286 ©, FALSE);
1287
1288 if (kr != KERN_SUCCESS)
1289 panic("read_done: vm_map_copyin_page_list failed");
1290
1291 (void)ds_device_read_reply(ior->io_reply_port,
1292 ior->io_reply_port_type,
1293 ior->io_error,
1294 (char *) copy,
1295 size_read);
1296 }
1297
1298 /*
1299 * Free any memory that was allocated but not sent.
1300 */
1301 if (ior->io_count != 0) {
1302 if (ior->io_op & IO_INBAND) {
1303 if (ior->io_alloc_size > 0)
1304 zfree(io_inband_zone, (vm_offset_t)ior->io_data);
1305 } else {
1306 register vm_offset_t end_alloc;
1307
1308 end_alloc = start_sent + round_page(ior->io_alloc_size);
1309 if (end_alloc > end_sent)
1310 (void) vm_deallocate(kernel_map,
1311 end_sent,
1312 end_alloc - end_sent);
1313 }
1314 }
1315
1316 device_deallocate(ior->io_device);
1317
1318 return (TRUE);
1319 }
1320
1321 io_return_t
1322 ds_device_set_status(device, flavor, status, status_count)
1323 register device_t device;
1324 dev_flavor_t flavor;
1325 dev_status_t status;
1326 mach_msg_type_number_t status_count;
1327 {
1328 /*
1329 * Refuse if device is dead or not completely open.
1330 */
1331 if (device == DEVICE_NULL)
1332 return (D_NO_SUCH_DEVICE);
1333
1334 if (device->state != DEV_STATE_OPEN)
1335 return (D_NO_SUCH_DEVICE);
1336
1337 /* XXX note that a CLOSE may proceed at any point */
1338
1339 return ((*device->dev_ops->d_setstat)(device->dev_number,
1340 flavor,
1341 status,
1342 status_count));
1343 }
1344
1345 io_return_t
1346 ds_device_get_status(device, flavor, status, status_count)
1347 register device_t device;
1348 dev_flavor_t flavor;
1349 dev_status_t status; /* pointer to OUT array */
1350 mach_msg_type_number_t *status_count; /* out */
1351 {
1352 /*
1353 * Refuse if device is dead or not completely open.
1354 */
1355 if (device == DEVICE_NULL)
1356 return (D_NO_SUCH_DEVICE);
1357
1358 if (device->state != DEV_STATE_OPEN)
1359 return (D_NO_SUCH_DEVICE);
1360
1361 /* XXX note that a CLOSE may proceed at any point */
1362
1363 return ((*device->dev_ops->d_getstat)(device->dev_number,
1364 flavor,
1365 status,
1366 status_count));
1367 }
1368
1369 io_return_t
1370 ds_device_set_filter(device, receive_port, priority, filter, filter_count)
1371 register device_t device;
1372 ipc_port_t receive_port;
1373 int priority;
1374 filter_t filter[]; /* pointer to IN array */
1375 unsigned int filter_count;
1376 {
1377 /*
1378 * Refuse if device is dead or not completely open.
1379 */
1380 if (device == DEVICE_NULL)
1381 return (D_NO_SUCH_DEVICE);
1382
1383 if (device->state != DEV_STATE_OPEN)
1384 return (D_NO_SUCH_DEVICE);
1385
1386 /* XXX note that a CLOSE may proceed at any point */
1387
1388 /*
1389 * Request is absurd if no receive port is specified.
1390 */
1391 if (!IP_VALID(receive_port))
1392 return (D_INVALID_OPERATION);
1393
1394 return ((*device->dev_ops->d_async_in)(device->dev_number,
1395 receive_port,
1396 priority,
1397 filter,
1398 filter_count));
1399 }
1400
1401 io_return_t
1402 ds_device_map(device, protection, offset, size, pager, unmap)
1403 register device_t device;
1404 vm_prot_t protection;
1405 vm_offset_t offset;
1406 vm_size_t size;
1407 ipc_port_t *pager; /* out */
1408 boolean_t unmap; /* ? */
1409 {
1410 #ifdef lint
1411 unmap = unmap;
1412 #endif lint
1413 if (protection & ~VM_PROT_ALL)
1414 return (KERN_INVALID_ARGUMENT);
1415 /*
1416 * Refuse if device is dead or not completely open.
1417 */
1418 if (device == DEVICE_NULL)
1419 return (D_NO_SUCH_DEVICE);
1420
1421 if (device->state != DEV_STATE_OPEN)
1422 return (D_NO_SUCH_DEVICE);
1423
1424 /* XXX note that a CLOSE may proceed at any point */
1425
1426 return (device_pager_setup(device, protection, offset, size,
1427 (mach_port_t*)pager));
1428 }
1429
1430 /*
1431 * Doesn't do anything (yet).
1432 */
1433 void ds_no_senders(notification)
1434 mach_no_senders_notification_t *notification;
1435 {
1436 printf("ds_no_senders called! device_port=0x%x count=%d\n",
1437 notification->not_header.msgh_remote_port,
1438 notification->not_count);
1439 }
1440
1441 boolean_t
1442 ds_notify(msg)
1443 mach_msg_header_t *msg;
1444 {
1445 switch (msg->msgh_id) {
1446 case MACH_NOTIFY_NO_SENDERS:
1447 ds_no_senders((mach_no_senders_notification_t *) msg);
1448 return TRUE;
1449
1450 default:
1451 printf("ds_notify: strange notification %d\n", msg->msgh_id);
1452 return FALSE;
1453 }
1454 }
1455
1456 queue_head_t io_done_list;
1457 decl_simple_lock_data(, io_done_list_lock)
1458
1459 #define splio splsched /* XXX must block ALL io devices */
1460
1461 void iodone(ior)
1462 register io_req_t ior;
1463 {
1464 register spl_t s;
1465
1466 /*
1467 * If this ior was loaned to us, return it directly.
1468 */
1469 if (ior->io_op & IO_LOANED) {
1470 (*ior->io_done)(ior);
1471 return;
1472 }
1473 /*
1474 * If !IO_CALL, some thread is waiting for this. Must lock
1475 * structure to interlock correctly with iowait(). Else can
1476 * toss on queue for io_done thread to call completion.
1477 */
1478 s = splio();
1479 if ((ior->io_op & IO_CALL) == 0) {
1480 ior_lock(ior);
1481 ior->io_op |= IO_DONE;
1482 ior->io_op &= ~IO_WANTED;
1483 ior_unlock(ior);
1484 thread_wakeup((event_t)ior);
1485 } else {
1486 ior->io_op |= IO_DONE;
1487 simple_lock(&io_done_list_lock);
1488 enqueue_tail(&io_done_list, (queue_entry_t)ior);
1489 thread_wakeup((event_t)&io_done_list);
1490 simple_unlock(&io_done_list_lock);
1491 }
1492 splx(s);
1493 }
1494
1495 void io_done_thread_continue()
1496 {
1497 for (;;) {
1498 register spl_t s;
1499 register io_req_t ior;
1500
1501 s = splio();
1502 simple_lock(&io_done_list_lock);
1503 while ((ior = (io_req_t)dequeue_head(&io_done_list)) != 0) {
1504 simple_unlock(&io_done_list_lock);
1505 (void) splx(s);
1506
1507 if ((*ior->io_done)(ior)) {
1508 /*
1509 * IO done - free io_req_elt
1510 */
1511 io_req_free(ior);
1512 }
1513 /* else routine has re-queued it somewhere */
1514
1515 s = splio();
1516 simple_lock(&io_done_list_lock);
1517 }
1518
1519 assert_wait(&io_done_list, FALSE);
1520 simple_unlock(&io_done_list_lock);
1521 (void) splx(s);
1522 counter(c_io_done_thread_block++);
1523 thread_block(io_done_thread_continue);
1524 }
1525 }
1526
1527 void io_done_thread()
1528 {
1529 /*
1530 * Set thread privileges and highest priority.
1531 */
1532 current_thread()->vm_privilege = TRUE;
1533 stack_privilege(current_thread());
1534 thread_set_own_priority(0);
1535
1536 io_done_thread_continue();
1537 /*NOTREACHED*/
1538 }
1539
1540 #define DEVICE_IO_MAP_SIZE (2 * 1024 * 1024)
1541
1542 extern void ds_trap_init(void); /* forward */
1543
1544 void ds_init()
1545 {
1546 vm_offset_t device_io_min, device_io_max;
1547
1548 queue_init(&io_done_list);
1549 simple_lock_init(&io_done_list_lock);
1550
1551 device_io_map = kmem_suballoc(kernel_map,
1552 &device_io_min,
1553 &device_io_max,
1554 DEVICE_IO_MAP_SIZE,
1555 FALSE);
1556 /*
1557 * If the kernel receives many device_write requests, the
1558 * device_io_map might run out of space. To prevent
1559 * device_write_get from failing in this case, we enable
1560 * wait_for_space on the map. This causes kmem_io_map_copyout
1561 * to block until there is sufficient space.
1562 * (XXX Large writes may be starved by small writes.)
1563 *
1564 * There is a potential deadlock problem with this solution,
1565 * if a device_write from the default pager has to wait
1566 * for the completion of a device_write which needs to wait
1567 * for memory allocation. Hence, once device_write_get
1568 * allocates space in device_io_map, no blocking memory
1569 * allocations should happen until device_write_dealloc
1570 * frees the space. (XXX A large write might starve
1571 * a small write from the default pager.)
1572 */
1573 device_io_map->wait_for_space = TRUE;
1574
1575 io_inband_zone = zinit(sizeof(io_buf_ptr_inband_t),
1576 1000 * sizeof(io_buf_ptr_inband_t),
1577 10 * sizeof(io_buf_ptr_inband_t),
1578 FALSE,
1579 "io inband read buffers");
1580
1581 ds_trap_init();
1582 }
1583
1584 void iowait(ior)
1585 io_req_t ior;
1586 {
1587 spl_t s;
1588
1589 s = splio();
1590 ior_lock(ior);
1591 while ((ior->io_op&IO_DONE)==0) {
1592 assert_wait((event_t)ior, FALSE);
1593 ior_unlock(ior);
1594 thread_block((void (*)()) 0);
1595 ior_lock(ior);
1596 }
1597 ior_unlock(ior);
1598 splx(s);
1599 }
1600
1601
1602 /*
1603 * Device trap support.
1604 */
1605
1606 /*
1607 * Memory Management
1608 *
1609 * This currently has a single pool of 2k wired buffers
1610 * since we only handle writes to an ethernet device.
1611 * Should be more general.
1612 */
1613 #define IOTRAP_REQSIZE 2048
1614
1615 zone_t io_trap_zone;
1616
1617 /*
1618 * Initialization. Called from ds_init().
1619 */
1620 void
1621 ds_trap_init(void)
1622 {
1623 io_trap_zone = zinit(IOTRAP_REQSIZE,
1624 256 * IOTRAP_REQSIZE,
1625 16 * IOTRAP_REQSIZE,
1626 FALSE,
1627 "wired device trap buffers");
1628 }
1629
1630 /*
1631 * Allocate an io_req_t.
1632 * Currently zalloc's from io_trap_zone.
1633 *
1634 * Could have lists of different size zones.
1635 * Could call a device-specific routine.
1636 */
1637 io_req_t
1638 ds_trap_req_alloc(device_t device, vm_size_t data_size)
1639 {
1640 return (io_req_t) zalloc(io_trap_zone);
1641 }
1642
1643 /*
1644 * Called by iodone to release ior.
1645 */
1646 boolean_t
1647 ds_trap_write_done(io_req_t ior)
1648 {
1649 register device_t dev;
1650
1651 dev = ior->io_device;
1652
1653 /*
1654 * Should look at reply port and maybe send a message.
1655 */
1656 zfree(io_trap_zone, ior);
1657
1658 /*
1659 * Give up device reference from ds_write_trap.
1660 */
1661 device_deallocate(dev);
1662 return TRUE;
1663 }
1664
1665 /*
1666 * Like device_write except that data is in user space.
1667 */
1668 io_return_t
1669 ds_device_write_trap(device_t device,
1670 dev_mode_t mode,
1671 recnum_t recnum,
1672 vm_offset_t data,
1673 vm_size_t data_count)
1674 {
1675 io_req_t ior;
1676 io_return_t result;
1677
1678 /*
1679 * Refuse if device is dead or not completely open.
1680 */
1681 if (device == DEVICE_NULL)
1682 return (D_NO_SUCH_DEVICE);
1683
1684 if (device->state != DEV_STATE_OPEN)
1685 return (D_NO_SUCH_DEVICE);
1686
1687 /* XXX note that a CLOSE may proceed at any point */
1688
1689 /*
1690 * Get a buffer to hold the ioreq.
1691 */
1692 ior = ds_trap_req_alloc(device, data_count);
1693
1694 /*
1695 * Package the write request for the device driver.
1696 */
1697
1698 ior->io_device = device;
1699 ior->io_unit = device->dev_number;
1700 ior->io_op = IO_WRITE | IO_CALL | IO_LOANED;
1701 ior->io_mode = mode;
1702 ior->io_recnum = recnum;
1703 ior->io_data = (io_buf_ptr_t)
1704 (vm_offset_t)ior + sizeof(struct io_req);
1705 ior->io_count = data_count;
1706 ior->io_total = data_count;
1707 ior->io_alloc_size = 0;
1708 ior->io_residual = 0;
1709 ior->io_error = 0;
1710 ior->io_done = ds_trap_write_done;
1711 ior->io_reply_port = IP_NULL; /* XXX */
1712 ior->io_reply_port_type = 0; /* XXX */
1713
1714 /*
1715 * Copy the data from user space.
1716 */
1717 if (data_count > 0)
1718 copyin((char *)data, (char *)ior->io_data, data_count);
1719
1720 /*
1721 * The ior keeps an extra reference for the device.
1722 */
1723 device_reference(device);
1724
1725 /*
1726 * And do the write.
1727 */
1728 result = (*device->dev_ops->d_write)(device->dev_number, ior);
1729
1730 /*
1731 * If the IO was queued, delay reply until it is finished.
1732 */
1733 if (result == D_IO_QUEUED)
1734 return (MIG_NO_REPLY);
1735
1736 /*
1737 * Remove the extra reference.
1738 */
1739 device_deallocate(device);
1740
1741 zfree(io_trap_zone, ior);
1742 return (result);
1743 }
1744
1745 io_return_t
1746 ds_device_writev_trap(device_t device,
1747 dev_mode_t mode,
1748 recnum_t recnum,
1749 io_buf_vec_t *iovec,
1750 vm_size_t iocount)
1751 {
1752 io_req_t ior;
1753 io_return_t result;
1754 io_buf_vec_t stack_iovec[16]; /* XXX */
1755 vm_size_t data_count;
1756 int i;
1757
1758 /*
1759 * Refuse if device is dead or not completely open.
1760 */
1761 if (device == DEVICE_NULL)
1762 return (D_NO_SUCH_DEVICE);
1763
1764 if (device->state != DEV_STATE_OPEN)
1765 return (D_NO_SUCH_DEVICE);
1766
1767 /* XXX note that a CLOSE may proceed at any point */
1768
1769 /*
1770 * Copyin user addresses.
1771 */
1772 if (iocount > 16)
1773 return KERN_INVALID_VALUE; /* lame */
1774 copyin((char *)iovec,
1775 (char *)stack_iovec,
1776 iocount * sizeof(io_buf_vec_t));
1777 for (data_count = 0, i = 0; i < iocount; i++)
1778 data_count += stack_iovec[i].count;
1779
1780 /*
1781 * Get a buffer to hold the ioreq.
1782 */
1783 ior = ds_trap_req_alloc(device, data_count);
1784
1785 /*
1786 * Package the write request for the device driver.
1787 */
1788
1789 ior->io_device = device;
1790 ior->io_unit = device->dev_number;
1791 ior->io_op = IO_WRITE | IO_CALL | IO_LOANED;
1792 ior->io_mode = mode;
1793 ior->io_recnum = recnum;
1794 ior->io_data = (io_buf_ptr_t)
1795 (vm_offset_t)ior + sizeof(struct io_req);
1796 ior->io_count = data_count;
1797 ior->io_total = data_count;
1798 ior->io_alloc_size = 0;
1799 ior->io_residual = 0;
1800 ior->io_error = 0;
1801 ior->io_done = ds_trap_write_done;
1802 ior->io_reply_port = IP_NULL; /* XXX */
1803 ior->io_reply_port_type = 0; /* XXX */
1804
1805 /*
1806 * Copy the data from user space.
1807 */
1808 if (data_count > 0) {
1809 vm_offset_t p;
1810
1811 p = (vm_offset_t) ior->io_data;
1812 for (i = 0; i < iocount; i++) {
1813 copyin((char *) stack_iovec[i].data,
1814 (char *) p,
1815 stack_iovec[i].count);
1816 p += stack_iovec[i].count;
1817 }
1818 }
1819
1820 /*
1821 * The ior keeps an extra reference for the device.
1822 */
1823 device_reference(device);
1824
1825 /*
1826 * And do the write.
1827 */
1828 result = (*device->dev_ops->d_write)(device->dev_number, ior);
1829
1830 /*
1831 * If the IO was queued, delay reply until it is finished.
1832 */
1833 if (result == D_IO_QUEUED)
1834 return (MIG_NO_REPLY);
1835
1836 /*
1837 * Remove the extra reference.
1838 */
1839 device_deallocate(device);
1840
1841 zfree(io_trap_zone, ior);
1842 return (result);
1843 }
Cache object: e40f86d76a911a57c4720c31b7068f4c
|