1 /*-
2 * Coda: an Experimental Distributed File System
3 * Release 3.1
4 *
5 * Copyright (c) 1987-1998 Carnegie Mellon University
6 * All Rights Reserved
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation, and
13 * that credit is given to Carnegie Mellon University in all documents
14 * and publicity pertaining to direct or indirect use of this code or its
15 * derivatives.
16 *
17 * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
18 * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
19 * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
20 * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
21 * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
22 * ANY DERIVATIVE WORK.
23 *
24 * Carnegie Mellon encourages users of this software to return any
25 * improvements or extensions that they make, and to grant Carnegie
26 * Mellon the rights to redistribute these changes without encumbrance.
27 *
28 * @(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
29 */
30 /*-
31 * Mach Operating System
32 * Copyright (c) 1989 Carnegie-Mellon University
33 * All rights reserved. The CMU software License Agreement specifies
34 * the terms and conditions for use and redistribution.
35 */
36
37 /*
38 * This code was written for the Coda filesystem at Carnegie Mellon
39 * University. Contributers include David Steere, James Kistler, and
40 * M. Satyanarayanan. */
41
42 /*
43 * These routines define the psuedo device for communication between Coda's
44 * Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c, but I
45 * moved them to make it easier to port the Minicache without porting coda.
46 * -- DCS 10/12/94
47 */
48
49 /*
50 * These routines are the device entry points for Venus.
51 */
52
53 #include <sys/cdefs.h>
54 __FBSDID("$FreeBSD$");
55
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/capability.h>
59 #include <sys/conf.h>
60 #include <sys/ioccom.h>
61 #include <sys/kernel.h>
62 #include <sys/lock.h>
63 #include <sys/malloc.h>
64 #include <sys/file.h> /* must come after sys/malloc.h */
65 #include <sys/mount.h>
66 #include <sys/mutex.h>
67 #include <sys/poll.h>
68 #include <sys/proc.h>
69 #include <sys/filedesc.h>
70
71 #include <fs/coda/coda.h>
72 #include <fs/coda/cnode.h>
73 #include <fs/coda/coda_io.h>
74 #include <fs/coda/coda_psdev.h>
75
76 /*
77 * Variables to determine how Coda sleeps and whether or not it is
78 * interruptible when it does sleep waiting for Venus.
79 */
80 /* #define CTL_C */
81
82 #ifdef CTL_C
83 #include <sys/signalvar.h>
84 #endif
85
86 int coda_psdev_print_entry = 0;
87 static int outstanding_upcalls = 0;
88 int coda_call_sleep = PZERO - 1;
89 #ifdef CTL_C
90 int coda_pcatch = PCATCH;
91 #else
92 #endif
93
94 #define ENTRY do { \
95 if (coda_psdev_print_entry) \
96 myprintf(("Entered %s\n", __func__)); \
97 } while (0)
98
99 struct vmsg {
100 TAILQ_ENTRY(vmsg) vm_chain;
101 caddr_t vm_data;
102 u_short vm_flags;
103 u_short vm_inSize; /* Size is at most 5000 bytes */
104 u_short vm_outSize;
105 u_short vm_opcode; /* Copied from data to save ptr deref */
106 int vm_unique;
107 caddr_t vm_sleep; /* Not used by Mach. */
108 };
109
110 #define VM_READ 1
111 #define VM_WRITE 2
112 #define VM_INTR 4 /* Unused. */
113
114 int
115 vc_open(struct cdev *dev, int flag, int mode, struct thread *td)
116 {
117 struct vcomm *vcp;
118 struct coda_mntinfo *mnt;
119
120 ENTRY;
121 mnt = dev2coda_mntinfo(dev);
122 KASSERT(mnt, ("Coda: tried to open uninitialized cfs device"));
123 vcp = &mnt->mi_vcomm;
124 if (VC_OPEN(vcp))
125 return (EBUSY);
126 bzero(&(vcp->vc_selproc), sizeof (struct selinfo));
127 TAILQ_INIT(&vcp->vc_requests);
128 TAILQ_INIT(&vcp->vc_replies);
129 MARK_VC_OPEN(vcp);
130 mnt->mi_vfsp = NULL;
131 mnt->mi_rootvp = NULL;
132 return (0);
133 }
134
135 int
136 vc_close(struct cdev *dev, int flag, int mode, struct thread *td)
137 {
138 struct vcomm *vcp;
139 struct vmsg *vmp, *nvmp = NULL;
140 struct coda_mntinfo *mi;
141 int err;
142
143 ENTRY;
144 mi = dev2coda_mntinfo(dev);
145 KASSERT(mi, ("Coda: closing unknown cfs device"));
146 vcp = &mi->mi_vcomm;
147 KASSERT(VC_OPEN(vcp), ("Coda: closing unopened cfs device"));
148
149 /*
150 * Prevent future operations on this vfs from succeeding by
151 * auto-unmounting any vfs mounted via this device. This frees user
152 * or sysadm from having to remember where all mount points are
153 * located. Put this before WAKEUPs to avoid queuing new messages
154 * between the WAKEUP and the unmount (which can happen if we're
155 * unlucky).
156 */
157 if (mi->mi_rootvp == NULL) {
158 /*
159 * Just a simple open/close with no mount.
160 */
161 MARK_VC_CLOSED(vcp);
162 return (0);
163 }
164
165 /*
166 * Let unmount know this is for real.
167 */
168 VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING;
169 coda_unmounting(mi->mi_vfsp);
170
171 /*
172 * Wakeup clients so they can return.
173 */
174 outstanding_upcalls = 0;
175 TAILQ_FOREACH_SAFE(vmp, &vcp->vc_requests, vm_chain, nvmp) {
176 /*
177 * Free signal request messages and don't wakeup cause no one
178 * is waiting.
179 */
180 if (vmp->vm_opcode == CODA_SIGNAL) {
181 CODA_FREE((caddr_t)vmp->vm_data,
182 (u_int)VC_IN_NO_DATA);
183 CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
184 continue;
185 }
186 outstanding_upcalls++;
187 wakeup(&vmp->vm_sleep);
188 }
189 TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
190 outstanding_upcalls++;
191 wakeup(&vmp->vm_sleep);
192 }
193 MARK_VC_CLOSED(vcp);
194 if (outstanding_upcalls) {
195 #ifdef CODA_VERBOSE
196 printf("presleep: outstanding_upcalls = %d\n",
197 outstanding_upcalls);
198 #endif
199 (void) tsleep(&outstanding_upcalls, coda_call_sleep,
200 "coda_umount", 0);
201 #ifdef CODA_VERBOSE
202 printf("postsleep: outstanding_upcalls = %d\n",
203 outstanding_upcalls);
204 #endif
205 }
206 err = dounmount(mi->mi_vfsp, flag, td);
207 if (err)
208 myprintf(("Error %d unmounting vfs in vcclose(%s)\n", err,
209 devtoname(dev)));
210 return (0);
211 }
212
213 int
214 vc_read(struct cdev *dev, struct uio *uiop, int flag)
215 {
216 struct vcomm *vcp;
217 struct vmsg *vmp;
218 int error = 0;
219
220 ENTRY;
221 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
222
223 /*
224 * Get message at head of request queue.
225 */
226 vmp = TAILQ_FIRST(&vcp->vc_requests);
227 if (vmp == NULL)
228 return (0); /* Nothing to read */
229
230 /*
231 * Move the input args into userspace.
232 *
233 * XXXRW: This is not safe in the presence of >1 reader, as vmp is
234 * still on the head of the list.
235 */
236 uiop->uio_rw = UIO_READ;
237 error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop);
238 if (error) {
239 myprintf(("vcread: error (%d) on uiomove\n", error));
240 error = EINVAL;
241 }
242 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
243
244 /*
245 * If request was a signal, free up the message and don't enqueue it
246 * in the reply queue.
247 */
248 if (vmp->vm_opcode == CODA_SIGNAL) {
249 if (codadebug)
250 myprintf(("vcread: signal msg (%d, %d)\n",
251 vmp->vm_opcode, vmp->vm_unique));
252 CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
253 CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
254 return (error);
255 }
256 vmp->vm_flags |= VM_READ;
257 TAILQ_INSERT_TAIL(&vcp->vc_replies, vmp, vm_chain);
258 return (error);
259 }
260
261 int
262 vc_write(struct cdev *dev, struct uio *uiop, int flag)
263 {
264 struct vcomm *vcp;
265 struct vmsg *vmp;
266 struct coda_out_hdr *out;
267 u_long seq;
268 u_long opcode;
269 int buf[2];
270 int error = 0;
271
272 ENTRY;
273 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
274
275 /*
276 * Peek at the opcode, unique without transfering the data.
277 */
278 uiop->uio_rw = UIO_WRITE;
279 error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop);
280 if (error) {
281 myprintf(("vcwrite: error (%d) on uiomove\n", error));
282 return (EINVAL);
283 }
284 opcode = buf[0];
285 seq = buf[1];
286 if (codadebug)
287 myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq));
288 if (DOWNCALL(opcode)) {
289 union outputArgs pbuf;
290
291 /*
292 * Get the rest of the data.
293 */
294 uiop->uio_rw = UIO_WRITE;
295 error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result,
296 sizeof(pbuf) - (sizeof(int)*2), uiop);
297 if (error) {
298 myprintf(("vcwrite: error (%d) on uiomove (Op %ld "
299 "seq %ld)\n", error, opcode, seq));
300 return (EINVAL);
301 }
302 return (handleDownCall(dev2coda_mntinfo(dev), opcode, &pbuf));
303 }
304
305 /*
306 * Look for the message on the (waiting for) reply queue.
307 */
308 TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
309 if (vmp->vm_unique == seq)
310 break;
311 }
312 if (vmp == NULL) {
313 if (codadebug)
314 myprintf(("vcwrite: msg (%ld, %ld) not found\n",
315 opcode, seq));
316 return (ESRCH);
317 }
318
319 /*
320 * Remove the message from the reply queue.
321 */
322 TAILQ_REMOVE(&vcp->vc_replies, vmp, vm_chain);
323
324 /*
325 * Move data into response buffer.
326 */
327 out = (struct coda_out_hdr *)vmp->vm_data;
328
329 /*
330 * Don't need to copy opcode and uniquifier.
331 *
332 * Get the rest of the data.
333 */
334 if (vmp->vm_outSize < uiop->uio_resid) {
335 myprintf(("vcwrite: more data than asked for (%d < %zd)\n",
336 vmp->vm_outSize, uiop->uio_resid));
337
338 /*
339 * Notify caller of the error.
340 */
341 wakeup(&vmp->vm_sleep);
342 return (EINVAL);
343 }
344
345 /*
346 * Save the value.
347 */
348 buf[0] = uiop->uio_resid;
349 uiop->uio_rw = UIO_WRITE;
350 error = uiomove((caddr_t) &out->result, vmp->vm_outSize -
351 (sizeof(int) * 2), uiop);
352 if (error) {
353 myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n",
354 error, opcode, seq));
355 return (EINVAL);
356 }
357
358 /*
359 * I don't think these are used, but just in case.
360 *
361 * XXX - aren't these two already correct? -bnoble
362 */
363 out->opcode = opcode;
364 out->unique = seq;
365 vmp->vm_outSize = buf[0]; /* Amount of data transferred? */
366 vmp->vm_flags |= VM_WRITE;
367 error = 0;
368 if (opcode == CODA_OPEN_BY_FD) {
369 struct coda_open_by_fd_out *tmp =
370 (struct coda_open_by_fd_out *)out;
371 struct file *fp;
372 struct vnode *vp = NULL;
373
374 if (tmp->oh.result == 0) {
375 error = getvnode(uiop->uio_td->td_proc->p_fd, CAP_WRITE,
376 tmp->fd, &fp);
377 if (!error) {
378 /*
379 * XXX: Since the whole driver runs with
380 * Giant, don't actually need to acquire it
381 * explicitly here yet.
382 */
383 mtx_lock(&Giant);
384 vp = fp->f_vnode;
385 VREF(vp);
386 fdrop(fp, uiop->uio_td);
387 mtx_unlock(&Giant);
388 }
389 }
390 tmp->vp = vp;
391 }
392 wakeup(&vmp->vm_sleep);
393 return (error);
394 }
395
396 int
397 vc_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
398 struct thread *t)
399 {
400
401 ENTRY;
402 switch(cmd) {
403 case CODARESIZE:
404 return (ENODEV);
405
406 case CODASTATS:
407 return (ENODEV);
408
409 case CODAPRINT:
410 return (ENODEV);
411
412 case CIOC_KERNEL_VERSION:
413 switch (*(u_int *)addr) {
414 case 0:
415 *(u_int *)addr = coda_kernel_version;
416 return (0);
417
418 case 1:
419 case 2:
420 if (coda_kernel_version != *(u_int *)addr)
421 return (ENOENT);
422 else
423 return (0);
424
425 default:
426 return (ENOENT);
427 }
428
429 default:
430 return (EINVAL);
431 }
432 }
433
434 int
435 vc_poll(struct cdev *dev, int events, struct thread *td)
436 {
437 struct vcomm *vcp;
438 int event_msk = 0;
439
440 ENTRY;
441 vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
442 event_msk = events & (POLLIN|POLLRDNORM);
443 if (!event_msk)
444 return (0);
445 if (!TAILQ_EMPTY(&vcp->vc_requests))
446 return (events & (POLLIN|POLLRDNORM));
447 selrecord(td, &(vcp->vc_selproc));
448 return (0);
449 }
450
451 /*
452 * Statistics.
453 */
454 struct coda_clstat coda_clstat;
455
456 /*
457 * Key question: whether to sleep interuptably or uninteruptably when waiting
458 * for Venus. The former seems better (cause you can ^C a job), but then
459 * GNU-EMACS completion breaks. Use tsleep with no timeout, and no longjmp
460 * happens. But, when sleeping "uninterruptibly", we don't get told if it
461 * returns abnormally (e.g. kill -9).
462 */
463 int
464 coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize,
465 caddr_t buffer)
466 {
467 struct vcomm *vcp;
468 struct vmsg *vmp;
469 int error;
470 #ifdef CTL_C
471 struct thread *td = curthread;
472 struct proc *p = td->td_proc;
473 sigset_t psig_omask;
474 sigset_t tempset;
475 int i;
476 #endif
477
478 /*
479 * Unlikely, but could be a race condition with a dying warden.
480 */
481 if (mntinfo == NULL)
482 return ENODEV;
483 vcp = &(mntinfo->mi_vcomm);
484 coda_clstat.ncalls++;
485 coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++;
486 if (!VC_OPEN(vcp))
487 return (ENODEV);
488 CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg));
489
490 /*
491 * Format the request message.
492 */
493 vmp->vm_data = buffer;
494 vmp->vm_flags = 0;
495 vmp->vm_inSize = inSize;
496 vmp->vm_outSize
497 = *outSize ? *outSize : inSize; /* |buffer| >= inSize */
498 vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode;
499 vmp->vm_unique = ++vcp->vc_seq;
500 if (codadebug)
501 myprintf(("Doing a call for %d.%d\n", vmp->vm_opcode,
502 vmp->vm_unique));
503
504 /*
505 * Fill in the common input args.
506 */
507 ((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique;
508
509 /*
510 * Append msg to request queue and poke Venus.
511 */
512 TAILQ_INSERT_TAIL(&vcp->vc_requests, vmp, vm_chain);
513 selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
514
515 /*
516 * We can be interrupted while we wait for Venus to process our
517 * request. If the interrupt occurs before Venus has read the
518 * request, we dequeue and return. If it occurs after the read but
519 * before the reply, we dequeue, send a signal message, and return.
520 * If it occurs after the reply we ignore it. In no case do we want
521 * to restart the syscall. If it was interrupted by a venus shutdown
522 * (vcclose), return ENODEV.
523 *
524 * Ignore return, we have to check anyway.
525 */
526 #ifdef CTL_C
527 /*
528 * This is work in progress. Setting coda_pcatch lets tsleep
529 * reawaken on a ^c or ^z. The problem is that emacs sets certain
530 * interrupts as SA_RESTART. This means that we should exit sleep
531 * handle the "signal" and then go to sleep again. Mostly this is
532 * done by letting the syscall complete and be restarted. We are not
533 * idempotent and can not do this. A better solution is necessary.
534 */
535 i = 0;
536 PROC_LOCK(p);
537 psig_omask = td->td_sigmask;
538 do {
539 error = msleep(&vmp->vm_sleep, &p->p_mtx,
540 (coda_call_sleep|coda_pcatch), "coda_call", hz*2);
541 if (error == 0)
542 break;
543 else if (error == EWOULDBLOCK) {
544 #ifdef CODA_VERBOSE
545 printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i);
546 #endif
547 }
548 else {
549 SIGEMPTYSET(tempset);
550 SIGADDSET(tempset, SIGIO);
551 if (SIGSETEQ(td->td_siglist, tempset)) {
552 SIGADDSET(td->td_sigmask, SIGIO);
553 #ifdef CODA_VERBOSE
554 printf("coda_call: tsleep returns %d SIGIO, "
555 "cnt %d\n", error, i);
556 #endif
557 } else {
558 SIGDELSET(tempset, SIGIO);
559 SIGADDSET(tempset, SIGALRM);
560 if (SIGSETEQ(td->td_siglist, tempset)) {
561 SIGADDSET(td->td_sigmask, SIGALRM);
562 #ifdef CODA_VERBOSE
563 printf("coda_call: tsleep returns "
564 "%d SIGALRM, cnt %d\n", error, i);
565 #endif
566 } else {
567 #ifdef CODA_VERBOSE
568 printf("coda_call: tsleep returns "
569 "%d, cnt %d\n", error, i);
570 #endif
571
572 #ifdef notyet
573 tempset = td->td_siglist;
574 SIGSETNAND(tempset, td->td_sigmask);
575 printf("coda_call: siglist = %p, "
576 "sigmask = %p, mask %p\n",
577 td->td_siglist, td->td_sigmask,
578 tempset);
579 break;
580 SIGSETOR(td->td_sigmask, td->td_siglist);
581 tempset = td->td_siglist;
582 SIGSETNAND(tempset, td->td_sigmask);
583 printf("coda_call: new mask, "
584 "siglist = %p, sigmask = %p, "
585 "mask %p\n", td->td_siglist,
586 td->td_sigmask, tempset);
587 #endif
588 }
589 }
590 }
591 } while (error && i++ < 128 && VC_OPEN(vcp));
592 td->td_sigmask = psig_omask;
593 signotify(td);
594 PROC_UNLOCK(p);
595 #else
596 (void)tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0);
597 #endif
598 if (VC_OPEN(vcp)) {
599 /*
600 * Venus is still alive.
601 *
602 * Op went through, interrupt or not...
603 */
604 if (vmp->vm_flags & VM_WRITE) {
605 error = 0;
606 *outSize = vmp->vm_outSize;
607 } else if (!(vmp->vm_flags & VM_READ)) {
608 /* Interrupted before venus read it. */
609 #ifdef CODA_VERBOSE
610 if (1)
611 #else
612 if (codadebug)
613 #endif
614 myprintf(("interrupted before read: op = "
615 "%d.%d, flags = %x\n", vmp->vm_opcode,
616 vmp->vm_unique, vmp->vm_flags));
617 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
618 error = EINTR;
619 } else {
620 /*
621 * (!(vmp->vm_flags & VM_WRITE)) means interrupted
622 * after upcall started.
623 *
624 * Interrupted after start of upcall, send venus a
625 * signal.
626 */
627 struct coda_in_hdr *dog;
628 struct vmsg *svmp;
629
630 #ifdef CODA_VERBOSE
631 if (1)
632 #else
633 if (codadebug)
634 #endif
635 myprintf(("Sending Venus a signal: op = "
636 "%d.%d, flags = %x\n", vmp->vm_opcode,
637 vmp->vm_unique, vmp->vm_flags));
638 TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
639 error = EINTR;
640 CODA_ALLOC(svmp, struct vmsg *, sizeof(struct vmsg));
641 CODA_ALLOC((svmp->vm_data), char *,
642 sizeof(struct coda_in_hdr));
643 dog = (struct coda_in_hdr *)svmp->vm_data;
644 svmp->vm_flags = 0;
645 dog->opcode = svmp->vm_opcode = CODA_SIGNAL;
646 dog->unique = svmp->vm_unique = vmp->vm_unique;
647 svmp->vm_inSize = sizeof (struct coda_in_hdr);
648 /*??? rvb */ svmp->vm_outSize = sizeof (struct coda_in_hdr);
649 if (codadebug)
650 myprintf(("coda_call: enqueing signal msg "
651 "(%d, %d)\n", svmp->vm_opcode,
652 svmp->vm_unique));
653
654 /*
655 * Insert at head of queue!
656 *
657 * XXXRW: Actually, the tail.
658 */
659 TAILQ_INSERT_TAIL(&vcp->vc_requests, svmp, vm_chain);
660 selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
661 }
662 } else {
663 /* If venus died (!VC_OPEN(vcp)) */
664 if (codadebug)
665 myprintf(("vcclose woke op %d.%d flags %d\n",
666 vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
667 error = ENODEV;
668 }
669 CODA_FREE(vmp, sizeof(struct vmsg));
670 if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0))
671 wakeup(&outstanding_upcalls);
672 if (!error)
673 error = ((struct coda_out_hdr *)buffer)->result;
674 return (error);
675 }
Cache object: 9d3c7d244cdcb0e25ae245babbbd5315
|