1 /*-
2 * Copyright (c) 1989, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 /*
39 * Socket operations for use by nfs
40 */
41
42 #include "opt_mac.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/protosw.h>
55 #include <sys/signalvar.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sysctl.h>
59 #include <sys/syslog.h>
60 #include <sys/vnode.h>
61
62 #include <netinet/in.h>
63 #include <netinet/tcp.h>
64
65 #include <nfs/rpcv2.h>
66 #include <nfs/nfsproto.h>
67 #include <nfsserver/nfs.h>
68 #include <nfs/xdr_subs.h>
69 #include <nfsserver/nfsm_subs.h>
70
71 #define TRUE 1
72 #define FALSE 0
73
74 static int nfs_realign_test;
75 static int nfs_realign_count;
76
77 SYSCTL_DECL(_vfs_nfsrv);
78
79 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "");
80 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "");
81
82
83 /*
84 * There is a congestion window for outstanding rpcs maintained per mount
85 * point. The cwnd size is adjusted in roughly the way that:
86 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
87 * SIGCOMM '88". ACM, August 1988.
88 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
89 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
90 * of rpcs is in progress.
91 * (The sent count and cwnd are scaled for integer arith.)
92 * Variants of "slow start" were tried and were found to be too much of a
93 * performance hit (ave. rtt 3 times larger),
94 * I suspect due to the large rtt that nfs rpcs have.
95 */
96 #define NFS_CWNDSCALE 256
97 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
98 struct callout nfsrv_callout;
99
100 static void nfs_realign(struct mbuf **pm, int hsiz); /* XXX SHARED */
101 static int nfsrv_getstream(struct nfssvc_sock *, int);
102
103 int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
104 struct nfssvc_sock *slp,
105 struct thread *td,
106 struct mbuf **mreqp) = {
107 nfsrv_null,
108 nfsrv_getattr,
109 nfsrv_setattr,
110 nfsrv_lookup,
111 nfsrv3_access,
112 nfsrv_readlink,
113 nfsrv_read,
114 nfsrv_write,
115 nfsrv_create,
116 nfsrv_mkdir,
117 nfsrv_symlink,
118 nfsrv_mknod,
119 nfsrv_remove,
120 nfsrv_rmdir,
121 nfsrv_rename,
122 nfsrv_link,
123 nfsrv_readdir,
124 nfsrv_readdirplus,
125 nfsrv_statfs,
126 nfsrv_fsinfo,
127 nfsrv_pathconf,
128 nfsrv_commit,
129 nfsrv_noop
130 };
131
132
133 /*
134 * Generate the rpc reply header
135 * siz arg. is used to decide if adding a cluster is worthwhile
136 */
137 struct mbuf *
138 nfs_rephead(int siz, struct nfsrv_descript *nd, int err,
139 struct mbuf **mbp, caddr_t *bposp)
140 {
141 u_int32_t *tl;
142 struct mbuf *mreq;
143 caddr_t bpos;
144 struct mbuf *mb;
145
146 /* XXXRW: not 100% clear the lock is needed here. */
147 NFSD_LOCK_ASSERT();
148
149 nd->nd_repstat = err;
150 if (err && (nd->nd_flag & ND_NFSV3) == 0) /* XXX recheck */
151 siz = 0;
152 NFSD_UNLOCK();
153 MGETHDR(mreq, M_TRYWAIT, MT_DATA);
154 mb = mreq;
155 /*
156 * If this is a big reply, use a cluster else
157 * try and leave leading space for the lower level headers.
158 */
159 mreq->m_len = 6 * NFSX_UNSIGNED;
160 siz += RPC_REPLYSIZ;
161 if ((max_hdr + siz) >= MINCLSIZE) {
162 MCLGET(mreq, M_TRYWAIT);
163 } else
164 mreq->m_data += min(max_hdr, M_TRAILINGSPACE(mreq));
165 NFSD_LOCK();
166 tl = mtod(mreq, u_int32_t *);
167 bpos = ((caddr_t)tl) + mreq->m_len;
168 *tl++ = txdr_unsigned(nd->nd_retxid);
169 *tl++ = nfsrv_rpc_reply;
170 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
171 *tl++ = nfsrv_rpc_msgdenied;
172 if (err & NFSERR_AUTHERR) {
173 *tl++ = nfsrv_rpc_autherr;
174 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
175 mreq->m_len -= NFSX_UNSIGNED;
176 bpos -= NFSX_UNSIGNED;
177 } else {
178 *tl++ = nfsrv_rpc_mismatch;
179 *tl++ = txdr_unsigned(RPC_VER2);
180 *tl = txdr_unsigned(RPC_VER2);
181 }
182 } else {
183 *tl++ = nfsrv_rpc_msgaccepted;
184 /*
185 * Send a RPCAUTH_NULL verifier - no Kerberos.
186 */
187 *tl++ = 0;
188 *tl++ = 0;
189 switch (err) {
190 case EPROGUNAVAIL:
191 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
192 break;
193 case EPROGMISMATCH:
194 *tl = txdr_unsigned(RPC_PROGMISMATCH);
195 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
196 *tl++ = txdr_unsigned(2);
197 *tl = txdr_unsigned(3);
198 break;
199 case EPROCUNAVAIL:
200 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
201 break;
202 case EBADRPC:
203 *tl = txdr_unsigned(RPC_GARBAGE);
204 break;
205 default:
206 *tl = 0;
207 if (err != NFSERR_RETVOID) {
208 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
209 if (err)
210 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
211 else
212 *tl = 0;
213 }
214 break;
215 }
216 }
217 *mbp = mb;
218 *bposp = bpos;
219 if (err != 0 && err != NFSERR_RETVOID)
220 nfsrvstats.srvrpc_errs++;
221 return mreq;
222 }
223
224
225 /*
226 * nfs_realign:
227 *
228 * Check for badly aligned mbuf data and realign by copying the unaligned
229 * portion of the data into a new mbuf chain and freeing the portions
230 * of the old chain that were replaced.
231 *
232 * We cannot simply realign the data within the existing mbuf chain
233 * because the underlying buffers may contain other rpc commands and
234 * we cannot afford to overwrite them.
235 *
236 * We would prefer to avoid this situation entirely. The situation does
237 * not occur with NFS/UDP and is supposed to only occassionally occur
238 * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
239 */
240 static void
241 nfs_realign(struct mbuf **pm, int hsiz) /* XXX COMMON */
242 {
243 struct mbuf *m;
244 struct mbuf *n = NULL;
245 int off = 0;
246
247 /* XXXRW: may not need lock? */
248 NFSD_LOCK_ASSERT();
249
250 ++nfs_realign_test;
251 while ((m = *pm) != NULL) {
252 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
253 NFSD_UNLOCK();
254 MGET(n, M_TRYWAIT, MT_DATA);
255 if (m->m_len >= MINCLSIZE) {
256 MCLGET(n, M_TRYWAIT);
257 }
258 NFSD_LOCK();
259 n->m_len = 0;
260 break;
261 }
262 pm = &m->m_next;
263 }
264
265 /*
266 * If n is non-NULL, loop on m copying data, then replace the
267 * portion of the chain that had to be realigned.
268 */
269 if (n != NULL) {
270 ++nfs_realign_count;
271 while (m) {
272 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
273 off += m->m_len;
274 m = m->m_next;
275 }
276 m_freem(*pm);
277 *pm = n;
278 }
279 }
280
281
282 /*
283 * Parse an RPC request
284 * - verify it
285 * - fill in the cred struct.
286 */
287 int
288 nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
289 {
290 int len, i;
291 u_int32_t *tl;
292 caddr_t dpos;
293 u_int32_t nfsvers, auth_type;
294 int error = 0;
295 struct mbuf *mrep, *md;
296
297 NFSD_LOCK_ASSERT();
298
299 mrep = nd->nd_mrep;
300 md = nd->nd_md;
301 dpos = nd->nd_dpos;
302 if (has_header) {
303 tl = nfsm_dissect_nonblock(u_int32_t *, 10 * NFSX_UNSIGNED);
304 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
305 if (*tl++ != nfsrv_rpc_call) {
306 m_freem(mrep);
307 return (EBADRPC);
308 }
309 } else
310 tl = nfsm_dissect_nonblock(u_int32_t *, 8 * NFSX_UNSIGNED);
311 nd->nd_repstat = 0;
312 nd->nd_flag = 0;
313 if (*tl++ != nfsrv_rpc_vers) {
314 nd->nd_repstat = ERPCMISMATCH;
315 nd->nd_procnum = NFSPROC_NOOP;
316 return (0);
317 }
318 if (*tl != nfsrv_nfs_prog) {
319 nd->nd_repstat = EPROGUNAVAIL;
320 nd->nd_procnum = NFSPROC_NOOP;
321 return (0);
322 }
323 tl++;
324 nfsvers = fxdr_unsigned(u_int32_t, *tl++);
325 if (nfsvers < NFS_VER2 || nfsvers > NFS_VER3) {
326 nd->nd_repstat = EPROGMISMATCH;
327 nd->nd_procnum = NFSPROC_NOOP;
328 return (0);
329 }
330 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
331 if (nd->nd_procnum == NFSPROC_NULL)
332 return (0);
333 if (nfsvers == NFS_VER3) {
334 nd->nd_flag = ND_NFSV3;
335 if (nd->nd_procnum >= NFS_NPROCS) {
336 nd->nd_repstat = EPROCUNAVAIL;
337 nd->nd_procnum = NFSPROC_NOOP;
338 return (0);
339 }
340 } else {
341 if (nd->nd_procnum > NFSV2PROC_STATFS) {
342 nd->nd_repstat = EPROCUNAVAIL;
343 nd->nd_procnum = NFSPROC_NOOP;
344 return (0);
345 }
346 /* Map the v2 procedure numbers into v3 ones */
347 nd->nd_procnum = nfsrv_nfsv3_procid[nd->nd_procnum];
348 }
349 auth_type = *tl++;
350 len = fxdr_unsigned(int, *tl++);
351 if (len < 0 || len > RPCAUTH_MAXSIZ) {
352 m_freem(mrep);
353 return (EBADRPC);
354 }
355
356 /*
357 * Handle auth_unix;
358 */
359 if (auth_type == nfsrv_rpc_auth_unix) {
360 len = fxdr_unsigned(int, *++tl);
361 if (len < 0 || len > NFS_MAXNAMLEN) {
362 m_freem(mrep);
363 return (EBADRPC);
364 }
365 nfsm_adv(nfsm_rndup(len));
366 tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
367 nd->nd_cr->cr_uid = nd->nd_cr->cr_ruid =
368 nd->nd_cr->cr_svuid = fxdr_unsigned(uid_t, *tl++);
369 nd->nd_cr->cr_groups[0] = nd->nd_cr->cr_rgid =
370 nd->nd_cr->cr_svgid = fxdr_unsigned(gid_t, *tl++);
371 #ifdef MAC
372 mac_associate_nfsd_label(nd->nd_cr);
373 #endif
374 len = fxdr_unsigned(int, *tl);
375 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
376 m_freem(mrep);
377 return (EBADRPC);
378 }
379 tl = nfsm_dissect_nonblock(u_int32_t *, (len + 2) * NFSX_UNSIGNED);
380 for (i = 1; i <= len; i++)
381 if (i < NGROUPS)
382 nd->nd_cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
383 else
384 tl++;
385 nd->nd_cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
386 if (nd->nd_cr->cr_ngroups > 1)
387 nfsrvw_sort(nd->nd_cr->cr_groups, nd->nd_cr->cr_ngroups);
388 len = fxdr_unsigned(int, *++tl);
389 if (len < 0 || len > RPCAUTH_MAXSIZ) {
390 m_freem(mrep);
391 return (EBADRPC);
392 }
393 if (len > 0)
394 nfsm_adv(nfsm_rndup(len));
395 } else {
396 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
397 nd->nd_procnum = NFSPROC_NOOP;
398 return (0);
399 }
400
401 nd->nd_md = md;
402 nd->nd_dpos = dpos;
403 return (0);
404 nfsmout:
405 return (error);
406 }
407
408 /*
409 * Socket upcall routine for the nfsd sockets.
410 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
411 * Essentially do as much as possible non-blocking, else punt and it will
412 * be called with M_TRYWAIT from an nfsd.
413 */
414 void
415 nfsrv_rcv(struct socket *so, void *arg, int waitflag)
416 {
417 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
418 struct mbuf *m;
419 struct mbuf *mp;
420 struct sockaddr *nam;
421 struct uio auio;
422 int flags, error;
423
424 /*
425 * XXXRW: For now, assert Giant here since the NFS server upcall
426 * will perform socket operations requiring Giant in a non-mpsafe
427 * kernel.
428 */
429 NET_ASSERT_GIANT();
430 NFSD_UNLOCK_ASSERT();
431
432 /* XXXRW: Unlocked read. */
433 if ((slp->ns_flag & SLP_VALID) == 0)
434 return;
435
436 /*
437 * We can't do this in the context of a socket callback
438 * because we're called with locks held.
439 * XXX: SMP
440 */
441 if (waitflag == M_DONTWAIT) {
442 NFSD_LOCK();
443 slp->ns_flag |= SLP_NEEDQ;
444 goto dorecs;
445 }
446
447
448 NFSD_LOCK();
449 auio.uio_td = NULL;
450 if (so->so_type == SOCK_STREAM) {
451 /*
452 * If there are already records on the queue, defer soreceive()
453 * to an nfsd so that there is feedback to the TCP layer that
454 * the nfs servers are heavily loaded.
455 */
456 if (STAILQ_FIRST(&slp->ns_rec) != NULL &&
457 waitflag == M_DONTWAIT) {
458 slp->ns_flag |= SLP_NEEDQ;
459 goto dorecs;
460 }
461
462 /*
463 * Do soreceive().
464 */
465 auio.uio_resid = 1000000000;
466 flags = MSG_DONTWAIT;
467 NFSD_UNLOCK();
468 error = so->so_proto->pr_usrreqs->pru_soreceive
469 (so, &nam, &auio, &mp, NULL, &flags);
470 NFSD_LOCK();
471 if (error || mp == NULL) {
472 if (error == EWOULDBLOCK)
473 slp->ns_flag |= SLP_NEEDQ;
474 else
475 slp->ns_flag |= SLP_DISCONN;
476 goto dorecs;
477 }
478 m = mp;
479 if (slp->ns_rawend) {
480 slp->ns_rawend->m_next = m;
481 slp->ns_cc += 1000000000 - auio.uio_resid;
482 } else {
483 slp->ns_raw = m;
484 slp->ns_cc = 1000000000 - auio.uio_resid;
485 }
486 while (m->m_next)
487 m = m->m_next;
488 slp->ns_rawend = m;
489
490 /*
491 * Now try and parse record(s) out of the raw stream data.
492 */
493 error = nfsrv_getstream(slp, waitflag);
494 if (error) {
495 if (error == EPERM)
496 slp->ns_flag |= SLP_DISCONN;
497 else
498 slp->ns_flag |= SLP_NEEDQ;
499 }
500 } else {
501 do {
502 auio.uio_resid = 1000000000;
503 flags = MSG_DONTWAIT;
504 NFSD_UNLOCK();
505 error = so->so_proto->pr_usrreqs->pru_soreceive
506 (so, &nam, &auio, &mp, NULL, &flags);
507 if (mp) {
508 struct nfsrv_rec *rec;
509 rec = malloc(sizeof(struct nfsrv_rec),
510 M_NFSRVDESC,
511 waitflag == M_DONTWAIT ? M_NOWAIT : M_WAITOK);
512 if (!rec) {
513 if (nam)
514 FREE(nam, M_SONAME);
515 m_freem(mp);
516 NFSD_LOCK();
517 continue;
518 }
519 NFSD_LOCK();
520 nfs_realign(&mp, 10 * NFSX_UNSIGNED);
521 rec->nr_address = nam;
522 rec->nr_packet = mp;
523 STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
524 } else
525 NFSD_LOCK();
526 if (error) {
527 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
528 && error != EWOULDBLOCK) {
529 slp->ns_flag |= SLP_DISCONN;
530 goto dorecs;
531 }
532 }
533 } while (mp);
534 }
535
536 /*
537 * Now try and process the request records, non-blocking.
538 */
539 dorecs:
540 if (waitflag == M_DONTWAIT &&
541 (STAILQ_FIRST(&slp->ns_rec) != NULL ||
542 (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
543 nfsrv_wakenfsd(slp);
544 NFSD_UNLOCK();
545 }
546
547 /*
548 * Try and extract an RPC request from the mbuf data list received on a
549 * stream socket. The "waitflag" argument indicates whether or not it
550 * can sleep.
551 */
552 static int
553 nfsrv_getstream(struct nfssvc_sock *slp, int waitflag)
554 {
555 struct mbuf *m, **mpp;
556 char *cp1, *cp2;
557 int len;
558 struct mbuf *om, *m2, *recm;
559 u_int32_t recmark;
560
561 NFSD_LOCK_ASSERT();
562
563 if (slp->ns_flag & SLP_GETSTREAM)
564 panic("nfs getstream");
565 slp->ns_flag |= SLP_GETSTREAM;
566 for (;;) {
567 if (slp->ns_reclen == 0) {
568 if (slp->ns_cc < NFSX_UNSIGNED) {
569 slp->ns_flag &= ~SLP_GETSTREAM;
570 return (0);
571 }
572 m = slp->ns_raw;
573 if (m->m_len >= NFSX_UNSIGNED) {
574 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
575 m->m_data += NFSX_UNSIGNED;
576 m->m_len -= NFSX_UNSIGNED;
577 } else {
578 cp1 = (caddr_t)&recmark;
579 cp2 = mtod(m, caddr_t);
580 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
581 while (m->m_len == 0) {
582 m = m->m_next;
583 cp2 = mtod(m, caddr_t);
584 }
585 *cp1++ = *cp2++;
586 m->m_data++;
587 m->m_len--;
588 }
589 }
590 slp->ns_cc -= NFSX_UNSIGNED;
591 recmark = ntohl(recmark);
592 slp->ns_reclen = recmark & ~0x80000000;
593 if (recmark & 0x80000000)
594 slp->ns_flag |= SLP_LASTFRAG;
595 else
596 slp->ns_flag &= ~SLP_LASTFRAG;
597 if (slp->ns_reclen > NFS_MAXPACKET || slp->ns_reclen <= 0) {
598 slp->ns_flag &= ~SLP_GETSTREAM;
599 return (EPERM);
600 }
601 }
602
603 /*
604 * Now get the record part.
605 *
606 * Note that slp->ns_reclen may be 0. Linux sometimes
607 * generates 0-length RPCs.
608 */
609 recm = NULL;
610 if (slp->ns_cc == slp->ns_reclen) {
611 recm = slp->ns_raw;
612 slp->ns_raw = slp->ns_rawend = NULL;
613 slp->ns_cc = slp->ns_reclen = 0;
614 } else if (slp->ns_cc > slp->ns_reclen) {
615 len = 0;
616 m = slp->ns_raw;
617 om = NULL;
618
619 while (len < slp->ns_reclen) {
620 if ((len + m->m_len) > slp->ns_reclen) {
621 NFSD_UNLOCK();
622 m2 = m_copym(m, 0, slp->ns_reclen - len,
623 waitflag);
624 NFSD_LOCK();
625 if (m2) {
626 if (om) {
627 om->m_next = m2;
628 recm = slp->ns_raw;
629 } else
630 recm = m2;
631 m->m_data += slp->ns_reclen - len;
632 m->m_len -= slp->ns_reclen - len;
633 len = slp->ns_reclen;
634 } else {
635 slp->ns_flag &= ~SLP_GETSTREAM;
636 return (EWOULDBLOCK);
637 }
638 } else if ((len + m->m_len) == slp->ns_reclen) {
639 om = m;
640 len += m->m_len;
641 m = m->m_next;
642 recm = slp->ns_raw;
643 om->m_next = NULL;
644 } else {
645 om = m;
646 len += m->m_len;
647 m = m->m_next;
648 }
649 }
650 slp->ns_raw = m;
651 slp->ns_cc -= len;
652 slp->ns_reclen = 0;
653 } else {
654 slp->ns_flag &= ~SLP_GETSTREAM;
655 return (0);
656 }
657
658 /*
659 * Accumulate the fragments into a record.
660 */
661 mpp = &slp->ns_frag;
662 while (*mpp)
663 mpp = &((*mpp)->m_next);
664 *mpp = recm;
665 if (slp->ns_flag & SLP_LASTFRAG) {
666 struct nfsrv_rec *rec;
667 NFSD_UNLOCK();
668 rec = malloc(sizeof(struct nfsrv_rec), M_NFSRVDESC,
669 waitflag == M_DONTWAIT ? M_NOWAIT : M_WAITOK);
670 NFSD_LOCK();
671 if (!rec) {
672 m_freem(slp->ns_frag);
673 } else {
674 nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
675 rec->nr_address = NULL;
676 rec->nr_packet = slp->ns_frag;
677 STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
678 }
679 slp->ns_frag = NULL;
680 }
681 }
682 }
683
684 /*
685 * Parse an RPC header.
686 */
687 int
688 nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
689 struct nfsrv_descript **ndp)
690 {
691 struct nfsrv_rec *rec;
692 struct mbuf *m;
693 struct sockaddr *nam;
694 struct nfsrv_descript *nd;
695 int error;
696
697 NFSD_LOCK_ASSERT();
698
699 *ndp = NULL;
700 if ((slp->ns_flag & SLP_VALID) == 0 ||
701 STAILQ_FIRST(&slp->ns_rec) == NULL)
702 return (ENOBUFS);
703 rec = STAILQ_FIRST(&slp->ns_rec);
704 STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
705 nam = rec->nr_address;
706 m = rec->nr_packet;
707 free(rec, M_NFSRVDESC);
708 NFSD_UNLOCK();
709 MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
710 M_NFSRVDESC, M_WAITOK);
711 nd->nd_cr = crget();
712 NFSD_LOCK();
713 nd->nd_md = nd->nd_mrep = m;
714 nd->nd_nam2 = nam;
715 nd->nd_dpos = mtod(m, caddr_t);
716 error = nfs_getreq(nd, nfsd, TRUE);
717 if (error) {
718 if (nam) {
719 FREE(nam, M_SONAME);
720 }
721 if (nd->nd_cr != NULL)
722 crfree(nd->nd_cr);
723 free((caddr_t)nd, M_NFSRVDESC);
724 return (error);
725 }
726 *ndp = nd;
727 nfsd->nfsd_nd = nd;
728 return (0);
729 }
730
731 /*
732 * Search for a sleeping nfsd and wake it up.
733 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
734 * running nfsds will go look for the work in the nfssvc_sock list.
735 */
736 void
737 nfsrv_wakenfsd(struct nfssvc_sock *slp)
738 {
739 struct nfsd *nd;
740
741 NFSD_LOCK_ASSERT();
742
743 if ((slp->ns_flag & SLP_VALID) == 0)
744 return;
745 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
746 if (nd->nfsd_flag & NFSD_WAITING) {
747 nd->nfsd_flag &= ~NFSD_WAITING;
748 if (nd->nfsd_slp)
749 panic("nfsd wakeup");
750 slp->ns_sref++;
751 nd->nfsd_slp = slp;
752 wakeup(nd);
753 return;
754 }
755 }
756 slp->ns_flag |= SLP_DOREC;
757 nfsd_head_flag |= NFSD_CHECKSLP;
758 }
759
760 /*
761 * This is the nfs send routine.
762 * For the server side:
763 * - return EINTR or ERESTART if interrupted by a signal
764 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
765 * - do any cleanup required by recoverable socket errors (?)
766 */
767 int
768 nfsrv_send(struct socket *so, struct sockaddr *nam, struct mbuf *top)
769 {
770 struct sockaddr *sendnam;
771 int error, soflags, flags;
772
773 NET_ASSERT_GIANT();
774 NFSD_UNLOCK_ASSERT();
775
776 soflags = so->so_proto->pr_flags;
777 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
778 sendnam = NULL;
779 else
780 sendnam = nam;
781 if (so->so_type == SOCK_SEQPACKET)
782 flags = MSG_EOR;
783 else
784 flags = 0;
785
786 error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0,
787 flags, curthread/*XXX*/);
788 if (error == ENOBUFS && so->so_type == SOCK_DGRAM)
789 error = 0;
790
791 if (error) {
792 log(LOG_INFO, "nfsd send error %d\n", error);
793
794 /*
795 * Handle any recoverable (soft) socket errors here. (?)
796 */
797 if (error != EINTR && error != ERESTART &&
798 error != EWOULDBLOCK && error != EPIPE)
799 error = 0;
800 }
801 return (error);
802 }
803
804 /*
805 * NFS server timer routine.
806 */
807 void
808 nfsrv_timer(void *arg)
809 {
810 struct nfssvc_sock *slp;
811 u_quad_t cur_usec;
812
813 NFSD_LOCK();
814 /*
815 * Scan the write gathering queues for writes that need to be
816 * completed now.
817 */
818 cur_usec = nfs_curusec();
819 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
820 if (LIST_FIRST(&slp->ns_tq) &&
821 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
822 nfsrv_wakenfsd(slp);
823 }
824 NFSD_UNLOCK();
825 callout_reset(&nfsrv_callout, nfsrv_ticks, nfsrv_timer, NULL);
826 }
Cache object: 4f96b6276980490821c5ad381c4b7f45
|