1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/8.0/sys/nfsserver/nfs_srvkrpc.c 195202 2009-06-30 19:03:27Z dfr $");
37
38 #include "opt_inet6.h"
39 #include "opt_kgssapi.h"
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/sysproto.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/jail.h>
49 #include <sys/vnode.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/bio.h>
55 #include <sys/buf.h>
56 #include <sys/mbuf.h>
57 #include <sys/socket.h>
58 #include <sys/socketvar.h>
59 #include <sys/domain.h>
60 #include <sys/protosw.h>
61 #include <sys/namei.h>
62 #include <sys/fcntl.h>
63 #include <sys/lockf.h>
64 #include <sys/eventhandler.h>
65
66 #include <netinet/in.h>
67 #include <netinet/tcp.h>
68 #ifdef INET6
69 #include <net/if.h>
70 #include <netinet6/in6_var.h>
71 #endif
72
73 #include <rpc/rpc.h>
74 #include <rpc/rpcsec_gss.h>
75 #include <rpc/replay.h>
76
77 #include <nfs/xdr_subs.h>
78 #include <nfs/nfsproto.h>
79 #include <nfsserver/nfs.h>
80 #include <nfsserver/nfsm_subs.h>
81 #include <nfsserver/nfsrvcache.h>
82 #include <nfsserver/nfs_fha.h>
83
84 #include <security/mac/mac_framework.h>
85
86 static MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure");
87
88 MALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor");
89 MALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure");
90
91 #define TRUE 1
92 #define FALSE 0
93
94 SYSCTL_DECL(_vfs_nfsrv);
95
96 SVCPOOL *nfsrv_pool;
97 int nfsd_waiting = 0;
98 int nfsrv_numnfsd = 0;
99 static int nfs_realign_test;
100 static int nfs_realign_count;
101 struct callout nfsrv_callout;
102 static eventhandler_tag nfsrv_nmbclusters_tag;
103
104 static int nfs_privport = 0;
105 SYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW,
106 &nfs_privport, 0,
107 "Only allow clients using a privileged port");
108 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW,
109 &nfsrvw_procrastinate, 0,
110 "Delay value for write gathering");
111 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW,
112 &nfsrvw_procrastinate_v3, 0,
113 "Delay in seconds for NFSv3 write gathering");
114 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_test, CTLFLAG_RW,
115 &nfs_realign_test, 0, "");
116 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_count, CTLFLAG_RW,
117 &nfs_realign_count, 0, "");
118
119 static int nfssvc_addsock(struct file *, struct thread *);
120 static int nfssvc_nfsd(struct thread *, struct nfsd_nfsd_args *);
121
122 extern u_long sb_max_adj;
123
124 int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
125 struct nfssvc_sock *slp, struct mbuf **mreqp) = {
126 nfsrv_null,
127 nfsrv_getattr,
128 nfsrv_setattr,
129 nfsrv_lookup,
130 nfsrv3_access,
131 nfsrv_readlink,
132 nfsrv_read,
133 nfsrv_write,
134 nfsrv_create,
135 nfsrv_mkdir,
136 nfsrv_symlink,
137 nfsrv_mknod,
138 nfsrv_remove,
139 nfsrv_rmdir,
140 nfsrv_rename,
141 nfsrv_link,
142 nfsrv_readdir,
143 nfsrv_readdirplus,
144 nfsrv_statfs,
145 nfsrv_fsinfo,
146 nfsrv_pathconf,
147 nfsrv_commit,
148 nfsrv_noop
149 };
150
151 /*
152 * NFS server system calls
153 */
154 /*
155 * This is now called from nfssvc() in nfs/nfs_nfssvc.c.
156 */
157
158 /*
159 * Nfs server psuedo system call for the nfsd's
160 * Based on the flag value it either:
161 * - adds a socket to the selection list
162 * - remains in the kernel as an nfsd
163 * - remains in the kernel as an nfsiod
164 * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets
165 * and that mountd provides
166 * - sockaddr with no IPv4-mapped addresses
167 * - mask for both INET and INET6 families if there is IPv4-mapped overlap
168 */
169 int
170 nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap)
171 {
172 struct file *fp;
173 struct nfsd_addsock_args addsockarg;
174 struct nfsd_nfsd_args nfsdarg;
175 int error;
176
177 if (uap->flag & NFSSVC_ADDSOCK) {
178 error = copyin(uap->argp, (caddr_t)&addsockarg,
179 sizeof(addsockarg));
180 if (error)
181 return (error);
182 if ((error = fget(td, addsockarg.sock, &fp)) != 0)
183 return (error);
184 if (fp->f_type != DTYPE_SOCKET) {
185 fdrop(fp, td);
186 return (error); /* XXXRW: Should be EINVAL? */
187 }
188 error = nfssvc_addsock(fp, td);
189 fdrop(fp, td);
190 } else if (uap->flag & NFSSVC_OLDNFSD) {
191 error = nfssvc_nfsd(td, NULL);
192 } else if (uap->flag & NFSSVC_NFSD) {
193 if (!uap->argp)
194 return (EINVAL);
195 error = copyin(uap->argp, (caddr_t)&nfsdarg,
196 sizeof(nfsdarg));
197 if (error)
198 return (error);
199 error = nfssvc_nfsd(td, &nfsdarg);
200 } else {
201 error = ENXIO;
202 }
203 return (error);
204 }
205
206 /*
207 * Generate the rpc reply header
208 * siz arg. is used to decide if adding a cluster is worthwhile
209 */
210 struct mbuf *
211 nfs_rephead(int siz, struct nfsrv_descript *nd, int err,
212 struct mbuf **mbp, caddr_t *bposp)
213 {
214 u_int32_t *tl;
215 struct mbuf *mreq;
216 caddr_t bpos;
217 struct mbuf *mb;
218
219 if (err == EBADRPC)
220 return (NULL);
221
222 nd->nd_repstat = err;
223 if (err && (nd->nd_flag & ND_NFSV3) == 0) /* XXX recheck */
224 siz = 0;
225
226 MGET(mreq, M_WAIT, MT_DATA);
227
228 /*
229 * If this is a big reply, use a cluster
230 */
231 mreq->m_len = 0;
232 if (siz >= MINCLSIZE) {
233 MCLGET(mreq, M_WAIT);
234 }
235 mb = mreq;
236 bpos = mtod(mb, caddr_t);
237
238 if (err != NFSERR_RETVOID) {
239 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
240 if (err)
241 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
242 else
243 *tl = 0;
244 }
245
246 *mbp = mb;
247 *bposp = bpos;
248 if (err != 0 && err != NFSERR_RETVOID)
249 nfsrvstats.srvrpc_errs++;
250
251 return (mreq);
252 }
253
254 /*
255 * nfs_realign:
256 *
257 * Check for badly aligned mbuf data and realign by copying the unaligned
258 * portion of the data into a new mbuf chain and freeing the portions
259 * of the old chain that were replaced.
260 *
261 * We cannot simply realign the data within the existing mbuf chain
262 * because the underlying buffers may contain other rpc commands and
263 * we cannot afford to overwrite them.
264 *
265 * We would prefer to avoid this situation entirely. The situation does
266 * not occur with NFS/UDP and is supposed to only occassionally occur
267 * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
268 */
269 static void
270 nfs_realign(struct mbuf **pm) /* XXX COMMON */
271 {
272 struct mbuf *m;
273 struct mbuf *n = NULL;
274 int off = 0;
275
276 ++nfs_realign_test;
277 while ((m = *pm) != NULL) {
278 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
279 MGET(n, M_WAIT, MT_DATA);
280 if (m->m_len >= MINCLSIZE) {
281 MCLGET(n, M_WAIT);
282 }
283 n->m_len = 0;
284 break;
285 }
286 pm = &m->m_next;
287 }
288
289 /*
290 * If n is non-NULL, loop on m copying data, then replace the
291 * portion of the chain that had to be realigned.
292 */
293 if (n != NULL) {
294 ++nfs_realign_count;
295 while (m) {
296 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
297 off += m->m_len;
298 m = m->m_next;
299 }
300 m_freem(*pm);
301 *pm = n;
302 }
303 }
304
305 static void
306 nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt)
307 {
308 rpcproc_t procnum;
309 int32_t (*proc)(struct nfsrv_descript *nd, struct nfssvc_sock *slp,
310 struct mbuf **mreqp);
311 int flag;
312 struct nfsrv_descript nd;
313 struct mbuf *mreq, *mrep;
314 int error;
315
316 if (rqst->rq_vers == NFS_VER2) {
317 if (rqst->rq_proc > NFSV2PROC_STATFS) {
318 svcerr_noproc(rqst);
319 svc_freereq(rqst);
320 return;
321 }
322 procnum = nfsrv_nfsv3_procid[rqst->rq_proc];
323 flag = 0;
324 } else {
325 if (rqst->rq_proc >= NFS_NPROCS) {
326 svcerr_noproc(rqst);
327 svc_freereq(rqst);
328 return;
329 }
330 procnum = rqst->rq_proc;
331 flag = ND_NFSV3;
332 }
333 proc = nfsrv3_procs[procnum];
334
335 mreq = mrep = NULL;
336 mreq = rqst->rq_args;
337 rqst->rq_args = NULL;
338 nfs_realign(&mreq);
339
340 /*
341 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 -
342 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP
343 * mounts.
344 */
345 memset(&nd, 0, sizeof(nd));
346 nd.nd_md = nd.nd_mrep = mreq;
347 nd.nd_dpos = mtod(mreq, caddr_t);
348 nd.nd_nam = svc_getrpccaller(rqst);
349 nd.nd_nam2 = rqst->rq_addr;
350 nd.nd_procnum = procnum;
351 nd.nd_cr = NULL;
352 nd.nd_flag = flag;
353
354 if (nfs_privport) {
355 /* Check if source port is privileged */
356 u_short port;
357 struct sockaddr *nam = nd.nd_nam;
358 struct sockaddr_in *sin;
359
360 sin = (struct sockaddr_in *)nam;
361 /*
362 * INET/INET6 - same code:
363 * sin_port and sin6_port are at same offset
364 */
365 port = ntohs(sin->sin_port);
366 if (port >= IPPORT_RESERVED &&
367 nd.nd_procnum != NFSPROC_NULL) {
368 #ifdef INET6
369 char b6[INET6_ADDRSTRLEN];
370 #if defined(KLD_MODULE)
371 /* Do not use ip6_sprintf: the nfs module should work without INET6. */
372 #define ip6_sprintf(buf, a) \
373 (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \
374 (a)->s6_addr16[0], (a)->s6_addr16[1], \
375 (a)->s6_addr16[2], (a)->s6_addr16[3], \
376 (a)->s6_addr16[4], (a)->s6_addr16[5], \
377 (a)->s6_addr16[6], (a)->s6_addr16[7]), \
378 (buf))
379 #endif
380 #endif
381 printf("NFS request from unprivileged port (%s:%d)\n",
382 #ifdef INET6
383 sin->sin_family == AF_INET6 ?
384 ip6_sprintf(b6, &satosin6(sin)->sin6_addr) :
385 #if defined(KLD_MODULE)
386 #undef ip6_sprintf
387 #endif
388 #endif
389 inet_ntoa(sin->sin_addr), port);
390 m_freem(mreq);
391 svcerr_weakauth(rqst);
392 svc_freereq(rqst);
393 return;
394 }
395 }
396
397 if (proc != nfsrv_null) {
398 if (!svc_getcred(rqst, &nd.nd_cr, &nd.nd_credflavor)) {
399 m_freem(mreq);
400 svcerr_weakauth(rqst);
401 svc_freereq(rqst);
402 return;
403 }
404 #ifdef MAC
405 mac_cred_associate_nfsd(nd.nd_cr);
406 #endif
407 }
408 nfsrvstats.srvrpccnt[nd.nd_procnum]++;
409
410 error = proc(&nd, NULL, &mrep);
411
412 if (nd.nd_cr)
413 crfree(nd.nd_cr);
414
415 if (mrep == NULL) {
416 svcerr_decode(rqst);
417 svc_freereq(rqst);
418 return;
419 }
420 if (error && error != NFSERR_RETVOID) {
421 svcerr_systemerr(rqst);
422 svc_freereq(rqst);
423 return;
424 }
425 if (nd.nd_repstat & NFSERR_AUTHERR) {
426 svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR);
427 m_freem(mrep);
428 } else {
429 if (!svc_sendreply_mbuf(rqst, mrep))
430 svcerr_systemerr(rqst);
431 }
432 svc_freereq(rqst);
433 }
434
435 /*
436 * Adds a socket to the list for servicing by nfsds.
437 */
438 static int
439 nfssvc_addsock(struct file *fp, struct thread *td)
440 {
441 int siz;
442 struct socket *so;
443 int error;
444 SVCXPRT *xprt;
445
446 so = fp->f_data;
447
448 siz = sb_max_adj;
449 error = soreserve(so, siz, siz);
450 if (error) {
451 return (error);
452 }
453
454 /*
455 * Steal the socket from userland so that it doesn't close
456 * unexpectedly.
457 */
458 if (so->so_type == SOCK_DGRAM)
459 xprt = svc_dg_create(nfsrv_pool, so, 0, 0);
460 else
461 xprt = svc_vc_create(nfsrv_pool, so, 0, 0);
462 if (xprt) {
463 fp->f_ops = &badfileops;
464 fp->f_data = NULL;
465 svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL);
466 svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL);
467 SVC_RELEASE(xprt);
468 }
469
470 return (0);
471 }
472
473 /*
474 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
475 * until it is killed by a signal.
476 */
477 static int
478 nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args)
479 {
480 #ifdef KGSSAPI
481 char principal[128];
482 int error;
483 #endif
484
485 #ifdef KGSSAPI
486 if (args) {
487 error = copyinstr(args->principal, principal,
488 sizeof(principal), NULL);
489 if (error)
490 return (error);
491 } else {
492 memcpy(principal, "nfs@", 4);
493 getcredhostname(td->td_ucred, principal + 4,
494 sizeof(principal) - 4);
495 }
496 #endif
497
498 /*
499 * Only the first nfsd actually does any work. The RPC code
500 * adds threads to it as needed. Any extra processes offered
501 * by nfsd just exit. If nfsd is new enough, it will call us
502 * once with a structure that specifies how many threads to
503 * use.
504 */
505 NFSD_LOCK();
506 if (nfsrv_numnfsd == 0) {
507 nfsrv_numnfsd++;
508
509 NFSD_UNLOCK();
510
511 #ifdef KGSSAPI
512 rpc_gss_set_svc_name(principal, "kerberosv5",
513 GSS_C_INDEFINITE, NFS_PROG, NFS_VER2);
514 rpc_gss_set_svc_name(principal, "kerberosv5",
515 GSS_C_INDEFINITE, NFS_PROG, NFS_VER3);
516 #endif
517
518 if (args) {
519 nfsrv_pool->sp_minthreads = args->minthreads;
520 nfsrv_pool->sp_maxthreads = args->maxthreads;
521 } else {
522 nfsrv_pool->sp_minthreads = 4;
523 nfsrv_pool->sp_maxthreads = 4;
524 }
525
526 svc_run(nfsrv_pool);
527
528 #ifdef KGSSAPI
529 rpc_gss_clear_svc_name(NFS_PROG, NFS_VER2);
530 rpc_gss_clear_svc_name(NFS_PROG, NFS_VER3);
531 #endif
532
533 NFSD_LOCK();
534 nfsrv_numnfsd--;
535 nfsrv_init(TRUE);
536 }
537 NFSD_UNLOCK();
538
539 return (0);
540 }
541
542 /*
543 * Size the NFS server's duplicate request cache at 1/2 the
544 * nmbclusters, floating within a (64, 2048) range. This is to
545 * prevent all mbuf clusters being tied up in the NFS dupreq
546 * cache for small values of nmbclusters.
547 */
548 static size_t
549 nfsrv_replay_size(void)
550 {
551 size_t replaysiz;
552
553 replaysiz = nmbclusters / 2;
554 if (replaysiz > NFSRVCACHE_MAX_SIZE)
555 replaysiz = NFSRVCACHE_MAX_SIZE;
556 if (replaysiz < NFSRVCACHE_MIN_SIZE)
557 replaysiz = NFSRVCACHE_MIN_SIZE;
558 replaysiz *= MCLBYTES;
559
560 return (replaysiz);
561 }
562
563 /*
564 * Called when nmbclusters changes - we resize the replay cache
565 * accordingly.
566 */
567 static void
568 nfsrv_nmbclusters_change(void *tag)
569 {
570
571 if (nfsrv_pool)
572 replay_setsize(nfsrv_pool->sp_rcache, nfsrv_replay_size());
573 }
574
575 /*
576 * Initialize the data structures for the server.
577 * Handshake with any new nfsds starting up to avoid any chance of
578 * corruption.
579 */
580 void
581 nfsrv_init(int terminating)
582 {
583
584 NFSD_LOCK_ASSERT();
585
586 if (terminating) {
587 NFSD_UNLOCK();
588 EVENTHANDLER_DEREGISTER(nmbclusters_change,
589 nfsrv_nmbclusters_tag);
590 svcpool_destroy(nfsrv_pool);
591 nfsrv_pool = NULL;
592 NFSD_LOCK();
593 } else
594 nfs_pub.np_valid = 0;
595
596 NFSD_UNLOCK();
597
598 nfsrv_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsrv));
599 nfsrv_pool->sp_rcache = replay_newcache(nfsrv_replay_size());
600 nfsrv_pool->sp_assign = fha_assign;
601 nfsrv_pool->sp_done = fha_nd_complete;
602 nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change,
603 nfsrv_nmbclusters_change, NULL, EVENTHANDLER_PRI_FIRST);
604
605 NFSD_LOCK();
606 }
Cache object: ea2fa0df7f009cc18d5860cb3c5b035a
|