1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: releng/9.0/sys/fs/nfsserver/nfs_nfsdcache.c 224086 2011-07-16 08:51:09Z zack $");
36
37 /*
38 * Here is the basic algorithm:
39 * First, some design criteria I used:
40 * - I think a false hit is more serious than a false miss
41 * - A false hit for an RPC that has Op(s) that order via seqid# must be
42 * avoided at all cost
43 * - A valid hit will probably happen a long time after the original reply
44 * and the TCP socket that the original request was received on will no
45 * longer be active
46 * (The long time delay implies to me that LRU is not appropriate.)
47 * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
48 * in them as well as minimizing the risk of redoing retried non-idempotent
49 * Ops.
50 * Because it is biased towards avoiding false hits, multiple entries with
51 * the same xid are to be expected, especially for the case of the entry
52 * in the cache being related to a seqid# sequenced Op.
53 *
54 * The basic algorithm I'm about to code up:
55 * - Null RPCs bypass the cache and are just done
56 * For TCP
57 * - key on <xid, NFS version> (as noted above, there can be several
58 * entries with the same key)
59 * When a request arrives:
60 * For all that match key
61 * - if RPC# != OR request_size !=
62 * - not a match with this one
63 * - if NFSv4 and received on same TCP socket OR
64 * received on a TCP connection created before the
65 * entry was cached
66 * - not a match with this one
67 * (V2,3 clients might retry on same TCP socket)
68 * - calculate checksum on first N bytes of NFS XDR
69 * - if checksum !=
70 * - not a match for this one
71 * If any of the remaining ones that match has a
72 * seqid_refcnt > 0
73 * - not a match (go do RPC, using new cache entry)
74 * If one match left
75 * - a hit (reply from cache)
76 * else
77 * - miss (go do RPC, using new cache entry)
78 *
79 * During processing of NFSv4 request:
80 * - set a flag when a non-idempotent Op is processed
81 * - when an Op that uses a seqid# (Open,...) is processed
82 * - if same seqid# as referenced entry in cache
83 * - free new cache entry
84 * - reply from referenced cache entry
85 * else if next seqid# in order
86 * - free referenced cache entry
87 * - increment seqid_refcnt on new cache entry
88 * - set pointer from Openowner/Lockowner to
89 * new cache entry (aka reference it)
90 * else if first seqid# in sequence
91 * - increment seqid_refcnt on new cache entry
92 * - set pointer from Openowner/Lockowner to
93 * new cache entry (aka reference it)
94 *
95 * At end of RPC processing:
96 * - if seqid_refcnt > 0 OR flagged non-idempotent on new
97 * cache entry
98 * - save reply in cache entry
99 * - calculate checksum on first N bytes of NFS XDR
100 * request
101 * - note op and length of XDR request (in bytes)
102 * - timestamp it
103 * else
104 * - free new cache entry
105 * - Send reply (noting info for socket activity check, below)
106 *
107 * For cache entries saved above:
108 * - if saved since seqid_refcnt was > 0
109 * - free when seqid_refcnt decrements to 0
110 * (when next one in sequence is processed above, or
111 * when Openowner/Lockowner is discarded)
112 * else { non-idempotent Op(s) }
113 * - free when
114 * - some further activity observed on same
115 * socket
116 * (I'm not yet sure how I'm going to do
117 * this. Maybe look at the TCP connection
118 * to see if the send_tcp_sequence# is well
119 * past sent reply OR K additional RPCs
120 * replied on same socket OR?)
121 * OR
122 * - when very old (hours, days, weeks?)
123 *
124 * For UDP (v2, 3 only), pretty much the old way:
125 * - key on <xid, NFS version, RPC#, Client host ip#>
126 * (at most one entry for each key)
127 *
128 * When a Request arrives:
129 * - if a match with entry via key
130 * - if RPC marked In_progress
131 * - discard request (don't send reply)
132 * else
133 * - reply from cache
134 * - timestamp cache entry
135 * else
136 * - add entry to cache, marked In_progress
137 * - do RPC
138 * - when RPC done
139 * - if RPC# non-idempotent
140 * - mark entry Done (not In_progress)
141 * - save reply
142 * - timestamp cache entry
143 * else
144 * - free cache entry
145 * - send reply
146 *
147 * Later, entries with saved replies are free'd a short time (few minutes)
148 * after reply sent (timestamp).
149 * Reference: Chet Juszczak, "Improving the Performance and Correctness
150 * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
151 * pages 53-63. San Diego, February 1989.
152 * for the UDP case.
153 * nfsrc_floodlevel is set to the allowable upper limit for saved replies
154 * for TCP. For V3, a reply won't be saved when the flood level is
155 * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
156 * that case. This level should be set high enough that this almost
157 * never happens.
158 */
159 #ifndef APPLEKEXT
160 #include <fs/nfs/nfsport.h>
161
162 extern struct nfsstats newnfsstats;
163 NFSCACHEMUTEX;
164 int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
165 #endif /* !APPLEKEXT */
166
167 static int nfsrc_tcpnonidempotent = 1;
168 static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0;
169 static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
170 static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE],
171 nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
172 /*
173 * and the reverse mapping from generic to Version 2 procedure numbers
174 */
175 static int newnfsv2_procid[NFS_V3NPROCS] = {
176 NFSV2PROC_NULL,
177 NFSV2PROC_GETATTR,
178 NFSV2PROC_SETATTR,
179 NFSV2PROC_LOOKUP,
180 NFSV2PROC_NOOP,
181 NFSV2PROC_READLINK,
182 NFSV2PROC_READ,
183 NFSV2PROC_WRITE,
184 NFSV2PROC_CREATE,
185 NFSV2PROC_MKDIR,
186 NFSV2PROC_SYMLINK,
187 NFSV2PROC_CREATE,
188 NFSV2PROC_REMOVE,
189 NFSV2PROC_RMDIR,
190 NFSV2PROC_RENAME,
191 NFSV2PROC_LINK,
192 NFSV2PROC_READDIR,
193 NFSV2PROC_NOOP,
194 NFSV2PROC_STATFS,
195 NFSV2PROC_NOOP,
196 NFSV2PROC_NOOP,
197 NFSV2PROC_NOOP,
198 };
199
200 #define NFSRCUDPHASH(xid) \
201 (&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
202 #define NFSRCHASH(xid) \
203 (&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
204 #define TRUE 1
205 #define FALSE 0
206 #define NFSRVCACHE_CHECKLEN 100
207
208 /* True iff the rpc reply is an nfs status ONLY! */
209 static int nfsv2_repstat[NFS_V3NPROCS] = {
210 FALSE,
211 FALSE,
212 FALSE,
213 FALSE,
214 FALSE,
215 FALSE,
216 FALSE,
217 FALSE,
218 FALSE,
219 FALSE,
220 TRUE,
221 TRUE,
222 TRUE,
223 TRUE,
224 FALSE,
225 TRUE,
226 FALSE,
227 FALSE,
228 FALSE,
229 FALSE,
230 FALSE,
231 FALSE,
232 };
233
234 /*
235 * Will NFS want to work over IPv6 someday?
236 */
237 #define NETFAMILY(rp) \
238 (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
239
240 /* local functions */
241 static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
242 static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
243 static void nfsrc_lock(struct nfsrvcache *rp);
244 static void nfsrc_unlock(struct nfsrvcache *rp);
245 static void nfsrc_wanted(struct nfsrvcache *rp);
246 static void nfsrc_freecache(struct nfsrvcache *rp);
247 static void nfsrc_trimcache(u_int64_t, struct socket *);
248 static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
249 struct socket *);
250 static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
251 static void nfsrc_marksametcpconn(u_int64_t);
252
253 /*
254 * Initialize the server request cache list
255 */
256 APPLESTATIC void
257 nfsrvd_initcache(void)
258 {
259 int i;
260 static int inited = 0;
261
262 if (inited)
263 return;
264 inited = 1;
265 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
266 LIST_INIT(&nfsrvudphashtbl[i]);
267 LIST_INIT(&nfsrvhashtbl[i]);
268 }
269 TAILQ_INIT(&nfsrvudplru);
270 nfsrc_tcpsavedreplies = 0;
271 nfsrc_udpcachesize = 0;
272 newnfsstats.srvcache_tcppeak = 0;
273 newnfsstats.srvcache_size = 0;
274 }
275
276 /*
277 * Get a cache entry for this request. Basically just malloc a new one
278 * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
279 * Call nfsrc_trimcache() to clean up the cache before returning.
280 */
281 APPLESTATIC int
282 nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
283 {
284 struct nfsrvcache *newrp;
285 int ret;
286
287 if (nd->nd_procnum == NFSPROC_NULL)
288 panic("nfsd cache null");
289 MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
290 M_NFSRVCACHE, M_WAITOK);
291 NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
292 if (nd->nd_flag & ND_NFSV4)
293 newrp->rc_flag = RC_NFSV4;
294 else if (nd->nd_flag & ND_NFSV3)
295 newrp->rc_flag = RC_NFSV3;
296 else
297 newrp->rc_flag = RC_NFSV2;
298 newrp->rc_xid = nd->nd_retxid;
299 newrp->rc_proc = nd->nd_procnum;
300 newrp->rc_sockref = nd->nd_sockref;
301 newrp->rc_cachetime = nd->nd_tcpconntime;
302 if (nd->nd_flag & ND_SAMETCPCONN)
303 newrp->rc_flag |= RC_SAMETCPCONN;
304 if (nd->nd_nam2 != NULL) {
305 newrp->rc_flag |= RC_UDP;
306 ret = nfsrc_getudp(nd, newrp);
307 } else {
308 ret = nfsrc_gettcp(nd, newrp);
309 }
310 nfsrc_trimcache(nd->nd_sockref, so);
311 NFSEXITCODE2(0, nd);
312 return (ret);
313 }
314
315 /*
316 * For UDP (v2, v3):
317 * - key on <xid, NFS version, RPC#, Client host ip#>
318 * (at most one entry for each key)
319 */
320 static int
321 nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
322 {
323 struct nfsrvcache *rp;
324 struct sockaddr_in *saddr;
325 struct sockaddr_in6 *saddr6;
326 struct nfsrvhashhead *hp;
327 int ret = 0;
328
329 hp = NFSRCUDPHASH(newrp->rc_xid);
330 loop:
331 NFSLOCKCACHE();
332 LIST_FOREACH(rp, hp, rc_hash) {
333 if (newrp->rc_xid == rp->rc_xid &&
334 newrp->rc_proc == rp->rc_proc &&
335 (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
336 nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
337 if ((rp->rc_flag & RC_LOCKED) != 0) {
338 rp->rc_flag |= RC_WANTED;
339 NFSUNLOCKCACHE();
340 (void) tsleep((caddr_t)rp, PZERO - 1,
341 "nfsrc", 10 * hz);
342 goto loop;
343 }
344 if (rp->rc_flag == 0)
345 panic("nfs udp cache0");
346 rp->rc_flag |= RC_LOCKED;
347 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
348 TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
349 if (rp->rc_flag & RC_INPROG) {
350 newnfsstats.srvcache_inproghits++;
351 NFSUNLOCKCACHE();
352 ret = RC_DROPIT;
353 } else if (rp->rc_flag & RC_REPSTATUS) {
354 /*
355 * V2 only.
356 */
357 newnfsstats.srvcache_nonidemdonehits++;
358 NFSUNLOCKCACHE();
359 nfsrvd_rephead(nd);
360 *(nd->nd_errp) = rp->rc_status;
361 ret = RC_REPLY;
362 rp->rc_timestamp = NFSD_MONOSEC +
363 NFSRVCACHE_UDPTIMEOUT;
364 } else if (rp->rc_flag & RC_REPMBUF) {
365 newnfsstats.srvcache_nonidemdonehits++;
366 NFSUNLOCKCACHE();
367 nd->nd_mreq = m_copym(rp->rc_reply, 0,
368 M_COPYALL, M_WAIT);
369 ret = RC_REPLY;
370 rp->rc_timestamp = NFSD_MONOSEC +
371 NFSRVCACHE_UDPTIMEOUT;
372 } else {
373 panic("nfs udp cache1");
374 }
375 nfsrc_unlock(rp);
376 free((caddr_t)newrp, M_NFSRVCACHE);
377 goto out;
378 }
379 }
380 newnfsstats.srvcache_misses++;
381 newnfsstats.srvcache_size++;
382 nfsrc_udpcachesize++;
383
384 newrp->rc_flag |= RC_INPROG;
385 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
386 if (saddr->sin_family == AF_INET)
387 newrp->rc_inet = saddr->sin_addr.s_addr;
388 else if (saddr->sin_family == AF_INET6) {
389 saddr6 = (struct sockaddr_in6 *)saddr;
390 NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
391 sizeof (struct in6_addr));
392 newrp->rc_flag |= RC_INETIPV6;
393 }
394 LIST_INSERT_HEAD(hp, newrp, rc_hash);
395 TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
396 NFSUNLOCKCACHE();
397 nd->nd_rp = newrp;
398 ret = RC_DOIT;
399
400 out:
401 NFSEXITCODE2(0, nd);
402 return (ret);
403 }
404
405 /*
406 * Update a request cache entry after the rpc has been done
407 */
408 APPLESTATIC struct nfsrvcache *
409 nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
410 {
411 struct nfsrvcache *rp;
412 struct nfsrvcache *retrp = NULL;
413 mbuf_t m;
414
415 rp = nd->nd_rp;
416 if (!rp)
417 panic("nfsrvd_updatecache null rp");
418 nd->nd_rp = NULL;
419 NFSLOCKCACHE();
420 nfsrc_lock(rp);
421 if (!(rp->rc_flag & RC_INPROG))
422 panic("nfsrvd_updatecache not inprog");
423 rp->rc_flag &= ~RC_INPROG;
424 if (rp->rc_flag & RC_UDP) {
425 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
426 TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
427 }
428
429 /*
430 * Reply from cache is a special case returned by nfsrv_checkseqid().
431 */
432 if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
433 newnfsstats.srvcache_nonidemdonehits++;
434 NFSUNLOCKCACHE();
435 nd->nd_repstat = 0;
436 if (nd->nd_mreq)
437 mbuf_freem(nd->nd_mreq);
438 if (!(rp->rc_flag & RC_REPMBUF))
439 panic("reply from cache");
440 nd->nd_mreq = m_copym(rp->rc_reply, 0,
441 M_COPYALL, M_WAIT);
442 rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT;
443 nfsrc_unlock(rp);
444 goto out;
445 }
446
447 /*
448 * If rc_refcnt > 0, save it
449 * For UDP, save it if ND_SAVEREPLY is set
450 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
451 */
452 if (nd->nd_repstat != NFSERR_DONTREPLY &&
453 (rp->rc_refcnt > 0 ||
454 ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
455 ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
456 nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
457 nfsrc_tcpnonidempotent))) {
458 if (rp->rc_refcnt > 0) {
459 if (!(rp->rc_flag & RC_NFSV4))
460 panic("update_cache refcnt");
461 rp->rc_flag |= RC_REFCNT;
462 }
463 if ((nd->nd_flag & ND_NFSV2) &&
464 nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
465 rp->rc_status = nd->nd_repstat;
466 rp->rc_flag |= RC_REPSTATUS;
467 NFSUNLOCKCACHE();
468 } else {
469 if (!(rp->rc_flag & RC_UDP)) {
470 nfsrc_tcpsavedreplies++;
471 if (nfsrc_tcpsavedreplies >
472 newnfsstats.srvcache_tcppeak)
473 newnfsstats.srvcache_tcppeak =
474 nfsrc_tcpsavedreplies;
475 }
476 NFSUNLOCKCACHE();
477 m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAIT);
478 NFSLOCKCACHE();
479 rp->rc_reply = m;
480 rp->rc_flag |= RC_REPMBUF;
481 NFSUNLOCKCACHE();
482 }
483 if (rp->rc_flag & RC_UDP) {
484 rp->rc_timestamp = NFSD_MONOSEC +
485 NFSRVCACHE_UDPTIMEOUT;
486 nfsrc_unlock(rp);
487 } else {
488 rp->rc_timestamp = NFSD_MONOSEC +
489 NFSRVCACHE_TCPTIMEOUT;
490 if (rp->rc_refcnt > 0)
491 nfsrc_unlock(rp);
492 else
493 retrp = rp;
494 }
495 } else {
496 nfsrc_freecache(rp);
497 NFSUNLOCKCACHE();
498 }
499
500 out:
501 nfsrc_trimcache(nd->nd_sockref, so);
502 NFSEXITCODE2(0, nd);
503 return (retrp);
504 }
505
506 /*
507 * Invalidate and, if possible, free an in prog cache entry.
508 * Must not sleep.
509 */
510 APPLESTATIC void
511 nfsrvd_delcache(struct nfsrvcache *rp)
512 {
513
514 if (!(rp->rc_flag & RC_INPROG))
515 panic("nfsrvd_delcache not in prog");
516 NFSLOCKCACHE();
517 rp->rc_flag &= ~RC_INPROG;
518 if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
519 nfsrc_freecache(rp);
520 NFSUNLOCKCACHE();
521 }
522
523 /*
524 * Called after nfsrvd_updatecache() once the reply is sent, to update
525 * the entry for nfsrc_activesocket() and unlock it. The argument is
526 * the pointer returned by nfsrvd_updatecache().
527 */
528 APPLESTATIC void
529 nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
530 {
531 tcp_seq tmp_seq;
532
533 if (!(rp->rc_flag & RC_LOCKED))
534 panic("nfsrvd_sentcache not locked");
535 if (!err) {
536 if ((so->so_proto->pr_domain->dom_family != AF_INET &&
537 so->so_proto->pr_domain->dom_family != AF_INET6) ||
538 so->so_proto->pr_protocol != IPPROTO_TCP)
539 panic("nfs sent cache");
540 if (nfsrv_getsockseqnum(so, &tmp_seq)) {
541 NFSLOCKCACHE();
542 rp->rc_tcpseq = tmp_seq;
543 rp->rc_flag |= RC_TCPSEQ;
544 NFSUNLOCKCACHE();
545 }
546 }
547 nfsrc_unlock(rp);
548 }
549
550 /*
551 * Get a cache entry for TCP
552 * - key on <xid, nfs version>
553 * (allow multiple entries for a given key)
554 */
555 static int
556 nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
557 {
558 struct nfsrvcache *rp, *nextrp;
559 int i;
560 struct nfsrvcache *hitrp;
561 struct nfsrvhashhead *hp, nfsrc_templist;
562 int hit, ret = 0;
563
564 hp = NFSRCHASH(newrp->rc_xid);
565 newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
566 tryagain:
567 NFSLOCKCACHE();
568 hit = 1;
569 LIST_INIT(&nfsrc_templist);
570 /*
571 * Get all the matches and put them on the temp list.
572 */
573 rp = LIST_FIRST(hp);
574 while (rp != LIST_END(hp)) {
575 nextrp = LIST_NEXT(rp, rc_hash);
576 if (newrp->rc_xid == rp->rc_xid &&
577 (!(rp->rc_flag & RC_INPROG) ||
578 ((newrp->rc_flag & RC_SAMETCPCONN) &&
579 newrp->rc_sockref == rp->rc_sockref)) &&
580 (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
581 newrp->rc_proc == rp->rc_proc &&
582 ((newrp->rc_flag & RC_NFSV4) &&
583 newrp->rc_sockref != rp->rc_sockref &&
584 newrp->rc_cachetime >= rp->rc_cachetime)
585 && newrp->rc_reqlen == rp->rc_reqlen &&
586 newrp->rc_cksum == rp->rc_cksum) {
587 LIST_REMOVE(rp, rc_hash);
588 LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
589 }
590 rp = nextrp;
591 }
592
593 /*
594 * Now, use nfsrc_templist to decide if there is a match.
595 */
596 i = 0;
597 LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
598 i++;
599 if (rp->rc_refcnt > 0) {
600 hit = 0;
601 break;
602 }
603 }
604 /*
605 * Can be a hit only if one entry left.
606 * Note possible hit entry and put nfsrc_templist back on hash
607 * list.
608 */
609 if (i != 1)
610 hit = 0;
611 hitrp = rp = LIST_FIRST(&nfsrc_templist);
612 while (rp != LIST_END(&nfsrc_templist)) {
613 nextrp = LIST_NEXT(rp, rc_hash);
614 LIST_REMOVE(rp, rc_hash);
615 LIST_INSERT_HEAD(hp, rp, rc_hash);
616 rp = nextrp;
617 }
618 if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist))
619 panic("nfs gettcp cache templist");
620
621 if (hit) {
622 rp = hitrp;
623 if ((rp->rc_flag & RC_LOCKED) != 0) {
624 rp->rc_flag |= RC_WANTED;
625 NFSUNLOCKCACHE();
626 (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 10 * hz);
627 goto tryagain;
628 }
629 if (rp->rc_flag == 0)
630 panic("nfs tcp cache0");
631 rp->rc_flag |= RC_LOCKED;
632 if (rp->rc_flag & RC_INPROG) {
633 newnfsstats.srvcache_inproghits++;
634 NFSUNLOCKCACHE();
635 if (newrp->rc_sockref == rp->rc_sockref)
636 nfsrc_marksametcpconn(rp->rc_sockref);
637 ret = RC_DROPIT;
638 } else if (rp->rc_flag & RC_REPSTATUS) {
639 /*
640 * V2 only.
641 */
642 newnfsstats.srvcache_nonidemdonehits++;
643 NFSUNLOCKCACHE();
644 if (newrp->rc_sockref == rp->rc_sockref)
645 nfsrc_marksametcpconn(rp->rc_sockref);
646 ret = RC_REPLY;
647 nfsrvd_rephead(nd);
648 *(nd->nd_errp) = rp->rc_status;
649 rp->rc_timestamp = NFSD_MONOSEC +
650 NFSRVCACHE_TCPTIMEOUT;
651 } else if (rp->rc_flag & RC_REPMBUF) {
652 newnfsstats.srvcache_nonidemdonehits++;
653 NFSUNLOCKCACHE();
654 if (newrp->rc_sockref == rp->rc_sockref)
655 nfsrc_marksametcpconn(rp->rc_sockref);
656 ret = RC_REPLY;
657 nd->nd_mreq = m_copym(rp->rc_reply, 0,
658 M_COPYALL, M_WAIT);
659 rp->rc_timestamp = NFSD_MONOSEC +
660 NFSRVCACHE_TCPTIMEOUT;
661 } else {
662 panic("nfs tcp cache1");
663 }
664 nfsrc_unlock(rp);
665 free((caddr_t)newrp, M_NFSRVCACHE);
666 goto out;
667 }
668 newnfsstats.srvcache_misses++;
669 newnfsstats.srvcache_size++;
670
671 /*
672 * For TCP, multiple entries for a key are allowed, so don't
673 * chain it into the hash table until done.
674 */
675 newrp->rc_cachetime = NFSD_MONOSEC;
676 newrp->rc_flag |= RC_INPROG;
677 LIST_INSERT_HEAD(hp, newrp, rc_hash);
678 NFSUNLOCKCACHE();
679 nd->nd_rp = newrp;
680 ret = RC_DOIT;
681
682 out:
683 NFSEXITCODE2(0, nd);
684 return (ret);
685 }
686
687 /*
688 * Lock a cache entry.
689 * Also puts a mutex lock on the cache list.
690 */
691 static void
692 nfsrc_lock(struct nfsrvcache *rp)
693 {
694 NFSCACHELOCKREQUIRED();
695 while ((rp->rc_flag & RC_LOCKED) != 0) {
696 rp->rc_flag |= RC_WANTED;
697 (void) nfsmsleep((caddr_t)rp, NFSCACHEMUTEXPTR, PZERO - 1,
698 "nfsrc", 0);
699 }
700 rp->rc_flag |= RC_LOCKED;
701 }
702
703 /*
704 * Unlock a cache entry.
705 */
706 static void
707 nfsrc_unlock(struct nfsrvcache *rp)
708 {
709
710 NFSLOCKCACHE();
711 rp->rc_flag &= ~RC_LOCKED;
712 nfsrc_wanted(rp);
713 NFSUNLOCKCACHE();
714 }
715
716 /*
717 * Wakeup anyone wanting entry.
718 */
719 static void
720 nfsrc_wanted(struct nfsrvcache *rp)
721 {
722 if (rp->rc_flag & RC_WANTED) {
723 rp->rc_flag &= ~RC_WANTED;
724 wakeup((caddr_t)rp);
725 }
726 }
727
728 /*
729 * Free up the entry.
730 * Must not sleep.
731 */
732 static void
733 nfsrc_freecache(struct nfsrvcache *rp)
734 {
735
736 NFSCACHELOCKREQUIRED();
737 LIST_REMOVE(rp, rc_hash);
738 if (rp->rc_flag & RC_UDP) {
739 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
740 nfsrc_udpcachesize--;
741 }
742 nfsrc_wanted(rp);
743 if (rp->rc_flag & RC_REPMBUF) {
744 mbuf_freem(rp->rc_reply);
745 if (!(rp->rc_flag & RC_UDP))
746 nfsrc_tcpsavedreplies--;
747 }
748 FREE((caddr_t)rp, M_NFSRVCACHE);
749 newnfsstats.srvcache_size--;
750 }
751
752 /*
753 * Clean out the cache. Called when nfsserver module is unloaded.
754 */
755 APPLESTATIC void
756 nfsrvd_cleancache(void)
757 {
758 struct nfsrvcache *rp, *nextrp;
759 int i;
760
761 NFSLOCKCACHE();
762 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
763 LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
764 nfsrc_freecache(rp);
765 }
766 }
767 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
768 LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
769 nfsrc_freecache(rp);
770 }
771 }
772 newnfsstats.srvcache_size = 0;
773 nfsrc_tcpsavedreplies = 0;
774 NFSUNLOCKCACHE();
775 }
776
777 /*
778 * The basic rule is to get rid of entries that are expired.
779 */
780 static void
781 nfsrc_trimcache(u_int64_t sockref, struct socket *so)
782 {
783 struct nfsrvcache *rp, *nextrp;
784 int i;
785
786 NFSLOCKCACHE();
787 TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
788 if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
789 && rp->rc_refcnt == 0
790 && ((rp->rc_flag & RC_REFCNT) ||
791 NFSD_MONOSEC > rp->rc_timestamp ||
792 nfsrc_udpcachesize > nfsrc_udphighwater))
793 nfsrc_freecache(rp);
794 }
795 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
796 LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
797 if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
798 && rp->rc_refcnt == 0
799 && ((rp->rc_flag & RC_REFCNT) ||
800 NFSD_MONOSEC > rp->rc_timestamp ||
801 nfsrc_activesocket(rp, sockref, so)))
802 nfsrc_freecache(rp);
803 }
804 }
805 NFSUNLOCKCACHE();
806 }
807
808 /*
809 * Add a seqid# reference to the cache entry.
810 */
811 APPLESTATIC void
812 nfsrvd_refcache(struct nfsrvcache *rp)
813 {
814
815 NFSLOCKCACHE();
816 if (rp->rc_refcnt < 0)
817 panic("nfs cache refcnt");
818 rp->rc_refcnt++;
819 NFSUNLOCKCACHE();
820 }
821
822 /*
823 * Dereference a seqid# cache entry.
824 */
825 APPLESTATIC void
826 nfsrvd_derefcache(struct nfsrvcache *rp)
827 {
828
829 NFSLOCKCACHE();
830 if (rp->rc_refcnt <= 0)
831 panic("nfs cache derefcnt");
832 rp->rc_refcnt--;
833 if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
834 nfsrc_freecache(rp);
835 NFSUNLOCKCACHE();
836 }
837
838 /*
839 * Check to see if the socket is active.
840 * Return 1 if the reply has been received/acknowledged by the client,
841 * 0 otherwise.
842 * XXX - Uses tcp internals.
843 */
844 static int
845 nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
846 struct socket *cur_so)
847 {
848 int ret = 0;
849
850 if (!(rp->rc_flag & RC_TCPSEQ))
851 return (ret);
852 /*
853 * If the sockref is the same, it is the same TCP connection.
854 */
855 if (cur_sockref == rp->rc_sockref)
856 ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
857 return (ret);
858 }
859
860 /*
861 * Calculate the length of the mbuf list and a checksum on the first up to
862 * NFSRVCACHE_CHECKLEN bytes.
863 */
864 static int
865 nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
866 {
867 int len = 0, cklen;
868 mbuf_t m;
869
870 m = m1;
871 while (m) {
872 len += mbuf_len(m);
873 m = mbuf_next(m);
874 }
875 cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
876 *cksum = in_cksum(m1, cklen);
877 return (len);
878 }
879
880 /*
881 * Mark a TCP connection that is seeing retries. Should never happen for
882 * NFSv4.
883 */
884 static void
885 nfsrc_marksametcpconn(u_int64_t sockref)
886 {
887 }
888
Cache object: de0d7b2b3fb4f94afc74b8729f4a2bc0
|