1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: releng/8.0/sys/fs/nfsserver/nfs_nfsdcache.c 191783 2009-05-04 15:23:58Z rmacklem $");
36
37 /*
38 * Here is the basic algorithm:
39 * First, some design criteria I used:
40 * - I think a false hit is more serious than a false miss
41 * - A false hit for an RPC that has Op(s) that order via seqid# must be
42 * avoided at all cost
43 * - A valid hit will probably happen a long time after the original reply
44 * and the TCP socket that the original request was received on will no
45 * longer be active
46 * (The long time delay implies to me that LRU is not appropriate.)
47 * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
48 * in them as well as minimizing the risk of redoing retried non-idempotent
49 * Ops.
50 * Because it is biased towards avoiding false hits, multiple entries with
51 * the same xid are to be expected, especially for the case of the entry
52 * in the cache being related to a seqid# sequenced Op.
53 *
54 * The basic algorithm I'm about to code up:
55 * - Null RPCs bypass the cache and are just done
56 * For TCP
57 * - key on <xid, NFS version> (as noted above, there can be several
58 * entries with the same key)
59 * When a request arrives:
60 * For all that match key
61 * - if RPC# != OR request_size !=
62 * - not a match with this one
63 * - if NFSv4 and received on same TCP socket OR
64 * received on a TCP connection created before the
65 * entry was cached
66 * - not a match with this one
67 * (V2,3 clients might retry on same TCP socket)
68 * - calculate checksum on first N bytes of NFS XDR
69 * - if checksum !=
70 * - not a match for this one
71 * If any of the remaining ones that match has a
72 * seqid_refcnt > 0
73 * - not a match (go do RPC, using new cache entry)
74 * If one match left
75 * - a hit (reply from cache)
76 * else
77 * - miss (go do RPC, using new cache entry)
78 *
79 * During processing of NFSv4 request:
80 * - set a flag when a non-idempotent Op is processed
81 * - when an Op that uses a seqid# (Open,...) is processed
82 * - if same seqid# as referenced entry in cache
83 * - free new cache entry
84 * - reply from referenced cache entry
85 * else if next seqid# in order
86 * - free referenced cache entry
87 * - increment seqid_refcnt on new cache entry
88 * - set pointer from Openowner/Lockowner to
89 * new cache entry (aka reference it)
90 * else if first seqid# in sequence
91 * - increment seqid_refcnt on new cache entry
92 * - set pointer from Openowner/Lockowner to
93 * new cache entry (aka reference it)
94 *
95 * At end of RPC processing:
96 * - if seqid_refcnt > 0 OR flagged non-idempotent on new
97 * cache entry
98 * - save reply in cache entry
99 * - calculate checksum on first N bytes of NFS XDR
100 * request
101 * - note op and length of XDR request (in bytes)
102 * - timestamp it
103 * else
104 * - free new cache entry
105 * - Send reply (noting info for socket activity check, below)
106 *
107 * For cache entries saved above:
108 * - if saved since seqid_refcnt was > 0
109 * - free when seqid_refcnt decrements to 0
110 * (when next one in sequence is processed above, or
111 * when Openowner/Lockowner is discarded)
112 * else { non-idempotent Op(s) }
113 * - free when
114 * - some further activity observed on same
115 * socket
116 * (I'm not yet sure how I'm going to do
117 * this. Maybe look at the TCP connection
118 * to see if the send_tcp_sequence# is well
119 * past sent reply OR K additional RPCs
120 * replied on same socket OR?)
121 * OR
122 * - when very old (hours, days, weeks?)
123 *
124 * For UDP (v2, 3 only), pretty much the old way:
125 * - key on <xid, NFS version, RPC#, Client host ip#>
126 * (at most one entry for each key)
127 *
128 * When a Request arrives:
129 * - if a match with entry via key
130 * - if RPC marked In_progress
131 * - discard request (don't send reply)
132 * else
133 * - reply from cache
134 * - timestamp cache entry
135 * else
136 * - add entry to cache, marked In_progress
137 * - do RPC
138 * - when RPC done
139 * - if RPC# non-idempotent
140 * - mark entry Done (not In_progress)
141 * - save reply
142 * - timestamp cache entry
143 * else
144 * - free cache entry
145 * - send reply
146 *
147 * Later, entries with saved replies are free'd a short time (few minutes)
148 * after reply sent (timestamp).
149 * Reference: Chet Juszczak, "Improving the Performance and Correctness
150 * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
151 * pages 53-63. San Diego, February 1989.
152 * for the UDP case.
153 * nfsrc_floodlevel is set to the allowable upper limit for saved replies
154 * for TCP. For V3, a reply won't be saved when the flood level is
155 * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
156 * that case. This level should be set high enough that this almost
157 * never happens.
158 */
159 #ifndef APPLEKEXT
160 #include <fs/nfs/nfsport.h>
161
162 extern struct nfsstats newnfsstats;
163 NFSCACHEMUTEX;
164 int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
165 #endif /* !APPLEKEXT */
166
167 static int nfsrc_tcpnonidempotent = 1;
168 static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0;
169 static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
170 static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE],
171 nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
172 /*
173 * and the reverse mapping from generic to Version 2 procedure numbers
174 */
175 static int newnfsv2_procid[NFS_V3NPROCS] = {
176 NFSV2PROC_NULL,
177 NFSV2PROC_GETATTR,
178 NFSV2PROC_SETATTR,
179 NFSV2PROC_LOOKUP,
180 NFSV2PROC_NOOP,
181 NFSV2PROC_READLINK,
182 NFSV2PROC_READ,
183 NFSV2PROC_WRITE,
184 NFSV2PROC_CREATE,
185 NFSV2PROC_MKDIR,
186 NFSV2PROC_SYMLINK,
187 NFSV2PROC_CREATE,
188 NFSV2PROC_REMOVE,
189 NFSV2PROC_RMDIR,
190 NFSV2PROC_RENAME,
191 NFSV2PROC_LINK,
192 NFSV2PROC_READDIR,
193 NFSV2PROC_NOOP,
194 NFSV2PROC_STATFS,
195 NFSV2PROC_NOOP,
196 NFSV2PROC_NOOP,
197 NFSV2PROC_NOOP,
198 };
199
200 #define NFSRCUDPHASH(xid) \
201 (&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
202 #define NFSRCHASH(xid) \
203 (&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
204 #define TRUE 1
205 #define FALSE 0
206 #define NFSRVCACHE_CHECKLEN 100
207
208 /* True iff the rpc reply is an nfs status ONLY! */
209 static int nfsv2_repstat[NFS_V3NPROCS] = {
210 FALSE,
211 FALSE,
212 FALSE,
213 FALSE,
214 FALSE,
215 FALSE,
216 FALSE,
217 FALSE,
218 FALSE,
219 FALSE,
220 TRUE,
221 TRUE,
222 TRUE,
223 TRUE,
224 FALSE,
225 TRUE,
226 FALSE,
227 FALSE,
228 FALSE,
229 FALSE,
230 FALSE,
231 FALSE,
232 };
233
234 /*
235 * Will NFS want to work over IPv6 someday?
236 */
237 #define NETFAMILY(rp) \
238 (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
239
240 /* local functions */
241 static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
242 static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
243 static void nfsrc_lock(struct nfsrvcache *rp);
244 static void nfsrc_unlock(struct nfsrvcache *rp);
245 static void nfsrc_wanted(struct nfsrvcache *rp);
246 static void nfsrc_freecache(struct nfsrvcache *rp);
247 static void nfsrc_trimcache(u_int64_t, struct socket *);
248 static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
249 struct socket *);
250 static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
251 static void nfsrc_marksametcpconn(u_int64_t);
252
253 /*
254 * Initialize the server request cache list
255 */
256 APPLESTATIC void
257 nfsrvd_initcache(void)
258 {
259 int i;
260 static int inited = 0;
261
262 if (inited)
263 return;
264 inited = 1;
265 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
266 LIST_INIT(&nfsrvudphashtbl[i]);
267 LIST_INIT(&nfsrvhashtbl[i]);
268 }
269 TAILQ_INIT(&nfsrvudplru);
270 nfsrc_tcpsavedreplies = 0;
271 nfsrc_udpcachesize = 0;
272 newnfsstats.srvcache_tcppeak = 0;
273 newnfsstats.srvcache_size = 0;
274 }
275
276 /*
277 * Get a cache entry for this request. Basically just malloc a new one
278 * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
279 * Call nfsrc_trimcache() to clean up the cache before returning.
280 */
281 APPLESTATIC int
282 nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
283 {
284 struct nfsrvcache *newrp;
285 int ret;
286
287 if (nd->nd_procnum == NFSPROC_NULL)
288 panic("nfsd cache null");
289 MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
290 M_NFSRVCACHE, M_WAITOK);
291 NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
292 if (nd->nd_flag & ND_NFSV4)
293 newrp->rc_flag = RC_NFSV4;
294 else if (nd->nd_flag & ND_NFSV3)
295 newrp->rc_flag = RC_NFSV3;
296 else
297 newrp->rc_flag = RC_NFSV2;
298 newrp->rc_xid = nd->nd_retxid;
299 newrp->rc_proc = nd->nd_procnum;
300 newrp->rc_sockref = nd->nd_sockref;
301 newrp->rc_cachetime = nd->nd_tcpconntime;
302 if (nd->nd_flag & ND_SAMETCPCONN)
303 newrp->rc_flag |= RC_SAMETCPCONN;
304 if (nd->nd_nam2 != NULL) {
305 newrp->rc_flag |= RC_UDP;
306 ret = nfsrc_getudp(nd, newrp);
307 } else {
308 ret = nfsrc_gettcp(nd, newrp);
309 }
310 nfsrc_trimcache(nd->nd_sockref, so);
311 return (ret);
312 }
313
314 /*
315 * For UDP (v2, v3):
316 * - key on <xid, NFS version, RPC#, Client host ip#>
317 * (at most one entry for each key)
318 */
319 static int
320 nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
321 {
322 struct nfsrvcache *rp;
323 struct sockaddr_in *saddr;
324 struct sockaddr_in6 *saddr6;
325 struct nfsrvhashhead *hp;
326 int ret = 0;
327
328 hp = NFSRCUDPHASH(newrp->rc_xid);
329 loop:
330 NFSLOCKCACHE();
331 LIST_FOREACH(rp, hp, rc_hash) {
332 if (newrp->rc_xid == rp->rc_xid &&
333 newrp->rc_proc == rp->rc_proc &&
334 (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
335 nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
336 if ((rp->rc_flag & RC_LOCKED) != 0) {
337 rp->rc_flag |= RC_WANTED;
338 NFSUNLOCKCACHE();
339 (void) tsleep((caddr_t)rp, PZERO - 1,
340 "nfsrc", 10 * hz);
341 goto loop;
342 }
343 if (rp->rc_flag == 0)
344 panic("nfs udp cache0");
345 rp->rc_flag |= RC_LOCKED;
346 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
347 TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
348 if (rp->rc_flag & RC_INPROG) {
349 newnfsstats.srvcache_inproghits++;
350 NFSUNLOCKCACHE();
351 ret = RC_DROPIT;
352 } else if (rp->rc_flag & RC_REPSTATUS) {
353 /*
354 * V2 only.
355 */
356 newnfsstats.srvcache_nonidemdonehits++;
357 NFSUNLOCKCACHE();
358 nfsrvd_rephead(nd);
359 *(nd->nd_errp) = rp->rc_status;
360 ret = RC_REPLY;
361 rp->rc_timestamp = NFSD_MONOSEC +
362 NFSRVCACHE_UDPTIMEOUT;
363 } else if (rp->rc_flag & RC_REPMBUF) {
364 newnfsstats.srvcache_nonidemdonehits++;
365 NFSUNLOCKCACHE();
366 nd->nd_mreq = m_copym(rp->rc_reply, 0,
367 M_COPYALL, M_WAIT);
368 ret = RC_REPLY;
369 rp->rc_timestamp = NFSD_MONOSEC +
370 NFSRVCACHE_UDPTIMEOUT;
371 } else {
372 panic("nfs udp cache1");
373 }
374 nfsrc_unlock(rp);
375 free((caddr_t)newrp, M_NFSRVCACHE);
376 return (ret);
377 }
378 }
379 newnfsstats.srvcache_misses++;
380 newnfsstats.srvcache_size++;
381 nfsrc_udpcachesize++;
382
383 newrp->rc_flag |= RC_INPROG;
384 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
385 if (saddr->sin_family == AF_INET)
386 newrp->rc_inet = saddr->sin_addr.s_addr;
387 else if (saddr->sin_family == AF_INET6) {
388 saddr6 = (struct sockaddr_in6 *)saddr;
389 NFSBCOPY((caddr_t)&saddr6->sin6_addr,(caddr_t)&newrp->rc_inet6,
390 sizeof (struct in6_addr));
391 rp->rc_flag |= RC_INETIPV6;
392 }
393 LIST_INSERT_HEAD(hp, newrp, rc_hash);
394 TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
395 NFSUNLOCKCACHE();
396 nd->nd_rp = newrp;
397 return (RC_DOIT);
398 }
399
400 /*
401 * Update a request cache entry after the rpc has been done
402 */
403 APPLESTATIC struct nfsrvcache *
404 nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
405 {
406 struct nfsrvcache *rp;
407 struct nfsrvcache *retrp = NULL;
408
409 rp = nd->nd_rp;
410 if (!rp)
411 panic("nfsrvd_updatecache null rp");
412 nd->nd_rp = NULL;
413 NFSLOCKCACHE();
414 nfsrc_lock(rp);
415 if (!(rp->rc_flag & RC_INPROG))
416 panic("nfsrvd_updatecache not inprog");
417 rp->rc_flag &= ~RC_INPROG;
418 if (rp->rc_flag & RC_UDP) {
419 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
420 TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
421 }
422
423 /*
424 * Reply from cache is a special case returned by nfsrv_checkseqid().
425 */
426 if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
427 newnfsstats.srvcache_nonidemdonehits++;
428 NFSUNLOCKCACHE();
429 nd->nd_repstat = 0;
430 if (nd->nd_mreq)
431 mbuf_freem(nd->nd_mreq);
432 if (!(rp->rc_flag & RC_REPMBUF))
433 panic("reply from cache");
434 nd->nd_mreq = m_copym(rp->rc_reply, 0,
435 M_COPYALL, M_WAIT);
436 rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT;
437 nfsrc_unlock(rp);
438 nfsrc_trimcache(nd->nd_sockref, so);
439 return (retrp);
440 }
441
442 /*
443 * If rc_refcnt > 0, save it
444 * For UDP, save it if ND_SAVEREPLY is set
445 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
446 */
447 if (nd->nd_repstat != NFSERR_DONTREPLY &&
448 (rp->rc_refcnt > 0 ||
449 ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
450 ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
451 nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
452 nfsrc_tcpnonidempotent))) {
453 if (rp->rc_refcnt > 0) {
454 if (!(rp->rc_flag & RC_NFSV4))
455 panic("update_cache refcnt");
456 rp->rc_flag |= RC_REFCNT;
457 }
458 if ((nd->nd_flag & ND_NFSV2) &&
459 nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
460 NFSUNLOCKCACHE();
461 rp->rc_status = nd->nd_repstat;
462 rp->rc_flag |= RC_REPSTATUS;
463 } else {
464 if (!(rp->rc_flag & RC_UDP)) {
465 nfsrc_tcpsavedreplies++;
466 if (nfsrc_tcpsavedreplies >
467 newnfsstats.srvcache_tcppeak)
468 newnfsstats.srvcache_tcppeak =
469 nfsrc_tcpsavedreplies;
470 }
471 NFSUNLOCKCACHE();
472 rp->rc_reply = m_copym(nd->nd_mreq, 0, M_COPYALL,
473 M_WAIT);
474 rp->rc_flag |= RC_REPMBUF;
475 }
476 if (rp->rc_flag & RC_UDP) {
477 rp->rc_timestamp = NFSD_MONOSEC +
478 NFSRVCACHE_UDPTIMEOUT;
479 nfsrc_unlock(rp);
480 } else {
481 rp->rc_timestamp = NFSD_MONOSEC +
482 NFSRVCACHE_TCPTIMEOUT;
483 if (rp->rc_refcnt > 0)
484 nfsrc_unlock(rp);
485 else
486 retrp = rp;
487 }
488 } else {
489 nfsrc_freecache(rp);
490 NFSUNLOCKCACHE();
491 }
492 nfsrc_trimcache(nd->nd_sockref, so);
493 return (retrp);
494 }
495
496 /*
497 * Invalidate and, if possible, free an in prog cache entry.
498 * Must not sleep.
499 */
500 APPLESTATIC void
501 nfsrvd_delcache(struct nfsrvcache *rp)
502 {
503
504 if (!(rp->rc_flag & RC_INPROG))
505 panic("nfsrvd_delcache not in prog");
506 NFSLOCKCACHE();
507 rp->rc_flag &= ~RC_INPROG;
508 if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
509 nfsrc_freecache(rp);
510 NFSUNLOCKCACHE();
511 }
512
513 /*
514 * Called after nfsrvd_updatecache() once the reply is sent, to update
515 * the entry for nfsrc_activesocket() and unlock it. The argument is
516 * the pointer returned by nfsrvd_updatecache().
517 */
518 APPLESTATIC void
519 nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
520 {
521
522 if (!(rp->rc_flag & RC_LOCKED))
523 panic("nfsrvd_sentcache not locked");
524 if (!err) {
525 if (so->so_proto->pr_domain->dom_family != AF_INET ||
526 so->so_proto->pr_protocol != IPPROTO_TCP)
527 panic("nfs sent cache");
528 if (nfsrv_getsockseqnum(so, &rp->rc_tcpseq))
529 rp->rc_flag |= RC_TCPSEQ;
530 }
531 nfsrc_unlock(rp);
532 }
533
534 /*
535 * Get a cache entry for TCP
536 * - key on <xid, nfs version>
537 * (allow multiple entries for a given key)
538 */
539 static int
540 nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
541 {
542 struct nfsrvcache *rp, *nextrp;
543 int i;
544 struct nfsrvcache *hitrp;
545 struct nfsrvhashhead *hp, nfsrc_templist;
546 int hit, ret = 0;
547
548 hp = NFSRCHASH(newrp->rc_xid);
549 newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
550 tryagain:
551 NFSLOCKCACHE();
552 hit = 1;
553 LIST_INIT(&nfsrc_templist);
554 /*
555 * Get all the matches and put them on the temp list.
556 */
557 rp = LIST_FIRST(hp);
558 while (rp != LIST_END(hp)) {
559 nextrp = LIST_NEXT(rp, rc_hash);
560 if (newrp->rc_xid == rp->rc_xid &&
561 (!(rp->rc_flag & RC_INPROG) ||
562 ((newrp->rc_flag & RC_SAMETCPCONN) &&
563 newrp->rc_sockref == rp->rc_sockref)) &&
564 (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
565 newrp->rc_proc == rp->rc_proc &&
566 ((newrp->rc_flag & RC_NFSV4) &&
567 newrp->rc_sockref != rp->rc_sockref &&
568 newrp->rc_cachetime >= rp->rc_cachetime)
569 && newrp->rc_reqlen == rp->rc_reqlen &&
570 newrp->rc_cksum == rp->rc_cksum) {
571 LIST_REMOVE(rp, rc_hash);
572 LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
573 }
574 rp = nextrp;
575 }
576
577 /*
578 * Now, use nfsrc_templist to decide if there is a match.
579 */
580 i = 0;
581 LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
582 i++;
583 if (rp->rc_refcnt > 0) {
584 hit = 0;
585 break;
586 }
587 }
588 /*
589 * Can be a hit only if one entry left.
590 * Note possible hit entry and put nfsrc_templist back on hash
591 * list.
592 */
593 if (i != 1)
594 hit = 0;
595 hitrp = rp = LIST_FIRST(&nfsrc_templist);
596 while (rp != LIST_END(&nfsrc_templist)) {
597 nextrp = LIST_NEXT(rp, rc_hash);
598 LIST_REMOVE(rp, rc_hash);
599 LIST_INSERT_HEAD(hp, rp, rc_hash);
600 rp = nextrp;
601 }
602 if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist))
603 panic("nfs gettcp cache templist");
604
605 if (hit) {
606 rp = hitrp;
607 if ((rp->rc_flag & RC_LOCKED) != 0) {
608 rp->rc_flag |= RC_WANTED;
609 NFSUNLOCKCACHE();
610 (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 10 * hz);
611 goto tryagain;
612 }
613 if (rp->rc_flag == 0)
614 panic("nfs tcp cache0");
615 rp->rc_flag |= RC_LOCKED;
616 if (rp->rc_flag & RC_INPROG) {
617 newnfsstats.srvcache_inproghits++;
618 NFSUNLOCKCACHE();
619 if (newrp->rc_sockref == rp->rc_sockref)
620 nfsrc_marksametcpconn(rp->rc_sockref);
621 ret = RC_DROPIT;
622 } else if (rp->rc_flag & RC_REPSTATUS) {
623 /*
624 * V2 only.
625 */
626 newnfsstats.srvcache_nonidemdonehits++;
627 NFSUNLOCKCACHE();
628 if (newrp->rc_sockref == rp->rc_sockref)
629 nfsrc_marksametcpconn(rp->rc_sockref);
630 ret = RC_REPLY;
631 nfsrvd_rephead(nd);
632 *(nd->nd_errp) = rp->rc_status;
633 rp->rc_timestamp = NFSD_MONOSEC +
634 NFSRVCACHE_TCPTIMEOUT;
635 } else if (rp->rc_flag & RC_REPMBUF) {
636 newnfsstats.srvcache_nonidemdonehits++;
637 NFSUNLOCKCACHE();
638 if (newrp->rc_sockref == rp->rc_sockref)
639 nfsrc_marksametcpconn(rp->rc_sockref);
640 ret = RC_REPLY;
641 nd->nd_mreq = m_copym(rp->rc_reply, 0,
642 M_COPYALL, M_WAIT);
643 rp->rc_timestamp = NFSD_MONOSEC +
644 NFSRVCACHE_TCPTIMEOUT;
645 } else {
646 panic("nfs tcp cache1");
647 }
648 nfsrc_unlock(rp);
649 free((caddr_t)newrp, M_NFSRVCACHE);
650 return (ret);
651 }
652 newnfsstats.srvcache_misses++;
653 newnfsstats.srvcache_size++;
654
655 /*
656 * For TCP, multiple entries for a key are allowed, so don't
657 * chain it into the hash table until done.
658 */
659 newrp->rc_cachetime = NFSD_MONOSEC;
660 newrp->rc_flag |= RC_INPROG;
661 LIST_INSERT_HEAD(hp, newrp, rc_hash);
662 NFSUNLOCKCACHE();
663 nd->nd_rp = newrp;
664 return (RC_DOIT);
665 }
666
667 /*
668 * Lock a cache entry.
669 * Also puts a mutex lock on the cache list.
670 */
671 static void
672 nfsrc_lock(struct nfsrvcache *rp)
673 {
674 NFSCACHELOCKREQUIRED();
675 while ((rp->rc_flag & RC_LOCKED) != 0) {
676 rp->rc_flag |= RC_WANTED;
677 (void) nfsmsleep((caddr_t)rp, NFSCACHEMUTEXPTR, PZERO - 1,
678 "nfsrc", 0);
679 }
680 rp->rc_flag |= RC_LOCKED;
681 }
682
683 /*
684 * Unlock a cache entry.
685 */
686 static void
687 nfsrc_unlock(struct nfsrvcache *rp)
688 {
689 rp->rc_flag &= ~RC_LOCKED;
690 nfsrc_wanted(rp);
691 }
692
693 /*
694 * Wakeup anyone wanting entry.
695 */
696 static void
697 nfsrc_wanted(struct nfsrvcache *rp)
698 {
699 if (rp->rc_flag & RC_WANTED) {
700 rp->rc_flag &= ~RC_WANTED;
701 wakeup((caddr_t)rp);
702 }
703 }
704
705 /*
706 * Free up the entry.
707 * Must not sleep.
708 */
709 static void
710 nfsrc_freecache(struct nfsrvcache *rp)
711 {
712
713 NFSCACHELOCKREQUIRED();
714 LIST_REMOVE(rp, rc_hash);
715 if (rp->rc_flag & RC_UDP) {
716 TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
717 nfsrc_udpcachesize--;
718 }
719 nfsrc_wanted(rp);
720 if (rp->rc_flag & RC_REPMBUF) {
721 mbuf_freem(rp->rc_reply);
722 if (!(rp->rc_flag & RC_UDP))
723 nfsrc_tcpsavedreplies--;
724 }
725 FREE((caddr_t)rp, M_NFSRVCACHE);
726 newnfsstats.srvcache_size--;
727 }
728
729 #ifdef notdef
730 /*
731 * Clean out the cache. Called when the last nfsd terminates.
732 */
733 APPLESTATIC void
734 nfsrvd_cleancache(void)
735 {
736 struct nfsrvcache *rp, *nextrp;
737 int i;
738
739 NFSLOCKCACHE();
740 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
741 LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
742 nfsrc_freecache(rp);
743 }
744 }
745 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
746 LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
747 nfsrc_freecache(rp);
748 }
749 }
750 newnfsstats.srvcache_size = 0;
751 nfsrc_tcpsavedreplies = 0;
752 NFSUNLOCKCACHE();
753 }
754 #endif /* notdef */
755
756 /*
757 * The basic rule is to get rid of entries that are expired.
758 */
759 static void
760 nfsrc_trimcache(u_int64_t sockref, struct socket *so)
761 {
762 struct nfsrvcache *rp, *nextrp;
763 int i;
764
765 NFSLOCKCACHE();
766 TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
767 if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
768 && rp->rc_refcnt == 0
769 && ((rp->rc_flag & RC_REFCNT) ||
770 NFSD_MONOSEC > rp->rc_timestamp ||
771 nfsrc_udpcachesize > nfsrc_udphighwater))
772 nfsrc_freecache(rp);
773 }
774 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
775 LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
776 if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
777 && rp->rc_refcnt == 0
778 && ((rp->rc_flag & RC_REFCNT) ||
779 NFSD_MONOSEC > rp->rc_timestamp ||
780 nfsrc_activesocket(rp, sockref, so)))
781 nfsrc_freecache(rp);
782 }
783 }
784 NFSUNLOCKCACHE();
785 }
786
787 /*
788 * Add a seqid# reference to the cache entry.
789 */
790 APPLESTATIC void
791 nfsrvd_refcache(struct nfsrvcache *rp)
792 {
793
794 NFSLOCKCACHE();
795 if (rp->rc_refcnt < 0)
796 panic("nfs cache refcnt");
797 rp->rc_refcnt++;
798 NFSUNLOCKCACHE();
799 }
800
801 /*
802 * Dereference a seqid# cache entry.
803 */
804 APPLESTATIC void
805 nfsrvd_derefcache(struct nfsrvcache *rp)
806 {
807
808 NFSLOCKCACHE();
809 if (rp->rc_refcnt <= 0)
810 panic("nfs cache derefcnt");
811 rp->rc_refcnt--;
812 if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
813 nfsrc_freecache(rp);
814 NFSUNLOCKCACHE();
815 }
816
817 /*
818 * Check to see if the socket is active.
819 * Return 1 if the reply has been received/acknowledged by the client,
820 * 0 otherwise.
821 * XXX - Uses tcp internals.
822 */
823 static int
824 nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
825 struct socket *cur_so)
826 {
827 int ret = 0;
828
829 if (!(rp->rc_flag & RC_TCPSEQ))
830 return (ret);
831 /*
832 * If the sockref is the same, it is the same TCP connection.
833 */
834 if (cur_sockref == rp->rc_sockref)
835 ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
836 return (ret);
837 }
838
839 /*
840 * Calculate the length of the mbuf list and a checksum on the first up to
841 * NFSRVCACHE_CHECKLEN bytes.
842 */
843 static int
844 nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
845 {
846 int len = 0, cklen;
847 mbuf_t m;
848
849 m = m1;
850 while (m) {
851 len += mbuf_len(m);
852 m = mbuf_next(m);
853 }
854 cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
855 *cksum = in_cksum(m1, cklen);
856 return (len);
857 }
858
859 /*
860 * Mark a TCP connection that is seeing retries. Should never happen for
861 * NFSv4.
862 */
863 static void
864 nfsrc_marksametcpconn(u_int64_t sockref)
865 {
866 }
867
Cache object: 10b21454e4ff2c4826395d2f6b07f4b6
|