On 15 May, Robert Watson wrote: > > On Thu, 15 May 2003, Don Lewis wrote: > >> Have you had a chance to exercise my NFS server vnode locking patch? If >> so, the original version, or the second version where I removed the >> checks on the vn_lock(..., LK_RETRY, ...) return value? >> >> I've had one report from someone who was having "locking against myself" >> panics that were fixed by my patch. > > I have been running it, but I haven't been testing it as extensively as I > like because I couldn't get it running well on my NFS clients at first. > Since I've now got that pretty much cleaned up, I'm about ready to go on > the NFS server testing, and was just in the process of setting up the > testing when you e-mailed me. I appear to have the old patch which checks > vn_lock() failures, which as I understand it aren't ever supposed to > happen. If you could drop me the updated patch, would be good. Attached. > BTW, we have conflicting fifofs patches -- mine avoids the VOP_CLOSE() > locking assertion in fifo_open(), I believe, and yours fixes real bugs. > Does your fifo patch run happily with DEBUG_VFS_LOCKS when in active use > (i.e., with the client rpc.lockd)? If so, I should just drop my patch and > use yours. I'm still dinking with my version of the patch. I'm splitting it into two pieces. A bug fix for 5.1, and a post 5.1 cleanup. It seems to work with DEBUG_VFS_LOCKS in the testing that I've done, but I haven't tried it with rpc.lockd. The main problem is that I only have one box running -current at the present time. I can exercise the client code using a -stable server, or test the server code from a -stable client. Since rpc.lockd is pretty non-functional in -stable, I can't easily test it in that configuration. I can test the -current server and client software together by NFS mounting the local file system to a subdirectory, but it's hard to say what rpc.lockd would do in that configuration ;-) I'll try to send out my latest version of the patch later tonight after I've had a chance to play with it some more ... and after The Matrix :-) Index: sys/nfsserver/nfs.h =================================================================== RCS file: /home/ncvs/src/sys/nfsserver/nfs.h,v retrieving revision 1.68 diff -u -r1.68 nfs.h --- sys/nfsserver/nfs.h 24 Jul 2002 22:27:35 -0000 1.68 +++ sys/nfsserver/nfs.h 5 May 2003 00:34:59 -0000 @@ -332,7 +332,8 @@ int netaddr_match(int, union nethostaddr *, struct sockaddr *); int nfs_namei(struct nameidata *, fhandle_t *, int, struct nfssvc_sock *, struct sockaddr *, struct mbuf **, - caddr_t *, struct vnode **, struct thread *, int); + caddr_t *, struct vnode **, int, struct vattr *, int *, + struct thread *, int); void nfsm_adj(struct mbuf *, int, int); int nfsm_mbuftouio(struct mbuf **, struct uio *, int, caddr_t *); void nfsrv_initcache(void); Index: sys/nfsserver/nfs_serv.c =================================================================== RCS file: /home/ncvs/src/sys/nfsserver/nfs_serv.c,v retrieving revision 1.133 diff -u -r1.133 nfs_serv.c --- sys/nfsserver/nfs_serv.c 24 Apr 2003 04:31:25 -0000 1.133 +++ sys/nfsserver/nfs_serv.c 11 May 2003 23:35:38 -0000 @@ -467,7 +467,7 @@ nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, pubflag); + &dirp, v3, &dirattr, &dirattr_ret, td, pubflag); /* * namei failure, only dirp to cleanup. Clear out garbarge from @@ -476,9 +476,6 @@ if (error) { if (dirp) { - if (v3) - dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, - td); vrele(dirp); dirp = NULL; } @@ -551,9 +548,6 @@ } if (dirp) { - if (v3) - dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, - td); vrele(dirp); dirp = NULL; } @@ -1630,15 +1624,10 @@ * prior to calling nfsm_reply ( which might goto nfsmout ). */ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); @@ -1809,9 +1798,25 @@ if (exclusive_flag && !error && bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) error = EEXIST; - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); - vrele(dirp); - dirp = NULL; + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Drop the other locks to avoid deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + + vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td); + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } } ereply: nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); @@ -1906,9 +1911,7 @@ */ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, td); + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); if (error) { nfsm_reply(NFSX_WCCDATA(1)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); @@ -2006,10 +2009,10 @@ vp = NULL; nd.ni_vp = NULL; } - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); if (dirp) { - vrele(dirp); - dirp = NULL; + vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td); + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); } ereply: nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); @@ -2085,15 +2088,10 @@ nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error == 0) { if (nd.ni_vp->v_type == VDIR) { @@ -2114,9 +2112,25 @@ } } if (dirp && v3) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); - vrele(dirp); - dirp = NULL; + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Drop the other locks to avoid deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + + vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td); + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } } ereply: nfsm_reply(NFSX_WCCDATA(v3)); @@ -2200,15 +2214,10 @@ fromnd.ni_cnd.cn_nameiop = DELETE; fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, - &dpos, &fdirp, td, FALSE); - if (fdirp) { - if (v3) { - fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, - td); - } else { - vrele(fdirp); - fdirp = NULL; - } + &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE); + if (fdirp && !v3) { + vrele(fdirp); + fdirp = NULL; } if (error) { nfsm_reply(2 * NFSX_WCCDATA(v3)); @@ -2227,15 +2236,10 @@ tond.ni_cnd.cn_nameiop = RENAME; tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, - &dpos, &tdirp, td, FALSE); - if (tdirp) { - if (v3) { - tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, - td); - } else { - vrele(tdirp); - tdirp = NULL; - } + &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE); + if (tdirp && !v3) { + vrele(tdirp); + tdirp = NULL; } if (error) goto out1; @@ -2318,12 +2322,30 @@ /* fall through */ out1: - if (fdirp) - fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td); - if (tdirp) - tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { + /* Release existing locks to prevent deadlock. */ + if (tond.ni_dvp) { + if (tond.ni_dvp == tond.ni_vp) + vrele(tond.ni_dvp); + else + vput(tond.ni_dvp); + } + if (tond.ni_vp) + vput(tond.ni_vp); + tond.ni_dvp = NULL; + tond.ni_vp = NULL; + + if (fdirp) { + vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY, td); + fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td); + VOP_UNLOCK(fdirp, 0, td); + } + if (tdirp) { + vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY, td); + tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td); + VOP_UNLOCK(tdirp, 0, td); + } nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } @@ -2407,7 +2429,7 @@ nfsm_srvmtofh(dfhp); nfsm_srvnamesiz(len); - error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, &rdonly, TRUE); + error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, slp, nam, &rdonly, TRUE); if (error) { nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { @@ -2418,47 +2440,71 @@ error = 0; goto nfsmout; } + if (v3) + getret = VOP_GETATTR(vp, &at, cred, td); if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ goto out1; } + VOP_UNLOCK(vp, 0, td); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; + } + if (error) { + vrele(vp); + vp = NULL; + goto out2; } - if (error) - goto out1; - xp = nd.ni_vp; if (xp != NULL) { error = EEXIST; - goto out; + vrele(vp); + vp = NULL; + goto out2; } xp = nd.ni_dvp; - if (vp->v_mount != xp->v_mount) + if (vp->v_mount != xp->v_mount) { error = EXDEV; -out: - if (!error) { - error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); - NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(vp); + vp = NULL; + goto out2; } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + NDFREE(&nd, NDF_ONLY_PNBUF); /* fall through */ out1: if (v3) getret = VOP_GETATTR(vp, &at, cred, td); - if (dirp) - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); +out2: + if (dirp) { + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Release existing locks to prevent deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vrele(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + + vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td); + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } ereply: nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { @@ -2473,7 +2519,7 @@ if (dirp) vrele(dirp); if (vp) - vrele(vp); + vput(vp); if (nd.ni_dvp) { if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); @@ -2534,15 +2580,10 @@ nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) goto out; @@ -2630,9 +2671,9 @@ pathcp = NULL; } if (dirp) { + vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td); diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); - vrele(dirp); - dirp = NULL; + VOP_UNLOCK(dirp, 0, td); } if (nd.ni_startdir) { vrele(nd.ni_startdir); @@ -2719,15 +2760,10 @@ nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); @@ -2778,8 +2814,31 @@ error = VOP_GETATTR(nd.ni_vp, vap, cred, td); } out: - if (dirp) - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + if (dirp) { + if (dirp == nd.ni_dvp) { + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + } else { + /* Release existing locks to prevent deadlock. */ + if (nd.ni_dvp) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == nd.ni_vp && vpexcl) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) { + if (vpexcl) + vput(nd.ni_vp); + else + vrele(nd.ni_vp); + } + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td); + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { @@ -2859,15 +2918,10 @@ nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, td, FALSE); - if (dirp) { - if (v3) { - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - td); - } else { - vrele(dirp); - dirp = NULL; - } + &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + if (dirp && !v3) { + vrele(dirp); + dirp = NULL; } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); @@ -2902,8 +2956,26 @@ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); NDFREE(&nd, NDF_ONLY_PNBUF); - if (dirp) - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + if (dirp) { + if (dirp == nd.ni_dvp) + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + else { + /* Release existing locks to prevent deadlock. */ + if (nd.ni_dvp) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + if (nd.ni_vp) + vput(nd.ni_vp); + nd.ni_dvp = NULL; + nd.ni_vp = NULL; + vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td); + diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td); + VOP_UNLOCK(dirp, 0, td); + } + } nfsm_reply(NFSX_WCCDATA(v3)); error = 0; if (v3) Index: sys/nfsserver/nfs_srvsubs.c =================================================================== RCS file: /home/ncvs/src/sys/nfsserver/nfs_srvsubs.c,v retrieving revision 1.120 diff -u -r1.120 nfs_srvsubs.c --- sys/nfsserver/nfs_srvsubs.c 19 Feb 2003 05:47:39 -0000 1.120 +++ sys/nfsserver/nfs_srvsubs.c 6 May 2003 02:56:15 -0000 @@ -592,7 +592,8 @@ int nfs_namei(struct nameidata *ndp, fhandle_t *fhp, int len, struct nfssvc_sock *slp, struct sockaddr *nam, struct mbuf **mdp, - caddr_t *dposp, struct vnode **retdirp, struct thread *td, int pubflag) + caddr_t *dposp, struct vnode **retdirp, int v3, struct vattr *retdirattrp, + int *retdirattr_retp, struct thread *td, int pubflag) { int i, rem; struct mbuf *md; @@ -602,6 +603,7 @@ struct vnode *dp; int error, rdonly, linklen; struct componentname *cnp = &ndp->ni_cnd; + int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0; *retdirp = NULL; cnp->cn_flags |= NOMACCHECK; @@ -664,6 +666,12 @@ * to the returned pointer */ *retdirp = dp; + if (v3) { + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, td); + *retdirattr_retp = VOP_GETATTR(dp, retdirattrp, + ndp->ni_cnd.cn_cred, td); + VOP_UNLOCK(dp, 0, td); + } if (pubflag) { /* @@ -736,6 +744,8 @@ VREF(dp); ndp->ni_startdir = dp; + if (!lockleaf) + cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* @@ -761,6 +771,8 @@ cnp->cn_flags |= HASBUF; else uma_zfree(namei_zone, cnp->cn_pnbuf); + if (ndp->ni_vp && !lockleaf) + VOP_UNLOCK(ndp->ni_vp, 0, td); break; } @@ -840,6 +852,8 @@ ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } + if (!lockleaf) + cnp->cn_flags &= ~LOCKLEAF; /* * nfs_namei() guarentees that fields will not contain garbage