Index: kern/kern_vimage.c =================================================================== --- kern/kern_vimage.c (revision 195741) +++ kern/kern_vimage.c (working copy) @@ -58,23 +58,22 @@ static int vnet_mod_constructor(struct vnet_modlink *); static int vnet_mod_destructor(struct vnet_modlink *); -#define VNET_LIST_WLOCK() \ - mtx_lock(&vnet_list_refc_mtx); \ - while (vnet_list_refc != 0) \ - cv_wait(&vnet_list_condvar, &vnet_list_refc_mtx); +struct rwlock vnet_rwlock; +struct sx vnet_sxlock; -#define VNET_LIST_WUNLOCK() \ - mtx_unlock(&vnet_list_refc_mtx); +#define VNET_LIST_WLOCK() do { \ + sx_xlock(&vnet_sxlock); \ + rw_wlock(&vnet_rwlock); \ +} while (0) +#define VNET_LIST_WUNLOCK() do { \ + rw_wunlock(&vnet_rwlock); \ + sx_xunlock(&vnet_sxlock); \ +} while (0) + struct vnet_list_head vnet_head; - -struct cv vnet_list_condvar; -struct mtx vnet_list_refc_mtx; -int vnet_list_refc = 0; - struct vnet *vnet0; - /* * Move an ifnet to or from another vnet, specified by the jail id. */ @@ -373,17 +372,23 @@ } static void -vi_init(void *unused) +vnet_init_prelink(void *arg) { + rw_init(&vnet_rwlock, "vnet_rwlock"); + sx_init(&vnet_sxlock, "vnet_sxlock"); + LIST_INIT(&vnet_head); +} +SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST, + vnet_init_prelink, NULL); + +static void +vnet0_init(void *arg) +{ + TAILQ_INIT(&vnet_modlink_head); TAILQ_INIT(&vnet_modpending_head); - LIST_INIT(&vnet_head); - - mtx_init(&vnet_list_refc_mtx, "vnet_list_refc_mtx", NULL, MTX_DEF); - cv_init(&vnet_list_condvar, "vnet_list_condvar"); - /* * We MUST clear curvnet in vi_init_done() before going SMP, * otherwise CURVNET_SET() macros would scream about unnecessary @@ -391,9 +396,10 @@ */ curvnet = prison0.pr_vnet = vnet0 = vnet_alloc(); } +SYSINIT(vnet0_init, SI_SUB_VNET, SI_ORDER_FIRST, vnet0_init, NULL); static void -vi_init_done(void *unused) +vnet_init_done(void *unused) { struct vnet_modlink *vml_iter; @@ -411,8 +417,8 @@ panic("going nowhere without my vnet modules!"); } -SYSINIT(vimage, SI_SUB_VIMAGE, SI_ORDER_FIRST, vi_init, NULL); -SYSINIT(vimage_done, SI_SUB_VIMAGE_DONE, SI_ORDER_FIRST, vi_init_done, NULL); +SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done, + NULL); #ifdef DDB DB_SHOW_COMMAND(vnets, db_show_vnets) Index: netinet/tcp_subr.c =================================================================== --- netinet/tcp_subr.c (revision 195741) +++ netinet/tcp_subr.c (working copy) @@ -940,7 +940,7 @@ if (!do_tcpdrain) return; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct inpcb *inpb; @@ -976,7 +976,7 @@ INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -1576,7 +1576,7 @@ VNET_ITERATOR_DECL(vnet_iter); u_int32_t projected_offset; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); ISN_LOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS */ @@ -1590,7 +1590,7 @@ CURVNET_RESTORE(); } ISN_UNLOCK(); - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL); } Index: netinet/tcp_timer.c =================================================================== --- netinet/tcp_timer.c (revision 195741) +++ netinet/tcp_timer.c (working copy) @@ -127,7 +127,7 @@ { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); tcp_maxidle = tcp_keepcnt * tcp_keepintvl; @@ -136,7 +136,7 @@ INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = Index: netinet/in_pcb.c =================================================================== --- netinet/in_pcb.c (revision 195741) +++ netinet/in_pcb.c (working copy) @@ -1570,7 +1570,7 @@ { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */ if (V_ipport_tcpallocs <= @@ -1582,7 +1582,7 @@ V_ipport_tcplastcount = V_ipport_tcpallocs; CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL); } Index: netinet/ip_input.c =================================================================== --- netinet/ip_input.c (revision 195741) +++ netinet/ip_input.c (working copy) @@ -1193,8 +1193,8 @@ struct ipq *fp; int i; + VNET_LIST_RLOCK_NOSLEEP(); IPQ_LOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); for (i = 0; i < IPREASS_NHASH; i++) { @@ -1228,8 +1228,8 @@ } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IPQ_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -1241,8 +1241,8 @@ VNET_ITERATOR_DECL(vnet_iter); int i; + VNET_LIST_RLOCK_NOSLEEP(); IPQ_LOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); for (i = 0; i < IPREASS_NHASH; i++) { @@ -1254,8 +1254,8 @@ } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IPQ_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); in_rtqdrain(); } Index: netinet/igmp.c =================================================================== --- netinet/igmp.c (revision 195741) +++ netinet/igmp.c (working copy) @@ -1616,13 +1616,13 @@ { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_fasttimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -2159,13 +2159,13 @@ { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_slowtimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* Index: netinet/in_rmx.c =================================================================== --- netinet/in_rmx.c (revision 195741) +++ netinet/in_rmx.c (working copy) @@ -319,7 +319,7 @@ struct rtqk_arg arg; int fibnum; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); @@ -336,7 +336,7 @@ } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } static int _in_rt_was_here; Index: netgraph/ng_gif.c =================================================================== --- netgraph/ng_gif.c (revision 195741) +++ netgraph/ng_gif.c (working copy) @@ -561,8 +561,8 @@ ng_gif_input_orphan_p = ng_gif_input_orphan; /* Create nodes for any already-existing gif interfaces */ + VNET_LIST_RLOCK_NOSLEEP(); IFNET_RLOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET_QUIET(vnet_iter); /* XXX revisit quiet */ TAILQ_FOREACH(ifp, &V_ifnet, if_link) { @@ -571,8 +571,8 @@ } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IFNET_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); break; case MOD_UNLOAD: Index: net/if.c =================================================================== --- net/if.c (revision 195741) +++ net/if.c (working copy) @@ -1793,8 +1793,8 @@ struct ifnet *ifp; int s = splimp(); + VNET_LIST_RLOCK_NOSLEEP(); IFNET_RLOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { @@ -1805,8 +1805,8 @@ } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IFNET_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); splx(s); timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ); } Index: netinet6/frag6.c =================================================================== --- netinet6/frag6.c (revision 195741) +++ netinet6/frag6.c (working copy) @@ -720,8 +720,8 @@ VNET_ITERATOR_DECL(vnet_iter); struct ip6q *q6; + VNET_LIST_RLOCK_NOSLEEP(); IP6Q_LOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); q6 = V_ip6q.ip6q_next; @@ -748,8 +748,8 @@ } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IP6Q_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -760,9 +760,11 @@ { VNET_ITERATOR_DECL(vnet_iter); - if (IP6Q_TRYLOCK() == 0) + VNET_LIST_RLOCK_NOSLEEP(); + if (IP6Q_TRYLOCK() == 0) { + VNET_LIST_RUNLOCK_NOSLEEP(); return; - VNET_LIST_RLOCK(); + } VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); while (V_ip6q.ip6q_next != &V_ip6q) { @@ -772,6 +774,6 @@ } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IP6Q_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } Index: netinet6/mld6.c =================================================================== --- netinet6/mld6.c (revision 195741) +++ netinet6/mld6.c (working copy) @@ -1306,13 +1306,13 @@ { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_fasttimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -1721,13 +1721,13 @@ { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_slowtimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* Index: netipsec/key.c =================================================================== --- netipsec/key.c (revision 195741) +++ netipsec/key.c (working copy) @@ -4537,7 +4537,7 @@ VNET_ITERATOR_DECL(vnet_iter); time_t now = time_second; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); key_flush_spd(now); @@ -4546,7 +4546,7 @@ key_flush_spacq(now); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); #ifndef IPSEC_DEBUG2 /* do exchange to tick time !! */ Index: sys/vimage.h =================================================================== --- sys/vimage.h (revision 195741) +++ sys/vimage.h (working copy) @@ -38,6 +38,9 @@ #ifdef _KERNEL +#include +#include + #ifdef INVARIANTS #define VNET_DEBUG #endif @@ -176,17 +179,42 @@ #endif /* !VIMAGE */ #ifdef VIMAGE +/* + * Global linked list of all virtual network stacks, along with read locks to + * access it. If a caller may sleep while accessing the list, it must use + * the sleepable lock macros. + */ LIST_HEAD(vnet_list_head, vnet); extern struct vnet_list_head vnet_head; -extern struct vnet *vnet0; -#define VNET_ITERATOR_DECL(arg) struct vnet *arg; -#define VNET_FOREACH(arg) LIST_FOREACH(arg, &vnet_head, vnet_le) -#else +extern struct rwlock vnet_rwlock; +extern struct sx vnet_sxlock; + +#define VNET_LIST_RLOCK() sx_slock(&vnet_sxlock) +#define VNET_LIST_RLOCK_NOSLEEP() rw_rlock(&vnet_rwlock) +#define VNET_LIST_RUNLOCK() sx_sunlock(&vnet_sxlock) +#define VNET_LIST_RUNLOCK_NOSLEEP() rw_runlock(&vnet_rwlock) + +/* + * Iteration macros to walk the global list of virtual network stacks. + */ +#define VNET_ITERATOR_DECL(arg) struct vnet *arg +#define VNET_FOREACH(arg) LIST_FOREACH((arg), &vnet_head, vnet_le) + +#else /* !VIMAGE */ +/* + * No-op macros for the !VIMAGE case. + */ +#define VNET_LIST_RLOCK() +#define VNET_LIST_RLOCK_NOSLEEP() +#define VNET_LIST_RUNLOCK() +#define VNET_LIST_RUNLOCK_NOSLEEP() #define VNET_ITERATOR_DECL(arg) #define VNET_FOREACH(arg) -#endif +#endif /* VIMAGE */ + #ifdef VIMAGE +extern struct vnet *vnet0; #define IS_DEFAULT_VNET(arg) ((arg) == vnet0) #else #define IS_DEFAULT_VNET(arg) 1 @@ -202,10 +230,6 @@ #define P_TO_VNET(p) NULL #endif /* VIMAGE */ -/* Non-VIMAGE null-macros */ -#define VNET_LIST_RLOCK() -#define VNET_LIST_RUNLOCK() - #endif /* _KERNEL */ #endif /* !_SYS_VIMAGE_H_ */ Index: sys/kernel.h =================================================================== --- sys/kernel.h (revision 195741) +++ sys/kernel.h (working copy) @@ -106,13 +106,14 @@ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */ + SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */ SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */ - SI_SUB_VIMAGE = 0x21E0000, /* vimage infrastructure */ + SI_SUB_VNET = 0x21E0000, /* vnet 0 */ SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/ SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/ SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */ @@ -158,7 +159,7 @@ SI_SUB_SWAP = 0xc000000, /* swap */ SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/ SI_SUB_SYSCALLS = 0xd800000, /* register system calls */ - SI_SUB_VIMAGE_DONE = 0xdc00000, /* vnet registration complete */ + SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */ SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/ SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/ SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/