Index: conf/files =================================================================== --- conf/files (revision 195343) +++ conf/files (working copy) @@ -2225,6 +2225,7 @@ net/rtsock.c standard net/slcompress.c optional netgraph_vjc | sppp | \ netgraph_sppp +net/vnet.c optional vimage net/zlib.c optional crypto | geom_uzip | ipsec | \ mxge | netgraph_deflate | \ ddb_ctf Index: kern/link_elf_obj.c =================================================================== --- kern/link_elf_obj.c (revision 195343) +++ kern/link_elf_obj.c (working copy) @@ -45,6 +45,8 @@ #include +#include + #include #include @@ -346,6 +348,21 @@ ef->progtab[pb].size); dpcpu_copy(dpcpu, shdr[i].sh_size); ef->progtab[pb].addr = dpcpu; +#ifdef VIMAGE + } else if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_vnet")) { + void *vnet_data; + + vnet_data = vnet_data_alloc(shdr[i].sh_size); + if (vnet_data == NULL) { + error = ENOSPC; + goto out; + } + memcpy(vnet_data, ef->progtab[pb].addr, + ef->progtab[pb].size); + vnet_data_copy(vnet_data, shdr[i].sh_size); + ef->progtab[pb].addr = vnet_data; +#endif } /* Update all symbol values with the offset. */ @@ -737,6 +754,12 @@ !strcmp(ef->progtab[pb].name, "set_pcpu")) ef->progtab[pb].addr = dpcpu_alloc(shdr[i].sh_size); +#ifdef VIMAGE + else if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_vnet")) + ef->progtab[pb].addr = + vnet_data_alloc(shdr[i].sh_size); +#endif else ef->progtab[pb].addr = (void *)(uintptr_t)mapbase; @@ -758,10 +781,21 @@ error = EINVAL; goto out; } - /* Initialize the per-cpu area. */ - if (ef->progtab[pb].addr != (void *)mapbase) + /* Initialize the per-cpu or vnet area. */ + if (ef->progtab[pb].addr != (void *)mapbase && + !strcmp(ef->progtab[pb].name, "set_pcpu")) dpcpu_copy(ef->progtab[pb].addr, shdr[i].sh_size); +#ifdef VIMAGE + else if (ef->progtab[pb].addr != + (void *)mapbase && + !strcmp(ef->progtab[pb].name, "set_vnet")) + vnet_data_copy(ef->progtab[pb].addr, + shdr[i].sh_size); +#endif + else + panic("link_elf_load_file: unexpected " + "progbits type"); } else bzero(ef->progtab[pb].addr, shdr[i].sh_size); @@ -877,6 +911,11 @@ if (!strcmp(ef->progtab[i].name, "set_pcpu")) dpcpu_free(ef->progtab[i].addr, ef->progtab[i].size); +#ifdef VIMAGE + else if (!strcmp(ef->progtab[i].name, "set_vnet")) + vnet_data_free(ef->progtab[i].addr, + ef->progtab[i].size); +#endif } } if (ef->preloaded) { Index: kern/link_elf.c =================================================================== --- kern/link_elf.c (revision 195343) +++ kern/link_elf.c (working copy) @@ -49,6 +49,8 @@ #include +#include + #include #include @@ -111,6 +113,11 @@ Elf_Addr pcpu_start; /* Pre-relocation pcpu set start. */ Elf_Addr pcpu_stop; /* Pre-relocation pcpu set stop. */ Elf_Addr pcpu_base; /* Relocated pcpu set address. */ +#ifdef VIMAGE + Elf_Addr vnet_start; /* Pre-relocation vnet set start. */ + Elf_Addr vnet_stop; /* Pre-relocation vnet set stop. */ + Elf_Addr vnet_base; /* Relocated vnet set address. */ +#endif #ifdef GDB struct link_map gdb; /* hooks for gdb */ #endif @@ -506,7 +513,37 @@ return (0); } +#ifdef VIMAGE static int +parse_vnet(elf_file_t ef) +{ + int count; + int error; + + ef->vnet_start = 0; + ef->vnet_stop = 0; + error = link_elf_lookup_set(&ef->lf, "vnet", (void ***)&ef->vnet_start, + (void ***)&ef->vnet_stop, &count); + /* Error just means there is no vnet data set to relocate. */ + if (error) + return (0); + count *= sizeof(void *); + /* + * Allocate space in the primary vnet area. Copy in our initialization + * from the data section and then initialize all per-vnet storage from + * that. + */ + ef->vnet_base = (Elf_Addr)(uintptr_t)vnet_data_alloc(count); + if (ef->vnet_base == (Elf_Addr)NULL) + return (ENOSPC); + memcpy((void *)ef->vnet_base, (void *)ef->vnet_start, count); + vnet_data_copy((void *)ef->vnet_base, count); + + return (0); +} +#endif + +static int link_elf_link_preload(linker_class_t cls, const char* filename, linker_file_t *result) { @@ -553,6 +590,10 @@ error = parse_dynamic(ef); if (error == 0) error = parse_dpcpu(ef); +#ifdef VIMAGE + if (error == 0) + error = parse_vnet(ef); +#endif if (error) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); return error; @@ -838,6 +879,11 @@ error = parse_dpcpu(ef); if (error) goto out; +#ifdef VIMAGE + error = parse_vnet(ef); + if (error) + goto out; +#endif link_elf_reloc_local(lf); VOP_UNLOCK(nd.ni_vp, 0); @@ -954,6 +1000,11 @@ if (ef->pcpu_base) { dpcpu_free((void *)ef->pcpu_base, ef->pcpu_stop - ef->pcpu_start); } +#ifdef VIMAGE + if (ef->vnet_base) { + vnet_data_free((void *)ef->vnet_base, ef->vnet_stop - ef->vnet_start); + } +#endif #ifdef GDB if (ef->gdb.l_ld) { GDB_STATE(RT_DELETE); Index: kern/kern_vimage.c =================================================================== --- kern/kern_vimage.c (revision 195343) +++ kern/kern_vimage.c (working copy) @@ -642,6 +642,7 @@ vnet = malloc(sizeof(struct vnet), M_VNET, M_WAITOK | M_ZERO); vnet->vnet_magic_n = VNET_MAGIC_N; + vnet_data_init(vnet); /* Initialize / attach vnet module instances. */ CURVNET_SET_QUIET(vnet); @@ -685,10 +686,22 @@ CURVNET_RESTORE(); /* Hopefully, we are OK to free the vnet container itself. */ + vnet_data_destroy(vnet); vnet->vnet_magic_n = 0xdeadbeef; free(vnet, M_VNET); } +void +vnet_foreach(void (*vnet_foreach_fn)(struct vnet *, void *), void *arg) +{ + struct vnet *vnet; + + VNET_LIST_RLOCK(); + LIST_FOREACH(vnet, &vnet_head, vnet_le) + vnet_foreach_fn(vnet, arg); + VNET_LIST_RUNLOCK(); +} + static struct vimage * vi_alloc(struct vimage *parent, char *name) { Index: net/vnet.c =================================================================== --- net/vnet.c (revision 0) +++ net/vnet.c (revision 0) @@ -0,0 +1,234 @@ +/*- + * Copyright (c) 2001 Wind River Systems, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Copyright (c) 2009 Jeffrey Roberson + * Copyright (c) 2009 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * This is the per-vnet allocation facility, which provides storage for + * per-vnet global variables. A global allocator manages the layout of all + * malloc'd memory blocks associated with each vnet, keeping allocations in + * lock-step. Accessor macros, such as VNET_PTR(), accept a vnet (or use the + * current vnet) and map global names into local storage. + */ +MALLOC_DEFINE(M_VNET_DATA_FREE, "vnet_data_free", + "Per-VNET resource accounting"); +MALLOC_DEFINE(M_VNET_DATA, "vnet_data", "Per-VNET data"); + +struct vnet_data_free { + uintptr_t vnd_start; + int vnd_len; + TAILQ_ENTRY(vnet_data_free) vnd_link; +}; + +static VNET_DEFINE(char, modspace[VNET_MODMIN]); +static TAILQ_HEAD(, vnet_data_free) vnet_data_free_head = + TAILQ_HEAD_INITIALIZER(vnet_data_free_head); +static struct sx vnet_data_lock; + +void +vnet_data_init(struct vnet *vnet) +{ + + vnet->vnet_data_mem = malloc(VNET_SIZE, M_VNET_DATA, M_WAITOK); + memcpy(vnet->vnet_data_mem, (void *)VNET_START, VNET_BYTES); + + /* + * All use of vnet-specific data will immediately subtract VNET_START + * from the base memory pointer, so pre-calculate that now to avoid + * it on each use. + */ + vnet->vnet_data_base = (uintptr_t)vnet->vnet_data_mem - VNET_START; +} + +void +vnet_data_destroy(struct vnet *vnet) +{ + + free(vnet->vnet_data_mem, M_VNET_DATA); + vnet->vnet_data_mem = NULL; + vnet->vnet_data_base = 0; +} + +static void +vnet_data_startup(void *dummy __unused) +{ + struct vnet_data_free *df; + + df = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); + df->vnd_start = (uintptr_t)&VNET_NAME(modspace); + df->vnd_len = VNET_MODSIZE; + TAILQ_INSERT_HEAD(&vnet_data_free_head, df, vnd_link); + sx_init(&vnet_data_lock, "vnet_data alloc lock"); +} +SYSINIT(vnet_data, SI_SUB_KLD, SI_ORDER_FIRST, vnet_data_startup, 0); + +/* + * First-fit extent based allocator for allocating space in the per-VNET + * region reserved for modules. This is only intended for use by the + * kernel linkers to place module linker sets. + */ +void * +vnet_data_alloc(int size) +{ + struct vnet_data_free *df; + void *s; + + s = NULL; + size = roundup2(size, sizeof(void *)); + sx_xlock(&vnet_data_lock); + TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { + if (df->vnd_len < size) + continue; + if (df->vnd_len == size) { + s = (void *)df->vnd_start; + TAILQ_REMOVE(&vnet_data_free_head, df, vnd_link); + free(df, M_VNET_DATA_FREE); + break; + } + s = (void *)df->vnd_start; + df->vnd_len -= size; + df->vnd_start = df->vnd_start + size; + break; + } + sx_xunlock(&vnet_data_lock); + + return (s); +} + +/* + * Free dynamic per-VNET space at module unload time. + */ +void +vnet_data_free(void *s, int size) +{ + struct vnet_data_free *df; + struct vnet_data_free *dn; + uintptr_t start; + uintptr_t end; + + size = roundup2(size, sizeof(void *)); + start = (uintptr_t)s; + end = start + size; + /* + * Free a region of space and merge it with as many neighbors as + * possible. Keeping the list sorted simplifies this operation. + */ + sx_xlock(&vnet_data_lock); + TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { + if (df->vnd_start > end) + break; + /* + * If we expand at the end of an entry we may have to + * merge it with the one following it as well. + */ + if (df->vnd_start + df->vnd_len == start) { + df->vnd_len += size; + dn = TAILQ_NEXT(df, vnd_link); + if (df->vnd_start + df->vnd_len == dn->vnd_start) { + df->vnd_len += dn->vnd_len; + TAILQ_REMOVE(&vnet_data_free_head, dn, vnd_link); + free(dn, M_VNET_DATA_FREE); + } + sx_xunlock(&vnet_data_lock); + return; + } + if (df->vnd_start == end) { + df->vnd_start = start; + df->vnd_len += size; + sx_xunlock(&vnet_data_lock); + return; + } + } + dn = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); + dn->vnd_start = start; + dn->vnd_len = size; + if (df) + TAILQ_INSERT_BEFORE(df, dn, vnd_link); + else + TAILQ_INSERT_TAIL(&vnet_data_free_head, dn, vnd_link); + sx_xunlock(&vnet_data_lock); +} + +/* + * Initialize the per-VNET storage from an updated linker-set region. + */ +struct vnet_data_copy_fn_arg { + void *s; + int size; +}; + +static void +vnet_data_copy_fn(struct vnet *vnet, void *arg) +{ + struct vnet_data_copy_fn_arg *varg = arg; + + memcpy((void *)((uintptr_t)vnet->vnet_data_mem + (uintptr_t)varg->s), + varg->s, varg->size); +} + +void +vnet_data_copy(void *s, int size) +{ + struct vnet_data_copy_fn_arg varg; + + varg.s = s; + varg.size = size; + vnet_foreach(vnet_data_copy_fn, &varg); +} + +VNET_DEFINE(int, alocalval) = 10; + +static int +sysctl_alocalval(SYSCTL_HANDLER_ARGS) +{ + + return (sysctl_handle_int(oidp, VNET_PTR(alocalval), 0, req)); +} +SYSCTL_PROC(_debug, OID_AUTO, alocalval, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, sysctl_alocalval, "I", + "Test value"); Property changes on: net/vnet.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: net/vnet.h =================================================================== --- net/vnet.h (revision 195343) +++ net/vnet.h (working copy) @@ -1,6 +1,8 @@ /*- * Copyright (c) 2006-2008 University of Zagreb * Copyright (c) 2006-2008 FreeBSD Foundation + * Copyright (c) 2009 Jeffrey Roberson + * Copyright (c) 2009 Robert N. M. Watson * * This software was developed by the University of Zagreb and the * FreeBSD Foundation under sponsorship by the Stichting NLnet and the @@ -33,6 +35,8 @@ #ifndef _NET_VNET_H_ #define _NET_VNET_H_ +#include /* XXXRW: until vnet_net is gone. */ +#include #include struct ifindex_entry; @@ -57,9 +61,6 @@ int _ether_ipfw; }; -/* Size guard. See sys/vimage.h. */ -VIMAGE_CTASSERT(SIZEOF_vnet_net, sizeof(struct vnet_net)); - #ifndef VIMAGE #ifndef VIMAGE_GLOBALS extern struct vnet_net vnet_net_0; @@ -88,4 +89,67 @@ #define V_loif VNET_NET(loif) #define V_rawcb_list VNET_NET(rawcb_list) +/* + * The per-vnet allocator allows global variables to be declared "per-vnet", + * which is implemented in conjunction with the ELF linker. A striking + * resemblance to the dynamic per-CPU allocator is not unexpected. + */ + +extern uintptr_t *__start_set_vnet; +extern uintptr_t *__stop_set_vnet; + +#if defined(__arm__) +__asm__(".section set_vnet, \"aw\", %progbits"); +#else +__asm__(".section set_vnet, \"aw\", @progbits"); +#endif +__asm__(".previous"); + +#define VNET_START (uintptr_t)&__start_set_vnet +#define VNET_STOP (uintptr_t)&__stop_set_vnet +#define VNET_BYTES (VNET_STOP - VNET_START) +#define VNET_MODMIN 8192 +#define VNET_SIZE roundup2(VNET_BYTES, PAGE_SIZE) +#define VNET_MODSIZE (VNET_SIZE - (VNET_BYTES - \ + VNET_MODMIN)) + +#ifdef VIMAGE +#define VNET_NAME(n) vnet_##n +#define VNET_DECLARE(t, n) extern t VNET_NAME(n) +#define VNET_DEFINE(t, n) t VNET_NAME(n) __section("set_vnet") __used +#define _VNET_PTR(b, n) (__typeof(VNET_NAME(n))*) \ + ((b) + (uintptr_t)&VNET_NAME(n)) +#else +#define VNET_NAME(n) n +#define VNET_DECLARE(t, n) extern t n +#define VNET_DEFINE(t, n) t n +#define _VNET_PTR(b, n) &VNET_NAME(n) +#endif + +#define _VNET_GET(b, n) (*_VNET_PTR(b, n)) +#define _VNET_SET(b, n, v) (*_VNET_PTR(b, n) = v) + +#define VNET_VNET_PTR(vnet, n) _VNET_PTR((vnet)->vnet_data_base, n) +#define VNET_VNET_GET(vnet, n) (*_VNET_PTR((vnet), n)) +#define VNET_VNET_SET(vnet, n, v) ((*_VNET_PTR((vnet), n)) = v) + +#define VNET_PTR(n) VNET_VNET_PTR(curvnet, n) +#define VNET_GET(n) VNET_VNET_GET(curvnet, n) +#define VNET_SET(n, v) VNET_VNET_SET(curvnet, n, v) + +struct vnet; + +/* + * Interfaces from the kernel linker. + */ +void *vnet_data_alloc(int size); +void vnet_data_copy(void *s, int size); +void vnet_data_free(void *s, int size); + +/* + * Interfaces for vnet setup/teardown. + */ +void vnet_data_init(struct vnet *vnet); +void vnet_data_destroy(struct vnet *vnet); + #endif /* !_NET_VNET_H_ */ Index: sys/pcpu.h =================================================================== --- sys/pcpu.h (revision 195343) +++ sys/pcpu.h (working copy) @@ -160,6 +160,11 @@ uintptr_t pc_dynamic; /* + * Reference to current VIMAGE for the purposes of per-VIMAGE data. + */ + void *pc_vidata; + + /* * Keep MD fields last, so that CPU-specific variations on a * single architecture don't result in offset variations of * the machine-independent fields of the pcpu. Even though @@ -183,6 +188,7 @@ #ifndef curthread #define curthread PCPU_GET(curthread) #endif +#define curvidata PCPU_GET(vidata) /* * Machine dependent callouts. cpu_pcpu_init() is responsible for Index: sys/vimage.h =================================================================== --- sys/vimage.h (revision 195343) +++ sys/vimage.h (working copy) @@ -171,6 +171,8 @@ void vnet_mod_deregister_multi(const struct vnet_modinfo *, void *, char *); struct vnet *vnet_alloc(void); void vnet_destroy(struct vnet *); +void vnet_foreach(void (*vnet_foreach_fn)(struct vnet *, void *), + void *arg); #endif /* !VIMAGE_GLOBALS */ @@ -218,6 +220,8 @@ u_int vnet_magic_n; u_int ifcnt; u_int sockcnt; + void *vnet_data_mem; + uintptr_t vnet_data_base; }; struct vprocg {