1 /*
2 * Copyright (c) 2021 Klara Systems, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
29 #include <sys/kmem.h>
30 #include <linux/file.h>
31 #include <linux/magic.h>
32 #include <sys/zone.h>
33
34 #if defined(CONFIG_USER_NS)
35 #include <linux/statfs.h>
36 #include <linux/proc_ns.h>
37 #endif
38
39 #include <sys/mutex.h>
40
41 static kmutex_t zone_datasets_lock;
42 static struct list_head zone_datasets;
43
44 typedef struct zone_datasets {
45 struct list_head zds_list; /* zone_datasets linkage */
46 struct user_namespace *zds_userns; /* namespace reference */
47 struct list_head zds_datasets; /* datasets for the namespace */
48 } zone_datasets_t;
49
50 typedef struct zone_dataset {
51 struct list_head zd_list; /* zone_dataset linkage */
52 size_t zd_dsnamelen; /* length of name */
53 char zd_dsname[]; /* name of the member dataset */
54 } zone_dataset_t;
55
56 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
57 /*
58 * Returns:
59 * - 0 on success
60 * - EBADF if it cannot open the provided file descriptor
61 * - ENOTTY if the file itself is a not a user namespace file. We want to
62 * intercept this error in the ZFS layer. We cannot just return one of the
63 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
64 * and the SPL layers.
65 */
66 static int
67 user_ns_get(int fd, struct user_namespace **userns)
68 {
69 struct kstatfs st;
70 struct file *nsfile;
71 struct ns_common *ns;
72 int error;
73
74 if ((nsfile = fget(fd)) == NULL)
75 return (EBADF);
76 if (vfs_statfs(&nsfile->f_path, &st) != 0) {
77 error = ENOTTY;
78 goto done;
79 }
80 if (st.f_type != NSFS_MAGIC) {
81 error = ENOTTY;
82 goto done;
83 }
84 ns = get_proc_ns(file_inode(nsfile));
85 if (ns->ops->type != CLONE_NEWUSER) {
86 error = ENOTTY;
87 goto done;
88 }
89 *userns = container_of(ns, struct user_namespace, ns);
90
91 error = 0;
92 done:
93 fput(nsfile);
94
95 return (error);
96 }
97 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
98
99 static unsigned int
100 user_ns_zoneid(struct user_namespace *user_ns)
101 {
102 unsigned int r;
103
104 #if defined(HAVE_USER_NS_COMMON_INUM)
105 r = user_ns->ns.inum;
106 #else
107 r = user_ns->proc_inum;
108 #endif
109
110 return (r);
111 }
112
113 static struct zone_datasets *
114 zone_datasets_lookup(unsigned int nsinum)
115 {
116 zone_datasets_t *zds;
117
118 list_for_each_entry(zds, &zone_datasets, zds_list) {
119 if (user_ns_zoneid(zds->zds_userns) == nsinum)
120 return (zds);
121 }
122 return (NULL);
123 }
124
125 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
126 static struct zone_dataset *
127 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
128 {
129 zone_dataset_t *zd;
130
131 list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
132 if (zd->zd_dsnamelen != dsnamelen)
133 continue;
134 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
135 return (zd);
136 }
137
138 return (NULL);
139 }
140
141 static int
142 zone_dataset_cred_check(cred_t *cred)
143 {
144
145 if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
146 return (EPERM);
147
148 return (0);
149 }
150 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
151
152 static int
153 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
154 {
155
156 if (dataset[0] == '\0' || dataset[0] == '/')
157 return (ENOENT);
158
159 *dsnamelen = strlen(dataset);
160 /* Ignore trailing slash, if supplied. */
161 if (dataset[*dsnamelen - 1] == '/')
162 (*dsnamelen)--;
163
164 return (0);
165 }
166
167 int
168 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
169 {
170 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
171 struct user_namespace *userns;
172 zone_datasets_t *zds;
173 zone_dataset_t *zd;
174 int error;
175 size_t dsnamelen;
176
177 if ((error = zone_dataset_cred_check(cred)) != 0)
178 return (error);
179 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
180 return (error);
181 if ((error = user_ns_get(userns_fd, &userns)) != 0)
182 return (error);
183
184 mutex_enter(&zone_datasets_lock);
185 zds = zone_datasets_lookup(user_ns_zoneid(userns));
186 if (zds == NULL) {
187 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
188 INIT_LIST_HEAD(&zds->zds_list);
189 INIT_LIST_HEAD(&zds->zds_datasets);
190 zds->zds_userns = userns;
191 /*
192 * Lock the namespace by incresing its refcount to prevent
193 * the namespace ID from being reused.
194 */
195 get_user_ns(userns);
196 list_add_tail(&zds->zds_list, &zone_datasets);
197 } else {
198 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
199 if (zd != NULL) {
200 mutex_exit(&zone_datasets_lock);
201 return (EEXIST);
202 }
203 }
204
205 zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
206 zd->zd_dsnamelen = dsnamelen;
207 strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
208 INIT_LIST_HEAD(&zd->zd_list);
209 list_add_tail(&zd->zd_list, &zds->zds_datasets);
210
211 mutex_exit(&zone_datasets_lock);
212 return (0);
213 #else
214 return (ENXIO);
215 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
216 }
217 EXPORT_SYMBOL(zone_dataset_attach);
218
219 int
220 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
221 {
222 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
223 struct user_namespace *userns;
224 zone_datasets_t *zds;
225 zone_dataset_t *zd;
226 int error;
227 size_t dsnamelen;
228
229 if ((error = zone_dataset_cred_check(cred)) != 0)
230 return (error);
231 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
232 return (error);
233 if ((error = user_ns_get(userns_fd, &userns)) != 0)
234 return (error);
235
236 mutex_enter(&zone_datasets_lock);
237 zds = zone_datasets_lookup(user_ns_zoneid(userns));
238 if (zds != NULL)
239 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
240 if (zds == NULL || zd == NULL) {
241 mutex_exit(&zone_datasets_lock);
242 return (ENOENT);
243 }
244
245 list_del(&zd->zd_list);
246 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
247
248 /* Prune the namespace entry if it has no more delegations. */
249 if (list_empty(&zds->zds_datasets)) {
250 /*
251 * Decrease the refcount now that the namespace is no longer
252 * used. It is no longer necessary to prevent the namespace ID
253 * from being reused.
254 */
255 put_user_ns(userns);
256 list_del(&zds->zds_list);
257 kmem_free(zds, sizeof (*zds));
258 }
259
260 mutex_exit(&zone_datasets_lock);
261 return (0);
262 #else
263 return (ENXIO);
264 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
265 }
266 EXPORT_SYMBOL(zone_dataset_detach);
267
268 /*
269 * A dataset is visible if:
270 * - It is a parent of a namespace entry.
271 * - It is one of the namespace entries.
272 * - It is a child of a namespace entry.
273 *
274 * A dataset is writable if:
275 * - It is one of the namespace entries.
276 * - It is a child of a namespace entry.
277 *
278 * The parent datasets of namespace entries are visible and
279 * read-only to provide a path back to the root of the pool.
280 */
281 int
282 zone_dataset_visible(const char *dataset, int *write)
283 {
284 zone_datasets_t *zds;
285 zone_dataset_t *zd;
286 size_t dsnamelen, zd_len;
287 int visible;
288
289 /* Default to read-only, in case visible is returned. */
290 if (write != NULL)
291 *write = 0;
292 if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
293 return (0);
294 if (INGLOBALZONE(curproc)) {
295 if (write != NULL)
296 *write = 1;
297 return (1);
298 }
299
300 mutex_enter(&zone_datasets_lock);
301 zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
302 if (zds == NULL) {
303 mutex_exit(&zone_datasets_lock);
304 return (0);
305 }
306
307 visible = 0;
308 list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
309 zd_len = strlen(zd->zd_dsname);
310 if (zd_len > dsnamelen) {
311 /*
312 * The name of the namespace entry is longer than that
313 * of the dataset, so it could be that the dataset is a
314 * parent of the namespace entry.
315 */
316 visible = memcmp(zd->zd_dsname, dataset,
317 dsnamelen) == 0 &&
318 zd->zd_dsname[dsnamelen] == '/';
319 if (visible)
320 break;
321 } else if (zd_len == dsnamelen) {
322 /*
323 * The name of the namespace entry is as long as that
324 * of the dataset, so perhaps the dataset itself is the
325 * namespace entry.
326 */
327 visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
328 if (visible) {
329 if (write != NULL)
330 *write = 1;
331 break;
332 }
333 } else {
334 /*
335 * The name of the namespace entry is shorter than that
336 * of the dataset, so perhaps the dataset is a child of
337 * the namespace entry.
338 */
339 visible = memcmp(zd->zd_dsname, dataset,
340 zd_len) == 0 && dataset[zd_len] == '/';
341 if (visible) {
342 if (write != NULL)
343 *write = 1;
344 break;
345 }
346 }
347 }
348
349 mutex_exit(&zone_datasets_lock);
350 return (visible);
351 }
352 EXPORT_SYMBOL(zone_dataset_visible);
353
354 unsigned int
355 global_zoneid(void)
356 {
357 unsigned int z = 0;
358
359 #if defined(CONFIG_USER_NS)
360 z = user_ns_zoneid(&init_user_ns);
361 #endif
362
363 return (z);
364 }
365 EXPORT_SYMBOL(global_zoneid);
366
367 unsigned int
368 crgetzoneid(const cred_t *cr)
369 {
370 unsigned int r = 0;
371
372 #if defined(CONFIG_USER_NS)
373 r = user_ns_zoneid(cr->user_ns);
374 #endif
375
376 return (r);
377 }
378 EXPORT_SYMBOL(crgetzoneid);
379
380 boolean_t
381 inglobalzone(proc_t *proc)
382 {
383 #if defined(CONFIG_USER_NS)
384 return (proc->cred->user_ns == &init_user_ns);
385 #else
386 return (B_TRUE);
387 #endif
388 }
389 EXPORT_SYMBOL(inglobalzone);
390
391 int
392 spl_zone_init(void)
393 {
394 mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
395 INIT_LIST_HEAD(&zone_datasets);
396 return (0);
397 }
398
399 void
400 spl_zone_fini(void)
401 {
402 zone_datasets_t *zds;
403 zone_dataset_t *zd;
404
405 /*
406 * It would be better to assert an empty zone_datasets, but since
407 * there's no automatic mechanism for cleaning them up if the user
408 * namespace is destroyed, just do it here, since spl is about to go
409 * out of context.
410 */
411 while (!list_empty(&zone_datasets)) {
412 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
413 while (!list_empty(&zds->zds_datasets)) {
414 zd = list_entry(zds->zds_datasets.next,
415 zone_dataset_t, zd_list);
416 list_del(&zd->zd_list);
417 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
418 }
419 put_user_ns(zds->zds_userns);
420 list_del(&zds->zds_list);
421 kmem_free(zds, sizeof (*zds));
422 }
423 mutex_destroy(&zone_datasets_lock);
424 }
Cache object: 8de7da83f9e2451bb4c780953e30f284
|