4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/zfs_context.h>
29 #include <sys/refcount.h>
30 #include <sys/nvpair.h>
32 #include <sys/kidmap.h>
34 #include <sys/zfs_vfsops.h>
35 #include <sys/zfs_znode.h>
37 #include <sys/zfs_fuid.h>
40 * FUID Domain table(s).
42 * The FUID table is stored as a packed nvlist of an array
43 * of nvlists which contain an index, domain string and offset
45 * During file system initialization the nvlist(s) are read and
46 * two AVL trees are created. One tree is keyed by the index number
47 * and the other by the domain string. Nodes are never removed from
48 * trees, but new entries may be added. If a new entry is added then
49 * the zsb->z_fuid_dirty flag is set to true and the caller will then
50 * be responsible for calling zfs_fuid_sync() to sync the changes to disk.
54 #define FUID_IDX "fuid_idx"
55 #define FUID_DOMAIN "fuid_domain"
56 #define FUID_OFFSET "fuid_offset"
57 #define FUID_NVP_ARRAY "fuid_nvlist"
59 typedef struct fuid_domain {
66 static char *nulldomain = "";
69 * Compare two indexes.
72 idx_compare(const void *arg1, const void *arg2)
74 const fuid_domain_t *node1 = arg1;
75 const fuid_domain_t *node2 = arg2;
77 if (node1->f_idx < node2->f_idx)
79 else if (node1->f_idx > node2->f_idx)
85 * Compare two domain strings.
88 domain_compare(const void *arg1, const void *arg2)
90 const fuid_domain_t *node1 = arg1;
91 const fuid_domain_t *node2 = arg2;
94 val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
97 return (val > 0 ? 1 : -1);
101 zfs_fuid_avl_tree_create(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
103 avl_create(idx_tree, idx_compare,
104 sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
105 avl_create(domain_tree, domain_compare,
106 sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
110 * load initial fuid domain and idx trees. This function is used by
111 * both the kernel and zdb.
114 zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
115 avl_tree_t *domain_tree)
120 ASSERT(fuid_obj != 0);
121 VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
123 fuid_size = *(uint64_t *)db->db_data;
124 dmu_buf_rele(db, FTAG);
128 nvlist_t *nvp = NULL;
133 packed = kmem_alloc(fuid_size, KM_SLEEP);
134 VERIFY(dmu_read(os, fuid_obj, 0,
135 fuid_size, packed, DMU_READ_PREFETCH) == 0);
136 VERIFY(nvlist_unpack(packed, fuid_size,
138 VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
139 &fuidnvp, &count) == 0);
141 for (i = 0; i != count; i++) {
142 fuid_domain_t *domnode;
146 VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
148 VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
151 domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
153 domnode->f_idx = idx;
154 domnode->f_ksid = ksid_lookupdomain(domain);
155 avl_add(idx_tree, domnode);
156 avl_add(domain_tree, domnode);
159 kmem_free(packed, fuid_size);
165 zfs_fuid_table_destroy(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
167 fuid_domain_t *domnode;
171 while ((domnode = avl_destroy_nodes(domain_tree, &cookie)))
172 ksiddomain_rele(domnode->f_ksid);
174 avl_destroy(domain_tree);
176 while ((domnode = avl_destroy_nodes(idx_tree, &cookie)))
177 kmem_free(domnode, sizeof (fuid_domain_t));
178 avl_destroy(idx_tree);
182 zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
184 fuid_domain_t searchnode, *findnode;
187 searchnode.f_idx = idx;
189 findnode = avl_find(idx_tree, &searchnode, &loc);
191 return (findnode ? findnode->f_ksid->kd_name : nulldomain);
196 * Load the fuid table(s) into memory.
199 zfs_fuid_init(zfs_sb_t *zsb)
201 rw_enter(&zsb->z_fuid_lock, RW_WRITER);
203 if (zsb->z_fuid_loaded) {
204 rw_exit(&zsb->z_fuid_lock);
208 zfs_fuid_avl_tree_create(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
210 (void) zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
211 ZFS_FUID_TABLES, 8, 1, &zsb->z_fuid_obj);
212 if (zsb->z_fuid_obj != 0) {
213 zsb->z_fuid_size = zfs_fuid_table_load(zsb->z_os,
214 zsb->z_fuid_obj, &zsb->z_fuid_idx,
215 &zsb->z_fuid_domain);
218 zsb->z_fuid_loaded = B_TRUE;
219 rw_exit(&zsb->z_fuid_lock);
223 * sync out AVL trees to persistent storage.
226 zfs_fuid_sync(zfs_sb_t *zsb, dmu_tx_t *tx)
233 fuid_domain_t *domnode;
237 if (!zsb->z_fuid_dirty) {
241 rw_enter(&zsb->z_fuid_lock, RW_WRITER);
244 * First see if table needs to be created?
246 if (zsb->z_fuid_obj == 0) {
247 zsb->z_fuid_obj = dmu_object_alloc(zsb->z_os,
248 DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
249 sizeof (uint64_t), tx);
250 VERIFY(zap_add(zsb->z_os, MASTER_NODE_OBJ,
251 ZFS_FUID_TABLES, sizeof (uint64_t), 1,
252 &zsb->z_fuid_obj, tx) == 0);
255 VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
257 numnodes = avl_numnodes(&zsb->z_fuid_idx);
258 fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
259 for (i = 0, domnode = avl_first(&zsb->z_fuid_domain); domnode; i++,
260 domnode = AVL_NEXT(&zsb->z_fuid_domain, domnode)) {
261 VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
262 VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
263 domnode->f_idx) == 0);
264 VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
265 VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
266 domnode->f_ksid->kd_name) == 0);
268 VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
269 fuids, numnodes) == 0);
270 for (i = 0; i != numnodes; i++)
271 nvlist_free(fuids[i]);
272 kmem_free(fuids, numnodes * sizeof (void *));
273 VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
274 packed = kmem_alloc(nvsize, KM_SLEEP);
275 VERIFY(nvlist_pack(nvp, &packed, &nvsize,
276 NV_ENCODE_XDR, KM_SLEEP) == 0);
278 zsb->z_fuid_size = nvsize;
279 dmu_write(zsb->z_os, zsb->z_fuid_obj, 0, zsb->z_fuid_size, packed, tx);
280 kmem_free(packed, zsb->z_fuid_size);
281 VERIFY(0 == dmu_bonus_hold(zsb->z_os, zsb->z_fuid_obj,
283 dmu_buf_will_dirty(db, tx);
284 *(uint64_t *)db->db_data = zsb->z_fuid_size;
285 dmu_buf_rele(db, FTAG);
287 zsb->z_fuid_dirty = B_FALSE;
288 rw_exit(&zsb->z_fuid_lock);
292 * Query domain table for a given domain.
294 * If domain isn't found and addok is set, it is added to AVL trees and
295 * the zsb->z_fuid_dirty flag will be set to TRUE. It will then be
296 * necessary for the caller or another thread to detect the dirty table
297 * and sync out the changes.
300 zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain,
301 char **retdomain, boolean_t addok)
303 fuid_domain_t searchnode, *findnode;
305 krw_t rw = RW_READER;
308 * If the dummy "nobody" domain then return an index of 0
309 * to cause the created FUID to be a standard POSIX id
310 * for the user nobody.
312 if (domain[0] == '\0') {
314 *retdomain = nulldomain;
318 searchnode.f_ksid = ksid_lookupdomain(domain);
320 *retdomain = searchnode.f_ksid->kd_name;
321 if (!zsb->z_fuid_loaded)
325 rw_enter(&zsb->z_fuid_lock, rw);
326 findnode = avl_find(&zsb->z_fuid_domain, &searchnode, &loc);
329 rw_exit(&zsb->z_fuid_lock);
330 ksiddomain_rele(searchnode.f_ksid);
331 return (findnode->f_idx);
333 fuid_domain_t *domnode;
336 if (rw == RW_READER && !rw_tryupgrade(&zsb->z_fuid_lock)) {
337 rw_exit(&zsb->z_fuid_lock);
342 domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
343 domnode->f_ksid = searchnode.f_ksid;
345 retidx = domnode->f_idx = avl_numnodes(&zsb->z_fuid_idx) + 1;
347 avl_add(&zsb->z_fuid_domain, domnode);
348 avl_add(&zsb->z_fuid_idx, domnode);
349 zsb->z_fuid_dirty = B_TRUE;
350 rw_exit(&zsb->z_fuid_lock);
353 rw_exit(&zsb->z_fuid_lock);
359 * Query domain table by index, returning domain string
361 * Returns a pointer from an avl node of the domain string.
365 zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx)
369 if (idx == 0 || !zsb->z_use_fuids)
372 if (!zsb->z_fuid_loaded)
375 rw_enter(&zsb->z_fuid_lock, RW_READER);
377 if (zsb->z_fuid_obj || zsb->z_fuid_dirty)
378 domain = zfs_fuid_idx_domain(&zsb->z_fuid_idx, idx);
381 rw_exit(&zsb->z_fuid_lock);
388 zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
390 *uidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
391 *gidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_gid, cr, ZFS_GROUP);
395 zfs_fuid_map_id(zfs_sb_t *zsb, uint64_t fuid,
396 cred_t *cr, zfs_fuid_type_t type)
399 uint32_t index = FUID_INDEX(fuid);
406 domain = zfs_fuid_find_by_idx(zsb, index);
407 ASSERT(domain != NULL);
409 if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
410 (void) kidmap_getuidbysid(crgetzone(cr), domain,
411 FUID_RID(fuid), &id);
413 (void) kidmap_getgidbysid(crgetzone(cr), domain,
414 FUID_RID(fuid), &id);
418 if(type == ZFS_OWNER || type == ZFS_ACE_USER)
419 return (crgetuid(cr));
421 return (crgetgid(cr));
422 #endif /* HAVE_KSID */
426 * Add a FUID node to the list of fuid's being created for this
429 * If ACL has multiple domains, then keep only one copy of each unique
433 zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
434 uint64_t idx, uint64_t id, zfs_fuid_type_t type)
437 zfs_fuid_domain_t *fuid_domain;
438 zfs_fuid_info_t *fuidp;
440 boolean_t found = B_FALSE;
443 *fuidpp = zfs_fuid_info_alloc();
447 * First find fuid domain index in linked list
449 * If one isn't found then create an entry.
452 for (fuididx = 1, fuid_domain = list_head(&fuidp->z_domains);
453 fuid_domain; fuid_domain = list_next(&fuidp->z_domains,
454 fuid_domain), fuididx++) {
455 if (idx == fuid_domain->z_domidx) {
462 fuid_domain = kmem_alloc(sizeof (zfs_fuid_domain_t), KM_SLEEP);
463 fuid_domain->z_domain = domain;
464 fuid_domain->z_domidx = idx;
465 list_insert_tail(&fuidp->z_domains, fuid_domain);
466 fuidp->z_domain_str_sz += strlen(domain) + 1;
467 fuidp->z_domain_cnt++;
470 if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) {
473 * Now allocate fuid entry and add it on the end of the list
476 fuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
478 fuid->z_domidx = idx;
479 fuid->z_logfuid = FUID_ENCODE(fuididx, rid);
481 list_insert_tail(&fuidp->z_fuids, fuid);
484 if (type == ZFS_OWNER)
485 fuidp->z_fuid_owner = FUID_ENCODE(fuididx, rid);
487 fuidp->z_fuid_group = FUID_ENCODE(fuididx, rid);
493 * Create a file system FUID, based on information in the users cred
495 * If cred contains KSID_OWNER then it should be used to determine
496 * the uid otherwise cred's uid will be used. By default cred's gid
497 * is used unless it's an ephemeral ID in which case KSID_GROUP will
498 * be used if it exists.
501 zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type,
502 cred_t *cr, zfs_fuid_info_t **fuidp)
511 VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
513 ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
515 if (!zsb->z_use_fuids || (ksid == NULL)) {
516 id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
518 if (IS_EPHEMERAL(id))
519 return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY);
521 return ((uint64_t)id);
525 * ksid is present and FUID is supported
527 id = (type == ZFS_OWNER) ? ksid_getid(ksid) : crgetgid(cr);
529 if (!IS_EPHEMERAL(id))
530 return ((uint64_t)id);
532 if (type == ZFS_GROUP)
533 id = ksid_getid(ksid);
535 rid = ksid_getrid(ksid);
536 domain = ksid_getdomain(ksid);
538 idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
540 zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
542 return (FUID_ENCODE(idx, rid));
544 #endif /* HAVE_KSID */
547 * Create a file system FUID for an ACL ace
548 * or a chown/chgrp of the file.
549 * This is similar to zfs_fuid_create_cred, except that
550 * we can't find the domain + rid information in the
551 * cred. Instead we have to query Winchester for the
554 * During replay operations the domain+rid information is
555 * found in the zfs_fuid_info_t that the replay code has
556 * attached to the zsb of the file system.
559 zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr,
560 zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp)
565 uint32_t fuid_idx = FUID_INDEX(id);
569 zfs_fuid_t *zfuid = NULL;
570 zfs_fuid_info_t *fuidp;
573 * If POSIX ID, or entry is already a FUID then
576 * We may also be handed an already FUID'ized id via
580 if (!zsb->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
584 fuidp = zsb->z_fuid_replay;
587 * If we are passed an ephemeral id, but no
588 * fuid_info was logged then return NOBODY.
589 * This is most likely a result of idmap service
590 * not being available.
598 zfuid = list_head(&fuidp->z_fuids);
599 rid = FUID_RID(zfuid->z_logfuid);
600 idx = FUID_INDEX(zfuid->z_logfuid);
603 rid = FUID_RID(fuidp->z_fuid_owner);
604 idx = FUID_INDEX(fuidp->z_fuid_owner);
607 rid = FUID_RID(fuidp->z_fuid_group);
608 idx = FUID_INDEX(fuidp->z_fuid_group);
611 domain = fuidp->z_domain_table[idx -1];
613 if (type == ZFS_OWNER || type == ZFS_ACE_USER)
614 status = kidmap_getsidbyuid(crgetzone(cr), id,
617 status = kidmap_getsidbygid(crgetzone(cr), id,
622 * When returning nobody we will need to
623 * make a dummy fuid table entry for logging
631 idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
634 zfs_fuid_node_add(fuidpp, kdomain,
636 else if (zfuid != NULL) {
637 list_remove(&fuidp->z_fuids, zfuid);
638 kmem_free(zfuid, sizeof (zfs_fuid_t));
640 return (FUID_ENCODE(idx, rid));
643 * The Linux port only supports POSIX IDs, use the passed id.
650 zfs_fuid_destroy(zfs_sb_t *zsb)
652 rw_enter(&zsb->z_fuid_lock, RW_WRITER);
653 if (!zsb->z_fuid_loaded) {
654 rw_exit(&zsb->z_fuid_lock);
657 zfs_fuid_table_destroy(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
658 rw_exit(&zsb->z_fuid_lock);
662 * Allocate zfs_fuid_info for tracking FUIDs created during
663 * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
666 zfs_fuid_info_alloc(void)
668 zfs_fuid_info_t *fuidp;
670 fuidp = kmem_zalloc(sizeof (zfs_fuid_info_t), KM_SLEEP);
671 list_create(&fuidp->z_domains, sizeof (zfs_fuid_domain_t),
672 offsetof(zfs_fuid_domain_t, z_next));
673 list_create(&fuidp->z_fuids, sizeof (zfs_fuid_t),
674 offsetof(zfs_fuid_t, z_next));
679 * Release all memory associated with zfs_fuid_info_t
682 zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
685 zfs_fuid_domain_t *zdomain;
687 while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
688 list_remove(&fuidp->z_fuids, zfuid);
689 kmem_free(zfuid, sizeof (zfs_fuid_t));
692 if (fuidp->z_domain_table != NULL)
693 kmem_free(fuidp->z_domain_table,
694 (sizeof (char **)) * fuidp->z_domain_cnt);
696 while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
697 list_remove(&fuidp->z_domains, zdomain);
698 kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
701 kmem_free(fuidp, sizeof (zfs_fuid_info_t));
705 * Check to see if id is a groupmember. If cred
706 * has ksid info then sidlist is checked first
707 * and if still not found then POSIX groups are checked
709 * Will use a straight FUID compare when possible.
712 zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr)
715 ksid_t *ksid = crgetsid(cr, KSID_GROUP);
716 ksidlist_t *ksidlist = crgetsidlist(cr);
719 if (ksid && ksidlist) {
722 uint32_t idx = FUID_INDEX(id);
723 uint32_t rid = FUID_RID(id);
725 ksid_groups = ksidlist->ksl_sids;
727 for (i = 0; i != ksidlist->ksl_nsid; i++) {
729 if (id != IDMAP_WK_CREATOR_GROUP_GID &&
730 id == ksid_groups[i].ks_id) {
736 domain = zfs_fuid_find_by_idx(zsb, idx);
737 ASSERT(domain != NULL);
740 IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
744 ksid_groups[i].ks_domain->kd_name) == 0) &&
745 rid == ksid_groups[i].ks_rid)
752 * Not found in ksidlist, check posix groups
754 gid = zfs_fuid_map_id(zsb, id, cr, ZFS_GROUP);
755 return (groupmember(gid, cr));
762 zfs_fuid_txhold(zfs_sb_t *zsb, dmu_tx_t *tx)
764 if (zsb->z_fuid_obj == 0) {
765 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
766 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
767 FUID_SIZE_ESTIMATE(zsb));
768 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
770 dmu_tx_hold_bonus(tx, zsb->z_fuid_obj);
771 dmu_tx_hold_write(tx, zsb->z_fuid_obj, 0,
772 FUID_SIZE_ESTIMATE(zsb));