4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27 * Copyright (c) 2018 Datto Inc.
28 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
43 #include <sys/efi_partition.h>
44 #include <sys/systeminfo.h>
46 #include <sys/zfs_ioctl.h>
47 #include <sys/vdev_disk.h>
50 #include "zfs_namecheck.h"
52 #include "libzfs_impl.h"
53 #include "zfs_comutil.h"
54 #include "zfeature_common.h"
56 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
57 static boolean_t zpool_vdev_is_interior(const char *name);
59 typedef struct prop_flags {
60 int create:1; /* Validate property on creation */
61 int import:1; /* Validate property on import */
65 * ====================================================================
66 * zpool property functions
67 * ====================================================================
71 zpool_get_all_props(zpool_handle_t *zhp)
73 zfs_cmd_t zc = {"\0"};
74 libzfs_handle_t *hdl = zhp->zpool_hdl;
76 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
78 if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
81 while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
82 if (errno == ENOMEM) {
83 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
84 zcmd_free_nvlists(&zc);
88 zcmd_free_nvlists(&zc);
93 if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
94 zcmd_free_nvlists(&zc);
98 zcmd_free_nvlists(&zc);
104 zpool_props_refresh(zpool_handle_t *zhp)
108 old_props = zhp->zpool_props;
110 if (zpool_get_all_props(zhp) != 0)
113 nvlist_free(old_props);
118 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
124 zprop_source_t source;
126 nvl = zhp->zpool_props;
127 if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
128 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
130 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
132 source = ZPROP_SRC_DEFAULT;
133 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
144 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
148 zprop_source_t source;
150 if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
152 * zpool_get_all_props() has most likely failed because
153 * the pool is faulted, but if all we need is the top level
154 * vdev's guid then get it from the zhp config nvlist.
156 if ((prop == ZPOOL_PROP_GUID) &&
157 (nvlist_lookup_nvlist(zhp->zpool_config,
158 ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
159 (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
163 return (zpool_prop_default_numeric(prop));
166 nvl = zhp->zpool_props;
167 if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
168 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
170 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
172 source = ZPROP_SRC_DEFAULT;
173 value = zpool_prop_default_numeric(prop);
183 * Map VDEV STATE to printed strings.
186 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
189 case VDEV_STATE_CLOSED:
190 case VDEV_STATE_OFFLINE:
191 return (gettext("OFFLINE"));
192 case VDEV_STATE_REMOVED:
193 return (gettext("REMOVED"));
194 case VDEV_STATE_CANT_OPEN:
195 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
196 return (gettext("FAULTED"));
197 else if (aux == VDEV_AUX_SPLIT_POOL)
198 return (gettext("SPLIT"));
200 return (gettext("UNAVAIL"));
201 case VDEV_STATE_FAULTED:
202 return (gettext("FAULTED"));
203 case VDEV_STATE_DEGRADED:
204 return (gettext("DEGRADED"));
205 case VDEV_STATE_HEALTHY:
206 return (gettext("ONLINE"));
212 return (gettext("UNKNOWN"));
216 * Map POOL STATE to printed strings.
219 zpool_pool_state_to_name(pool_state_t state)
224 case POOL_STATE_ACTIVE:
225 return (gettext("ACTIVE"));
226 case POOL_STATE_EXPORTED:
227 return (gettext("EXPORTED"));
228 case POOL_STATE_DESTROYED:
229 return (gettext("DESTROYED"));
230 case POOL_STATE_SPARE:
231 return (gettext("SPARE"));
232 case POOL_STATE_L2CACHE:
233 return (gettext("L2CACHE"));
234 case POOL_STATE_UNINITIALIZED:
235 return (gettext("UNINITIALIZED"));
236 case POOL_STATE_UNAVAIL:
237 return (gettext("UNAVAIL"));
238 case POOL_STATE_POTENTIALLY_ACTIVE:
239 return (gettext("POTENTIALLY_ACTIVE"));
242 return (gettext("UNKNOWN"));
246 * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED",
250 zpool_get_state_str(zpool_handle_t *zhp)
252 zpool_errata_t errata;
253 zpool_status_t status;
259 status = zpool_get_status(zhp, NULL, &errata);
261 if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
262 str = gettext("FAULTED");
263 } else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
264 status == ZPOOL_STATUS_IO_FAILURE_MMP) {
265 str = gettext("SUSPENDED");
267 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
268 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
269 verify(nvlist_lookup_uint64_array(nvroot,
270 ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
272 str = zpool_state_to_name(vs->vs_state, vs->vs_aux);
278 * Get a zpool property value for 'prop' and return the value in
279 * a pre-allocated buffer.
282 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
283 size_t len, zprop_source_t *srctype, boolean_t literal)
287 zprop_source_t src = ZPROP_SRC_NONE;
289 if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
291 case ZPOOL_PROP_NAME:
292 (void) strlcpy(buf, zpool_get_name(zhp), len);
295 case ZPOOL_PROP_HEALTH:
296 (void) strlcpy(buf, zpool_get_state_str(zhp), len);
299 case ZPOOL_PROP_GUID:
300 intval = zpool_get_prop_int(zhp, prop, &src);
301 (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
304 case ZPOOL_PROP_ALTROOT:
305 case ZPOOL_PROP_CACHEFILE:
306 case ZPOOL_PROP_COMMENT:
307 if (zhp->zpool_props != NULL ||
308 zpool_get_all_props(zhp) == 0) {
310 zpool_get_prop_string(zhp, prop, &src),
316 (void) strlcpy(buf, "-", len);
325 if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
326 prop != ZPOOL_PROP_NAME)
329 switch (zpool_prop_get_type(prop)) {
330 case PROP_TYPE_STRING:
331 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
335 case PROP_TYPE_NUMBER:
336 intval = zpool_get_prop_int(zhp, prop, &src);
339 case ZPOOL_PROP_SIZE:
340 case ZPOOL_PROP_ALLOCATED:
341 case ZPOOL_PROP_FREE:
342 case ZPOOL_PROP_FREEING:
343 case ZPOOL_PROP_LEAKED:
344 case ZPOOL_PROP_ASHIFT:
346 (void) snprintf(buf, len, "%llu",
347 (u_longlong_t)intval);
349 (void) zfs_nicenum(intval, buf, len);
352 case ZPOOL_PROP_EXPANDSZ:
354 (void) strlcpy(buf, "-", len);
355 } else if (literal) {
356 (void) snprintf(buf, len, "%llu",
357 (u_longlong_t)intval);
359 (void) zfs_nicebytes(intval, buf, len);
363 case ZPOOL_PROP_CAPACITY:
365 (void) snprintf(buf, len, "%llu",
366 (u_longlong_t)intval);
368 (void) snprintf(buf, len, "%llu%%",
369 (u_longlong_t)intval);
373 case ZPOOL_PROP_FRAGMENTATION:
374 if (intval == UINT64_MAX) {
375 (void) strlcpy(buf, "-", len);
376 } else if (literal) {
377 (void) snprintf(buf, len, "%llu",
378 (u_longlong_t)intval);
380 (void) snprintf(buf, len, "%llu%%",
381 (u_longlong_t)intval);
385 case ZPOOL_PROP_DEDUPRATIO:
387 (void) snprintf(buf, len, "%llu.%02llu",
388 (u_longlong_t)(intval / 100),
389 (u_longlong_t)(intval % 100));
391 (void) snprintf(buf, len, "%llu.%02llux",
392 (u_longlong_t)(intval / 100),
393 (u_longlong_t)(intval % 100));
396 case ZPOOL_PROP_HEALTH:
397 (void) strlcpy(buf, zpool_get_state_str(zhp), len);
399 case ZPOOL_PROP_VERSION:
400 if (intval >= SPA_VERSION_FEATURES) {
401 (void) snprintf(buf, len, "-");
406 (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
410 case PROP_TYPE_INDEX:
411 intval = zpool_get_prop_int(zhp, prop, &src);
412 if (zpool_prop_index_to_string(prop, intval, &strval)
415 (void) strlcpy(buf, strval, len);
429 * Check if the bootfs name has the same pool name as it is set to.
430 * Assuming bootfs is a valid dataset name.
433 bootfs_name_valid(const char *pool, char *bootfs)
435 int len = strlen(pool);
436 if (bootfs[0] == '\0')
439 if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
442 if (strncmp(pool, bootfs, len) == 0 &&
443 (bootfs[len] == '/' || bootfs[len] == '\0'))
450 zpool_is_bootable(zpool_handle_t *zhp)
452 char bootfs[ZFS_MAX_DATASET_NAME_LEN];
454 return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
455 sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
456 sizeof (bootfs)) != 0);
461 * Given an nvlist of zpool properties to be set, validate that they are
462 * correct, and parse any numeric properties (index, boolean, etc) if they are
463 * specified as strings.
466 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
467 nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
475 struct stat64 statbuf;
478 if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
479 (void) no_memory(hdl);
484 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
485 const char *propname = nvpair_name(elem);
487 prop = zpool_name_to_prop(propname);
488 if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) {
490 char *fname = strchr(propname, '@') + 1;
492 err = zfeature_lookup_name(fname, NULL);
494 ASSERT3U(err, ==, ENOENT);
495 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
496 "invalid feature '%s'"), fname);
497 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
501 if (nvpair_type(elem) != DATA_TYPE_STRING) {
502 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
503 "'%s' must be a string"), propname);
504 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
508 (void) nvpair_value_string(elem, &strval);
509 if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
510 strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
511 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
512 "property '%s' can only be set to "
513 "'enabled' or 'disabled'"), propname);
514 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
519 strcmp(strval, ZFS_FEATURE_DISABLED) == 0) {
520 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
521 "property '%s' can only be set to "
522 "'disabled' at creation time"), propname);
523 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
527 if (nvlist_add_uint64(retprops, propname, 0) != 0) {
528 (void) no_memory(hdl);
535 * Make sure this property is valid and applies to this type.
537 if (prop == ZPOOL_PROP_INVAL) {
538 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
539 "invalid property '%s'"), propname);
540 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
544 if (zpool_prop_readonly(prop)) {
545 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
546 "is readonly"), propname);
547 (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
551 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
552 &strval, &intval, errbuf) != 0)
556 * Perform additional checking for specific properties.
559 case ZPOOL_PROP_VERSION:
560 if (intval < version ||
561 !SPA_VERSION_IS_SUPPORTED(intval)) {
562 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
563 "property '%s' number %d is invalid."),
565 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
570 case ZPOOL_PROP_ASHIFT:
572 (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
573 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
574 "invalid '%s=%d' property: only values "
575 "between %" PRId32 " and %" PRId32 " "
577 propname, intval, ASHIFT_MIN, ASHIFT_MAX);
578 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
583 case ZPOOL_PROP_BOOTFS:
584 if (flags.create || flags.import) {
585 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
586 "property '%s' cannot be set at creation "
587 "or import time"), propname);
588 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
592 if (version < SPA_VERSION_BOOTFS) {
593 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
594 "pool must be upgraded to support "
595 "'%s' property"), propname);
596 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
601 * bootfs property value has to be a dataset name and
602 * the dataset has to be in the same pool as it sets to.
604 if (!bootfs_name_valid(poolname, strval)) {
605 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
606 "is an invalid name"), strval);
607 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
611 if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
612 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
613 "could not open pool '%s'"), poolname);
614 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
620 case ZPOOL_PROP_ALTROOT:
621 if (!flags.create && !flags.import) {
622 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
623 "property '%s' can only be set during pool "
624 "creation or import"), propname);
625 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
629 if (strval[0] != '/') {
630 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
631 "bad alternate root '%s'"), strval);
632 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
637 case ZPOOL_PROP_CACHEFILE:
638 if (strval[0] == '\0')
641 if (strcmp(strval, "none") == 0)
644 if (strval[0] != '/') {
645 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
646 "property '%s' must be empty, an "
647 "absolute path, or 'none'"), propname);
648 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
652 slash = strrchr(strval, '/');
654 if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
655 strcmp(slash, "/..") == 0) {
656 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
657 "'%s' is not a valid file"), strval);
658 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
664 if (strval[0] != '\0' &&
665 (stat64(strval, &statbuf) != 0 ||
666 !S_ISDIR(statbuf.st_mode))) {
667 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
668 "'%s' is not a valid directory"),
670 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
677 case ZPOOL_PROP_COMMENT:
678 for (check = strval; *check != '\0'; check++) {
679 if (!isprint(*check)) {
681 dgettext(TEXT_DOMAIN,
682 "comment may only have printable "
684 (void) zfs_error(hdl, EZFS_BADPROP,
689 if (strlen(strval) > ZPROP_MAX_COMMENT) {
690 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
691 "comment must not exceed %d characters"),
693 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
697 case ZPOOL_PROP_READONLY:
699 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
700 "property '%s' can only be set at "
701 "import time"), propname);
702 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
706 case ZPOOL_PROP_TNAME:
708 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
709 "property '%s' can only be set at "
710 "creation time"), propname);
711 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
715 case ZPOOL_PROP_MULTIHOST:
716 if (get_system_hostid() == 0) {
717 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
718 "requires a non-zero system hostid"));
719 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
724 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
725 "property '%s'(%d) not defined"), propname, prop);
732 nvlist_free(retprops);
737 * Set zpool property : propname=propval.
740 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
742 zfs_cmd_t zc = {"\0"};
745 nvlist_t *nvl = NULL;
748 prop_flags_t flags = { 0 };
750 (void) snprintf(errbuf, sizeof (errbuf),
751 dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
754 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
755 return (no_memory(zhp->zpool_hdl));
757 if (nvlist_add_string(nvl, propname, propval) != 0) {
759 return (no_memory(zhp->zpool_hdl));
762 version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
763 if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
764 zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
773 * Execute the corresponding ioctl() to set this property.
775 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
777 if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
782 ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
784 zcmd_free_nvlists(&zc);
788 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
790 (void) zpool_props_refresh(zhp);
796 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
798 libzfs_handle_t *hdl = zhp->zpool_hdl;
800 char buf[ZFS_MAXPROPLEN];
801 nvlist_t *features = NULL;
804 boolean_t firstexpand = (NULL == *plp);
807 if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
811 while (*last != NULL)
812 last = &(*last)->pl_next;
815 features = zpool_get_features(zhp);
817 if ((*plp)->pl_all && firstexpand) {
818 for (i = 0; i < SPA_FEATURES; i++) {
819 zprop_list_t *entry = zfs_alloc(hdl,
820 sizeof (zprop_list_t));
821 entry->pl_prop = ZPROP_INVAL;
822 entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
823 spa_feature_table[i].fi_uname);
824 entry->pl_width = strlen(entry->pl_user_prop);
825 entry->pl_all = B_TRUE;
828 last = &entry->pl_next;
832 /* add any unsupported features */
833 for (nvp = nvlist_next_nvpair(features, NULL);
834 nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
839 if (zfeature_is_supported(nvpair_name(nvp)))
842 propname = zfs_asprintf(hdl, "unsupported@%s",
846 * Before adding the property to the list make sure that no
847 * other pool already added the same property.
851 while (entry != NULL) {
852 if (entry->pl_user_prop != NULL &&
853 strcmp(propname, entry->pl_user_prop) == 0) {
857 entry = entry->pl_next;
864 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
865 entry->pl_prop = ZPROP_INVAL;
866 entry->pl_user_prop = propname;
867 entry->pl_width = strlen(entry->pl_user_prop);
868 entry->pl_all = B_TRUE;
871 last = &entry->pl_next;
874 for (entry = *plp; entry != NULL; entry = entry->pl_next) {
879 if (entry->pl_prop != ZPROP_INVAL &&
880 zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
881 NULL, B_FALSE) == 0) {
882 if (strlen(buf) > entry->pl_width)
883 entry->pl_width = strlen(buf);
891 * Get the state for the given feature on the given ZFS pool.
894 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
898 boolean_t found = B_FALSE;
899 nvlist_t *features = zpool_get_features(zhp);
901 const char *feature = strchr(propname, '@') + 1;
903 supported = zpool_prop_feature(propname);
904 ASSERT(supported || zpool_prop_unsupported(propname));
907 * Convert from feature name to feature guid. This conversion is
908 * unnecessary for unsupported@... properties because they already
915 ret = zfeature_lookup_name(feature, &fid);
917 (void) strlcpy(buf, "-", len);
920 feature = spa_feature_table[fid].fi_guid;
923 if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
928 (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
931 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
933 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
938 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
940 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
943 (void) strlcpy(buf, "-", len);
952 * Validate the given pool name, optionally putting an extended error message in
956 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
962 ret = pool_namecheck(pool, &why, &what);
965 * The rules for reserved pool names were extended at a later point.
966 * But we need to support users with existing pools that may now be
967 * invalid. So we only check for this expanded set of names during a
968 * create (or import), and only in userland.
970 if (ret == 0 && !isopen &&
971 (strncmp(pool, "mirror", 6) == 0 ||
972 strncmp(pool, "raidz", 5) == 0 ||
973 strncmp(pool, "spare", 5) == 0 ||
974 strcmp(pool, "log") == 0)) {
977 dgettext(TEXT_DOMAIN, "name is reserved"));
985 case NAME_ERR_TOOLONG:
987 dgettext(TEXT_DOMAIN, "name is too long"));
990 case NAME_ERR_INVALCHAR:
992 dgettext(TEXT_DOMAIN, "invalid character "
993 "'%c' in pool name"), what);
996 case NAME_ERR_NOLETTER:
997 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
998 "name must begin with a letter"));
1001 case NAME_ERR_RESERVED:
1002 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1003 "name is reserved"));
1006 case NAME_ERR_DISKLIKE:
1007 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1008 "pool name is reserved"));
1011 case NAME_ERR_LEADING_SLASH:
1012 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1013 "leading slash in name"));
1016 case NAME_ERR_EMPTY_COMPONENT:
1017 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1018 "empty component in name"));
1021 case NAME_ERR_TRAILING_SLASH:
1022 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1023 "trailing slash in name"));
1026 case NAME_ERR_MULTIPLE_DELIMITERS:
1027 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1028 "multiple '@' and/or '#' delimiters in "
1032 case NAME_ERR_NO_AT:
1033 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1034 "permission set is missing '@'"));
1038 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1039 "(%d) not defined"), why);
1050 * Open a handle to the given pool, even if the pool is currently in the FAULTED
1054 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1056 zpool_handle_t *zhp;
1060 * Make sure the pool name is valid.
1062 if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1063 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1064 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1069 if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1072 zhp->zpool_hdl = hdl;
1073 (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1075 if (zpool_refresh_stats(zhp, &missing) != 0) {
1081 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1082 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1083 dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1092 * Like the above, but silent on error. Used when iterating over pools (because
1093 * the configuration cache may be out of date).
1096 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1098 zpool_handle_t *zhp;
1101 if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1104 zhp->zpool_hdl = hdl;
1105 (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1107 if (zpool_refresh_stats(zhp, &missing) != 0) {
1123 * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1127 zpool_open(libzfs_handle_t *hdl, const char *pool)
1129 zpool_handle_t *zhp;
1131 if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1134 if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1135 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1136 dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1145 * Close the handle. Simply frees the memory associated with the handle.
1148 zpool_close(zpool_handle_t *zhp)
1150 nvlist_free(zhp->zpool_config);
1151 nvlist_free(zhp->zpool_old_config);
1152 nvlist_free(zhp->zpool_props);
1157 * Return the name of the pool.
1160 zpool_get_name(zpool_handle_t *zhp)
1162 return (zhp->zpool_name);
1167 * Return the state of the pool (ACTIVE or UNAVAILABLE)
1170 zpool_get_state(zpool_handle_t *zhp)
1172 return (zhp->zpool_state);
1176 * Create the named pool, using the provided vdev list. It is assumed
1177 * that the consumer has already validated the contents of the nvlist, so we
1178 * don't have to worry about error semantics.
1181 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1182 nvlist_t *props, nvlist_t *fsprops)
1184 zfs_cmd_t zc = {"\0"};
1185 nvlist_t *zc_fsprops = NULL;
1186 nvlist_t *zc_props = NULL;
1187 nvlist_t *hidden_args = NULL;
1188 uint8_t *wkeydata = NULL;
1193 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1194 "cannot create '%s'"), pool);
1196 if (!zpool_name_valid(hdl, B_FALSE, pool))
1197 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1199 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1203 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1205 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1206 SPA_VERSION_1, flags, msg)) == NULL) {
1215 zoned = ((nvlist_lookup_string(fsprops,
1216 zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1217 strcmp(zonestr, "on") == 0);
1219 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1220 fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
1224 (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1227 if (zfs_crypto_create(hdl, NULL, zc_fsprops, props,
1228 &wkeydata, &wkeylen) != 0) {
1229 zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
1232 if (nvlist_add_nvlist(zc_props,
1233 ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1236 if (wkeydata != NULL) {
1237 if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
1240 if (nvlist_add_uint8_array(hidden_args, "wkeydata",
1241 wkeydata, wkeylen) != 0)
1244 if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
1250 if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1253 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1255 if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1257 zcmd_free_nvlists(&zc);
1258 nvlist_free(zc_props);
1259 nvlist_free(zc_fsprops);
1260 nvlist_free(hidden_args);
1261 if (wkeydata != NULL)
1267 * This can happen if the user has specified the same
1268 * device multiple times. We can't reliably detect this
1269 * until we try to add it and see we already have a
1270 * label. This can also happen under if the device is
1271 * part of an active md or lvm device.
1273 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1274 "one or more vdevs refer to the same device, or "
1275 "one of\nthe devices is part of an active md or "
1277 return (zfs_error(hdl, EZFS_BADDEV, msg));
1281 * This happens if the record size is smaller or larger
1282 * than the allowed size range, or not a power of 2.
1284 * NOTE: although zfs_valid_proplist is called earlier,
1285 * this case may have slipped through since the
1286 * pool does not exist yet and it is therefore
1287 * impossible to read properties e.g. max blocksize
1290 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1291 "record size invalid"));
1292 return (zfs_error(hdl, EZFS_BADPROP, msg));
1296 * This occurs when one of the devices is below
1297 * SPA_MINDEVSIZE. Unfortunately, we can't detect which
1298 * device was the problem device since there's no
1299 * reliable way to determine device size from userland.
1304 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1307 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1308 "one or more devices is less than the "
1309 "minimum size (%s)"), buf);
1311 return (zfs_error(hdl, EZFS_BADDEV, msg));
1314 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1315 "one or more devices is out of space"));
1316 return (zfs_error(hdl, EZFS_BADDEV, msg));
1319 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1320 "cache device must be a disk or disk slice"));
1321 return (zfs_error(hdl, EZFS_BADDEV, msg));
1324 return (zpool_standard_error(hdl, errno, msg));
1329 zcmd_free_nvlists(&zc);
1330 nvlist_free(zc_props);
1331 nvlist_free(zc_fsprops);
1332 nvlist_free(hidden_args);
1333 if (wkeydata != NULL)
1339 * Destroy the given pool. It is up to the caller to ensure that there are no
1340 * datasets left in the pool.
1343 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1345 zfs_cmd_t zc = {"\0"};
1346 zfs_handle_t *zfp = NULL;
1347 libzfs_handle_t *hdl = zhp->zpool_hdl;
1350 if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1351 (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1354 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1355 zc.zc_history = (uint64_t)(uintptr_t)log_str;
1357 if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1358 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1359 "cannot destroy '%s'"), zhp->zpool_name);
1361 if (errno == EROFS) {
1362 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1363 "one or more devices is read only"));
1364 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1366 (void) zpool_standard_error(hdl, errno, msg);
1375 remove_mountpoint(zfp);
1383 * Add the given vdevs to the pool. The caller must have already performed the
1384 * necessary verification to ensure that the vdev specification is well-formed.
1387 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1389 zfs_cmd_t zc = {"\0"};
1391 libzfs_handle_t *hdl = zhp->zpool_hdl;
1393 nvlist_t **spares, **l2cache;
1394 uint_t nspares, nl2cache;
1396 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1397 "cannot add to '%s'"), zhp->zpool_name);
1399 if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1400 SPA_VERSION_SPARES &&
1401 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1402 &spares, &nspares) == 0) {
1403 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1404 "upgraded to add hot spares"));
1405 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1408 if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1409 SPA_VERSION_L2CACHE &&
1410 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1411 &l2cache, &nl2cache) == 0) {
1412 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1413 "upgraded to add cache devices"));
1414 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1417 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1419 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1421 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1425 * This can happen if the user has specified the same
1426 * device multiple times. We can't reliably detect this
1427 * until we try to add it and see we already have a
1430 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1431 "one or more vdevs refer to the same device"));
1432 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1436 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1437 "invalid config; a pool with removing/removed "
1438 "vdevs does not support adding raidz vdevs"));
1439 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1444 * This occurrs when one of the devices is below
1445 * SPA_MINDEVSIZE. Unfortunately, we can't detect which
1446 * device was the problem device since there's no
1447 * reliable way to determine device size from userland.
1452 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1455 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1456 "device is less than the minimum "
1459 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1463 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1464 "pool must be upgraded to add these vdevs"));
1465 (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1469 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1470 "cache device must be a disk or disk slice"));
1471 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1475 (void) zpool_standard_error(hdl, errno, msg);
1483 zcmd_free_nvlists(&zc);
1489 * Exports the pool from the system. The caller must ensure that there are no
1490 * mounted datasets in the pool.
1493 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1494 const char *log_str)
1496 zfs_cmd_t zc = {"\0"};
1499 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1500 "cannot export '%s'"), zhp->zpool_name);
1502 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1503 zc.zc_cookie = force;
1504 zc.zc_guid = hardforce;
1505 zc.zc_history = (uint64_t)(uintptr_t)log_str;
1507 if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1510 zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1511 "use '-f' to override the following errors:\n"
1512 "'%s' has an active shared spare which could be"
1513 " used by other pools once '%s' is exported."),
1514 zhp->zpool_name, zhp->zpool_name);
1515 return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1518 return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1527 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1529 return (zpool_export_common(zhp, force, B_FALSE, log_str));
1533 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1535 return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1539 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1542 nvlist_t *nv = NULL;
1548 if (!hdl->libzfs_printerr || config == NULL)
1551 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1552 nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1556 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1558 (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1560 if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1561 strftime(timestr, 128, "%c", &t) != 0) {
1563 (void) printf(dgettext(TEXT_DOMAIN,
1564 "Would be able to return %s "
1565 "to its state as of %s.\n"),
1568 (void) printf(dgettext(TEXT_DOMAIN,
1569 "Pool %s returned to its state as of %s.\n"),
1573 (void) printf(dgettext(TEXT_DOMAIN,
1574 "%s approximately %lld "),
1575 dryrun ? "Would discard" : "Discarded",
1576 ((longlong_t)loss + 30) / 60);
1577 (void) printf(dgettext(TEXT_DOMAIN,
1578 "minutes of transactions.\n"));
1579 } else if (loss > 0) {
1580 (void) printf(dgettext(TEXT_DOMAIN,
1581 "%s approximately %lld "),
1582 dryrun ? "Would discard" : "Discarded",
1584 (void) printf(dgettext(TEXT_DOMAIN,
1585 "seconds of transactions.\n"));
1591 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1594 nvlist_t *nv = NULL;
1596 uint64_t edata = UINT64_MAX;
1601 if (!hdl->libzfs_printerr)
1605 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1607 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1609 /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1610 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1611 nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1612 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1615 (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1616 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1619 (void) printf(dgettext(TEXT_DOMAIN,
1620 "Recovery is possible, but will result in some data loss.\n"));
1622 if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1623 strftime(timestr, 128, "%c", &t) != 0) {
1624 (void) printf(dgettext(TEXT_DOMAIN,
1625 "\tReturning the pool to its state as of %s\n"
1626 "\tshould correct the problem. "),
1629 (void) printf(dgettext(TEXT_DOMAIN,
1630 "\tReverting the pool to an earlier state "
1631 "should correct the problem.\n\t"));
1635 (void) printf(dgettext(TEXT_DOMAIN,
1636 "Approximately %lld minutes of data\n"
1637 "\tmust be discarded, irreversibly. "),
1638 ((longlong_t)loss + 30) / 60);
1639 } else if (loss > 0) {
1640 (void) printf(dgettext(TEXT_DOMAIN,
1641 "Approximately %lld seconds of data\n"
1642 "\tmust be discarded, irreversibly. "),
1645 if (edata != 0 && edata != UINT64_MAX) {
1647 (void) printf(dgettext(TEXT_DOMAIN,
1648 "After rewind, at least\n"
1649 "\tone persistent user-data error will remain. "));
1651 (void) printf(dgettext(TEXT_DOMAIN,
1652 "After rewind, several\n"
1653 "\tpersistent user-data errors will remain. "));
1656 (void) printf(dgettext(TEXT_DOMAIN,
1657 "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "),
1658 reason >= 0 ? "clear" : "import", name);
1660 (void) printf(dgettext(TEXT_DOMAIN,
1661 "A scrub of the pool\n"
1662 "\tis strongly recommended after recovery.\n"));
1666 (void) printf(dgettext(TEXT_DOMAIN,
1667 "Destroy and re-create the pool from\n\ta backup source.\n"));
1671 * zpool_import() is a contracted interface. Should be kept the same
1674 * Applications should use zpool_import_props() to import a pool with
1675 * new properties value to be set.
1678 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1681 nvlist_t *props = NULL;
1684 if (altroot != NULL) {
1685 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1686 return (zfs_error_fmt(hdl, EZFS_NOMEM,
1687 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1691 if (nvlist_add_string(props,
1692 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1693 nvlist_add_string(props,
1694 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1696 return (zfs_error_fmt(hdl, EZFS_NOMEM,
1697 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1702 ret = zpool_import_props(hdl, config, newname, props,
1709 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1715 uint64_t is_log = 0;
1717 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1721 (void) printf("\t%*s%s%s\n", indent, "", name,
1722 is_log ? " [log]" : "");
1724 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1725 &child, &children) != 0)
1728 for (c = 0; c < children; c++) {
1729 vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
1730 print_vdev_tree(hdl, vname, child[c], indent + 2);
1736 zpool_print_unsup_feat(nvlist_t *config)
1738 nvlist_t *nvinfo, *unsup_feat;
1741 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1743 verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1746 for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1747 nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1750 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1751 verify(nvpair_value_string(nvp, &desc) == 0);
1753 if (strlen(desc) > 0)
1754 (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1756 (void) printf("\t%s\n", nvpair_name(nvp));
1761 * Import the given pool using the known configuration and a list of
1762 * properties to be set. The configuration should have come from
1763 * zpool_find_import(). The 'newname' parameters control whether the pool
1764 * is imported with a different name.
1767 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1768 nvlist_t *props, int flags)
1770 zfs_cmd_t zc = {"\0"};
1771 zpool_load_policy_t policy;
1772 nvlist_t *nv = NULL;
1773 nvlist_t *nvinfo = NULL;
1774 nvlist_t *missing = NULL;
1781 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1784 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1785 "cannot import pool '%s'"), origname);
1787 if (newname != NULL) {
1788 if (!zpool_name_valid(hdl, B_FALSE, newname))
1789 return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1790 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1792 thename = (char *)newname;
1797 if (props != NULL) {
1799 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1801 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1804 if ((props = zpool_valid_proplist(hdl, origname,
1805 props, version, flags, errbuf)) == NULL)
1807 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1814 (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1816 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1819 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1820 zcmd_free_nvlists(&zc);
1823 if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1824 zcmd_free_nvlists(&zc);
1828 zc.zc_cookie = flags;
1829 while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1831 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1832 zcmd_free_nvlists(&zc);
1839 (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1841 zcmd_free_nvlists(&zc);
1843 zpool_get_load_policy(config, &policy);
1850 * Dry-run failed, but we print out what success
1851 * looks like if we found a best txg
1853 if (policy.zlp_rewind & ZPOOL_TRY_REWIND) {
1854 zpool_rewind_exclaim(hdl, newname ? origname : thename,
1860 if (newname == NULL)
1861 (void) snprintf(desc, sizeof (desc),
1862 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1865 (void) snprintf(desc, sizeof (desc),
1866 dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1871 if (nv != NULL && nvlist_lookup_nvlist(nv,
1872 ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1873 nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1874 (void) printf(dgettext(TEXT_DOMAIN, "This "
1875 "pool uses the following feature(s) not "
1876 "supported by this system:\n"));
1877 zpool_print_unsup_feat(nv);
1878 if (nvlist_exists(nvinfo,
1879 ZPOOL_CONFIG_CAN_RDONLY)) {
1880 (void) printf(dgettext(TEXT_DOMAIN,
1881 "All unsupported features are only "
1882 "required for writing to the pool."
1883 "\nThe pool can be imported using "
1884 "'-o readonly=on'.\n"));
1888 * Unsupported version.
1890 (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1894 if (nv != NULL && nvlist_lookup_nvlist(nv,
1895 ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
1896 char *hostname = "<unknown>";
1897 uint64_t hostid = 0;
1898 mmp_state_t mmp_state;
1900 mmp_state = fnvlist_lookup_uint64(nvinfo,
1901 ZPOOL_CONFIG_MMP_STATE);
1903 if (nvlist_exists(nvinfo,
1904 ZPOOL_CONFIG_MMP_HOSTNAME))
1905 hostname = fnvlist_lookup_string(nvinfo,
1906 ZPOOL_CONFIG_MMP_HOSTNAME);
1908 if (nvlist_exists(nvinfo,
1909 ZPOOL_CONFIG_MMP_HOSTID))
1910 hostid = fnvlist_lookup_uint64(nvinfo,
1911 ZPOOL_CONFIG_MMP_HOSTID);
1913 if (mmp_state == MMP_STATE_ACTIVE) {
1914 (void) snprintf(aux, sizeof (aux),
1915 dgettext(TEXT_DOMAIN, "pool is imp"
1916 "orted on host '%s' (hostid=%lx).\n"
1917 "Export the pool on the other "
1918 "system, then run 'zpool import'."),
1919 hostname, (unsigned long) hostid);
1920 } else if (mmp_state == MMP_STATE_NO_HOSTID) {
1921 (void) snprintf(aux, sizeof (aux),
1922 dgettext(TEXT_DOMAIN, "pool has "
1923 "the multihost property on and "
1924 "the\nsystem's hostid is not set. "
1925 "Set a unique system hostid with "
1926 "the zgenhostid(8) command.\n"));
1929 (void) zfs_error_aux(hdl, aux);
1931 (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
1935 (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1939 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1940 "one or more devices is read only"));
1941 (void) zfs_error(hdl, EZFS_BADDEV, desc);
1945 if (nv && nvlist_lookup_nvlist(nv,
1946 ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1947 nvlist_lookup_nvlist(nvinfo,
1948 ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1949 (void) printf(dgettext(TEXT_DOMAIN,
1950 "The devices below are missing or "
1951 "corrupted, use '-m' to import the pool "
1953 print_vdev_tree(hdl, NULL, missing, 2);
1954 (void) printf("\n");
1956 (void) zpool_standard_error(hdl, error, desc);
1960 (void) zpool_standard_error(hdl, error, desc);
1964 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1965 "one or more devices are already in use\n"));
1966 (void) zfs_error(hdl, EZFS_BADDEV, desc);
1969 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1970 "new name of at least one dataset is longer than "
1971 "the maximum allowable length"));
1972 (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
1975 (void) zpool_standard_error(hdl, error, desc);
1976 zpool_explain_recover(hdl,
1977 newname ? origname : thename, -error, nv);
1984 zpool_handle_t *zhp;
1987 * This should never fail, but play it safe anyway.
1989 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1991 else if (zhp != NULL)
1993 if (policy.zlp_rewind &
1994 (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1995 zpool_rewind_exclaim(hdl, newname ? origname : thename,
1996 ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv);
2009 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2011 zfs_cmd_t zc = {"\0"};
2014 libzfs_handle_t *hdl = zhp->zpool_hdl;
2016 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2017 zc.zc_cookie = func;
2020 if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
2025 /* ECANCELED on a scrub means we resumed a paused scrub */
2026 if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
2027 cmd == POOL_SCRUB_NORMAL)
2030 if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
2033 if (func == POOL_SCAN_SCRUB) {
2034 if (cmd == POOL_SCRUB_PAUSE) {
2035 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2036 "cannot pause scrubbing %s"), zc.zc_name);
2038 assert(cmd == POOL_SCRUB_NORMAL);
2039 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2040 "cannot scrub %s"), zc.zc_name);
2042 } else if (func == POOL_SCAN_NONE) {
2043 (void) snprintf(msg, sizeof (msg),
2044 dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
2047 assert(!"unexpected result");
2052 pool_scan_stat_t *ps = NULL;
2055 verify(nvlist_lookup_nvlist(zhp->zpool_config,
2056 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2057 (void) nvlist_lookup_uint64_array(nvroot,
2058 ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2059 if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2060 if (cmd == POOL_SCRUB_PAUSE)
2061 return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2063 return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2065 return (zfs_error(hdl, EZFS_RESILVERING, msg));
2067 } else if (err == ENOENT) {
2068 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
2070 return (zpool_standard_error(hdl, err, msg));
2075 * Find a vdev that matches the search criteria specified. We use the
2076 * the nvpair name to determine how we should look for the device.
2077 * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2078 * spare; but FALSE if its an INUSE spare.
2081 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2082 boolean_t *l2cache, boolean_t *log)
2089 nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2091 /* Nothing to look for */
2092 if (search == NULL || pair == NULL)
2095 /* Obtain the key we will use to search */
2096 srchkey = nvpair_name(pair);
2098 switch (nvpair_type(pair)) {
2099 case DATA_TYPE_UINT64:
2100 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
2101 uint64_t srchval, theguid;
2103 verify(nvpair_value_uint64(pair, &srchval) == 0);
2104 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2106 if (theguid == srchval)
2111 case DATA_TYPE_STRING: {
2112 char *srchval, *val;
2114 verify(nvpair_value_string(pair, &srchval) == 0);
2115 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2119 * Search for the requested value. Special cases:
2121 * - ZPOOL_CONFIG_PATH for whole disk entries. These end in
2122 * "-part1", or "p1". The suffix is hidden from the user,
2123 * but included in the string, so this matches around it.
2124 * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2125 * is used to check all possible expanded paths.
2126 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2128 * Otherwise, all other searches are simple string compares.
2130 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
2131 uint64_t wholedisk = 0;
2133 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2135 if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2138 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2139 char *type, *idx, *end, *p;
2140 uint64_t id, vdev_id;
2143 * Determine our vdev type, keeping in mind
2144 * that the srchval is composed of a type and
2145 * vdev id pair (i.e. mirror-4).
2147 if ((type = strdup(srchval)) == NULL)
2150 if ((p = strrchr(type, '-')) == NULL) {
2158 * If the types don't match then keep looking.
2160 if (strncmp(val, type, strlen(val)) != 0) {
2165 verify(zpool_vdev_is_interior(type));
2166 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2170 vdev_id = strtoull(idx, &end, 10);
2177 * Now verify that we have the correct vdev id.
2186 if (strcmp(srchval, val) == 0)
2195 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2196 &child, &children) != 0)
2199 for (c = 0; c < children; c++) {
2200 if ((ret = vdev_to_nvlist_iter(child[c], search,
2201 avail_spare, l2cache, NULL)) != NULL) {
2203 * The 'is_log' value is only set for the toplevel
2204 * vdev, not the leaf vdevs. So we always lookup the
2205 * log device from the root of the vdev tree (where
2206 * 'log' is non-NULL).
2209 nvlist_lookup_uint64(child[c],
2210 ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2218 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2219 &child, &children) == 0) {
2220 for (c = 0; c < children; c++) {
2221 if ((ret = vdev_to_nvlist_iter(child[c], search,
2222 avail_spare, l2cache, NULL)) != NULL) {
2223 *avail_spare = B_TRUE;
2229 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2230 &child, &children) == 0) {
2231 for (c = 0; c < children; c++) {
2232 if ((ret = vdev_to_nvlist_iter(child[c], search,
2233 avail_spare, l2cache, NULL)) != NULL) {
2244 * Given a physical path (minus the "/devices" prefix), find the
2248 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2249 boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2251 nvlist_t *search, *nvroot, *ret;
2253 verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2254 verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2256 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2259 *avail_spare = B_FALSE;
2263 ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2264 nvlist_free(search);
2270 * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2273 zpool_vdev_is_interior(const char *name)
2275 if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2276 strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 ||
2278 VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
2279 strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2285 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2286 boolean_t *l2cache, boolean_t *log)
2289 nvlist_t *nvroot, *search, *ret;
2292 verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2294 guid = strtoull(path, &end, 0);
2295 if (guid != 0 && *end == '\0') {
2296 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2297 } else if (zpool_vdev_is_interior(path)) {
2298 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2300 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2303 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2306 *avail_spare = B_FALSE;
2310 ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2311 nvlist_free(search);
2317 vdev_is_online(nvlist_t *nv)
2321 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2322 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2323 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2330 * Helper function for zpool_get_physpaths().
2333 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2334 size_t *bytes_written)
2336 size_t bytes_left, pos, rsz;
2340 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2342 return (EZFS_NODEVICE);
2344 pos = *bytes_written;
2345 bytes_left = physpath_size - pos;
2346 format = (pos == 0) ? "%s" : " %s";
2348 rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2349 *bytes_written += rsz;
2351 if (rsz >= bytes_left) {
2352 /* if physpath was not copied properly, clear it */
2353 if (bytes_left != 0) {
2356 return (EZFS_NOSPC);
2362 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2363 size_t *rsz, boolean_t is_spare)
2368 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2369 return (EZFS_INVALCONFIG);
2371 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2373 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2374 * For a spare vdev, we only want to boot from the active
2379 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2382 return (EZFS_INVALCONFIG);
2385 if (vdev_is_online(nv)) {
2386 if ((ret = vdev_get_one_physpath(nv, physpath,
2387 phypath_size, rsz)) != 0)
2390 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2391 strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2392 strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2393 (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2398 if (nvlist_lookup_nvlist_array(nv,
2399 ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2400 return (EZFS_INVALCONFIG);
2402 for (i = 0; i < count; i++) {
2403 ret = vdev_get_physpaths(child[i], physpath,
2404 phypath_size, rsz, is_spare);
2405 if (ret == EZFS_NOSPC)
2410 return (EZFS_POOL_INVALARG);
2414 * Get phys_path for a root pool config.
2415 * Return 0 on success; non-zero on failure.
2418 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2421 nvlist_t *vdev_root;
2428 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2430 return (EZFS_INVALCONFIG);
2432 if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2433 nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2434 &child, &count) != 0)
2435 return (EZFS_INVALCONFIG);
2438 * root pool can only have a single top-level vdev.
2440 if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
2441 return (EZFS_POOL_INVALARG);
2443 (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2446 /* No online devices */
2448 return (EZFS_NODEVICE);
2454 * Get phys_path for a root pool
2455 * Return 0 on success; non-zero on failure.
2458 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2460 return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2465 * If the device has being dynamically expanded then we need to relabel
2466 * the disk to use the new unallocated space.
2469 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
2473 if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2474 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2475 "relabel '%s': unable to open device: %d"), path, errno);
2476 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
2480 * It's possible that we might encounter an error if the device
2481 * does not have any unallocated space left. If so, we simply
2482 * ignore that error and continue on.
2484 * Also, we don't call efi_rescan() - that would just return EBUSY.
2485 * The module will do it for us in vdev_disk_open().
2487 error = efi_use_whole_disk(fd);
2489 /* Flush the buffers to disk and invalidate the page cache. */
2491 (void) ioctl(fd, BLKFLSBUF);
2494 if (error && error != VT_ENOSPC) {
2495 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2496 "relabel '%s': unable to read disk capacity"), path);
2497 return (zfs_error(hdl, EZFS_NOCAP, msg));
2504 * Convert a vdev path to a GUID. Returns GUID or 0 on error.
2506 * If is_spare, is_l2cache, or is_log is non-NULL, then store within it
2507 * if the VDEV is a spare, l2cache, or log device. If they're NULL then
2511 zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path,
2512 boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log)
2515 boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE;
2518 if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache,
2522 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0);
2523 if (is_spare != NULL)
2525 if (is_l2cache != NULL)
2526 *is_l2cache = l2cache;
2533 /* Convert a vdev path to a GUID. Returns GUID or 0 on error. */
2535 zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path)
2537 return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL));
2541 * Bring the specified vdev online. The 'flags' parameter is a set of the
2542 * ZFS_ONLINE_* flags.
2545 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2546 vdev_state_t *newstate)
2548 zfs_cmd_t zc = {"\0"};
2552 boolean_t avail_spare, l2cache, islog;
2553 libzfs_handle_t *hdl = zhp->zpool_hdl;
2556 if (flags & ZFS_ONLINE_EXPAND) {
2557 (void) snprintf(msg, sizeof (msg),
2558 dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2560 (void) snprintf(msg, sizeof (msg),
2561 dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2564 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2565 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2567 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2569 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2572 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2574 if ((flags & ZFS_ONLINE_EXPAND ||
2575 zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) &&
2576 nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) {
2577 uint64_t wholedisk = 0;
2579 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2583 * XXX - L2ARC 1.0 devices can't support expansion.
2586 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2587 "cannot expand cache devices"));
2588 return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2592 const char *fullpath = path;
2593 char buf[MAXPATHLEN];
2595 if (path[0] != '/') {
2596 error = zfs_resolve_shortname(path, buf,
2599 return (zfs_error(hdl, EZFS_NODEVICE,
2605 error = zpool_relabel_disk(hdl, fullpath, msg);
2611 zc.zc_cookie = VDEV_STATE_ONLINE;
2614 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2615 if (errno == EINVAL) {
2616 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2617 "from this pool into a new one. Use '%s' "
2618 "instead"), "zpool detach");
2619 return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2621 return (zpool_standard_error(hdl, errno, msg));
2624 *newstate = zc.zc_cookie;
2629 * Take the specified vdev offline
2632 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2634 zfs_cmd_t zc = {"\0"};
2637 boolean_t avail_spare, l2cache;
2638 libzfs_handle_t *hdl = zhp->zpool_hdl;
2640 (void) snprintf(msg, sizeof (msg),
2641 dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2643 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2644 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2646 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2648 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2651 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2653 zc.zc_cookie = VDEV_STATE_OFFLINE;
2654 zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2656 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2663 * There are no other replicas of this device.
2665 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2669 * The log device has unplayed logs
2671 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2674 return (zpool_standard_error(hdl, errno, msg));
2679 * Mark the given vdev faulted.
2682 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2684 zfs_cmd_t zc = {"\0"};
2686 libzfs_handle_t *hdl = zhp->zpool_hdl;
2688 (void) snprintf(msg, sizeof (msg),
2689 dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2691 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2693 zc.zc_cookie = VDEV_STATE_FAULTED;
2696 if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2703 * There are no other replicas of this device.
2705 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2708 return (zpool_standard_error(hdl, errno, msg));
2714 * Mark the given vdev degraded.
2717 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2719 zfs_cmd_t zc = {"\0"};
2721 libzfs_handle_t *hdl = zhp->zpool_hdl;
2723 (void) snprintf(msg, sizeof (msg),
2724 dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2726 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2728 zc.zc_cookie = VDEV_STATE_DEGRADED;
2731 if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2734 return (zpool_standard_error(hdl, errno, msg));
2738 * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2742 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2748 if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2750 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2753 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2754 children == 2 && child[which] == tgt)
2757 for (c = 0; c < children; c++)
2758 if (is_replacing_spare(child[c], tgt, which))
2766 * Attach new_disk (fully described by nvroot) to old_disk.
2767 * If 'replacing' is specified, the new disk will replace the old one.
2770 zpool_vdev_attach(zpool_handle_t *zhp,
2771 const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2773 zfs_cmd_t zc = {"\0"};
2777 boolean_t avail_spare, l2cache, islog;
2782 nvlist_t *config_root;
2783 libzfs_handle_t *hdl = zhp->zpool_hdl;
2784 boolean_t rootpool = zpool_is_bootable(zhp);
2787 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2788 "cannot replace %s with %s"), old_disk, new_disk);
2790 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2791 "cannot attach %s to %s"), new_disk, old_disk);
2793 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2794 if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2796 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2799 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2802 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2804 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2805 zc.zc_cookie = replacing;
2807 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2808 &child, &children) != 0 || children != 1) {
2809 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2810 "new device must be a single disk"));
2811 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2814 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2815 ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2817 if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
2821 * If the target is a hot spare that has been swapped in, we can only
2822 * replace it with another hot spare.
2825 nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2826 (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2827 NULL) == NULL || !avail_spare) &&
2828 is_replacing_spare(config_root, tgt, 1)) {
2829 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2830 "can only be replaced by another hot spare"));
2832 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2837 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2840 ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2842 zcmd_free_nvlists(&zc);
2847 * XXX need a better way to prevent user from
2848 * booting up a half-baked vdev.
2850 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2851 "sure to wait until resilver is done "
2852 "before rebooting.\n"));
2860 * Can't attach to or replace this type of vdev.
2863 uint64_t version = zpool_get_prop_int(zhp,
2864 ZPOOL_PROP_VERSION, NULL);
2867 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2868 "cannot replace a log with a spare"));
2869 else if (version >= SPA_VERSION_MULTI_REPLACE)
2870 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2871 "already in replacing/spare config; wait "
2872 "for completion or use 'zpool detach'"));
2874 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2875 "cannot replace a replacing device"));
2877 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2878 "can only attach to mirrors and top-level "
2881 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2886 * The new device must be a single disk.
2888 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2889 "new device must be a single disk"));
2890 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2894 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
2895 "or device removal is in progress"),
2897 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2902 * The new device is too small.
2904 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2905 "device is too small"));
2906 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2911 * The new device has a different optimal sector size.
2913 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2914 "new device has a different optimal sector size; use the "
2915 "option '-o ashift=N' to override the optimal size"));
2916 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2921 * The resulting top-level vdev spec won't fit in the label.
2923 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2927 (void) zpool_standard_error(hdl, errno, msg);
2934 * Detach the specified device.
2937 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2939 zfs_cmd_t zc = {"\0"};
2942 boolean_t avail_spare, l2cache;
2943 libzfs_handle_t *hdl = zhp->zpool_hdl;
2945 (void) snprintf(msg, sizeof (msg),
2946 dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2948 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2949 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2951 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2954 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2957 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2959 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2961 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2968 * Can't detach from this type of vdev.
2970 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2971 "applicable to mirror and replacing vdevs"));
2972 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2977 * There are no other replicas of this device.
2979 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2983 (void) zpool_standard_error(hdl, errno, msg);
2990 * Find a mirror vdev in the source nvlist.
2992 * The mchild array contains a list of disks in one of the top-level mirrors
2993 * of the source pool. The schild array contains a list of disks that the
2994 * user specified on the command line. We loop over the mchild array to
2995 * see if any entry in the schild array matches.
2997 * If a disk in the mchild array is found in the schild array, we return
2998 * the index of that entry. Otherwise we return -1.
3001 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
3002 nvlist_t **schild, uint_t schildren)
3006 for (mc = 0; mc < mchildren; mc++) {
3008 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3011 for (sc = 0; sc < schildren; sc++) {
3012 char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3014 boolean_t result = (strcmp(mpath, spath) == 0);
3030 * Split a mirror pool. If newroot points to null, then a new nvlist
3031 * is generated and it is the responsibility of the caller to free it.
3034 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
3035 nvlist_t *props, splitflags_t flags)
3037 zfs_cmd_t zc = {"\0"};
3039 nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
3040 nvlist_t **varray = NULL, *zc_props = NULL;
3041 uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
3042 libzfs_handle_t *hdl = zhp->zpool_hdl;
3043 uint64_t vers, readonly = B_FALSE;
3044 boolean_t freelist = B_FALSE, memory_err = B_TRUE;
3047 (void) snprintf(msg, sizeof (msg),
3048 dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
3050 if (!zpool_name_valid(hdl, B_FALSE, newname))
3051 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
3053 if ((config = zpool_get_config(zhp, NULL)) == NULL) {
3054 (void) fprintf(stderr, gettext("Internal error: unable to "
3055 "retrieve pool configuration\n"));
3059 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
3061 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
3064 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
3065 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
3066 props, vers, flags, msg)) == NULL)
3068 (void) nvlist_lookup_uint64(zc_props,
3069 zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3071 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3072 "property %s can only be set at import time"),
3073 zpool_prop_to_name(ZPOOL_PROP_READONLY));
3078 if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
3080 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3081 "Source pool is missing vdev tree"));
3082 nvlist_free(zc_props);
3086 varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
3089 if (*newroot == NULL ||
3090 nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
3091 &newchild, &newchildren) != 0)
3094 for (c = 0; c < children; c++) {
3095 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
3097 nvlist_t **mchild, *vdev;
3102 * Unlike cache & spares, slogs are stored in the
3103 * ZPOOL_CONFIG_CHILDREN array. We filter them out here.
3105 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3107 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3109 if (is_log || is_hole) {
3111 * Create a hole vdev and put it in the config.
3113 if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3115 if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3116 VDEV_TYPE_HOLE) != 0)
3118 if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3123 varray[vcount++] = vdev;
3127 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3129 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3130 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3131 "Source pool must be composed only of mirrors\n"));
3132 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3136 verify(nvlist_lookup_nvlist_array(child[c],
3137 ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3139 /* find or add an entry for this top-level vdev */
3140 if (newchildren > 0 &&
3141 (entry = find_vdev_entry(zhp, mchild, mchildren,
3142 newchild, newchildren)) >= 0) {
3143 /* We found a disk that the user specified. */
3144 vdev = mchild[entry];
3147 /* User didn't specify a disk for this vdev. */
3148 vdev = mchild[mchildren - 1];
3151 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3155 /* did we find every disk the user specified? */
3156 if (found != newchildren) {
3157 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3158 "include at most one disk from each mirror"));
3159 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3163 /* Prepare the nvlist for populating. */
3164 if (*newroot == NULL) {
3165 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3168 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3169 VDEV_TYPE_ROOT) != 0)
3172 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3175 /* Add all the children we found */
3176 if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3177 lastlog == 0 ? vcount : lastlog) != 0)
3181 * If we're just doing a dry run, exit now with success.
3184 memory_err = B_FALSE;
3189 /* now build up the config list & call the ioctl */
3190 if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3193 if (nvlist_add_nvlist(newconfig,
3194 ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3195 nvlist_add_string(newconfig,
3196 ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3197 nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3201 * The new pool is automatically part of the namespace unless we
3202 * explicitly export it.
3205 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3206 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3207 (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3208 if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3210 if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3213 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3214 retval = zpool_standard_error(hdl, errno, msg);
3219 memory_err = B_FALSE;
3222 if (varray != NULL) {
3225 for (v = 0; v < vcount; v++)
3226 nvlist_free(varray[v]);
3229 zcmd_free_nvlists(&zc);
3230 nvlist_free(zc_props);
3231 nvlist_free(newconfig);
3233 nvlist_free(*newroot);
3241 return (no_memory(hdl));
3247 * Remove the given device.
3250 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3252 zfs_cmd_t zc = {"\0"};
3255 boolean_t avail_spare, l2cache, islog;
3256 libzfs_handle_t *hdl = zhp->zpool_hdl;
3259 (void) snprintf(msg, sizeof (msg),
3260 dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3262 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3263 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3265 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3267 version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3268 if (islog && version < SPA_VERSION_HOLES) {
3269 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3270 "pool must be upgraded to support log removal"));
3271 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3274 if (!islog && !avail_spare && !l2cache && zpool_is_bootable(zhp)) {
3275 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3276 "root pool can not have removed devices, "
3277 "because GRUB does not understand them"));
3278 return (zfs_error(hdl, EINVAL, msg));
3281 zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
3283 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3289 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3290 "invalid config; all top-level vdevs must "
3291 "have the same sector size and not be raidz."));
3292 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3297 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3298 "Mount encrypted datasets to replay logs."));
3300 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3301 "Pool busy; removal may already be in progress"));
3303 (void) zfs_error(hdl, EZFS_BUSY, msg);
3308 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3309 "Mount encrypted datasets to replay logs."));
3310 (void) zfs_error(hdl, EZFS_BUSY, msg);
3312 (void) zpool_standard_error(hdl, errno, msg);
3317 (void) zpool_standard_error(hdl, errno, msg);
3323 zpool_vdev_remove_cancel(zpool_handle_t *zhp)
3327 libzfs_handle_t *hdl = zhp->zpool_hdl;
3329 (void) snprintf(msg, sizeof (msg),
3330 dgettext(TEXT_DOMAIN, "cannot cancel removal"));
3332 bzero(&zc, sizeof (zc));
3333 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3336 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3339 return (zpool_standard_error(hdl, errno, msg));
3343 zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
3348 boolean_t avail_spare, l2cache, islog;
3349 libzfs_handle_t *hdl = zhp->zpool_hdl;
3351 (void) snprintf(msg, sizeof (msg),
3352 dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
3355 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3357 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3359 if (avail_spare || l2cache || islog) {
3364 if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
3365 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3366 "indirect size not available"));
3367 return (zfs_error(hdl, EINVAL, msg));
3373 * Clear the errors for the pool, or the particular device if specified.
3376 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3378 zfs_cmd_t zc = {"\0"};
3381 zpool_load_policy_t policy;
3382 boolean_t avail_spare, l2cache;
3383 libzfs_handle_t *hdl = zhp->zpool_hdl;
3384 nvlist_t *nvi = NULL;
3388 (void) snprintf(msg, sizeof (msg),
3389 dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3392 (void) snprintf(msg, sizeof (msg),
3393 dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3396 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3398 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3399 &l2cache, NULL)) == NULL)
3400 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3403 * Don't allow error clearing for hot spares. Do allow
3404 * error clearing for l2cache devices.
3407 return (zfs_error(hdl, EZFS_ISSPARE, msg));
3409 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3413 zpool_get_load_policy(rewindnvl, &policy);
3414 zc.zc_cookie = policy.zlp_rewind;
3416 if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3419 if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3422 while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3424 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3425 zcmd_free_nvlists(&zc);
3430 if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) &&
3431 errno != EPERM && errno != EACCES)) {
3432 if (policy.zlp_rewind &
3433 (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3434 (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3435 zpool_rewind_exclaim(hdl, zc.zc_name,
3436 ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0),
3440 zcmd_free_nvlists(&zc);
3444 zcmd_free_nvlists(&zc);
3445 return (zpool_standard_error(hdl, errno, msg));
3449 * Similar to zpool_clear(), but takes a GUID (used by fmd).
3452 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3454 zfs_cmd_t zc = {"\0"};
3456 libzfs_handle_t *hdl = zhp->zpool_hdl;
3458 (void) snprintf(msg, sizeof (msg),
3459 dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3460 (u_longlong_t)guid);
3462 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3464 zc.zc_cookie = ZPOOL_NO_REWIND;
3466 if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3469 return (zpool_standard_error(hdl, errno, msg));
3473 * Change the GUID for a pool.
3476 zpool_reguid(zpool_handle_t *zhp)
3479 libzfs_handle_t *hdl = zhp->zpool_hdl;
3480 zfs_cmd_t zc = {"\0"};
3482 (void) snprintf(msg, sizeof (msg),
3483 dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3485 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3486 if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3489 return (zpool_standard_error(hdl, errno, msg));
3496 zpool_reopen_one(zpool_handle_t *zhp, void *data)
3498 libzfs_handle_t *hdl = zpool_get_handle(zhp);
3499 const char *pool_name = zpool_get_name(zhp);
3500 boolean_t *scrub_restart = data;
3503 error = lzc_reopen(pool_name, *scrub_restart);
3505 return (zpool_standard_error_fmt(hdl, error,
3506 dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), pool_name));
3512 /* call into libzfs_core to execute the sync IOCTL per pool */
3514 zpool_sync_one(zpool_handle_t *zhp, void *data)
3517 libzfs_handle_t *hdl = zpool_get_handle(zhp);
3518 const char *pool_name = zpool_get_name(zhp);
3519 boolean_t *force = data;
3520 nvlist_t *innvl = fnvlist_alloc();
3522 fnvlist_add_boolean_value(innvl, "force", *force);
3523 if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
3525 return (zpool_standard_error_fmt(hdl, ret,
3526 dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
3533 #if defined(__sun__) || defined(__sun)
3535 * Convert from a devid string to a path.
3538 devid_to_path(char *devid_str)
3543 devid_nmlist_t *list = NULL;
3546 if (devid_str_decode(devid_str, &devid, &minor) != 0)
3549 ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3551 devid_str_free(minor);
3558 * In a case the strdup() fails, we will just return NULL below.
3560 path = strdup(list[0].devname);
3562 devid_free_nmlist(list);
3568 * Convert from a path to a devid string.
3571 path_to_devid(const char *path)
3577 if ((fd = open(path, O_RDONLY)) < 0)
3582 if (devid_get(fd, &devid) == 0) {
3583 if (devid_get_minor_name(fd, &minor) == 0)
3584 ret = devid_str_encode(devid, minor);
3586 devid_str_free(minor);
3595 * Issue the necessary ioctl() to update the stored path value for the vdev. We
3596 * ignore any failure here, since a common case is for an unprivileged user to
3597 * type 'zpool status', and we'll display the correct information anyway.
3600 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3602 zfs_cmd_t zc = {"\0"};
3604 (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3605 (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3606 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3609 (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3614 * Remove partition suffix from a vdev path. Partition suffixes may take three
3615 * forms: "-partX", "pX", or "X", where X is a string of digits. The second
3616 * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3617 * third case only occurs when preceded by a string matching the regular
3618 * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
3620 * caller must free the returned string
3623 zfs_strip_partition(char *path)
3625 char *tmp = strdup(path);
3626 char *part = NULL, *d = NULL;
3630 if ((part = strstr(tmp, "-part")) && part != tmp) {
3632 } else if ((part = strrchr(tmp, 'p')) &&
3633 part > tmp + 1 && isdigit(*(part-1))) {
3635 } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
3637 for (d = &tmp[2]; isalpha(*d); part = ++d) { }
3638 } else if (strncmp("xvd", tmp, 3) == 0) {
3639 for (d = &tmp[3]; isalpha(*d); part = ++d) { }
3641 if (part && d && *d != '\0') {
3642 for (; isdigit(*d); d++) { }
3651 * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
3656 * Returned string must be freed.
3659 zfs_strip_partition_path(char *path)
3661 char *newpath = strdup(path);
3668 /* Point to "sda1" part of "/dev/sda1" */
3669 sd_offset = strrchr(newpath, '/') + 1;
3671 /* Get our new name "sda" */
3672 new_sd = zfs_strip_partition(sd_offset);
3678 /* Paste the "sda" where "sda1" was */
3679 strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
3681 /* Free temporary "sda" */
3687 #define PATH_BUF_LEN 64
3690 * Given a vdev, return the name to display in iostat. If the vdev has a path,
3691 * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3692 * We also check if this is a whole disk, in which case we strip off the
3693 * trailing 's0' slice name.
3695 * This routine is also responsible for identifying when disks have been
3696 * reconfigured in a new location. The kernel will have opened the device by
3697 * devid, but the path will still refer to the old location. To catch this, we
3698 * first do a path -> devid translation (which is fast for the common case). If
3699 * the devid matches, we're done. If not, we do a reverse devid -> path
3700 * translation and issue the appropriate ioctl() to update the path of the vdev.
3701 * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3705 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3708 char *path, *type, *env;
3710 char buf[PATH_BUF_LEN];
3711 char tmpbuf[PATH_BUF_LEN];
3714 * vdev_name will be "root"/"root-0" for the root vdev, but it is the
3715 * zpool name that will be displayed to the user.
3717 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3718 if (zhp != NULL && strcmp(type, "root") == 0)
3719 return (zfs_strdup(hdl, zpool_get_name(zhp)));
3721 env = getenv("ZPOOL_VDEV_NAME_PATH");
3722 if (env && (strtoul(env, NULL, 0) > 0 ||
3723 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3724 name_flags |= VDEV_NAME_PATH;
3726 env = getenv("ZPOOL_VDEV_NAME_GUID");
3727 if (env && (strtoul(env, NULL, 0) > 0 ||
3728 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3729 name_flags |= VDEV_NAME_GUID;
3731 env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3732 if (env && (strtoul(env, NULL, 0) > 0 ||
3733 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3734 name_flags |= VDEV_NAME_FOLLOW_LINKS;
3736 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3737 name_flags & VDEV_NAME_GUID) {
3738 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
3739 (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
3741 } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3742 #if defined(__sun__) || defined(__sun)
3744 * Live VDEV path updates to a kernel VDEV during a
3745 * zpool_vdev_name lookup are not supported on Linux.
3752 * If the device is dead (faulted, offline, etc) then don't
3753 * bother opening it. Otherwise we may be forcing the user to
3754 * open a misbehaving device, which can have undesirable
3757 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3758 (uint64_t **)&vs, &vsc) != 0 ||
3759 vs->vs_state >= VDEV_STATE_DEGRADED) &&
3761 nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3763 * Determine if the current path is correct.
3765 char *newdevid = path_to_devid(path);
3767 if (newdevid == NULL ||
3768 strcmp(devid, newdevid) != 0) {
3771 if ((newpath = devid_to_path(devid)) != NULL) {
3773 * Update the path appropriately.
3775 set_path(zhp, nv, newpath);
3776 if (nvlist_add_string(nv,
3777 ZPOOL_CONFIG_PATH, newpath) == 0)
3778 verify(nvlist_lookup_string(nv,
3786 devid_str_free(newdevid);
3790 if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3791 char *rp = realpath(path, NULL);
3793 strlcpy(buf, rp, sizeof (buf));
3800 * For a block device only use the name.
3802 if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
3803 !(name_flags & VDEV_NAME_PATH)) {
3804 path = strrchr(path, '/');
3809 * Remove the partition from the path it this is a whole disk.
3811 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3812 == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
3813 return (zfs_strip_partition(path));
3819 * If it's a raidz device, we need to stick in the parity level.
3821 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3822 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3824 (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3825 (u_longlong_t)value);
3830 * We identify each top-level vdev by using a <type-id>
3831 * naming convention.
3833 if (name_flags & VDEV_NAME_TYPE_ID) {
3835 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3837 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3838 path, (u_longlong_t)id);
3843 return (zfs_strdup(hdl, path));
3847 zbookmark_mem_compare(const void *a, const void *b)
3849 return (memcmp(a, b, sizeof (zbookmark_phys_t)));
3853 * Retrieve the persistent error log, uniquify the members, and return to the
3857 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3859 zfs_cmd_t zc = {"\0"};
3860 libzfs_handle_t *hdl = zhp->zpool_hdl;
3862 zbookmark_phys_t *zb = NULL;
3866 * Retrieve the raw error list from the kernel. If the number of errors
3867 * has increased, allocate more space and continue until we get the
3870 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3874 zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3875 count * sizeof (zbookmark_phys_t));
3876 zc.zc_nvlist_dst_size = count;
3877 (void) strcpy(zc.zc_name, zhp->zpool_name);
3879 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3881 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3882 if (errno == ENOMEM) {
3885 count = zc.zc_nvlist_dst_size;
3886 dst = zfs_alloc(zhp->zpool_hdl, count *
3887 sizeof (zbookmark_phys_t));
3888 zc.zc_nvlist_dst = (uintptr_t)dst;
3890 return (zpool_standard_error_fmt(hdl, errno,
3891 dgettext(TEXT_DOMAIN, "errors: List of "
3892 "errors unavailable")));
3900 * Sort the resulting bookmarks. This is a little confusing due to the
3901 * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last
3902 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3903 * _not_ copied as part of the process. So we point the start of our
3904 * array appropriate and decrement the total number of elements.
3906 zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
3907 zc.zc_nvlist_dst_size;
3908 count -= zc.zc_nvlist_dst_size;
3910 qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
3912 verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3915 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3917 for (i = 0; i < count; i++) {
3920 /* ignoring zb_blkid and zb_level for now */
3921 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3922 zb[i-1].zb_object == zb[i].zb_object)
3925 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3927 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3928 zb[i].zb_objset) != 0) {
3932 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3933 zb[i].zb_object) != 0) {
3937 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3944 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3948 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3949 return (no_memory(zhp->zpool_hdl));
3953 * Upgrade a ZFS pool to the latest on-disk version.
3956 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3958 zfs_cmd_t zc = {"\0"};
3959 libzfs_handle_t *hdl = zhp->zpool_hdl;
3961 (void) strcpy(zc.zc_name, zhp->zpool_name);
3962 zc.zc_cookie = new_version;
3964 if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3965 return (zpool_standard_error_fmt(hdl, errno,
3966 dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3972 zfs_save_arguments(int argc, char **argv, char *string, int len)
3976 (void) strlcpy(string, basename(argv[0]), len);
3977 for (i = 1; i < argc; i++) {
3978 (void) strlcat(string, " ", len);
3979 (void) strlcat(string, argv[i], len);
3984 zpool_log_history(libzfs_handle_t *hdl, const char *message)
3986 zfs_cmd_t zc = {"\0"};
3990 args = fnvlist_alloc();
3991 fnvlist_add_string(args, "message", message);
3992 err = zcmd_write_src_nvlist(hdl, &zc, args);
3994 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
3996 zcmd_free_nvlists(&zc);
4001 * Perform ioctl to get some command history of a pool.
4003 * 'buf' is the buffer to fill up to 'len' bytes. 'off' is the
4004 * logical offset of the history buffer to start reading from.
4006 * Upon return, 'off' is the next logical offset to read from and
4007 * 'len' is the actual amount of bytes read into 'buf'.
4010 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
4012 zfs_cmd_t zc = {"\0"};
4013 libzfs_handle_t *hdl = zhp->zpool_hdl;
4015 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4017 zc.zc_history = (uint64_t)(uintptr_t)buf;
4018 zc.zc_history_len = *len;
4019 zc.zc_history_offset = *off;
4021 if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
4024 return (zfs_error_fmt(hdl, EZFS_PERM,
4025 dgettext(TEXT_DOMAIN,
4026 "cannot show history for pool '%s'"),
4029 return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
4030 dgettext(TEXT_DOMAIN, "cannot get history for pool "
4031 "'%s'"), zhp->zpool_name));
4033 return (zfs_error_fmt(hdl, EZFS_BADVERSION,
4034 dgettext(TEXT_DOMAIN, "cannot get history for pool "
4035 "'%s', pool must be upgraded"), zhp->zpool_name));
4037 return (zpool_standard_error_fmt(hdl, errno,
4038 dgettext(TEXT_DOMAIN,
4039 "cannot get history for '%s'"), zhp->zpool_name));
4043 *len = zc.zc_history_len;
4044 *off = zc.zc_history_offset;
4050 * Process the buffer of nvlists, unpacking and storing each nvlist record
4051 * into 'records'. 'leftover' is set to the number of bytes that weren't
4052 * processed as there wasn't a complete record.
4055 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
4056 nvlist_t ***records, uint_t *numrecords)
4063 while (bytes_read > sizeof (reclen)) {
4065 /* get length of packed record (stored as little endian) */
4066 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
4067 reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
4069 if (bytes_read < sizeof (reclen) + reclen)
4073 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
4075 bytes_read -= sizeof (reclen) + reclen;
4076 buf += sizeof (reclen) + reclen;
4078 /* add record to nvlist array */
4080 if (ISP2(*numrecords + 1)) {
4081 tmp = realloc(*records,
4082 *numrecords * 2 * sizeof (nvlist_t *));
4090 (*records)[*numrecords - 1] = nv;
4093 *leftover = bytes_read;
4098 * Retrieve the command history of a pool.
4101 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
4104 int buflen = 128 * 1024;
4106 nvlist_t **records = NULL;
4107 uint_t numrecords = 0;
4110 buf = malloc(buflen);
4114 uint64_t bytes_read = buflen;
4117 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
4120 /* if nothing else was read in, we're at EOF, just return */
4124 if ((err = zpool_history_unpack(buf, bytes_read,
4125 &leftover, &records, &numrecords)) != 0)
4128 if (leftover == bytes_read) {
4130 * no progress made, because buffer is not big enough
4131 * to hold this record; resize and retry.
4135 buf = malloc(buflen);
4146 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
4147 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
4148 records, numrecords) == 0);
4150 for (i = 0; i < numrecords; i++)
4151 nvlist_free(records[i]);
4158 * Retrieve the next event given the passed 'zevent_fd' file descriptor.
4159 * If there is a new event available 'nvp' will contain a newly allocated
4160 * nvlist and 'dropped' will be set to the number of missed events since
4161 * the last call to this function. When 'nvp' is set to NULL it indicates
4162 * no new events are available. In either case the function returns 0 and
4163 * it is up to the caller to free 'nvp'. In the case of a fatal error the
4164 * function will return a non-zero value. When the function is called in
4165 * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
4166 * it will not return until a new event is available.
4169 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
4170 int *dropped, unsigned flags, int zevent_fd)
4172 zfs_cmd_t zc = {"\0"};
4177 zc.zc_cleanup_fd = zevent_fd;
4179 if (flags & ZEVENT_NONBLOCK)
4180 zc.zc_guid = ZEVENT_NONBLOCK;
4182 if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
4186 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
4189 error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
4190 dgettext(TEXT_DOMAIN, "zfs shutdown"));
4193 /* Blocking error case should not occur */
4194 if (!(flags & ZEVENT_NONBLOCK))
4195 error = zpool_standard_error_fmt(hdl, errno,
4196 dgettext(TEXT_DOMAIN, "cannot get event"));
4200 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
4201 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4202 dgettext(TEXT_DOMAIN, "cannot get event"));
4208 error = zpool_standard_error_fmt(hdl, errno,
4209 dgettext(TEXT_DOMAIN, "cannot get event"));
4214 error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
4218 *dropped = (int)zc.zc_cookie;
4220 zcmd_free_nvlists(&zc);
4229 zpool_events_clear(libzfs_handle_t *hdl, int *count)
4231 zfs_cmd_t zc = {"\0"};
4234 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
4235 "cannot clear events"));
4237 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
4238 return (zpool_standard_error_fmt(hdl, errno, msg));
4241 *count = (int)zc.zc_cookie; /* # of events cleared */
4247 * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
4248 * the passed zevent_fd file handle. On success zero is returned,
4249 * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
4252 zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
4254 zfs_cmd_t zc = {"\0"};
4258 zc.zc_cleanup_fd = zevent_fd;
4260 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
4263 error = zfs_error_fmt(hdl, EZFS_NOENT,
4264 dgettext(TEXT_DOMAIN, "cannot get event"));
4268 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4269 dgettext(TEXT_DOMAIN, "cannot get event"));
4273 error = zpool_standard_error_fmt(hdl, errno,
4274 dgettext(TEXT_DOMAIN, "cannot get event"));
4283 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4284 char *pathname, size_t len)
4286 zfs_cmd_t zc = {"\0"};
4287 boolean_t mounted = B_FALSE;
4288 char *mntpnt = NULL;
4289 char dsname[ZFS_MAX_DATASET_NAME_LEN];
4292 /* special case for the MOS */
4293 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
4298 /* get the dataset's name */
4299 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4301 if (ioctl(zhp->zpool_hdl->libzfs_fd,
4302 ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4303 /* just write out a path of two object numbers */
4304 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
4305 (longlong_t)dsobj, (longlong_t)obj);
4308 (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4310 /* find out if the dataset is mounted */
4311 mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4313 /* get the corrupted object's path */
4314 (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4316 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4319 (void) snprintf(pathname, len, "%s%s", mntpnt,
4322 (void) snprintf(pathname, len, "%s:%s",
4323 dsname, zc.zc_value);
4326 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
4333 * Read the EFI label from the config, if a label does not exist then
4334 * pass back the error to the caller. If the caller has passed a non-NULL
4335 * diskaddr argument then we set it to the starting address of the EFI
4339 read_efi_label(nvlist_t *config, diskaddr_t *sb)
4343 char diskname[MAXPATHLEN];
4346 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4349 (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
4350 strrchr(path, '/'));
4351 if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
4352 struct dk_gpt *vtoc;
4354 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4356 *sb = vtoc->efi_parts[0].p_start;
4365 * determine where a partition starts on a disk in the current
4369 find_start_block(nvlist_t *config)
4373 diskaddr_t sb = MAXOFFSET_T;
4376 if (nvlist_lookup_nvlist_array(config,
4377 ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4378 if (nvlist_lookup_uint64(config,
4379 ZPOOL_CONFIG_WHOLE_DISK,
4380 &wholedisk) != 0 || !wholedisk) {
4381 return (MAXOFFSET_T);
4383 if (read_efi_label(config, &sb) < 0)
4388 for (c = 0; c < children; c++) {
4389 sb = find_start_block(child[c]);
4390 if (sb != MAXOFFSET_T) {
4394 return (MAXOFFSET_T);
4398 zpool_label_disk_check(char *path)
4400 struct dk_gpt *vtoc;
4403 if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
4406 if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4411 if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4423 * Generate a unique partition name for the ZFS member. Partitions must
4424 * have unique names to ensure udev will be able to create symlinks under
4425 * /dev/disk/by-partlabel/ for all pool members. The partition names are
4426 * of the form <pool>-<unique-id>.
4429 zpool_label_name(char *label_name, int label_size)
4434 fd = open("/dev/urandom", O_RDONLY);
4436 if (read(fd, &id, sizeof (id)) != sizeof (id))
4443 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
4445 snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
4449 * Label an individual disk. The name provided is the short name,
4450 * stripped of any leading /dev path.
4453 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4455 char path[MAXPATHLEN];
4456 struct dk_gpt *vtoc;
4458 size_t resv = EFI_MIN_RESV_SIZE;
4459 uint64_t slice_size;
4460 diskaddr_t start_block;
4463 /* prepare an error message just in case */
4464 (void) snprintf(errbuf, sizeof (errbuf),
4465 dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4470 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4471 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4473 if (zhp->zpool_start_block == 0)
4474 start_block = find_start_block(nvroot);
4476 start_block = zhp->zpool_start_block;
4477 zhp->zpool_start_block = start_block;
4480 start_block = NEW_START_BLOCK;
4483 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4485 if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
4487 * This shouldn't happen. We've long since verified that this
4488 * is a valid device.
4490 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4491 "label '%s': unable to open device: %d"), path, errno);
4492 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4495 if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4497 * The only way this can fail is if we run out of memory, or we
4498 * were unable to read the disk's capacity
4500 if (errno == ENOMEM)
4501 (void) no_memory(hdl);
4504 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4505 "label '%s': unable to read disk capacity"), path);
4507 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4510 slice_size = vtoc->efi_last_u_lba + 1;
4511 slice_size -= EFI_MIN_RESV_SIZE;
4512 if (start_block == MAXOFFSET_T)
4513 start_block = NEW_START_BLOCK;
4514 slice_size -= start_block;
4515 slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
4517 vtoc->efi_parts[0].p_start = start_block;
4518 vtoc->efi_parts[0].p_size = slice_size;
4521 * Why we use V_USR: V_BACKUP confuses users, and is considered
4522 * disposable by some EFI utilities (since EFI doesn't have a backup
4523 * slice). V_UNASSIGNED is supposed to be used only for zero size
4524 * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT,
4525 * etc. were all pretty specific. V_USR is as close to reality as we
4526 * can get, in the absence of V_OTHER.
4528 vtoc->efi_parts[0].p_tag = V_USR;
4529 zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
4531 vtoc->efi_parts[8].p_start = slice_size + start_block;
4532 vtoc->efi_parts[8].p_size = resv;
4533 vtoc->efi_parts[8].p_tag = V_RESERVED;
4535 rval = efi_write(fd, vtoc);
4537 /* Flush the buffers to disk and invalidate the page cache. */
4539 (void) ioctl(fd, BLKFLSBUF);
4542 rval = efi_rescan(fd);
4545 * Some block drivers (like pcata) may not support EFI GPT labels.
4546 * Print out a helpful error message directing the user to manually
4547 * label the disk and give a specific slice.
4553 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4554 "parted(8) and then provide a specific slice: %d"), rval);
4555 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4561 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4562 (void) zfs_append_partition(path, MAXPATHLEN);
4564 /* Wait to udev to signal use the device has settled. */
4565 rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
4567 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4568 "detect device partitions on '%s': %d"), path, rval);
4569 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4572 /* We can't be to paranoid. Read the label back and verify it. */
4573 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4574 rval = zpool_label_disk_check(path);
4576 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4577 "EFI label on '%s' is damaged. Ensure\nthis device "
4578 "is not in in use, and is functioning properly: %d"),
4580 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4587 * Allocate and return the underlying device name for a device mapper device.
4588 * If a device mapper device maps to multiple devices, return the first device.
4590 * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
4591 * DM device (like /dev/disk/by-vdev/A0) are also allowed.
4593 * Returns device name, or NULL on error or no match. If dm_name is not a DM
4594 * device then return NULL.
4596 * NOTE: The returned name string must be *freed*.
4599 dm_get_underlying_path(char *dm_name)
4609 if (dm_name == NULL)
4612 /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
4613 realp = realpath(dm_name, NULL);
4618 * If they preface 'dev' with a path (like "/dev") then strip it off.
4619 * We just want the 'dm-N' part.
4621 tmp = strrchr(realp, '/');
4623 dev_str = tmp + 1; /* +1 since we want the chr after '/' */
4627 size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
4628 if (size == -1 || !tmp)
4635 /* Return first sd* entry in /sys/block/dm-N/slaves/ */
4636 while ((ep = readdir(dp))) {
4637 if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
4638 size = asprintf(&path, "/dev/%s", ep->d_name);
4652 * Return 1 if device is a device mapper or multipath device.
4656 zfs_dev_is_dm(char *dev_name)
4660 tmp = dm_get_underlying_path(dev_name);
4669 * By "whole disk" we mean an entire physical disk (something we can
4670 * label, toggle the write cache on, etc.) as opposed to the full
4671 * capacity of a pseudo-device such as lofi or did. We act as if we
4672 * are labeling the disk, which should be a pretty good test of whether
4673 * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
4677 zfs_dev_is_whole_disk(char *dev_name)
4679 struct dk_gpt *label;
4682 if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
4685 if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
4697 * Lookup the underlying device for a device name
4699 * Often you'll have a symlink to a device, a partition device,
4700 * or a multipath device, and want to look up the underlying device.
4701 * This function returns the underlying device name. If the device
4702 * name is already the underlying device, then just return the same
4703 * name. If the device is a DM device with multiple underlying devices
4704 * then return the first one.
4708 * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
4709 * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
4712 * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
4713 * dev_name: /dev/mapper/mpatha
4714 * returns: /dev/sda (first device)
4716 * 3. /dev/sda (already the underlying device)
4717 * dev_name: /dev/sda
4720 * 4. /dev/dm-3 (mapped to /dev/sda)
4721 * dev_name: /dev/dm-3
4724 * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
4725 * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
4728 * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
4729 * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
4732 * Returns underlying device name, or NULL on error or no match.
4734 * NOTE: The returned name string must be *freed*.
4737 zfs_get_underlying_path(char *dev_name)
4742 if (dev_name == NULL)
4745 tmp = dm_get_underlying_path(dev_name);
4747 /* dev_name not a DM device, so just un-symlinkize it */
4749 tmp = realpath(dev_name, NULL);
4752 name = zfs_strip_partition_path(tmp);
4760 * Given a dev name like "sda", return the full enclosure sysfs path to
4761 * the disk. You can also pass in the name with "/dev" prepended
4762 * to it (like /dev/sda).
4764 * For example, disk "sda" in enclosure slot 1:
4766 * returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
4768 * 'dev' must be a non-devicemapper device.
4770 * Returned string must be freed.
4773 zfs_get_enclosure_sysfs_path(char *dev_name)
4777 char buf[MAXPATHLEN];
4785 if (dev_name == NULL)
4788 /* If they preface 'dev' with a path (like "/dev") then strip it off */
4789 tmp1 = strrchr(dev_name, '/');
4791 dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
4793 tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
4794 if (tmpsize == -1 || tmp1 == NULL) {
4801 tmp1 = NULL; /* To make free() at the end a NOP */
4806 * Look though all sysfs entries in /sys/block/<dev>/device for
4807 * the enclosure symlink.
4809 while ((ep = readdir(dp))) {
4810 /* Ignore everything that's not our enclosure_device link */
4811 if (strstr(ep->d_name, "enclosure_device") == NULL)
4814 if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
4818 size = readlink(tmp2, buf, sizeof (buf));
4820 /* Did readlink fail or crop the link name? */
4821 if (size == -1 || size >= sizeof (buf)) {
4823 tmp2 = NULL; /* To make free() at the end a NOP */
4828 * We got a valid link. readlink() doesn't terminate strings
4829 * so we have to do it.
4834 * Our link will look like:
4836 * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
4838 * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
4840 tmp3 = strstr(buf, "enclosure");
4844 if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
4845 /* If asprintf() fails, 'path' is undefined */