]> granicus.if.org Git - zfs/blob - lib/libzfs/libzfs_pool.c
OpenZFS restructuring - libspl
[zfs] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright (c) 2018 Datto Inc.
28  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
29  * Copyright (c) 2017, Intel Corporation.
30  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
31  */
32
33 #include <errno.h>
34 #include <libintl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <libgen.h>
40 #include <zone.h>
41 #include <sys/stat.h>
42 #include <sys/efi_partition.h>
43 #include <sys/systeminfo.h>
44 #include <sys/vtoc.h>
45 #include <sys/zfs_ioctl.h>
46 #include <sys/vdev_disk.h>
47 #include <dlfcn.h>
48 #include <libzutil.h>
49
50 #include "zfs_namecheck.h"
51 #include "zfs_prop.h"
52 #include "libzfs_impl.h"
53 #include "zfs_comutil.h"
54 #include "zfeature_common.h"
55
56 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
57 static boolean_t zpool_vdev_is_interior(const char *name);
58
59 typedef struct prop_flags {
60         int create:1;   /* Validate property on creation */
61         int import:1;   /* Validate property on import */
62 } prop_flags_t;
63
64 /*
65  * ====================================================================
66  *   zpool property functions
67  * ====================================================================
68  */
69
70 static int
71 zpool_get_all_props(zpool_handle_t *zhp)
72 {
73         zfs_cmd_t zc = {"\0"};
74         libzfs_handle_t *hdl = zhp->zpool_hdl;
75
76         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
77
78         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
79                 return (-1);
80
81         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
82                 if (errno == ENOMEM) {
83                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
84                                 zcmd_free_nvlists(&zc);
85                                 return (-1);
86                         }
87                 } else {
88                         zcmd_free_nvlists(&zc);
89                         return (-1);
90                 }
91         }
92
93         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
94                 zcmd_free_nvlists(&zc);
95                 return (-1);
96         }
97
98         zcmd_free_nvlists(&zc);
99
100         return (0);
101 }
102
103 int
104 zpool_props_refresh(zpool_handle_t *zhp)
105 {
106         nvlist_t *old_props;
107
108         old_props = zhp->zpool_props;
109
110         if (zpool_get_all_props(zhp) != 0)
111                 return (-1);
112
113         nvlist_free(old_props);
114         return (0);
115 }
116
117 static const char *
118 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
119     zprop_source_t *src)
120 {
121         nvlist_t *nv, *nvl;
122         uint64_t ival;
123         char *value;
124         zprop_source_t source;
125
126         nvl = zhp->zpool_props;
127         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
128                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
129                 source = ival;
130                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
131         } else {
132                 source = ZPROP_SRC_DEFAULT;
133                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
134                         value = "-";
135         }
136
137         if (src)
138                 *src = source;
139
140         return (value);
141 }
142
143 uint64_t
144 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
145 {
146         nvlist_t *nv, *nvl;
147         uint64_t value;
148         zprop_source_t source;
149
150         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
151                 /*
152                  * zpool_get_all_props() has most likely failed because
153                  * the pool is faulted, but if all we need is the top level
154                  * vdev's guid then get it from the zhp config nvlist.
155                  */
156                 if ((prop == ZPOOL_PROP_GUID) &&
157                     (nvlist_lookup_nvlist(zhp->zpool_config,
158                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
159                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
160                     == 0)) {
161                         return (value);
162                 }
163                 return (zpool_prop_default_numeric(prop));
164         }
165
166         nvl = zhp->zpool_props;
167         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
168                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
169                 source = value;
170                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
171         } else {
172                 source = ZPROP_SRC_DEFAULT;
173                 value = zpool_prop_default_numeric(prop);
174         }
175
176         if (src)
177                 *src = source;
178
179         return (value);
180 }
181
182 /*
183  * Map VDEV STATE to printed strings.
184  */
185 const char *
186 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
187 {
188         switch (state) {
189         case VDEV_STATE_CLOSED:
190         case VDEV_STATE_OFFLINE:
191                 return (gettext("OFFLINE"));
192         case VDEV_STATE_REMOVED:
193                 return (gettext("REMOVED"));
194         case VDEV_STATE_CANT_OPEN:
195                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
196                         return (gettext("FAULTED"));
197                 else if (aux == VDEV_AUX_SPLIT_POOL)
198                         return (gettext("SPLIT"));
199                 else
200                         return (gettext("UNAVAIL"));
201         case VDEV_STATE_FAULTED:
202                 return (gettext("FAULTED"));
203         case VDEV_STATE_DEGRADED:
204                 return (gettext("DEGRADED"));
205         case VDEV_STATE_HEALTHY:
206                 return (gettext("ONLINE"));
207
208         default:
209                 break;
210         }
211
212         return (gettext("UNKNOWN"));
213 }
214
215 /*
216  * Map POOL STATE to printed strings.
217  */
218 const char *
219 zpool_pool_state_to_name(pool_state_t state)
220 {
221         switch (state) {
222         default:
223                 break;
224         case POOL_STATE_ACTIVE:
225                 return (gettext("ACTIVE"));
226         case POOL_STATE_EXPORTED:
227                 return (gettext("EXPORTED"));
228         case POOL_STATE_DESTROYED:
229                 return (gettext("DESTROYED"));
230         case POOL_STATE_SPARE:
231                 return (gettext("SPARE"));
232         case POOL_STATE_L2CACHE:
233                 return (gettext("L2CACHE"));
234         case POOL_STATE_UNINITIALIZED:
235                 return (gettext("UNINITIALIZED"));
236         case POOL_STATE_UNAVAIL:
237                 return (gettext("UNAVAIL"));
238         case POOL_STATE_POTENTIALLY_ACTIVE:
239                 return (gettext("POTENTIALLY_ACTIVE"));
240         }
241
242         return (gettext("UNKNOWN"));
243 }
244
245 /*
246  * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED",
247  * "SUSPENDED", etc).
248  */
249 const char *
250 zpool_get_state_str(zpool_handle_t *zhp)
251 {
252         zpool_errata_t errata;
253         zpool_status_t status;
254         nvlist_t *nvroot;
255         vdev_stat_t *vs;
256         uint_t vsc;
257         const char *str;
258
259         status = zpool_get_status(zhp, NULL, &errata);
260
261         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
262                 str = gettext("FAULTED");
263         } else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
264             status == ZPOOL_STATUS_IO_FAILURE_MMP) {
265                 str = gettext("SUSPENDED");
266         } else {
267                 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
268                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
269                 verify(nvlist_lookup_uint64_array(nvroot,
270                     ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
271                     == 0);
272                 str = zpool_state_to_name(vs->vs_state, vs->vs_aux);
273         }
274         return (str);
275 }
276
277 /*
278  * Get a zpool property value for 'prop' and return the value in
279  * a pre-allocated buffer.
280  */
281 int
282 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
283     size_t len, zprop_source_t *srctype, boolean_t literal)
284 {
285         uint64_t intval;
286         const char *strval;
287         zprop_source_t src = ZPROP_SRC_NONE;
288
289         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
290                 switch (prop) {
291                 case ZPOOL_PROP_NAME:
292                         (void) strlcpy(buf, zpool_get_name(zhp), len);
293                         break;
294
295                 case ZPOOL_PROP_HEALTH:
296                         (void) strlcpy(buf, zpool_get_state_str(zhp), len);
297                         break;
298
299                 case ZPOOL_PROP_GUID:
300                         intval = zpool_get_prop_int(zhp, prop, &src);
301                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
302                         break;
303
304                 case ZPOOL_PROP_ALTROOT:
305                 case ZPOOL_PROP_CACHEFILE:
306                 case ZPOOL_PROP_COMMENT:
307                         if (zhp->zpool_props != NULL ||
308                             zpool_get_all_props(zhp) == 0) {
309                                 (void) strlcpy(buf,
310                                     zpool_get_prop_string(zhp, prop, &src),
311                                     len);
312                                 break;
313                         }
314                         /* FALLTHROUGH */
315                 default:
316                         (void) strlcpy(buf, "-", len);
317                         break;
318                 }
319
320                 if (srctype != NULL)
321                         *srctype = src;
322                 return (0);
323         }
324
325         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
326             prop != ZPOOL_PROP_NAME)
327                 return (-1);
328
329         switch (zpool_prop_get_type(prop)) {
330         case PROP_TYPE_STRING:
331                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
332                     len);
333                 break;
334
335         case PROP_TYPE_NUMBER:
336                 intval = zpool_get_prop_int(zhp, prop, &src);
337
338                 switch (prop) {
339                 case ZPOOL_PROP_SIZE:
340                 case ZPOOL_PROP_ALLOCATED:
341                 case ZPOOL_PROP_FREE:
342                 case ZPOOL_PROP_FREEING:
343                 case ZPOOL_PROP_LEAKED:
344                 case ZPOOL_PROP_ASHIFT:
345                         if (literal)
346                                 (void) snprintf(buf, len, "%llu",
347                                     (u_longlong_t)intval);
348                         else
349                                 (void) zfs_nicenum(intval, buf, len);
350                         break;
351
352                 case ZPOOL_PROP_EXPANDSZ:
353                 case ZPOOL_PROP_CHECKPOINT:
354                         if (intval == 0) {
355                                 (void) strlcpy(buf, "-", len);
356                         } else if (literal) {
357                                 (void) snprintf(buf, len, "%llu",
358                                     (u_longlong_t)intval);
359                         } else {
360                                 (void) zfs_nicebytes(intval, buf, len);
361                         }
362                         break;
363
364                 case ZPOOL_PROP_CAPACITY:
365                         if (literal) {
366                                 (void) snprintf(buf, len, "%llu",
367                                     (u_longlong_t)intval);
368                         } else {
369                                 (void) snprintf(buf, len, "%llu%%",
370                                     (u_longlong_t)intval);
371                         }
372                         break;
373
374                 case ZPOOL_PROP_FRAGMENTATION:
375                         if (intval == UINT64_MAX) {
376                                 (void) strlcpy(buf, "-", len);
377                         } else if (literal) {
378                                 (void) snprintf(buf, len, "%llu",
379                                     (u_longlong_t)intval);
380                         } else {
381                                 (void) snprintf(buf, len, "%llu%%",
382                                     (u_longlong_t)intval);
383                         }
384                         break;
385
386                 case ZPOOL_PROP_DEDUPRATIO:
387                         if (literal)
388                                 (void) snprintf(buf, len, "%llu.%02llu",
389                                     (u_longlong_t)(intval / 100),
390                                     (u_longlong_t)(intval % 100));
391                         else
392                                 (void) snprintf(buf, len, "%llu.%02llux",
393                                     (u_longlong_t)(intval / 100),
394                                     (u_longlong_t)(intval % 100));
395                         break;
396
397                 case ZPOOL_PROP_HEALTH:
398                         (void) strlcpy(buf, zpool_get_state_str(zhp), len);
399                         break;
400                 case ZPOOL_PROP_VERSION:
401                         if (intval >= SPA_VERSION_FEATURES) {
402                                 (void) snprintf(buf, len, "-");
403                                 break;
404                         }
405                         /* FALLTHROUGH */
406                 default:
407                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
408                 }
409                 break;
410
411         case PROP_TYPE_INDEX:
412                 intval = zpool_get_prop_int(zhp, prop, &src);
413                 if (zpool_prop_index_to_string(prop, intval, &strval)
414                     != 0)
415                         return (-1);
416                 (void) strlcpy(buf, strval, len);
417                 break;
418
419         default:
420                 abort();
421         }
422
423         if (srctype)
424                 *srctype = src;
425
426         return (0);
427 }
428
429 /*
430  * Check if the bootfs name has the same pool name as it is set to.
431  * Assuming bootfs is a valid dataset name.
432  */
433 static boolean_t
434 bootfs_name_valid(const char *pool, char *bootfs)
435 {
436         int len = strlen(pool);
437         if (bootfs[0] == '\0')
438                 return (B_TRUE);
439
440         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
441                 return (B_FALSE);
442
443         if (strncmp(pool, bootfs, len) == 0 &&
444             (bootfs[len] == '/' || bootfs[len] == '\0'))
445                 return (B_TRUE);
446
447         return (B_FALSE);
448 }
449
450 boolean_t
451 zpool_is_bootable(zpool_handle_t *zhp)
452 {
453         char bootfs[ZFS_MAX_DATASET_NAME_LEN];
454
455         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
456             sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
457             sizeof (bootfs)) != 0);
458 }
459
460
461 /*
462  * Given an nvlist of zpool properties to be set, validate that they are
463  * correct, and parse any numeric properties (index, boolean, etc) if they are
464  * specified as strings.
465  */
466 static nvlist_t *
467 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
468     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
469 {
470         nvpair_t *elem;
471         nvlist_t *retprops;
472         zpool_prop_t prop;
473         char *strval;
474         uint64_t intval;
475         char *slash, *check;
476         struct stat64 statbuf;
477         zpool_handle_t *zhp;
478
479         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
480                 (void) no_memory(hdl);
481                 return (NULL);
482         }
483
484         elem = NULL;
485         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
486                 const char *propname = nvpair_name(elem);
487
488                 prop = zpool_name_to_prop(propname);
489                 if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) {
490                         int err;
491                         char *fname = strchr(propname, '@') + 1;
492
493                         err = zfeature_lookup_name(fname, NULL);
494                         if (err != 0) {
495                                 ASSERT3U(err, ==, ENOENT);
496                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
497                                     "invalid feature '%s'"), fname);
498                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
499                                 goto error;
500                         }
501
502                         if (nvpair_type(elem) != DATA_TYPE_STRING) {
503                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
504                                     "'%s' must be a string"), propname);
505                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
506                                 goto error;
507                         }
508
509                         (void) nvpair_value_string(elem, &strval);
510                         if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
511                             strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
512                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
513                                     "property '%s' can only be set to "
514                                     "'enabled' or 'disabled'"), propname);
515                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
516                                 goto error;
517                         }
518
519                         if (!flags.create &&
520                             strcmp(strval, ZFS_FEATURE_DISABLED) == 0) {
521                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
522                                     "property '%s' can only be set to "
523                                     "'disabled' at creation time"), propname);
524                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
525                                 goto error;
526                         }
527
528                         if (nvlist_add_uint64(retprops, propname, 0) != 0) {
529                                 (void) no_memory(hdl);
530                                 goto error;
531                         }
532                         continue;
533                 }
534
535                 /*
536                  * Make sure this property is valid and applies to this type.
537                  */
538                 if (prop == ZPOOL_PROP_INVAL) {
539                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
540                             "invalid property '%s'"), propname);
541                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
542                         goto error;
543                 }
544
545                 if (zpool_prop_readonly(prop)) {
546                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
547                             "is readonly"), propname);
548                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
549                         goto error;
550                 }
551
552                 if (!flags.create && zpool_prop_setonce(prop)) {
553                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
554                             "property '%s' can only be set at "
555                             "creation time"), propname);
556                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
557                         goto error;
558                 }
559
560                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
561                     &strval, &intval, errbuf) != 0)
562                         goto error;
563
564                 /*
565                  * Perform additional checking for specific properties.
566                  */
567                 switch (prop) {
568                 case ZPOOL_PROP_VERSION:
569                         if (intval < version ||
570                             !SPA_VERSION_IS_SUPPORTED(intval)) {
571                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
572                                     "property '%s' number %d is invalid."),
573                                     propname, intval);
574                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
575                                 goto error;
576                         }
577                         break;
578
579                 case ZPOOL_PROP_ASHIFT:
580                         if (intval != 0 &&
581                             (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
582                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
583                                     "property '%s' number %d is invalid, only "
584                                     "values between %" PRId32 " and "
585                                     "%" PRId32 " are allowed."),
586                                     propname, intval, ASHIFT_MIN, ASHIFT_MAX);
587                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
588                                 goto error;
589                         }
590                         break;
591
592                 case ZPOOL_PROP_BOOTFS:
593                         if (flags.create || flags.import) {
594                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
595                                     "property '%s' cannot be set at creation "
596                                     "or import time"), propname);
597                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
598                                 goto error;
599                         }
600
601                         if (version < SPA_VERSION_BOOTFS) {
602                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
603                                     "pool must be upgraded to support "
604                                     "'%s' property"), propname);
605                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
606                                 goto error;
607                         }
608
609                         /*
610                          * bootfs property value has to be a dataset name and
611                          * the dataset has to be in the same pool as it sets to.
612                          */
613                         if (!bootfs_name_valid(poolname, strval)) {
614                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
615                                     "is an invalid name"), strval);
616                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
617                                 goto error;
618                         }
619
620                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
621                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
622                                     "could not open pool '%s'"), poolname);
623                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
624                                 goto error;
625                         }
626                         zpool_close(zhp);
627                         break;
628
629                 case ZPOOL_PROP_ALTROOT:
630                         if (!flags.create && !flags.import) {
631                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
632                                     "property '%s' can only be set during pool "
633                                     "creation or import"), propname);
634                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
635                                 goto error;
636                         }
637
638                         if (strval[0] != '/') {
639                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
640                                     "bad alternate root '%s'"), strval);
641                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
642                                 goto error;
643                         }
644                         break;
645
646                 case ZPOOL_PROP_CACHEFILE:
647                         if (strval[0] == '\0')
648                                 break;
649
650                         if (strcmp(strval, "none") == 0)
651                                 break;
652
653                         if (strval[0] != '/') {
654                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
655                                     "property '%s' must be empty, an "
656                                     "absolute path, or 'none'"), propname);
657                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
658                                 goto error;
659                         }
660
661                         slash = strrchr(strval, '/');
662
663                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
664                             strcmp(slash, "/..") == 0) {
665                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
666                                     "'%s' is not a valid file"), strval);
667                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
668                                 goto error;
669                         }
670
671                         *slash = '\0';
672
673                         if (strval[0] != '\0' &&
674                             (stat64(strval, &statbuf) != 0 ||
675                             !S_ISDIR(statbuf.st_mode))) {
676                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
677                                     "'%s' is not a valid directory"),
678                                     strval);
679                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
680                                 goto error;
681                         }
682
683                         *slash = '/';
684                         break;
685
686                 case ZPOOL_PROP_COMMENT:
687                         for (check = strval; *check != '\0'; check++) {
688                                 if (!isprint(*check)) {
689                                         zfs_error_aux(hdl,
690                                             dgettext(TEXT_DOMAIN,
691                                             "comment may only have printable "
692                                             "characters"));
693                                         (void) zfs_error(hdl, EZFS_BADPROP,
694                                             errbuf);
695                                         goto error;
696                                 }
697                         }
698                         if (strlen(strval) > ZPROP_MAX_COMMENT) {
699                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
700                                     "comment must not exceed %d characters"),
701                                     ZPROP_MAX_COMMENT);
702                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
703                                 goto error;
704                         }
705                         break;
706                 case ZPOOL_PROP_READONLY:
707                         if (!flags.import) {
708                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
709                                     "property '%s' can only be set at "
710                                     "import time"), propname);
711                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
712                                 goto error;
713                         }
714                         break;
715                 case ZPOOL_PROP_MULTIHOST:
716                         if (get_system_hostid() == 0) {
717                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
718                                     "requires a non-zero system hostid"));
719                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
720                                 goto error;
721                         }
722                         break;
723                 case ZPOOL_PROP_DEDUPDITTO:
724                         printf("Note: property '%s' no longer has "
725                             "any effect\n", propname);
726                         break;
727
728                 default:
729                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
730                             "property '%s'(%d) not defined"), propname, prop);
731                         break;
732                 }
733         }
734
735         return (retprops);
736 error:
737         nvlist_free(retprops);
738         return (NULL);
739 }
740
741 /*
742  * Set zpool property : propname=propval.
743  */
744 int
745 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
746 {
747         zfs_cmd_t zc = {"\0"};
748         int ret = -1;
749         char errbuf[1024];
750         nvlist_t *nvl = NULL;
751         nvlist_t *realprops;
752         uint64_t version;
753         prop_flags_t flags = { 0 };
754
755         (void) snprintf(errbuf, sizeof (errbuf),
756             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
757             zhp->zpool_name);
758
759         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
760                 return (no_memory(zhp->zpool_hdl));
761
762         if (nvlist_add_string(nvl, propname, propval) != 0) {
763                 nvlist_free(nvl);
764                 return (no_memory(zhp->zpool_hdl));
765         }
766
767         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
768         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
769             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
770                 nvlist_free(nvl);
771                 return (-1);
772         }
773
774         nvlist_free(nvl);
775         nvl = realprops;
776
777         /*
778          * Execute the corresponding ioctl() to set this property.
779          */
780         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
781
782         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
783                 nvlist_free(nvl);
784                 return (-1);
785         }
786
787         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
788
789         zcmd_free_nvlists(&zc);
790         nvlist_free(nvl);
791
792         if (ret)
793                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
794         else
795                 (void) zpool_props_refresh(zhp);
796
797         return (ret);
798 }
799
800 int
801 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
802 {
803         libzfs_handle_t *hdl = zhp->zpool_hdl;
804         zprop_list_t *entry;
805         char buf[ZFS_MAXPROPLEN];
806         nvlist_t *features = NULL;
807         nvpair_t *nvp;
808         zprop_list_t **last;
809         boolean_t firstexpand = (NULL == *plp);
810         int i;
811
812         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
813                 return (-1);
814
815         last = plp;
816         while (*last != NULL)
817                 last = &(*last)->pl_next;
818
819         if ((*plp)->pl_all)
820                 features = zpool_get_features(zhp);
821
822         if ((*plp)->pl_all && firstexpand) {
823                 for (i = 0; i < SPA_FEATURES; i++) {
824                         zprop_list_t *entry = zfs_alloc(hdl,
825                             sizeof (zprop_list_t));
826                         entry->pl_prop = ZPROP_INVAL;
827                         entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
828                             spa_feature_table[i].fi_uname);
829                         entry->pl_width = strlen(entry->pl_user_prop);
830                         entry->pl_all = B_TRUE;
831
832                         *last = entry;
833                         last = &entry->pl_next;
834                 }
835         }
836
837         /* add any unsupported features */
838         for (nvp = nvlist_next_nvpair(features, NULL);
839             nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
840                 char *propname;
841                 boolean_t found;
842                 zprop_list_t *entry;
843
844                 if (zfeature_is_supported(nvpair_name(nvp)))
845                         continue;
846
847                 propname = zfs_asprintf(hdl, "unsupported@%s",
848                     nvpair_name(nvp));
849
850                 /*
851                  * Before adding the property to the list make sure that no
852                  * other pool already added the same property.
853                  */
854                 found = B_FALSE;
855                 entry = *plp;
856                 while (entry != NULL) {
857                         if (entry->pl_user_prop != NULL &&
858                             strcmp(propname, entry->pl_user_prop) == 0) {
859                                 found = B_TRUE;
860                                 break;
861                         }
862                         entry = entry->pl_next;
863                 }
864                 if (found) {
865                         free(propname);
866                         continue;
867                 }
868
869                 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
870                 entry->pl_prop = ZPROP_INVAL;
871                 entry->pl_user_prop = propname;
872                 entry->pl_width = strlen(entry->pl_user_prop);
873                 entry->pl_all = B_TRUE;
874
875                 *last = entry;
876                 last = &entry->pl_next;
877         }
878
879         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
880
881                 if (entry->pl_fixed)
882                         continue;
883
884                 if (entry->pl_prop != ZPROP_INVAL &&
885                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
886                     NULL, B_FALSE) == 0) {
887                         if (strlen(buf) > entry->pl_width)
888                                 entry->pl_width = strlen(buf);
889                 }
890         }
891
892         return (0);
893 }
894
895 /*
896  * Get the state for the given feature on the given ZFS pool.
897  */
898 int
899 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
900     size_t len)
901 {
902         uint64_t refcount;
903         boolean_t found = B_FALSE;
904         nvlist_t *features = zpool_get_features(zhp);
905         boolean_t supported;
906         const char *feature = strchr(propname, '@') + 1;
907
908         supported = zpool_prop_feature(propname);
909         ASSERT(supported || zpool_prop_unsupported(propname));
910
911         /*
912          * Convert from feature name to feature guid. This conversion is
913          * unnecessary for unsupported@... properties because they already
914          * use guids.
915          */
916         if (supported) {
917                 int ret;
918                 spa_feature_t fid;
919
920                 ret = zfeature_lookup_name(feature, &fid);
921                 if (ret != 0) {
922                         (void) strlcpy(buf, "-", len);
923                         return (ENOTSUP);
924                 }
925                 feature = spa_feature_table[fid].fi_guid;
926         }
927
928         if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
929                 found = B_TRUE;
930
931         if (supported) {
932                 if (!found) {
933                         (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
934                 } else  {
935                         if (refcount == 0)
936                                 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
937                         else
938                                 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
939                 }
940         } else {
941                 if (found) {
942                         if (refcount == 0) {
943                                 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
944                         } else {
945                                 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
946                         }
947                 } else {
948                         (void) strlcpy(buf, "-", len);
949                         return (ENOTSUP);
950                 }
951         }
952
953         return (0);
954 }
955
956 /*
957  * Validate the given pool name, optionally putting an extended error message in
958  * 'buf'.
959  */
960 boolean_t
961 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
962 {
963         namecheck_err_t why;
964         char what;
965         int ret;
966
967         ret = pool_namecheck(pool, &why, &what);
968
969         /*
970          * The rules for reserved pool names were extended at a later point.
971          * But we need to support users with existing pools that may now be
972          * invalid.  So we only check for this expanded set of names during a
973          * create (or import), and only in userland.
974          */
975         if (ret == 0 && !isopen &&
976             (strncmp(pool, "mirror", 6) == 0 ||
977             strncmp(pool, "raidz", 5) == 0 ||
978             strncmp(pool, "spare", 5) == 0 ||
979             strcmp(pool, "log") == 0)) {
980                 if (hdl != NULL)
981                         zfs_error_aux(hdl,
982                             dgettext(TEXT_DOMAIN, "name is reserved"));
983                 return (B_FALSE);
984         }
985
986
987         if (ret != 0) {
988                 if (hdl != NULL) {
989                         switch (why) {
990                         case NAME_ERR_TOOLONG:
991                                 zfs_error_aux(hdl,
992                                     dgettext(TEXT_DOMAIN, "name is too long"));
993                                 break;
994
995                         case NAME_ERR_INVALCHAR:
996                                 zfs_error_aux(hdl,
997                                     dgettext(TEXT_DOMAIN, "invalid character "
998                                     "'%c' in pool name"), what);
999                                 break;
1000
1001                         case NAME_ERR_NOLETTER:
1002                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1003                                     "name must begin with a letter"));
1004                                 break;
1005
1006                         case NAME_ERR_RESERVED:
1007                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1008                                     "name is reserved"));
1009                                 break;
1010
1011                         case NAME_ERR_DISKLIKE:
1012                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1013                                     "pool name is reserved"));
1014                                 break;
1015
1016                         case NAME_ERR_LEADING_SLASH:
1017                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1018                                     "leading slash in name"));
1019                                 break;
1020
1021                         case NAME_ERR_EMPTY_COMPONENT:
1022                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1023                                     "empty component in name"));
1024                                 break;
1025
1026                         case NAME_ERR_TRAILING_SLASH:
1027                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1028                                     "trailing slash in name"));
1029                                 break;
1030
1031                         case NAME_ERR_MULTIPLE_DELIMITERS:
1032                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1033                                     "multiple '@' and/or '#' delimiters in "
1034                                     "name"));
1035                                 break;
1036
1037                         case NAME_ERR_NO_AT:
1038                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1039                                     "permission set is missing '@'"));
1040                                 break;
1041
1042                         default:
1043                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1044                                     "(%d) not defined"), why);
1045                                 break;
1046                         }
1047                 }
1048                 return (B_FALSE);
1049         }
1050
1051         return (B_TRUE);
1052 }
1053
1054 /*
1055  * Open a handle to the given pool, even if the pool is currently in the FAULTED
1056  * state.
1057  */
1058 zpool_handle_t *
1059 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1060 {
1061         zpool_handle_t *zhp;
1062         boolean_t missing;
1063
1064         /*
1065          * Make sure the pool name is valid.
1066          */
1067         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1068                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1069                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1070                     pool);
1071                 return (NULL);
1072         }
1073
1074         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1075                 return (NULL);
1076
1077         zhp->zpool_hdl = hdl;
1078         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1079
1080         if (zpool_refresh_stats(zhp, &missing) != 0) {
1081                 zpool_close(zhp);
1082                 return (NULL);
1083         }
1084
1085         if (missing) {
1086                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1087                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1088                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1089                 zpool_close(zhp);
1090                 return (NULL);
1091         }
1092
1093         return (zhp);
1094 }
1095
1096 /*
1097  * Like the above, but silent on error.  Used when iterating over pools (because
1098  * the configuration cache may be out of date).
1099  */
1100 int
1101 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1102 {
1103         zpool_handle_t *zhp;
1104         boolean_t missing;
1105
1106         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1107                 return (-1);
1108
1109         zhp->zpool_hdl = hdl;
1110         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1111
1112         if (zpool_refresh_stats(zhp, &missing) != 0) {
1113                 zpool_close(zhp);
1114                 return (-1);
1115         }
1116
1117         if (missing) {
1118                 zpool_close(zhp);
1119                 *ret = NULL;
1120                 return (0);
1121         }
1122
1123         *ret = zhp;
1124         return (0);
1125 }
1126
1127 /*
1128  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1129  * state.
1130  */
1131 zpool_handle_t *
1132 zpool_open(libzfs_handle_t *hdl, const char *pool)
1133 {
1134         zpool_handle_t *zhp;
1135
1136         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1137                 return (NULL);
1138
1139         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1140                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1141                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1142                 zpool_close(zhp);
1143                 return (NULL);
1144         }
1145
1146         return (zhp);
1147 }
1148
1149 /*
1150  * Close the handle.  Simply frees the memory associated with the handle.
1151  */
1152 void
1153 zpool_close(zpool_handle_t *zhp)
1154 {
1155         nvlist_free(zhp->zpool_config);
1156         nvlist_free(zhp->zpool_old_config);
1157         nvlist_free(zhp->zpool_props);
1158         free(zhp);
1159 }
1160
1161 /*
1162  * Return the name of the pool.
1163  */
1164 const char *
1165 zpool_get_name(zpool_handle_t *zhp)
1166 {
1167         return (zhp->zpool_name);
1168 }
1169
1170
1171 /*
1172  * Return the state of the pool (ACTIVE or UNAVAILABLE)
1173  */
1174 int
1175 zpool_get_state(zpool_handle_t *zhp)
1176 {
1177         return (zhp->zpool_state);
1178 }
1179
1180 /*
1181  * Check if vdev list contains a special vdev
1182  */
1183 static boolean_t
1184 zpool_has_special_vdev(nvlist_t *nvroot)
1185 {
1186         nvlist_t **child;
1187         uint_t children;
1188
1189         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child,
1190             &children) == 0) {
1191                 for (uint_t c = 0; c < children; c++) {
1192                         char *bias;
1193
1194                         if (nvlist_lookup_string(child[c],
1195                             ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0 &&
1196                             strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
1197                                 return (B_TRUE);
1198                         }
1199                 }
1200         }
1201         return (B_FALSE);
1202 }
1203
1204 /*
1205  * Create the named pool, using the provided vdev list.  It is assumed
1206  * that the consumer has already validated the contents of the nvlist, so we
1207  * don't have to worry about error semantics.
1208  */
1209 int
1210 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1211     nvlist_t *props, nvlist_t *fsprops)
1212 {
1213         zfs_cmd_t zc = {"\0"};
1214         nvlist_t *zc_fsprops = NULL;
1215         nvlist_t *zc_props = NULL;
1216         nvlist_t *hidden_args = NULL;
1217         uint8_t *wkeydata = NULL;
1218         uint_t wkeylen = 0;
1219         char msg[1024];
1220         int ret = -1;
1221
1222         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1223             "cannot create '%s'"), pool);
1224
1225         if (!zpool_name_valid(hdl, B_FALSE, pool))
1226                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1227
1228         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1229                 return (-1);
1230
1231         if (props) {
1232                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1233
1234                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1235                     SPA_VERSION_1, flags, msg)) == NULL) {
1236                         goto create_failed;
1237                 }
1238         }
1239
1240         if (fsprops) {
1241                 uint64_t zoned;
1242                 char *zonestr;
1243
1244                 zoned = ((nvlist_lookup_string(fsprops,
1245                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1246                     strcmp(zonestr, "on") == 0);
1247
1248                 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1249                     fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
1250                         goto create_failed;
1251                 }
1252
1253                 if (nvlist_exists(zc_fsprops,
1254                     zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)) &&
1255                     !zpool_has_special_vdev(nvroot)) {
1256                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1257                             "%s property requires a special vdev"),
1258                             zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS));
1259                         (void) zfs_error(hdl, EZFS_BADPROP, msg);
1260                         goto create_failed;
1261                 }
1262
1263                 if (!zc_props &&
1264                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1265                         goto create_failed;
1266                 }
1267                 if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, B_TRUE,
1268                     &wkeydata, &wkeylen) != 0) {
1269                         zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
1270                         goto create_failed;
1271                 }
1272                 if (nvlist_add_nvlist(zc_props,
1273                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1274                         goto create_failed;
1275                 }
1276                 if (wkeydata != NULL) {
1277                         if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
1278                                 goto create_failed;
1279
1280                         if (nvlist_add_uint8_array(hidden_args, "wkeydata",
1281                             wkeydata, wkeylen) != 0)
1282                                 goto create_failed;
1283
1284                         if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
1285                             hidden_args) != 0)
1286                                 goto create_failed;
1287                 }
1288         }
1289
1290         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1291                 goto create_failed;
1292
1293         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1294
1295         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1296
1297                 zcmd_free_nvlists(&zc);
1298                 nvlist_free(zc_props);
1299                 nvlist_free(zc_fsprops);
1300                 nvlist_free(hidden_args);
1301                 if (wkeydata != NULL)
1302                         free(wkeydata);
1303
1304                 switch (errno) {
1305                 case EBUSY:
1306                         /*
1307                          * This can happen if the user has specified the same
1308                          * device multiple times.  We can't reliably detect this
1309                          * until we try to add it and see we already have a
1310                          * label.  This can also happen under if the device is
1311                          * part of an active md or lvm device.
1312                          */
1313                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1314                             "one or more vdevs refer to the same device, or "
1315                             "one of\nthe devices is part of an active md or "
1316                             "lvm device"));
1317                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1318
1319                 case ERANGE:
1320                         /*
1321                          * This happens if the record size is smaller or larger
1322                          * than the allowed size range, or not a power of 2.
1323                          *
1324                          * NOTE: although zfs_valid_proplist is called earlier,
1325                          * this case may have slipped through since the
1326                          * pool does not exist yet and it is therefore
1327                          * impossible to read properties e.g. max blocksize
1328                          * from the pool.
1329                          */
1330                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1331                             "record size invalid"));
1332                         return (zfs_error(hdl, EZFS_BADPROP, msg));
1333
1334                 case EOVERFLOW:
1335                         /*
1336                          * This occurs when one of the devices is below
1337                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1338                          * device was the problem device since there's no
1339                          * reliable way to determine device size from userland.
1340                          */
1341                         {
1342                                 char buf[64];
1343
1344                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1345                                     sizeof (buf));
1346
1347                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1348                                     "one or more devices is less than the "
1349                                     "minimum size (%s)"), buf);
1350                         }
1351                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1352
1353                 case ENOSPC:
1354                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1355                             "one or more devices is out of space"));
1356                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1357
1358                 case ENOTBLK:
1359                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1360                             "cache device must be a disk or disk slice"));
1361                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1362
1363                 default:
1364                         return (zpool_standard_error(hdl, errno, msg));
1365                 }
1366         }
1367
1368 create_failed:
1369         zcmd_free_nvlists(&zc);
1370         nvlist_free(zc_props);
1371         nvlist_free(zc_fsprops);
1372         nvlist_free(hidden_args);
1373         if (wkeydata != NULL)
1374                 free(wkeydata);
1375         return (ret);
1376 }
1377
1378 /*
1379  * Destroy the given pool.  It is up to the caller to ensure that there are no
1380  * datasets left in the pool.
1381  */
1382 int
1383 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1384 {
1385         zfs_cmd_t zc = {"\0"};
1386         zfs_handle_t *zfp = NULL;
1387         libzfs_handle_t *hdl = zhp->zpool_hdl;
1388         char msg[1024];
1389
1390         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1391             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1392                 return (-1);
1393
1394         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1395         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1396
1397         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1398                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1399                     "cannot destroy '%s'"), zhp->zpool_name);
1400
1401                 if (errno == EROFS) {
1402                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1403                             "one or more devices is read only"));
1404                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1405                 } else {
1406                         (void) zpool_standard_error(hdl, errno, msg);
1407                 }
1408
1409                 if (zfp)
1410                         zfs_close(zfp);
1411                 return (-1);
1412         }
1413
1414         if (zfp) {
1415                 remove_mountpoint(zfp);
1416                 zfs_close(zfp);
1417         }
1418
1419         return (0);
1420 }
1421
1422 /*
1423  * Create a checkpoint in the given pool.
1424  */
1425 int
1426 zpool_checkpoint(zpool_handle_t *zhp)
1427 {
1428         libzfs_handle_t *hdl = zhp->zpool_hdl;
1429         char msg[1024];
1430         int error;
1431
1432         error = lzc_pool_checkpoint(zhp->zpool_name);
1433         if (error != 0) {
1434                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1435                     "cannot checkpoint '%s'"), zhp->zpool_name);
1436                 (void) zpool_standard_error(hdl, error, msg);
1437                 return (-1);
1438         }
1439
1440         return (0);
1441 }
1442
1443 /*
1444  * Discard the checkpoint from the given pool.
1445  */
1446 int
1447 zpool_discard_checkpoint(zpool_handle_t *zhp)
1448 {
1449         libzfs_handle_t *hdl = zhp->zpool_hdl;
1450         char msg[1024];
1451         int error;
1452
1453         error = lzc_pool_checkpoint_discard(zhp->zpool_name);
1454         if (error != 0) {
1455                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1456                     "cannot discard checkpoint in '%s'"), zhp->zpool_name);
1457                 (void) zpool_standard_error(hdl, error, msg);
1458                 return (-1);
1459         }
1460
1461         return (0);
1462 }
1463
1464 /*
1465  * Add the given vdevs to the pool.  The caller must have already performed the
1466  * necessary verification to ensure that the vdev specification is well-formed.
1467  */
1468 int
1469 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1470 {
1471         zfs_cmd_t zc = {"\0"};
1472         int ret;
1473         libzfs_handle_t *hdl = zhp->zpool_hdl;
1474         char msg[1024];
1475         nvlist_t **spares, **l2cache;
1476         uint_t nspares, nl2cache;
1477
1478         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1479             "cannot add to '%s'"), zhp->zpool_name);
1480
1481         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1482             SPA_VERSION_SPARES &&
1483             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1484             &spares, &nspares) == 0) {
1485                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1486                     "upgraded to add hot spares"));
1487                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1488         }
1489
1490         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1491             SPA_VERSION_L2CACHE &&
1492             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1493             &l2cache, &nl2cache) == 0) {
1494                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1495                     "upgraded to add cache devices"));
1496                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1497         }
1498
1499         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1500                 return (-1);
1501         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1502
1503         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1504                 switch (errno) {
1505                 case EBUSY:
1506                         /*
1507                          * This can happen if the user has specified the same
1508                          * device multiple times.  We can't reliably detect this
1509                          * until we try to add it and see we already have a
1510                          * label.
1511                          */
1512                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1513                             "one or more vdevs refer to the same device"));
1514                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1515                         break;
1516
1517                 case EINVAL:
1518                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1519                             "invalid config; a pool with removing/removed "
1520                             "vdevs does not support adding raidz vdevs"));
1521                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1522                         break;
1523
1524                 case EOVERFLOW:
1525                         /*
1526                          * This occurs when one of the devices is below
1527                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1528                          * device was the problem device since there's no
1529                          * reliable way to determine device size from userland.
1530                          */
1531                         {
1532                                 char buf[64];
1533
1534                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1535                                     sizeof (buf));
1536
1537                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1538                                     "device is less than the minimum "
1539                                     "size (%s)"), buf);
1540                         }
1541                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1542                         break;
1543
1544                 case ENOTSUP:
1545                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1546                             "pool must be upgraded to add these vdevs"));
1547                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1548                         break;
1549
1550                 case ENOTBLK:
1551                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1552                             "cache device must be a disk or disk slice"));
1553                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1554                         break;
1555
1556                 default:
1557                         (void) zpool_standard_error(hdl, errno, msg);
1558                 }
1559
1560                 ret = -1;
1561         } else {
1562                 ret = 0;
1563         }
1564
1565         zcmd_free_nvlists(&zc);
1566
1567         return (ret);
1568 }
1569
1570 /*
1571  * Exports the pool from the system.  The caller must ensure that there are no
1572  * mounted datasets in the pool.
1573  */
1574 static int
1575 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1576     const char *log_str)
1577 {
1578         zfs_cmd_t zc = {"\0"};
1579         char msg[1024];
1580
1581         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1582             "cannot export '%s'"), zhp->zpool_name);
1583
1584         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1585         zc.zc_cookie = force;
1586         zc.zc_guid = hardforce;
1587         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1588
1589         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1590                 switch (errno) {
1591                 case EXDEV:
1592                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1593                             "use '-f' to override the following errors:\n"
1594                             "'%s' has an active shared spare which could be"
1595                             " used by other pools once '%s' is exported."),
1596                             zhp->zpool_name, zhp->zpool_name);
1597                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1598                             msg));
1599                 default:
1600                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1601                             msg));
1602                 }
1603         }
1604
1605         return (0);
1606 }
1607
1608 int
1609 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1610 {
1611         return (zpool_export_common(zhp, force, B_FALSE, log_str));
1612 }
1613
1614 int
1615 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1616 {
1617         return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1618 }
1619
1620 static void
1621 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1622     nvlist_t *config)
1623 {
1624         nvlist_t *nv = NULL;
1625         uint64_t rewindto;
1626         int64_t loss = -1;
1627         struct tm t;
1628         char timestr[128];
1629
1630         if (!hdl->libzfs_printerr || config == NULL)
1631                 return;
1632
1633         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1634             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1635                 return;
1636         }
1637
1638         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1639                 return;
1640         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1641
1642         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1643             strftime(timestr, 128, "%c", &t) != 0) {
1644                 if (dryrun) {
1645                         (void) printf(dgettext(TEXT_DOMAIN,
1646                             "Would be able to return %s "
1647                             "to its state as of %s.\n"),
1648                             name, timestr);
1649                 } else {
1650                         (void) printf(dgettext(TEXT_DOMAIN,
1651                             "Pool %s returned to its state as of %s.\n"),
1652                             name, timestr);
1653                 }
1654                 if (loss > 120) {
1655                         (void) printf(dgettext(TEXT_DOMAIN,
1656                             "%s approximately %lld "),
1657                             dryrun ? "Would discard" : "Discarded",
1658                             ((longlong_t)loss + 30) / 60);
1659                         (void) printf(dgettext(TEXT_DOMAIN,
1660                             "minutes of transactions.\n"));
1661                 } else if (loss > 0) {
1662                         (void) printf(dgettext(TEXT_DOMAIN,
1663                             "%s approximately %lld "),
1664                             dryrun ? "Would discard" : "Discarded",
1665                             (longlong_t)loss);
1666                         (void) printf(dgettext(TEXT_DOMAIN,
1667                             "seconds of transactions.\n"));
1668                 }
1669         }
1670 }
1671
1672 void
1673 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1674     nvlist_t *config)
1675 {
1676         nvlist_t *nv = NULL;
1677         int64_t loss = -1;
1678         uint64_t edata = UINT64_MAX;
1679         uint64_t rewindto;
1680         struct tm t;
1681         char timestr[128];
1682
1683         if (!hdl->libzfs_printerr)
1684                 return;
1685
1686         if (reason >= 0)
1687                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1688         else
1689                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1690
1691         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1692         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1693             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1694             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1695                 goto no_info;
1696
1697         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1698         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1699             &edata);
1700
1701         (void) printf(dgettext(TEXT_DOMAIN,
1702             "Recovery is possible, but will result in some data loss.\n"));
1703
1704         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1705             strftime(timestr, 128, "%c", &t) != 0) {
1706                 (void) printf(dgettext(TEXT_DOMAIN,
1707                     "\tReturning the pool to its state as of %s\n"
1708                     "\tshould correct the problem.  "),
1709                     timestr);
1710         } else {
1711                 (void) printf(dgettext(TEXT_DOMAIN,
1712                     "\tReverting the pool to an earlier state "
1713                     "should correct the problem.\n\t"));
1714         }
1715
1716         if (loss > 120) {
1717                 (void) printf(dgettext(TEXT_DOMAIN,
1718                     "Approximately %lld minutes of data\n"
1719                     "\tmust be discarded, irreversibly.  "),
1720                     ((longlong_t)loss + 30) / 60);
1721         } else if (loss > 0) {
1722                 (void) printf(dgettext(TEXT_DOMAIN,
1723                     "Approximately %lld seconds of data\n"
1724                     "\tmust be discarded, irreversibly.  "),
1725                     (longlong_t)loss);
1726         }
1727         if (edata != 0 && edata != UINT64_MAX) {
1728                 if (edata == 1) {
1729                         (void) printf(dgettext(TEXT_DOMAIN,
1730                             "After rewind, at least\n"
1731                             "\tone persistent user-data error will remain.  "));
1732                 } else {
1733                         (void) printf(dgettext(TEXT_DOMAIN,
1734                             "After rewind, several\n"
1735                             "\tpersistent user-data errors will remain.  "));
1736                 }
1737         }
1738         (void) printf(dgettext(TEXT_DOMAIN,
1739             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1740             reason >= 0 ? "clear" : "import", name);
1741
1742         (void) printf(dgettext(TEXT_DOMAIN,
1743             "A scrub of the pool\n"
1744             "\tis strongly recommended after recovery.\n"));
1745         return;
1746
1747 no_info:
1748         (void) printf(dgettext(TEXT_DOMAIN,
1749             "Destroy and re-create the pool from\n\ta backup source.\n"));
1750 }
1751
1752 /*
1753  * zpool_import() is a contracted interface. Should be kept the same
1754  * if possible.
1755  *
1756  * Applications should use zpool_import_props() to import a pool with
1757  * new properties value to be set.
1758  */
1759 int
1760 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1761     char *altroot)
1762 {
1763         nvlist_t *props = NULL;
1764         int ret;
1765
1766         if (altroot != NULL) {
1767                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1768                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1769                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1770                             newname));
1771                 }
1772
1773                 if (nvlist_add_string(props,
1774                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1775                     nvlist_add_string(props,
1776                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1777                         nvlist_free(props);
1778                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1779                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1780                             newname));
1781                 }
1782         }
1783
1784         ret = zpool_import_props(hdl, config, newname, props,
1785             ZFS_IMPORT_NORMAL);
1786         nvlist_free(props);
1787         return (ret);
1788 }
1789
1790 static void
1791 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1792     int indent)
1793 {
1794         nvlist_t **child;
1795         uint_t c, children;
1796         char *vname;
1797         uint64_t is_log = 0;
1798
1799         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1800             &is_log);
1801
1802         if (name != NULL)
1803                 (void) printf("\t%*s%s%s\n", indent, "", name,
1804                     is_log ? " [log]" : "");
1805
1806         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1807             &child, &children) != 0)
1808                 return;
1809
1810         for (c = 0; c < children; c++) {
1811                 vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
1812                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1813                 free(vname);
1814         }
1815 }
1816
1817 void
1818 zpool_print_unsup_feat(nvlist_t *config)
1819 {
1820         nvlist_t *nvinfo, *unsup_feat;
1821         nvpair_t *nvp;
1822
1823         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1824             0);
1825         verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1826             &unsup_feat) == 0);
1827
1828         for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1829             nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1830                 char *desc;
1831
1832                 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1833                 verify(nvpair_value_string(nvp, &desc) == 0);
1834
1835                 if (strlen(desc) > 0)
1836                         (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1837                 else
1838                         (void) printf("\t%s\n", nvpair_name(nvp));
1839         }
1840 }
1841
1842 /*
1843  * Import the given pool using the known configuration and a list of
1844  * properties to be set. The configuration should have come from
1845  * zpool_find_import(). The 'newname' parameters control whether the pool
1846  * is imported with a different name.
1847  */
1848 int
1849 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1850     nvlist_t *props, int flags)
1851 {
1852         zfs_cmd_t zc = {"\0"};
1853         zpool_load_policy_t policy;
1854         nvlist_t *nv = NULL;
1855         nvlist_t *nvinfo = NULL;
1856         nvlist_t *missing = NULL;
1857         char *thename;
1858         char *origname;
1859         int ret;
1860         int error = 0;
1861         char errbuf[1024];
1862
1863         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1864             &origname) == 0);
1865
1866         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1867             "cannot import pool '%s'"), origname);
1868
1869         if (newname != NULL) {
1870                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1871                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1872                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1873                             newname));
1874                 thename = (char *)newname;
1875         } else {
1876                 thename = origname;
1877         }
1878
1879         if (props != NULL) {
1880                 uint64_t version;
1881                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1882
1883                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1884                     &version) == 0);
1885
1886                 if ((props = zpool_valid_proplist(hdl, origname,
1887                     props, version, flags, errbuf)) == NULL)
1888                         return (-1);
1889                 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1890                         nvlist_free(props);
1891                         return (-1);
1892                 }
1893                 nvlist_free(props);
1894         }
1895
1896         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1897
1898         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1899             &zc.zc_guid) == 0);
1900
1901         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1902                 zcmd_free_nvlists(&zc);
1903                 return (-1);
1904         }
1905         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1906                 zcmd_free_nvlists(&zc);
1907                 return (-1);
1908         }
1909
1910         zc.zc_cookie = flags;
1911         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1912             errno == ENOMEM) {
1913                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1914                         zcmd_free_nvlists(&zc);
1915                         return (-1);
1916                 }
1917         }
1918         if (ret != 0)
1919                 error = errno;
1920
1921         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1922
1923         zcmd_free_nvlists(&zc);
1924
1925         zpool_get_load_policy(config, &policy);
1926
1927         if (error) {
1928                 char desc[1024];
1929                 char aux[256];
1930
1931                 /*
1932                  * Dry-run failed, but we print out what success
1933                  * looks like if we found a best txg
1934                  */
1935                 if (policy.zlp_rewind & ZPOOL_TRY_REWIND) {
1936                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1937                             B_TRUE, nv);
1938                         nvlist_free(nv);
1939                         return (-1);
1940                 }
1941
1942                 if (newname == NULL)
1943                         (void) snprintf(desc, sizeof (desc),
1944                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1945                             thename);
1946                 else
1947                         (void) snprintf(desc, sizeof (desc),
1948                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1949                             origname, thename);
1950
1951                 switch (error) {
1952                 case ENOTSUP:
1953                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1954                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1955                             nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1956                                 (void) printf(dgettext(TEXT_DOMAIN, "This "
1957                                     "pool uses the following feature(s) not "
1958                                     "supported by this system:\n"));
1959                                 zpool_print_unsup_feat(nv);
1960                                 if (nvlist_exists(nvinfo,
1961                                     ZPOOL_CONFIG_CAN_RDONLY)) {
1962                                         (void) printf(dgettext(TEXT_DOMAIN,
1963                                             "All unsupported features are only "
1964                                             "required for writing to the pool."
1965                                             "\nThe pool can be imported using "
1966                                             "'-o readonly=on'.\n"));
1967                                 }
1968                         }
1969                         /*
1970                          * Unsupported version.
1971                          */
1972                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1973                         break;
1974
1975                 case EREMOTEIO:
1976                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1977                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
1978                                 char *hostname = "<unknown>";
1979                                 uint64_t hostid = 0;
1980                                 mmp_state_t mmp_state;
1981
1982                                 mmp_state = fnvlist_lookup_uint64(nvinfo,
1983                                     ZPOOL_CONFIG_MMP_STATE);
1984
1985                                 if (nvlist_exists(nvinfo,
1986                                     ZPOOL_CONFIG_MMP_HOSTNAME))
1987                                         hostname = fnvlist_lookup_string(nvinfo,
1988                                             ZPOOL_CONFIG_MMP_HOSTNAME);
1989
1990                                 if (nvlist_exists(nvinfo,
1991                                     ZPOOL_CONFIG_MMP_HOSTID))
1992                                         hostid = fnvlist_lookup_uint64(nvinfo,
1993                                             ZPOOL_CONFIG_MMP_HOSTID);
1994
1995                                 if (mmp_state == MMP_STATE_ACTIVE) {
1996                                         (void) snprintf(aux, sizeof (aux),
1997                                             dgettext(TEXT_DOMAIN, "pool is imp"
1998                                             "orted on host '%s' (hostid=%lx).\n"
1999                                             "Export the pool on the other "
2000                                             "system, then run 'zpool import'."),
2001                                             hostname, (unsigned long) hostid);
2002                                 } else if (mmp_state == MMP_STATE_NO_HOSTID) {
2003                                         (void) snprintf(aux, sizeof (aux),
2004                                             dgettext(TEXT_DOMAIN, "pool has "
2005                                             "the multihost property on and "
2006                                             "the\nsystem's hostid is not set. "
2007                                             "Set a unique system hostid with "
2008                                             "the zgenhostid(8) command.\n"));
2009                                 }
2010
2011                                 (void) zfs_error_aux(hdl, aux);
2012                         }
2013                         (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
2014                         break;
2015
2016                 case EINVAL:
2017                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
2018                         break;
2019
2020                 case EROFS:
2021                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2022                             "one or more devices is read only"));
2023                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
2024                         break;
2025
2026                 case ENXIO:
2027                         if (nv && nvlist_lookup_nvlist(nv,
2028                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
2029                             nvlist_lookup_nvlist(nvinfo,
2030                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
2031                                 (void) printf(dgettext(TEXT_DOMAIN,
2032                                     "The devices below are missing or "
2033                                     "corrupted, use '-m' to import the pool "
2034                                     "anyway:\n"));
2035                                 print_vdev_tree(hdl, NULL, missing, 2);
2036                                 (void) printf("\n");
2037                         }
2038                         (void) zpool_standard_error(hdl, error, desc);
2039                         break;
2040
2041                 case EEXIST:
2042                         (void) zpool_standard_error(hdl, error, desc);
2043                         break;
2044
2045                 case EBUSY:
2046                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2047                             "one or more devices are already in use\n"));
2048                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
2049                         break;
2050                 case ENAMETOOLONG:
2051                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2052                             "new name of at least one dataset is longer than "
2053                             "the maximum allowable length"));
2054                         (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
2055                         break;
2056                 default:
2057                         (void) zpool_standard_error(hdl, error, desc);
2058                         zpool_explain_recover(hdl,
2059                             newname ? origname : thename, -error, nv);
2060                         break;
2061                 }
2062
2063                 nvlist_free(nv);
2064                 ret = -1;
2065         } else {
2066                 zpool_handle_t *zhp;
2067
2068                 /*
2069                  * This should never fail, but play it safe anyway.
2070                  */
2071                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
2072                         ret = -1;
2073                 else if (zhp != NULL)
2074                         zpool_close(zhp);
2075                 if (policy.zlp_rewind &
2076                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2077                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
2078                             ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv);
2079                 }
2080                 nvlist_free(nv);
2081                 return (0);
2082         }
2083
2084         return (ret);
2085 }
2086
2087 /*
2088  * Translate vdev names to guids.  If a vdev_path is determined to be
2089  * unsuitable then a vd_errlist is allocated and the vdev path and errno
2090  * are added to it.
2091  */
2092 static int
2093 zpool_translate_vdev_guids(zpool_handle_t *zhp, nvlist_t *vds,
2094     nvlist_t *vdev_guids, nvlist_t *guids_to_paths, nvlist_t **vd_errlist)
2095 {
2096         nvlist_t *errlist = NULL;
2097         int error = 0;
2098
2099         for (nvpair_t *elem = nvlist_next_nvpair(vds, NULL); elem != NULL;
2100             elem = nvlist_next_nvpair(vds, elem)) {
2101                 boolean_t spare, cache;
2102
2103                 char *vd_path = nvpair_name(elem);
2104                 nvlist_t *tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache,
2105                     NULL);
2106
2107                 if ((tgt == NULL) || cache || spare) {
2108                         if (errlist == NULL) {
2109                                 errlist = fnvlist_alloc();
2110                                 error = EINVAL;
2111                         }
2112
2113                         uint64_t err = (tgt == NULL) ? EZFS_NODEVICE :
2114                             (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE);
2115                         fnvlist_add_int64(errlist, vd_path, err);
2116                         continue;
2117                 }
2118
2119                 uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
2120                 fnvlist_add_uint64(vdev_guids, vd_path, guid);
2121
2122                 char msg[MAXNAMELEN];
2123                 (void) snprintf(msg, sizeof (msg), "%llu", (u_longlong_t)guid);
2124                 fnvlist_add_string(guids_to_paths, msg, vd_path);
2125         }
2126
2127         if (error != 0) {
2128                 verify(errlist != NULL);
2129                 if (vd_errlist != NULL)
2130                         *vd_errlist = errlist;
2131                 else
2132                         fnvlist_free(errlist);
2133         }
2134
2135         return (error);
2136 }
2137
2138 static int
2139 xlate_init_err(int err)
2140 {
2141         switch (err) {
2142         case ENODEV:
2143                 return (EZFS_NODEVICE);
2144         case EINVAL:
2145         case EROFS:
2146                 return (EZFS_BADDEV);
2147         case EBUSY:
2148                 return (EZFS_INITIALIZING);
2149         case ESRCH:
2150                 return (EZFS_NO_INITIALIZE);
2151         }
2152         return (err);
2153 }
2154
2155 /*
2156  * Begin, suspend, or cancel the initialization (initializing of all free
2157  * blocks) for the given vdevs in the given pool.
2158  */
2159 int
2160 zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
2161     nvlist_t *vds, boolean_t wait)
2162 {
2163         int err;
2164
2165         nvlist_t *vdev_guids = fnvlist_alloc();
2166         nvlist_t *guids_to_paths = fnvlist_alloc();
2167         nvlist_t *vd_errlist = NULL;
2168         nvlist_t *errlist;
2169         nvpair_t *elem;
2170
2171         err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
2172             guids_to_paths, &vd_errlist);
2173
2174         if (err != 0) {
2175                 verify(vd_errlist != NULL);
2176                 goto list_errors;
2177         }
2178
2179         err = lzc_initialize(zhp->zpool_name, cmd_type,
2180             vdev_guids, &errlist);
2181
2182         if (err != 0) {
2183                 if (errlist != NULL) {
2184                         vd_errlist = fnvlist_lookup_nvlist(errlist,
2185                             ZPOOL_INITIALIZE_VDEVS);
2186                         goto list_errors;
2187                 }
2188                 (void) zpool_standard_error(zhp->zpool_hdl, err,
2189                     dgettext(TEXT_DOMAIN, "operation failed"));
2190                 goto out;
2191         }
2192
2193         if (wait) {
2194                 for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL;
2195                     elem = nvlist_next_nvpair(vdev_guids, elem)) {
2196
2197                         uint64_t guid = fnvpair_value_uint64(elem);
2198
2199                         err = lzc_wait_tag(zhp->zpool_name,
2200                             ZPOOL_WAIT_INITIALIZE, guid, NULL);
2201                         if (err != 0) {
2202                                 (void) zpool_standard_error_fmt(zhp->zpool_hdl,
2203                                     err, dgettext(TEXT_DOMAIN, "error "
2204                                     "waiting for '%s' to initialize"),
2205                                     nvpair_name(elem));
2206
2207                                 goto out;
2208                         }
2209                 }
2210         }
2211         goto out;
2212
2213 list_errors:
2214         for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
2215             elem = nvlist_next_nvpair(vd_errlist, elem)) {
2216                 int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
2217                 char *path;
2218
2219                 if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem),
2220                     &path) != 0)
2221                         path = nvpair_name(elem);
2222
2223                 (void) zfs_error_fmt(zhp->zpool_hdl, vd_error,
2224                     "cannot initialize '%s'", path);
2225         }
2226
2227 out:
2228         fnvlist_free(vdev_guids);
2229         fnvlist_free(guids_to_paths);
2230
2231         if (vd_errlist != NULL)
2232                 fnvlist_free(vd_errlist);
2233
2234         return (err == 0 ? 0 : -1);
2235 }
2236
2237 int
2238 zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
2239     nvlist_t *vds)
2240 {
2241         return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE));
2242 }
2243
2244 int
2245 zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
2246     nvlist_t *vds)
2247 {
2248         return (zpool_initialize_impl(zhp, cmd_type, vds, B_TRUE));
2249 }
2250
2251 static int
2252 xlate_trim_err(int err)
2253 {
2254         switch (err) {
2255         case ENODEV:
2256                 return (EZFS_NODEVICE);
2257         case EINVAL:
2258         case EROFS:
2259                 return (EZFS_BADDEV);
2260         case EBUSY:
2261                 return (EZFS_TRIMMING);
2262         case ESRCH:
2263                 return (EZFS_NO_TRIM);
2264         case EOPNOTSUPP:
2265                 return (EZFS_TRIM_NOTSUP);
2266         }
2267         return (err);
2268 }
2269
2270 /*
2271  * Begin, suspend, or cancel the TRIM (discarding of all free blocks) for
2272  * the given vdevs in the given pool.
2273  */
2274 int
2275 zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
2276     trimflags_t *trim_flags)
2277 {
2278         char msg[1024];
2279         int err;
2280
2281         nvlist_t *vdev_guids = fnvlist_alloc();
2282         nvlist_t *guids_to_paths = fnvlist_alloc();
2283         nvlist_t *vd_errlist = NULL;
2284         nvlist_t *errlist;
2285         nvpair_t *elem;
2286
2287         err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
2288             guids_to_paths, &vd_errlist);
2289         if (err == 0) {
2290                 err = lzc_trim(zhp->zpool_name, cmd_type, trim_flags->rate,
2291                     trim_flags->secure, vdev_guids, &errlist);
2292                 if (err == 0) {
2293                         fnvlist_free(vdev_guids);
2294                         fnvlist_free(guids_to_paths);
2295                         return (0);
2296                 }
2297
2298                 if (errlist != NULL) {
2299                         vd_errlist = fnvlist_lookup_nvlist(errlist,
2300                             ZPOOL_TRIM_VDEVS);
2301                 }
2302
2303                 (void) snprintf(msg, sizeof (msg),
2304                     dgettext(TEXT_DOMAIN, "operation failed"));
2305         } else {
2306                 verify(vd_errlist != NULL);
2307         }
2308
2309         for (elem = nvlist_next_nvpair(vd_errlist, NULL);
2310             elem != NULL; elem = nvlist_next_nvpair(vd_errlist, elem)) {
2311                 int64_t vd_error = xlate_trim_err(fnvpair_value_int64(elem));
2312                 char *path;
2313
2314                 /*
2315                  * If only the pool was specified, and it was not a secure
2316                  * trim then suppress warnings for individual vdevs which
2317                  * do not support trimming.
2318                  */
2319                 if (vd_error == EZFS_TRIM_NOTSUP &&
2320                     trim_flags->fullpool &&
2321                     !trim_flags->secure) {
2322                         continue;
2323                 }
2324
2325                 if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem),
2326                     &path) != 0)
2327                         path = nvpair_name(elem);
2328
2329                 (void) zfs_error_fmt(zhp->zpool_hdl, vd_error,
2330                     "cannot trim '%s'", path);
2331         }
2332
2333         fnvlist_free(vdev_guids);
2334         fnvlist_free(guids_to_paths);
2335
2336         if (vd_errlist != NULL) {
2337                 fnvlist_free(vd_errlist);
2338                 return (-1);
2339         }
2340
2341         return (zpool_standard_error(zhp->zpool_hdl, err, msg));
2342 }
2343
2344 /*
2345  * Scan the pool.
2346  */
2347 int
2348 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2349 {
2350         zfs_cmd_t zc = {"\0"};
2351         char msg[1024];
2352         int err;
2353         libzfs_handle_t *hdl = zhp->zpool_hdl;
2354
2355         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2356         zc.zc_cookie = func;
2357         zc.zc_flags = cmd;
2358
2359         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
2360                 return (0);
2361
2362         err = errno;
2363
2364         /* ECANCELED on a scrub means we resumed a paused scrub */
2365         if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
2366             cmd == POOL_SCRUB_NORMAL)
2367                 return (0);
2368
2369         if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
2370                 return (0);
2371
2372         if (func == POOL_SCAN_SCRUB) {
2373                 if (cmd == POOL_SCRUB_PAUSE) {
2374                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2375                             "cannot pause scrubbing %s"), zc.zc_name);
2376                 } else {
2377                         assert(cmd == POOL_SCRUB_NORMAL);
2378                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2379                             "cannot scrub %s"), zc.zc_name);
2380                 }
2381         } else if (func == POOL_SCAN_RESILVER) {
2382                 assert(cmd == POOL_SCRUB_NORMAL);
2383                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2384                     "cannot restart resilver on %s"), zc.zc_name);
2385         } else if (func == POOL_SCAN_NONE) {
2386                 (void) snprintf(msg, sizeof (msg),
2387                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
2388                     zc.zc_name);
2389         } else {
2390                 assert(!"unexpected result");
2391         }
2392
2393         if (err == EBUSY) {
2394                 nvlist_t *nvroot;
2395                 pool_scan_stat_t *ps = NULL;
2396                 uint_t psc;
2397
2398                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
2399                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2400                 (void) nvlist_lookup_uint64_array(nvroot,
2401                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2402                 if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2403                         if (cmd == POOL_SCRUB_PAUSE)
2404                                 return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2405                         else
2406                                 return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2407                 } else {
2408                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
2409                 }
2410         } else if (err == ENOENT) {
2411                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
2412         } else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) {
2413                 return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg));
2414         } else {
2415                 return (zpool_standard_error(hdl, err, msg));
2416         }
2417 }
2418
2419 /*
2420  * Find a vdev that matches the search criteria specified. We use the
2421  * the nvpair name to determine how we should look for the device.
2422  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2423  * spare; but FALSE if its an INUSE spare.
2424  */
2425 static nvlist_t *
2426 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2427     boolean_t *l2cache, boolean_t *log)
2428 {
2429         uint_t c, children;
2430         nvlist_t **child;
2431         nvlist_t *ret;
2432         uint64_t is_log;
2433         char *srchkey;
2434         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2435
2436         /* Nothing to look for */
2437         if (search == NULL || pair == NULL)
2438                 return (NULL);
2439
2440         /* Obtain the key we will use to search */
2441         srchkey = nvpair_name(pair);
2442
2443         switch (nvpair_type(pair)) {
2444         case DATA_TYPE_UINT64:
2445                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
2446                         uint64_t srchval, theguid;
2447
2448                         verify(nvpair_value_uint64(pair, &srchval) == 0);
2449                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2450                             &theguid) == 0);
2451                         if (theguid == srchval)
2452                                 return (nv);
2453                 }
2454                 break;
2455
2456         case DATA_TYPE_STRING: {
2457                 char *srchval, *val;
2458
2459                 verify(nvpair_value_string(pair, &srchval) == 0);
2460                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2461                         break;
2462
2463                 /*
2464                  * Search for the requested value. Special cases:
2465                  *
2466                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
2467                  *   "-part1", or "p1".  The suffix is hidden from the user,
2468                  *   but included in the string, so this matches around it.
2469                  * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2470                  *   is used to check all possible expanded paths.
2471                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2472                  *
2473                  * Otherwise, all other searches are simple string compares.
2474                  */
2475                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
2476                         uint64_t wholedisk = 0;
2477
2478                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2479                             &wholedisk);
2480                         if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2481                                 return (nv);
2482
2483                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2484                         char *type, *idx, *end, *p;
2485                         uint64_t id, vdev_id;
2486
2487                         /*
2488                          * Determine our vdev type, keeping in mind
2489                          * that the srchval is composed of a type and
2490                          * vdev id pair (i.e. mirror-4).
2491                          */
2492                         if ((type = strdup(srchval)) == NULL)
2493                                 return (NULL);
2494
2495                         if ((p = strrchr(type, '-')) == NULL) {
2496                                 free(type);
2497                                 break;
2498                         }
2499                         idx = p + 1;
2500                         *p = '\0';
2501
2502                         /*
2503                          * If the types don't match then keep looking.
2504                          */
2505                         if (strncmp(val, type, strlen(val)) != 0) {
2506                                 free(type);
2507                                 break;
2508                         }
2509
2510                         verify(zpool_vdev_is_interior(type));
2511                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2512                             &id) == 0);
2513
2514                         errno = 0;
2515                         vdev_id = strtoull(idx, &end, 10);
2516
2517                         free(type);
2518                         if (errno != 0)
2519                                 return (NULL);
2520
2521                         /*
2522                          * Now verify that we have the correct vdev id.
2523                          */
2524                         if (vdev_id == id)
2525                                 return (nv);
2526                 }
2527
2528                 /*
2529                  * Common case
2530                  */
2531                 if (strcmp(srchval, val) == 0)
2532                         return (nv);
2533                 break;
2534         }
2535
2536         default:
2537                 break;
2538         }
2539
2540         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2541             &child, &children) != 0)
2542                 return (NULL);
2543
2544         for (c = 0; c < children; c++) {
2545                 if ((ret = vdev_to_nvlist_iter(child[c], search,
2546                     avail_spare, l2cache, NULL)) != NULL) {
2547                         /*
2548                          * The 'is_log' value is only set for the toplevel
2549                          * vdev, not the leaf vdevs.  So we always lookup the
2550                          * log device from the root of the vdev tree (where
2551                          * 'log' is non-NULL).
2552                          */
2553                         if (log != NULL &&
2554                             nvlist_lookup_uint64(child[c],
2555                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2556                             is_log) {
2557                                 *log = B_TRUE;
2558                         }
2559                         return (ret);
2560                 }
2561         }
2562
2563         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2564             &child, &children) == 0) {
2565                 for (c = 0; c < children; c++) {
2566                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2567                             avail_spare, l2cache, NULL)) != NULL) {
2568                                 *avail_spare = B_TRUE;
2569                                 return (ret);
2570                         }
2571                 }
2572         }
2573
2574         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2575             &child, &children) == 0) {
2576                 for (c = 0; c < children; c++) {
2577                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2578                             avail_spare, l2cache, NULL)) != NULL) {
2579                                 *l2cache = B_TRUE;
2580                                 return (ret);
2581                         }
2582                 }
2583         }
2584
2585         return (NULL);
2586 }
2587
2588 /*
2589  * Given a physical path or guid, find the associated vdev.
2590  */
2591 nvlist_t *
2592 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2593     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2594 {
2595         nvlist_t *search, *nvroot, *ret;
2596         uint64_t guid;
2597         char *end;
2598
2599         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2600
2601         guid = strtoull(ppath, &end, 0);
2602         if (guid != 0 && *end == '\0') {
2603                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2604         } else {
2605                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH,
2606                     ppath) == 0);
2607         }
2608
2609         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2610             &nvroot) == 0);
2611
2612         *avail_spare = B_FALSE;
2613         *l2cache = B_FALSE;
2614         if (log != NULL)
2615                 *log = B_FALSE;
2616         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2617         nvlist_free(search);
2618
2619         return (ret);
2620 }
2621
2622 /*
2623  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2624  */
2625 static boolean_t
2626 zpool_vdev_is_interior(const char *name)
2627 {
2628         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2629             strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 ||
2630             strncmp(name,
2631             VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
2632             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2633                 return (B_TRUE);
2634         return (B_FALSE);
2635 }
2636
2637 nvlist_t *
2638 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2639     boolean_t *l2cache, boolean_t *log)
2640 {
2641         char *end;
2642         nvlist_t *nvroot, *search, *ret;
2643         uint64_t guid;
2644
2645         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2646
2647         guid = strtoull(path, &end, 0);
2648         if (guid != 0 && *end == '\0') {
2649                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2650         } else if (zpool_vdev_is_interior(path)) {
2651                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2652         } else {
2653                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2654         }
2655
2656         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2657             &nvroot) == 0);
2658
2659         *avail_spare = B_FALSE;
2660         *l2cache = B_FALSE;
2661         if (log != NULL)
2662                 *log = B_FALSE;
2663         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2664         nvlist_free(search);
2665
2666         return (ret);
2667 }
2668
2669 static int
2670 vdev_is_online(nvlist_t *nv)
2671 {
2672         uint64_t ival;
2673
2674         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2675             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2676             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2677                 return (0);
2678
2679         return (1);
2680 }
2681
2682 /*
2683  * Helper function for zpool_get_physpaths().
2684  */
2685 static int
2686 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2687     size_t *bytes_written)
2688 {
2689         size_t bytes_left, pos, rsz;
2690         char *tmppath;
2691         const char *format;
2692
2693         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2694             &tmppath) != 0)
2695                 return (EZFS_NODEVICE);
2696
2697         pos = *bytes_written;
2698         bytes_left = physpath_size - pos;
2699         format = (pos == 0) ? "%s" : " %s";
2700
2701         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2702         *bytes_written += rsz;
2703
2704         if (rsz >= bytes_left) {
2705                 /* if physpath was not copied properly, clear it */
2706                 if (bytes_left != 0) {
2707                         physpath[pos] = 0;
2708                 }
2709                 return (EZFS_NOSPC);
2710         }
2711         return (0);
2712 }
2713
2714 static int
2715 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2716     size_t *rsz, boolean_t is_spare)
2717 {
2718         char *type;
2719         int ret;
2720
2721         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2722                 return (EZFS_INVALCONFIG);
2723
2724         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2725                 /*
2726                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2727                  * For a spare vdev, we only want to boot from the active
2728                  * spare device.
2729                  */
2730                 if (is_spare) {
2731                         uint64_t spare = 0;
2732                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2733                             &spare);
2734                         if (!spare)
2735                                 return (EZFS_INVALCONFIG);
2736                 }
2737
2738                 if (vdev_is_online(nv)) {
2739                         if ((ret = vdev_get_one_physpath(nv, physpath,
2740                             phypath_size, rsz)) != 0)
2741                                 return (ret);
2742                 }
2743         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2744             strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2745             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2746             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2747                 nvlist_t **child;
2748                 uint_t count;
2749                 int i, ret;
2750
2751                 if (nvlist_lookup_nvlist_array(nv,
2752                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2753                         return (EZFS_INVALCONFIG);
2754
2755                 for (i = 0; i < count; i++) {
2756                         ret = vdev_get_physpaths(child[i], physpath,
2757                             phypath_size, rsz, is_spare);
2758                         if (ret == EZFS_NOSPC)
2759                                 return (ret);
2760                 }
2761         }
2762
2763         return (EZFS_POOL_INVALARG);
2764 }
2765
2766 /*
2767  * Get phys_path for a root pool config.
2768  * Return 0 on success; non-zero on failure.
2769  */
2770 static int
2771 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2772 {
2773         size_t rsz;
2774         nvlist_t *vdev_root;
2775         nvlist_t **child;
2776         uint_t count;
2777         char *type;
2778
2779         rsz = 0;
2780
2781         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2782             &vdev_root) != 0)
2783                 return (EZFS_INVALCONFIG);
2784
2785         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2786             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2787             &child, &count) != 0)
2788                 return (EZFS_INVALCONFIG);
2789
2790         /*
2791          * root pool can only have a single top-level vdev.
2792          */
2793         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
2794                 return (EZFS_POOL_INVALARG);
2795
2796         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2797             B_FALSE);
2798
2799         /* No online devices */
2800         if (rsz == 0)
2801                 return (EZFS_NODEVICE);
2802
2803         return (0);
2804 }
2805
2806 /*
2807  * Get phys_path for a root pool
2808  * Return 0 on success; non-zero on failure.
2809  */
2810 int
2811 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2812 {
2813         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2814             phypath_size));
2815 }
2816
2817 /*
2818  * If the device has being dynamically expanded then we need to relabel
2819  * the disk to use the new unallocated space.
2820  */
2821 static int
2822 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
2823 {
2824         int fd, error;
2825
2826         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2827                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2828                     "relabel '%s': unable to open device: %d"), path, errno);
2829                 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
2830         }
2831
2832         /*
2833          * It's possible that we might encounter an error if the device
2834          * does not have any unallocated space left. If so, we simply
2835          * ignore that error and continue on.
2836          *
2837          * Also, we don't call efi_rescan() - that would just return EBUSY.
2838          * The module will do it for us in vdev_disk_open().
2839          */
2840         error = efi_use_whole_disk(fd);
2841
2842         /* Flush the buffers to disk and invalidate the page cache. */
2843         (void) fsync(fd);
2844         (void) ioctl(fd, BLKFLSBUF);
2845
2846         (void) close(fd);
2847         if (error && error != VT_ENOSPC) {
2848                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2849                     "relabel '%s': unable to read disk capacity"), path);
2850                 return (zfs_error(hdl, EZFS_NOCAP, msg));
2851         }
2852
2853         return (0);
2854 }
2855
2856 /*
2857  * Convert a vdev path to a GUID.  Returns GUID or 0 on error.
2858  *
2859  * If is_spare, is_l2cache, or is_log is non-NULL, then store within it
2860  * if the VDEV is a spare, l2cache, or log device.  If they're NULL then
2861  * ignore them.
2862  */
2863 static uint64_t
2864 zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path,
2865     boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log)
2866 {
2867         uint64_t guid;
2868         boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE;
2869         nvlist_t *tgt;
2870
2871         if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache,
2872             &log)) == NULL)
2873                 return (0);
2874
2875         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0);
2876         if (is_spare != NULL)
2877                 *is_spare = spare;
2878         if (is_l2cache != NULL)
2879                 *is_l2cache = l2cache;
2880         if (is_log != NULL)
2881                 *is_log = log;
2882
2883         return (guid);
2884 }
2885
2886 /* Convert a vdev path to a GUID.  Returns GUID or 0 on error. */
2887 uint64_t
2888 zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path)
2889 {
2890         return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL));
2891 }
2892
2893 /*
2894  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2895  * ZFS_ONLINE_* flags.
2896  */
2897 int
2898 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2899     vdev_state_t *newstate)
2900 {
2901         zfs_cmd_t zc = {"\0"};
2902         char msg[1024];
2903         char *pathname;
2904         nvlist_t *tgt;
2905         boolean_t avail_spare, l2cache, islog;
2906         libzfs_handle_t *hdl = zhp->zpool_hdl;
2907         int error;
2908
2909         if (flags & ZFS_ONLINE_EXPAND) {
2910                 (void) snprintf(msg, sizeof (msg),
2911                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2912         } else {
2913                 (void) snprintf(msg, sizeof (msg),
2914                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2915         }
2916
2917         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2918         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2919             &islog)) == NULL)
2920                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2921
2922         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2923
2924         if (avail_spare)
2925                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2926
2927         if ((flags & ZFS_ONLINE_EXPAND ||
2928             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) &&
2929             nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) {
2930                 uint64_t wholedisk = 0;
2931
2932                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2933                     &wholedisk);
2934
2935                 /*
2936                  * XXX - L2ARC 1.0 devices can't support expansion.
2937                  */
2938                 if (l2cache) {
2939                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2940                             "cannot expand cache devices"));
2941                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2942                 }
2943
2944                 if (wholedisk) {
2945                         const char *fullpath = path;
2946                         char buf[MAXPATHLEN];
2947
2948                         if (path[0] != '/') {
2949                                 error = zfs_resolve_shortname(path, buf,
2950                                     sizeof (buf));
2951                                 if (error != 0)
2952                                         return (zfs_error(hdl, EZFS_NODEVICE,
2953                                             msg));
2954
2955                                 fullpath = buf;
2956                         }
2957
2958                         error = zpool_relabel_disk(hdl, fullpath, msg);
2959                         if (error != 0)
2960                                 return (error);
2961                 }
2962         }
2963
2964         zc.zc_cookie = VDEV_STATE_ONLINE;
2965         zc.zc_obj = flags;
2966
2967         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2968                 if (errno == EINVAL) {
2969                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2970                             "from this pool into a new one.  Use '%s' "
2971                             "instead"), "zpool detach");
2972                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2973                 }
2974                 return (zpool_standard_error(hdl, errno, msg));
2975         }
2976
2977         *newstate = zc.zc_cookie;
2978         return (0);
2979 }
2980
2981 /*
2982  * Take the specified vdev offline
2983  */
2984 int
2985 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2986 {
2987         zfs_cmd_t zc = {"\0"};
2988         char msg[1024];
2989         nvlist_t *tgt;
2990         boolean_t avail_spare, l2cache;
2991         libzfs_handle_t *hdl = zhp->zpool_hdl;
2992
2993         (void) snprintf(msg, sizeof (msg),
2994             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2995
2996         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2997         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2998             NULL)) == NULL)
2999                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3000
3001         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3002
3003         if (avail_spare)
3004                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
3005
3006         zc.zc_cookie = VDEV_STATE_OFFLINE;
3007         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
3008
3009         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
3010                 return (0);
3011
3012         switch (errno) {
3013         case EBUSY:
3014
3015                 /*
3016                  * There are no other replicas of this device.
3017                  */
3018                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
3019
3020         case EEXIST:
3021                 /*
3022                  * The log device has unplayed logs
3023                  */
3024                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
3025
3026         default:
3027                 return (zpool_standard_error(hdl, errno, msg));
3028         }
3029 }
3030
3031 /*
3032  * Mark the given vdev faulted.
3033  */
3034 int
3035 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
3036 {
3037         zfs_cmd_t zc = {"\0"};
3038         char msg[1024];
3039         libzfs_handle_t *hdl = zhp->zpool_hdl;
3040
3041         (void) snprintf(msg, sizeof (msg),
3042             dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
3043
3044         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3045         zc.zc_guid = guid;
3046         zc.zc_cookie = VDEV_STATE_FAULTED;
3047         zc.zc_obj = aux;
3048
3049         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
3050                 return (0);
3051
3052         switch (errno) {
3053         case EBUSY:
3054
3055                 /*
3056                  * There are no other replicas of this device.
3057                  */
3058                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
3059
3060         default:
3061                 return (zpool_standard_error(hdl, errno, msg));
3062         }
3063
3064 }
3065
3066 /*
3067  * Mark the given vdev degraded.
3068  */
3069 int
3070 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
3071 {
3072         zfs_cmd_t zc = {"\0"};
3073         char msg[1024];
3074         libzfs_handle_t *hdl = zhp->zpool_hdl;
3075
3076         (void) snprintf(msg, sizeof (msg),
3077             dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
3078
3079         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3080         zc.zc_guid = guid;
3081         zc.zc_cookie = VDEV_STATE_DEGRADED;
3082         zc.zc_obj = aux;
3083
3084         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
3085                 return (0);
3086
3087         return (zpool_standard_error(hdl, errno, msg));
3088 }
3089
3090 /*
3091  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
3092  * a hot spare.
3093  */
3094 static boolean_t
3095 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
3096 {
3097         nvlist_t **child;
3098         uint_t c, children;
3099         char *type;
3100
3101         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
3102             &children) == 0) {
3103                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
3104                     &type) == 0);
3105
3106                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
3107                     children == 2 && child[which] == tgt)
3108                         return (B_TRUE);
3109
3110                 for (c = 0; c < children; c++)
3111                         if (is_replacing_spare(child[c], tgt, which))
3112                                 return (B_TRUE);
3113         }
3114
3115         return (B_FALSE);
3116 }
3117
3118 /*
3119  * Attach new_disk (fully described by nvroot) to old_disk.
3120  * If 'replacing' is specified, the new disk will replace the old one.
3121  */
3122 int
3123 zpool_vdev_attach(zpool_handle_t *zhp,
3124     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
3125 {
3126         zfs_cmd_t zc = {"\0"};
3127         char msg[1024];
3128         int ret;
3129         nvlist_t *tgt;
3130         boolean_t avail_spare, l2cache, islog;
3131         uint64_t val;
3132         char *newname;
3133         nvlist_t **child;
3134         uint_t children;
3135         nvlist_t *config_root;
3136         libzfs_handle_t *hdl = zhp->zpool_hdl;
3137         boolean_t rootpool = zpool_is_bootable(zhp);
3138
3139         if (replacing)
3140                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3141                     "cannot replace %s with %s"), old_disk, new_disk);
3142         else
3143                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3144                     "cannot attach %s to %s"), new_disk, old_disk);
3145
3146         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3147         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
3148             &islog)) == NULL)
3149                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3150
3151         if (avail_spare)
3152                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
3153
3154         if (l2cache)
3155                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
3156
3157         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3158         zc.zc_cookie = replacing;
3159
3160         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
3161             &child, &children) != 0 || children != 1) {
3162                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3163                     "new device must be a single disk"));
3164                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
3165         }
3166
3167         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
3168             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
3169
3170         if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
3171                 return (-1);
3172
3173         /*
3174          * If the target is a hot spare that has been swapped in, we can only
3175          * replace it with another hot spare.
3176          */
3177         if (replacing &&
3178             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
3179             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
3180             NULL) == NULL || !avail_spare) &&
3181             is_replacing_spare(config_root, tgt, 1)) {
3182                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3183                     "can only be replaced by another hot spare"));
3184                 free(newname);
3185                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
3186         }
3187
3188         free(newname);
3189
3190         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
3191                 return (-1);
3192
3193         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
3194
3195         zcmd_free_nvlists(&zc);
3196
3197         if (ret == 0) {
3198                 if (rootpool) {
3199                         /*
3200                          * XXX need a better way to prevent user from
3201                          * booting up a half-baked vdev.
3202                          */
3203                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
3204                             "sure to wait until resilver is done "
3205                             "before rebooting.\n"));
3206                 }
3207                 return (0);
3208         }
3209
3210         switch (errno) {
3211         case ENOTSUP:
3212                 /*
3213                  * Can't attach to or replace this type of vdev.
3214                  */
3215                 if (replacing) {
3216                         uint64_t version = zpool_get_prop_int(zhp,
3217                             ZPOOL_PROP_VERSION, NULL);
3218
3219                         if (islog)
3220                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3221                                     "cannot replace a log with a spare"));
3222                         else if (version >= SPA_VERSION_MULTI_REPLACE)
3223                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3224                                     "already in replacing/spare config; wait "
3225                                     "for completion or use 'zpool detach'"));
3226                         else
3227                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3228                                     "cannot replace a replacing device"));
3229                 } else {
3230                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3231                             "can only attach to mirrors and top-level "
3232                             "disks"));
3233                 }
3234                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
3235                 break;
3236
3237         case EINVAL:
3238                 /*
3239                  * The new device must be a single disk.
3240                  */
3241                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3242                     "new device must be a single disk"));
3243                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3244                 break;
3245
3246         case EBUSY:
3247                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
3248                     "or device removal is in progress"),
3249                     new_disk);
3250                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
3251                 break;
3252
3253         case EOVERFLOW:
3254                 /*
3255                  * The new device is too small.
3256                  */
3257                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3258                     "device is too small"));
3259                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
3260                 break;
3261
3262         case EDOM:
3263                 /*
3264                  * The new device has a different optimal sector size.
3265                  */
3266                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3267                     "new device has a different optimal sector size; use the "
3268                     "option '-o ashift=N' to override the optimal size"));
3269                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
3270                 break;
3271
3272         case ENAMETOOLONG:
3273                 /*
3274                  * The resulting top-level vdev spec won't fit in the label.
3275                  */
3276                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
3277                 break;
3278
3279         default:
3280                 (void) zpool_standard_error(hdl, errno, msg);
3281         }
3282
3283         return (-1);
3284 }
3285
3286 /*
3287  * Detach the specified device.
3288  */
3289 int
3290 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
3291 {
3292         zfs_cmd_t zc = {"\0"};
3293         char msg[1024];
3294         nvlist_t *tgt;
3295         boolean_t avail_spare, l2cache;
3296         libzfs_handle_t *hdl = zhp->zpool_hdl;
3297
3298         (void) snprintf(msg, sizeof (msg),
3299             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
3300
3301         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3302         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3303             NULL)) == NULL)
3304                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3305
3306         if (avail_spare)
3307                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
3308
3309         if (l2cache)
3310                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
3311
3312         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3313
3314         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
3315                 return (0);
3316
3317         switch (errno) {
3318
3319         case ENOTSUP:
3320                 /*
3321                  * Can't detach from this type of vdev.
3322                  */
3323                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
3324                     "applicable to mirror and replacing vdevs"));
3325                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
3326                 break;
3327
3328         case EBUSY:
3329                 /*
3330                  * There are no other replicas of this device.
3331                  */
3332                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
3333                 break;
3334
3335         default:
3336                 (void) zpool_standard_error(hdl, errno, msg);
3337         }
3338
3339         return (-1);
3340 }
3341
3342 /*
3343  * Find a mirror vdev in the source nvlist.
3344  *
3345  * The mchild array contains a list of disks in one of the top-level mirrors
3346  * of the source pool.  The schild array contains a list of disks that the
3347  * user specified on the command line.  We loop over the mchild array to
3348  * see if any entry in the schild array matches.
3349  *
3350  * If a disk in the mchild array is found in the schild array, we return
3351  * the index of that entry.  Otherwise we return -1.
3352  */
3353 static int
3354 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
3355     nvlist_t **schild, uint_t schildren)
3356 {
3357         uint_t mc;
3358
3359         for (mc = 0; mc < mchildren; mc++) {
3360                 uint_t sc;
3361                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3362                     mchild[mc], 0);
3363
3364                 for (sc = 0; sc < schildren; sc++) {
3365                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3366                             schild[sc], 0);
3367                         boolean_t result = (strcmp(mpath, spath) == 0);
3368
3369                         free(spath);
3370                         if (result) {
3371                                 free(mpath);
3372                                 return (mc);
3373                         }
3374                 }
3375
3376                 free(mpath);
3377         }
3378
3379         return (-1);
3380 }
3381
3382 /*
3383  * Split a mirror pool.  If newroot points to null, then a new nvlist
3384  * is generated and it is the responsibility of the caller to free it.
3385  */
3386 int
3387 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
3388     nvlist_t *props, splitflags_t flags)
3389 {
3390         zfs_cmd_t zc = {"\0"};
3391         char msg[1024];
3392         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
3393         nvlist_t **varray = NULL, *zc_props = NULL;
3394         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
3395         libzfs_handle_t *hdl = zhp->zpool_hdl;
3396         uint64_t vers, readonly = B_FALSE;
3397         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
3398         int retval = 0;
3399
3400         (void) snprintf(msg, sizeof (msg),
3401             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
3402
3403         if (!zpool_name_valid(hdl, B_FALSE, newname))
3404                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
3405
3406         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
3407                 (void) fprintf(stderr, gettext("Internal error: unable to "
3408                     "retrieve pool configuration\n"));
3409                 return (-1);
3410         }
3411
3412         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
3413             == 0);
3414         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
3415
3416         if (props) {
3417                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
3418                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
3419                     props, vers, flags, msg)) == NULL)
3420                         return (-1);
3421                 (void) nvlist_lookup_uint64(zc_props,
3422                     zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3423                 if (readonly) {
3424                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3425                             "property %s can only be set at import time"),
3426                             zpool_prop_to_name(ZPOOL_PROP_READONLY));
3427                         return (-1);
3428                 }
3429         }
3430
3431         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
3432             &children) != 0) {
3433                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3434                     "Source pool is missing vdev tree"));
3435                 nvlist_free(zc_props);
3436                 return (-1);
3437         }
3438
3439         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
3440         vcount = 0;
3441
3442         if (*newroot == NULL ||
3443             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
3444             &newchild, &newchildren) != 0)
3445                 newchildren = 0;
3446
3447         for (c = 0; c < children; c++) {
3448                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
3449                 char *type;
3450                 nvlist_t **mchild, *vdev;
3451                 uint_t mchildren;
3452                 int entry;
3453
3454                 /*
3455                  * Unlike cache & spares, slogs are stored in the
3456                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
3457                  */
3458                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3459                     &is_log);
3460                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3461                     &is_hole);
3462                 if (is_log || is_hole) {
3463                         /*
3464                          * Create a hole vdev and put it in the config.
3465                          */
3466                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3467                                 goto out;
3468                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3469                             VDEV_TYPE_HOLE) != 0)
3470                                 goto out;
3471                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3472                             1) != 0)
3473                                 goto out;
3474                         if (lastlog == 0)
3475                                 lastlog = vcount;
3476                         varray[vcount++] = vdev;
3477                         continue;
3478                 }
3479                 lastlog = 0;
3480                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3481                     == 0);
3482                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3483                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3484                             "Source pool must be composed only of mirrors\n"));
3485                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3486                         goto out;
3487                 }
3488
3489                 verify(nvlist_lookup_nvlist_array(child[c],
3490                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3491
3492                 /* find or add an entry for this top-level vdev */
3493                 if (newchildren > 0 &&
3494                     (entry = find_vdev_entry(zhp, mchild, mchildren,
3495                     newchild, newchildren)) >= 0) {
3496                         /* We found a disk that the user specified. */
3497                         vdev = mchild[entry];
3498                         ++found;
3499                 } else {
3500                         /* User didn't specify a disk for this vdev. */
3501                         vdev = mchild[mchildren - 1];
3502                 }
3503
3504                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3505                         goto out;
3506         }
3507
3508         /* did we find every disk the user specified? */
3509         if (found != newchildren) {
3510                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3511                     "include at most one disk from each mirror"));
3512                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3513                 goto out;
3514         }
3515
3516         /* Prepare the nvlist for populating. */
3517         if (*newroot == NULL) {
3518                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3519                         goto out;
3520                 freelist = B_TRUE;
3521                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3522                     VDEV_TYPE_ROOT) != 0)
3523                         goto out;
3524         } else {
3525                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3526         }
3527
3528         /* Add all the children we found */
3529         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3530             lastlog == 0 ? vcount : lastlog) != 0)
3531                 goto out;
3532
3533         /*
3534          * If we're just doing a dry run, exit now with success.
3535          */
3536         if (flags.dryrun) {
3537                 memory_err = B_FALSE;
3538                 freelist = B_FALSE;
3539                 goto out;
3540         }
3541
3542         /* now build up the config list & call the ioctl */
3543         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3544                 goto out;
3545
3546         if (nvlist_add_nvlist(newconfig,
3547             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3548             nvlist_add_string(newconfig,
3549             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3550             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3551                 goto out;
3552
3553         /*
3554          * The new pool is automatically part of the namespace unless we
3555          * explicitly export it.
3556          */
3557         if (!flags.import)
3558                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3559         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3560         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3561         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3562                 goto out;
3563         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3564                 goto out;
3565
3566         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3567                 retval = zpool_standard_error(hdl, errno, msg);
3568                 goto out;
3569         }
3570
3571         freelist = B_FALSE;
3572         memory_err = B_FALSE;
3573
3574 out:
3575         if (varray != NULL) {
3576                 int v;
3577
3578                 for (v = 0; v < vcount; v++)
3579                         nvlist_free(varray[v]);
3580                 free(varray);
3581         }
3582         zcmd_free_nvlists(&zc);
3583         nvlist_free(zc_props);
3584         nvlist_free(newconfig);
3585         if (freelist) {
3586                 nvlist_free(*newroot);
3587                 *newroot = NULL;
3588         }
3589
3590         if (retval != 0)
3591                 return (retval);
3592
3593         if (memory_err)
3594                 return (no_memory(hdl));
3595
3596         return (0);
3597 }
3598
3599 /*
3600  * Remove the given device.
3601  */
3602 int
3603 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3604 {
3605         zfs_cmd_t zc = {"\0"};
3606         char msg[1024];
3607         nvlist_t *tgt;
3608         boolean_t avail_spare, l2cache, islog;
3609         libzfs_handle_t *hdl = zhp->zpool_hdl;
3610         uint64_t version;
3611
3612         (void) snprintf(msg, sizeof (msg),
3613             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3614
3615         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3616         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3617             &islog)) == NULL)
3618                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3619
3620         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3621         if (islog && version < SPA_VERSION_HOLES) {
3622                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3623                     "pool must be upgraded to support log removal"));
3624                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3625         }
3626
3627         if (!islog && !avail_spare && !l2cache && zpool_is_bootable(zhp)) {
3628                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3629                     "root pool can not have removed devices, "
3630                     "because GRUB does not understand them"));
3631                 return (zfs_error(hdl, EINVAL, msg));
3632         }
3633
3634         zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
3635
3636         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3637                 return (0);
3638
3639         switch (errno) {
3640
3641         case EINVAL:
3642                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3643                     "invalid config; all top-level vdevs must "
3644                     "have the same sector size and not be raidz."));
3645                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3646                 break;
3647
3648         case EBUSY:
3649                 if (islog) {
3650                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3651                             "Mount encrypted datasets to replay logs."));
3652                 } else {
3653                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3654                             "Pool busy; removal may already be in progress"));
3655                 }
3656                 (void) zfs_error(hdl, EZFS_BUSY, msg);
3657                 break;
3658
3659         case EACCES:
3660                 if (islog) {
3661                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3662                             "Mount encrypted datasets to replay logs."));
3663                         (void) zfs_error(hdl, EZFS_BUSY, msg);
3664                 } else {
3665                         (void) zpool_standard_error(hdl, errno, msg);
3666                 }
3667                 break;
3668
3669         default:
3670                 (void) zpool_standard_error(hdl, errno, msg);
3671         }
3672         return (-1);
3673 }
3674
3675 int
3676 zpool_vdev_remove_cancel(zpool_handle_t *zhp)
3677 {
3678         zfs_cmd_t zc;
3679         char msg[1024];
3680         libzfs_handle_t *hdl = zhp->zpool_hdl;
3681
3682         (void) snprintf(msg, sizeof (msg),
3683             dgettext(TEXT_DOMAIN, "cannot cancel removal"));
3684
3685         bzero(&zc, sizeof (zc));
3686         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3687         zc.zc_cookie = 1;
3688
3689         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3690                 return (0);
3691
3692         return (zpool_standard_error(hdl, errno, msg));
3693 }
3694
3695 int
3696 zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
3697     uint64_t *sizep)
3698 {
3699         char msg[1024];
3700         nvlist_t *tgt;
3701         boolean_t avail_spare, l2cache, islog;
3702         libzfs_handle_t *hdl = zhp->zpool_hdl;
3703
3704         (void) snprintf(msg, sizeof (msg),
3705             dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
3706             path);
3707
3708         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3709             &islog)) == NULL)
3710                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3711
3712         if (avail_spare || l2cache || islog) {
3713                 *sizep = 0;
3714                 return (0);
3715         }
3716
3717         if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
3718                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3719                     "indirect size not available"));
3720                 return (zfs_error(hdl, EINVAL, msg));
3721         }
3722         return (0);
3723 }
3724
3725 /*
3726  * Clear the errors for the pool, or the particular device if specified.
3727  */
3728 int
3729 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3730 {
3731         zfs_cmd_t zc = {"\0"};
3732         char msg[1024];
3733         nvlist_t *tgt;
3734         zpool_load_policy_t policy;
3735         boolean_t avail_spare, l2cache;
3736         libzfs_handle_t *hdl = zhp->zpool_hdl;
3737         nvlist_t *nvi = NULL;
3738         int error;
3739
3740         if (path)
3741                 (void) snprintf(msg, sizeof (msg),
3742                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3743                     path);
3744         else
3745                 (void) snprintf(msg, sizeof (msg),
3746                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3747                     zhp->zpool_name);
3748
3749         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3750         if (path) {
3751                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3752                     &l2cache, NULL)) == NULL)
3753                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
3754
3755                 /*
3756                  * Don't allow error clearing for hot spares.  Do allow
3757                  * error clearing for l2cache devices.
3758                  */
3759                 if (avail_spare)
3760                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
3761
3762                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3763                     &zc.zc_guid) == 0);
3764         }
3765
3766         zpool_get_load_policy(rewindnvl, &policy);
3767         zc.zc_cookie = policy.zlp_rewind;
3768
3769         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3770                 return (-1);
3771
3772         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3773                 return (-1);
3774
3775         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3776             errno == ENOMEM) {
3777                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3778                         zcmd_free_nvlists(&zc);
3779                         return (-1);
3780                 }
3781         }
3782
3783         if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) &&
3784             errno != EPERM && errno != EACCES)) {
3785                 if (policy.zlp_rewind &
3786                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3787                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3788                         zpool_rewind_exclaim(hdl, zc.zc_name,
3789                             ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0),
3790                             nvi);
3791                         nvlist_free(nvi);
3792                 }
3793                 zcmd_free_nvlists(&zc);
3794                 return (0);
3795         }
3796
3797         zcmd_free_nvlists(&zc);
3798         return (zpool_standard_error(hdl, errno, msg));
3799 }
3800
3801 /*
3802  * Similar to zpool_clear(), but takes a GUID (used by fmd).
3803  */
3804 int
3805 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3806 {
3807         zfs_cmd_t zc = {"\0"};
3808         char msg[1024];
3809         libzfs_handle_t *hdl = zhp->zpool_hdl;
3810
3811         (void) snprintf(msg, sizeof (msg),
3812             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3813             (u_longlong_t)guid);
3814
3815         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3816         zc.zc_guid = guid;
3817         zc.zc_cookie = ZPOOL_NO_REWIND;
3818
3819         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3820                 return (0);
3821
3822         return (zpool_standard_error(hdl, errno, msg));
3823 }
3824
3825 /*
3826  * Change the GUID for a pool.
3827  */
3828 int
3829 zpool_reguid(zpool_handle_t *zhp)
3830 {
3831         char msg[1024];
3832         libzfs_handle_t *hdl = zhp->zpool_hdl;
3833         zfs_cmd_t zc = {"\0"};
3834
3835         (void) snprintf(msg, sizeof (msg),
3836             dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3837
3838         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3839         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3840                 return (0);
3841
3842         return (zpool_standard_error(hdl, errno, msg));
3843 }
3844
3845 /*
3846  * Reopen the pool.
3847  */
3848 int
3849 zpool_reopen_one(zpool_handle_t *zhp, void *data)
3850 {
3851         libzfs_handle_t *hdl = zpool_get_handle(zhp);
3852         const char *pool_name = zpool_get_name(zhp);
3853         boolean_t *scrub_restart = data;
3854         int error;
3855
3856         error = lzc_reopen(pool_name, *scrub_restart);
3857         if (error) {
3858                 return (zpool_standard_error_fmt(hdl, error,
3859                     dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), pool_name));
3860         }
3861
3862         return (0);
3863 }
3864
3865 /* call into libzfs_core to execute the sync IOCTL per pool */
3866 int
3867 zpool_sync_one(zpool_handle_t *zhp, void *data)
3868 {
3869         int ret;
3870         libzfs_handle_t *hdl = zpool_get_handle(zhp);
3871         const char *pool_name = zpool_get_name(zhp);
3872         boolean_t *force = data;
3873         nvlist_t *innvl = fnvlist_alloc();
3874
3875         fnvlist_add_boolean_value(innvl, "force", *force);
3876         if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
3877                 nvlist_free(innvl);
3878                 return (zpool_standard_error_fmt(hdl, ret,
3879                     dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
3880         }
3881         nvlist_free(innvl);
3882
3883         return (0);
3884 }
3885
3886 #define PATH_BUF_LEN    64
3887
3888 /*
3889  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3890  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3891  * We also check if this is a whole disk, in which case we strip off the
3892  * trailing 's0' slice name.
3893  *
3894  * This routine is also responsible for identifying when disks have been
3895  * reconfigured in a new location.  The kernel will have opened the device by
3896  * devid, but the path will still refer to the old location.  To catch this, we
3897  * first do a path -> devid translation (which is fast for the common case).  If
3898  * the devid matches, we're done.  If not, we do a reverse devid -> path
3899  * translation and issue the appropriate ioctl() to update the path of the vdev.
3900  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3901  * of these checks.
3902  */
3903 char *
3904 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3905     int name_flags)
3906 {
3907         char *path, *type, *env;
3908         uint64_t value;
3909         char buf[PATH_BUF_LEN];
3910         char tmpbuf[PATH_BUF_LEN];
3911
3912         /*
3913          * vdev_name will be "root"/"root-0" for the root vdev, but it is the
3914          * zpool name that will be displayed to the user.
3915          */
3916         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3917         if (zhp != NULL && strcmp(type, "root") == 0)
3918                 return (zfs_strdup(hdl, zpool_get_name(zhp)));
3919
3920         env = getenv("ZPOOL_VDEV_NAME_PATH");
3921         if (env && (strtoul(env, NULL, 0) > 0 ||
3922             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3923                 name_flags |= VDEV_NAME_PATH;
3924
3925         env = getenv("ZPOOL_VDEV_NAME_GUID");
3926         if (env && (strtoul(env, NULL, 0) > 0 ||
3927             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3928                 name_flags |= VDEV_NAME_GUID;
3929
3930         env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3931         if (env && (strtoul(env, NULL, 0) > 0 ||
3932             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3933                 name_flags |= VDEV_NAME_FOLLOW_LINKS;
3934
3935         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3936             name_flags & VDEV_NAME_GUID) {
3937                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
3938                 (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
3939                 path = buf;
3940         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3941                 if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3942                         char *rp = realpath(path, NULL);
3943                         if (rp) {
3944                                 strlcpy(buf, rp, sizeof (buf));
3945                                 path = buf;
3946                                 free(rp);
3947                         }
3948                 }
3949
3950                 /*
3951                  * For a block device only use the name.
3952                  */
3953                 if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
3954                     !(name_flags & VDEV_NAME_PATH)) {
3955                         path = strrchr(path, '/');
3956                         path++;
3957                 }
3958
3959                 /*
3960                  * Remove the partition from the path it this is a whole disk.
3961                  */
3962                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3963                     == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
3964                         return (zfs_strip_partition(path));
3965                 }
3966         } else {
3967                 path = type;
3968
3969                 /*
3970                  * If it's a raidz device, we need to stick in the parity level.
3971                  */
3972                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3973                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3974                             &value) == 0);
3975                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3976                             (u_longlong_t)value);
3977                         path = buf;
3978                 }
3979
3980                 /*
3981                  * We identify each top-level vdev by using a <type-id>
3982                  * naming convention.
3983                  */
3984                 if (name_flags & VDEV_NAME_TYPE_ID) {
3985                         uint64_t id;
3986                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3987                             &id) == 0);
3988                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3989                             path, (u_longlong_t)id);
3990                         path = tmpbuf;
3991                 }
3992         }
3993
3994         return (zfs_strdup(hdl, path));
3995 }
3996
3997 static int
3998 zbookmark_mem_compare(const void *a, const void *b)
3999 {
4000         return (memcmp(a, b, sizeof (zbookmark_phys_t)));
4001 }
4002
4003 /*
4004  * Retrieve the persistent error log, uniquify the members, and return to the
4005  * caller.
4006  */
4007 int
4008 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
4009 {
4010         zfs_cmd_t zc = {"\0"};
4011         libzfs_handle_t *hdl = zhp->zpool_hdl;
4012         uint64_t count;
4013         zbookmark_phys_t *zb = NULL;
4014         int i;
4015
4016         /*
4017          * Retrieve the raw error list from the kernel.  If the number of errors
4018          * has increased, allocate more space and continue until we get the
4019          * entire list.
4020          */
4021         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
4022             &count) == 0);
4023         if (count == 0)
4024                 return (0);
4025         zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
4026             count * sizeof (zbookmark_phys_t));
4027         zc.zc_nvlist_dst_size = count;
4028         (void) strcpy(zc.zc_name, zhp->zpool_name);
4029         for (;;) {
4030                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
4031                     &zc) != 0) {
4032                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
4033                         if (errno == ENOMEM) {
4034                                 void *dst;
4035
4036                                 count = zc.zc_nvlist_dst_size;
4037                                 dst = zfs_alloc(zhp->zpool_hdl, count *
4038                                     sizeof (zbookmark_phys_t));
4039                                 zc.zc_nvlist_dst = (uintptr_t)dst;
4040                         } else {
4041                                 return (zpool_standard_error_fmt(hdl, errno,
4042                                     dgettext(TEXT_DOMAIN, "errors: List of "
4043                                     "errors unavailable")));
4044                         }
4045                 } else {
4046                         break;
4047                 }
4048         }
4049
4050         /*
4051          * Sort the resulting bookmarks.  This is a little confusing due to the
4052          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
4053          * to first, and 'zc_nvlist_dst_size' indicates the number of bookmarks
4054          * _not_ copied as part of the process.  So we point the start of our
4055          * array appropriate and decrement the total number of elements.
4056          */
4057         zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
4058             zc.zc_nvlist_dst_size;
4059         count -= zc.zc_nvlist_dst_size;
4060
4061         qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
4062
4063         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
4064
4065         /*
4066          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
4067          */
4068         for (i = 0; i < count; i++) {
4069                 nvlist_t *nv;
4070
4071                 /* ignoring zb_blkid and zb_level for now */
4072                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
4073                     zb[i-1].zb_object == zb[i].zb_object)
4074                         continue;
4075
4076                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
4077                         goto nomem;
4078                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
4079                     zb[i].zb_objset) != 0) {
4080                         nvlist_free(nv);
4081                         goto nomem;
4082                 }
4083                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
4084                     zb[i].zb_object) != 0) {
4085                         nvlist_free(nv);
4086                         goto nomem;
4087                 }
4088                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
4089                         nvlist_free(nv);
4090                         goto nomem;
4091                 }
4092                 nvlist_free(nv);
4093         }
4094
4095         free((void *)(uintptr_t)zc.zc_nvlist_dst);
4096         return (0);
4097
4098 nomem:
4099         free((void *)(uintptr_t)zc.zc_nvlist_dst);
4100         return (no_memory(zhp->zpool_hdl));
4101 }
4102
4103 /*
4104  * Upgrade a ZFS pool to the latest on-disk version.
4105  */
4106 int
4107 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
4108 {
4109         zfs_cmd_t zc = {"\0"};
4110         libzfs_handle_t *hdl = zhp->zpool_hdl;
4111
4112         (void) strcpy(zc.zc_name, zhp->zpool_name);
4113         zc.zc_cookie = new_version;
4114
4115         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
4116                 return (zpool_standard_error_fmt(hdl, errno,
4117                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
4118                     zhp->zpool_name));
4119         return (0);
4120 }
4121
4122 void
4123 zfs_save_arguments(int argc, char **argv, char *string, int len)
4124 {
4125         int i;
4126
4127         (void) strlcpy(string, basename(argv[0]), len);
4128         for (i = 1; i < argc; i++) {
4129                 (void) strlcat(string, " ", len);
4130                 (void) strlcat(string, argv[i], len);
4131         }
4132 }
4133
4134 int
4135 zpool_log_history(libzfs_handle_t *hdl, const char *message)
4136 {
4137         zfs_cmd_t zc = {"\0"};
4138         nvlist_t *args;
4139         int err;
4140
4141         args = fnvlist_alloc();
4142         fnvlist_add_string(args, "message", message);
4143         err = zcmd_write_src_nvlist(hdl, &zc, args);
4144         if (err == 0)
4145                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
4146         nvlist_free(args);
4147         zcmd_free_nvlists(&zc);
4148         return (err);
4149 }
4150
4151 /*
4152  * Perform ioctl to get some command history of a pool.
4153  *
4154  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
4155  * logical offset of the history buffer to start reading from.
4156  *
4157  * Upon return, 'off' is the next logical offset to read from and
4158  * 'len' is the actual amount of bytes read into 'buf'.
4159  */
4160 static int
4161 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
4162 {
4163         zfs_cmd_t zc = {"\0"};
4164         libzfs_handle_t *hdl = zhp->zpool_hdl;
4165
4166         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4167
4168         zc.zc_history = (uint64_t)(uintptr_t)buf;
4169         zc.zc_history_len = *len;
4170         zc.zc_history_offset = *off;
4171
4172         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
4173                 switch (errno) {
4174                 case EPERM:
4175                         return (zfs_error_fmt(hdl, EZFS_PERM,
4176                             dgettext(TEXT_DOMAIN,
4177                             "cannot show history for pool '%s'"),
4178                             zhp->zpool_name));
4179                 case ENOENT:
4180                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
4181                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
4182                             "'%s'"), zhp->zpool_name));
4183                 case ENOTSUP:
4184                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
4185                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
4186                             "'%s', pool must be upgraded"), zhp->zpool_name));
4187                 default:
4188                         return (zpool_standard_error_fmt(hdl, errno,
4189                             dgettext(TEXT_DOMAIN,
4190                             "cannot get history for '%s'"), zhp->zpool_name));
4191                 }
4192         }
4193
4194         *len = zc.zc_history_len;
4195         *off = zc.zc_history_offset;
4196
4197         return (0);
4198 }
4199
4200 /*
4201  * Retrieve the command history of a pool.
4202  */
4203 int
4204 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
4205 {
4206         char *buf;
4207         int buflen = 128 * 1024;
4208         uint64_t off = 0;
4209         nvlist_t **records = NULL;
4210         uint_t numrecords = 0;
4211         int err, i;
4212
4213         buf = malloc(buflen);
4214         if (buf == NULL)
4215                 return (ENOMEM);
4216         do {
4217                 uint64_t bytes_read = buflen;
4218                 uint64_t leftover;
4219
4220                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
4221                         break;
4222
4223                 /* if nothing else was read in, we're at EOF, just return */
4224                 if (!bytes_read)
4225                         break;
4226
4227                 if ((err = zpool_history_unpack(buf, bytes_read,
4228                     &leftover, &records, &numrecords)) != 0)
4229                         break;
4230                 off -= leftover;
4231                 if (leftover == bytes_read) {
4232                         /*
4233                          * no progress made, because buffer is not big enough
4234                          * to hold this record; resize and retry.
4235                          */
4236                         buflen *= 2;
4237                         free(buf);
4238                         buf = malloc(buflen);
4239                         if (buf == NULL)
4240                                 return (ENOMEM);
4241                 }
4242
4243                 /* CONSTCOND */
4244         } while (1);
4245
4246         free(buf);
4247
4248         if (!err) {
4249                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
4250                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
4251                     records, numrecords) == 0);
4252         }
4253         for (i = 0; i < numrecords; i++)
4254                 nvlist_free(records[i]);
4255         free(records);
4256
4257         return (err);
4258 }
4259
4260 /*
4261  * Retrieve the next event given the passed 'zevent_fd' file descriptor.
4262  * If there is a new event available 'nvp' will contain a newly allocated
4263  * nvlist and 'dropped' will be set to the number of missed events since
4264  * the last call to this function.  When 'nvp' is set to NULL it indicates
4265  * no new events are available.  In either case the function returns 0 and
4266  * it is up to the caller to free 'nvp'.  In the case of a fatal error the
4267  * function will return a non-zero value.  When the function is called in
4268  * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
4269  * it will not return until a new event is available.
4270  */
4271 int
4272 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
4273     int *dropped, unsigned flags, int zevent_fd)
4274 {
4275         zfs_cmd_t zc = {"\0"};
4276         int error = 0;
4277
4278         *nvp = NULL;
4279         *dropped = 0;
4280         zc.zc_cleanup_fd = zevent_fd;
4281
4282         if (flags & ZEVENT_NONBLOCK)
4283                 zc.zc_guid = ZEVENT_NONBLOCK;
4284
4285         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
4286                 return (-1);
4287
4288 retry:
4289         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
4290                 switch (errno) {
4291                 case ESHUTDOWN:
4292                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
4293                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
4294                         goto out;
4295                 case ENOENT:
4296                         /* Blocking error case should not occur */
4297                         if (!(flags & ZEVENT_NONBLOCK))
4298                                 error = zpool_standard_error_fmt(hdl, errno,
4299                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4300
4301                         goto out;
4302                 case ENOMEM:
4303                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
4304                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4305                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4306                                 goto out;
4307                         } else {
4308                                 goto retry;
4309                         }
4310                 default:
4311                         error = zpool_standard_error_fmt(hdl, errno,
4312                             dgettext(TEXT_DOMAIN, "cannot get event"));
4313                         goto out;
4314                 }
4315         }
4316
4317         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
4318         if (error != 0)
4319                 goto out;
4320
4321         *dropped = (int)zc.zc_cookie;
4322 out:
4323         zcmd_free_nvlists(&zc);
4324
4325         return (error);
4326 }
4327
4328 /*
4329  * Clear all events.
4330  */
4331 int
4332 zpool_events_clear(libzfs_handle_t *hdl, int *count)
4333 {
4334         zfs_cmd_t zc = {"\0"};
4335         char msg[1024];
4336
4337         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
4338             "cannot clear events"));
4339
4340         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
4341                 return (zpool_standard_error_fmt(hdl, errno, msg));
4342
4343         if (count != NULL)
4344                 *count = (int)zc.zc_cookie; /* # of events cleared */
4345
4346         return (0);
4347 }
4348
4349 /*
4350  * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
4351  * the passed zevent_fd file handle.  On success zero is returned,
4352  * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
4353  */
4354 int
4355 zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
4356 {
4357         zfs_cmd_t zc = {"\0"};
4358         int error = 0;
4359
4360         zc.zc_guid = eid;
4361         zc.zc_cleanup_fd = zevent_fd;
4362
4363         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
4364                 switch (errno) {
4365                 case ENOENT:
4366                         error = zfs_error_fmt(hdl, EZFS_NOENT,
4367                             dgettext(TEXT_DOMAIN, "cannot get event"));
4368                         break;
4369
4370                 case ENOMEM:
4371                         error = zfs_error_fmt(hdl, EZFS_NOMEM,
4372                             dgettext(TEXT_DOMAIN, "cannot get event"));
4373                         break;
4374
4375                 default:
4376                         error = zpool_standard_error_fmt(hdl, errno,
4377                             dgettext(TEXT_DOMAIN, "cannot get event"));
4378                         break;
4379                 }
4380         }
4381
4382         return (error);
4383 }
4384
4385 void
4386 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4387     char *pathname, size_t len)
4388 {
4389         zfs_cmd_t zc = {"\0"};
4390         boolean_t mounted = B_FALSE;
4391         char *mntpnt = NULL;
4392         char dsname[ZFS_MAX_DATASET_NAME_LEN];
4393
4394         if (dsobj == 0) {
4395                 /* special case for the MOS */
4396                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
4397                     (longlong_t)obj);
4398                 return;
4399         }
4400
4401         /* get the dataset's name */
4402         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4403         zc.zc_obj = dsobj;
4404         if (ioctl(zhp->zpool_hdl->libzfs_fd,
4405             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4406                 /* just write out a path of two object numbers */
4407                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
4408                     (longlong_t)dsobj, (longlong_t)obj);
4409                 return;
4410         }
4411         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4412
4413         /* find out if the dataset is mounted */
4414         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4415
4416         /* get the corrupted object's path */
4417         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4418         zc.zc_obj = obj;
4419         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4420             &zc) == 0) {
4421                 if (mounted) {
4422                         (void) snprintf(pathname, len, "%s%s", mntpnt,
4423                             zc.zc_value);
4424                 } else {
4425                         (void) snprintf(pathname, len, "%s:%s",
4426                             dsname, zc.zc_value);
4427                 }
4428         } else {
4429                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
4430                     (longlong_t)obj);
4431         }
4432         free(mntpnt);
4433 }
4434
4435 /*
4436  * Read the EFI label from the config, if a label does not exist then
4437  * pass back the error to the caller. If the caller has passed a non-NULL
4438  * diskaddr argument then we set it to the starting address of the EFI
4439  * partition.
4440  */
4441 static int
4442 read_efi_label(nvlist_t *config, diskaddr_t *sb)
4443 {
4444         char *path;
4445         int fd;
4446         char diskname[MAXPATHLEN];
4447         int err = -1;
4448
4449         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4450                 return (err);
4451
4452         (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
4453             strrchr(path, '/'));
4454         if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
4455                 struct dk_gpt *vtoc;
4456
4457                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4458                         if (sb != NULL)
4459                                 *sb = vtoc->efi_parts[0].p_start;
4460                         efi_free(vtoc);
4461                 }
4462                 (void) close(fd);
4463         }
4464         return (err);
4465 }
4466
4467 /*
4468  * determine where a partition starts on a disk in the current
4469  * configuration
4470  */
4471 static diskaddr_t
4472 find_start_block(nvlist_t *config)
4473 {
4474         nvlist_t **child;
4475         uint_t c, children;
4476         diskaddr_t sb = MAXOFFSET_T;
4477         uint64_t wholedisk;
4478
4479         if (nvlist_lookup_nvlist_array(config,
4480             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4481                 if (nvlist_lookup_uint64(config,
4482                     ZPOOL_CONFIG_WHOLE_DISK,
4483                     &wholedisk) != 0 || !wholedisk) {
4484                         return (MAXOFFSET_T);
4485                 }
4486                 if (read_efi_label(config, &sb) < 0)
4487                         sb = MAXOFFSET_T;
4488                 return (sb);
4489         }
4490
4491         for (c = 0; c < children; c++) {
4492                 sb = find_start_block(child[c]);
4493                 if (sb != MAXOFFSET_T) {
4494                         return (sb);
4495                 }
4496         }
4497         return (MAXOFFSET_T);
4498 }
4499
4500 static int
4501 zpool_label_disk_check(char *path)
4502 {
4503         struct dk_gpt *vtoc;
4504         int fd, err;
4505
4506         if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
4507                 return (errno);
4508
4509         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4510                 (void) close(fd);
4511                 return (err);
4512         }
4513
4514         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4515                 efi_free(vtoc);
4516                 (void) close(fd);
4517                 return (EIDRM);
4518         }
4519
4520         efi_free(vtoc);
4521         (void) close(fd);
4522         return (0);
4523 }
4524
4525 /*
4526  * Generate a unique partition name for the ZFS member.  Partitions must
4527  * have unique names to ensure udev will be able to create symlinks under
4528  * /dev/disk/by-partlabel/ for all pool members.  The partition names are
4529  * of the form <pool>-<unique-id>.
4530  */
4531 static void
4532 zpool_label_name(char *label_name, int label_size)
4533 {
4534         uint64_t id = 0;
4535         int fd;
4536
4537         fd = open("/dev/urandom", O_RDONLY);
4538         if (fd >= 0) {
4539                 if (read(fd, &id, sizeof (id)) != sizeof (id))
4540                         id = 0;
4541
4542                 close(fd);
4543         }
4544
4545         if (id == 0)
4546                 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
4547
4548         snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
4549 }
4550
4551 /*
4552  * Label an individual disk.  The name provided is the short name,
4553  * stripped of any leading /dev path.
4554  */
4555 int
4556 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4557 {
4558         char path[MAXPATHLEN];
4559         struct dk_gpt *vtoc;
4560         int rval, fd;
4561         size_t resv = EFI_MIN_RESV_SIZE;
4562         uint64_t slice_size;
4563         diskaddr_t start_block;
4564         char errbuf[1024];
4565
4566         /* prepare an error message just in case */
4567         (void) snprintf(errbuf, sizeof (errbuf),
4568             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4569
4570         if (zhp) {
4571                 nvlist_t *nvroot;
4572
4573                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4574                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4575
4576                 if (zhp->zpool_start_block == 0)
4577                         start_block = find_start_block(nvroot);
4578                 else
4579                         start_block = zhp->zpool_start_block;
4580                 zhp->zpool_start_block = start_block;
4581         } else {
4582                 /* new pool */
4583                 start_block = NEW_START_BLOCK;
4584         }
4585
4586         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4587
4588         if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
4589                 /*
4590                  * This shouldn't happen.  We've long since verified that this
4591                  * is a valid device.
4592                  */
4593                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4594                     "label '%s': unable to open device: %d"), path, errno);
4595                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4596         }
4597
4598         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4599                 /*
4600                  * The only way this can fail is if we run out of memory, or we
4601                  * were unable to read the disk's capacity
4602                  */
4603                 if (errno == ENOMEM)
4604                         (void) no_memory(hdl);
4605
4606                 (void) close(fd);
4607                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4608                     "label '%s': unable to read disk capacity"), path);
4609
4610                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4611         }
4612
4613         slice_size = vtoc->efi_last_u_lba + 1;
4614         slice_size -= EFI_MIN_RESV_SIZE;
4615         if (start_block == MAXOFFSET_T)
4616                 start_block = NEW_START_BLOCK;
4617         slice_size -= start_block;
4618         slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
4619
4620         vtoc->efi_parts[0].p_start = start_block;
4621         vtoc->efi_parts[0].p_size = slice_size;
4622
4623         /*
4624          * Why we use V_USR: V_BACKUP confuses users, and is considered
4625          * disposable by some EFI utilities (since EFI doesn't have a backup
4626          * slice).  V_UNASSIGNED is supposed to be used only for zero size
4627          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4628          * etc. were all pretty specific.  V_USR is as close to reality as we
4629          * can get, in the absence of V_OTHER.
4630          */
4631         vtoc->efi_parts[0].p_tag = V_USR;
4632         zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
4633
4634         vtoc->efi_parts[8].p_start = slice_size + start_block;
4635         vtoc->efi_parts[8].p_size = resv;
4636         vtoc->efi_parts[8].p_tag = V_RESERVED;
4637
4638         rval = efi_write(fd, vtoc);
4639
4640         /* Flush the buffers to disk and invalidate the page cache. */
4641         (void) fsync(fd);
4642         (void) ioctl(fd, BLKFLSBUF);
4643
4644         if (rval == 0)
4645                 rval = efi_rescan(fd);
4646
4647         /*
4648          * Some block drivers (like pcata) may not support EFI GPT labels.
4649          * Print out a helpful error message directing the user to manually
4650          * label the disk and give a specific slice.
4651          */
4652         if (rval != 0) {
4653                 (void) close(fd);
4654                 efi_free(vtoc);
4655
4656                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4657                     "parted(8) and then provide a specific slice: %d"), rval);
4658                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4659         }
4660
4661         (void) close(fd);
4662         efi_free(vtoc);
4663
4664         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4665         (void) zfs_append_partition(path, MAXPATHLEN);
4666
4667         /* Wait to udev to signal use the device has settled. */
4668         rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
4669         if (rval) {
4670                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4671                     "detect device partitions on '%s': %d"), path, rval);
4672                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4673         }
4674
4675         /* We can't be to paranoid.  Read the label back and verify it. */
4676         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4677         rval = zpool_label_disk_check(path);
4678         if (rval) {
4679                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4680                     "EFI label on '%s' is damaged.  Ensure\nthis device "
4681                     "is not in use, and is functioning properly: %d"),
4682                     path, rval);
4683                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4684         }
4685
4686         return (0);
4687 }
4688
4689 /*
4690  * Wait while the specified activity is in progress in the pool.
4691  */
4692 int
4693 zpool_wait(zpool_handle_t *zhp, zpool_wait_activity_t activity)
4694 {
4695         boolean_t missing;
4696
4697         int error = zpool_wait_status(zhp, activity, &missing, NULL);
4698
4699         if (missing) {
4700                 (void) zpool_standard_error_fmt(zhp->zpool_hdl, ENOENT,
4701                     dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
4702                     zhp->zpool_name);
4703                 return (ENOENT);
4704         } else {
4705                 return (error);
4706         }
4707 }
4708
4709 /*
4710  * Wait for the given activity and return the status of the wait (whether or not
4711  * any waiting was done) in the 'waited' parameter. Non-existent pools are
4712  * reported via the 'missing' parameter, rather than by printing an error
4713  * message. This is convenient when this function is called in a loop over a
4714  * long period of time (as it is, for example, by zpool's wait cmd). In that
4715  * scenario, a pool being exported or destroyed should be considered a normal
4716  * event, so we don't want to print an error when we find that the pool doesn't
4717  * exist.
4718  */
4719 int
4720 zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,
4721     boolean_t *missing, boolean_t *waited)
4722 {
4723         int error = lzc_wait(zhp->zpool_name, activity, waited);
4724         *missing = (error == ENOENT);
4725         if (*missing)
4726                 return (0);
4727
4728         if (error != 0) {
4729                 (void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
4730                     dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
4731                     zhp->zpool_name);
4732         }
4733
4734         return (error);
4735 }