]> granicus.if.org Git - zfs/blob - lib/libzfs/libzfs_pool.c
cstyle: Resolve C style issues
[zfs] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25  * Copyright (c) 2012 by Delphix. All rights reserved.
26  */
27
28 #include <ctype.h>
29 #include <errno.h>
30 #include <devid.h>
31 #include <fcntl.h>
32 #include <libintl.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <strings.h>
36 #include <unistd.h>
37 #include <libgen.h>
38 #include <zone.h>
39 #include <sys/stat.h>
40 #include <sys/efi_partition.h>
41 #include <sys/vtoc.h>
42 #include <sys/zfs_ioctl.h>
43 #include <dlfcn.h>
44
45 #include "zfs_namecheck.h"
46 #include "zfs_prop.h"
47 #include "libzfs_impl.h"
48 #include "zfs_comutil.h"
49 #include "zfeature_common.h"
50
51 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
52
53 typedef struct prop_flags {
54         int create:1;   /* Validate property on creation */
55         int import:1;   /* Validate property on import */
56 } prop_flags_t;
57
58 /*
59  * ====================================================================
60  *   zpool property functions
61  * ====================================================================
62  */
63
64 static int
65 zpool_get_all_props(zpool_handle_t *zhp)
66 {
67         zfs_cmd_t zc = {"\0"};
68         libzfs_handle_t *hdl = zhp->zpool_hdl;
69
70         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
71
72         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
73                 return (-1);
74
75         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
76                 if (errno == ENOMEM) {
77                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
78                                 zcmd_free_nvlists(&zc);
79                                 return (-1);
80                         }
81                 } else {
82                         zcmd_free_nvlists(&zc);
83                         return (-1);
84                 }
85         }
86
87         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
88                 zcmd_free_nvlists(&zc);
89                 return (-1);
90         }
91
92         zcmd_free_nvlists(&zc);
93
94         return (0);
95 }
96
97 static int
98 zpool_props_refresh(zpool_handle_t *zhp)
99 {
100         nvlist_t *old_props;
101
102         old_props = zhp->zpool_props;
103
104         if (zpool_get_all_props(zhp) != 0)
105                 return (-1);
106
107         nvlist_free(old_props);
108         return (0);
109 }
110
111 static char *
112 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
113     zprop_source_t *src)
114 {
115         nvlist_t *nv, *nvl;
116         uint64_t ival;
117         char *value;
118         zprop_source_t source;
119
120         nvl = zhp->zpool_props;
121         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
122                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
123                 source = ival;
124                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
125         } else {
126                 source = ZPROP_SRC_DEFAULT;
127                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
128                         value = "-";
129         }
130
131         if (src)
132                 *src = source;
133
134         return (value);
135 }
136
137 uint64_t
138 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
139 {
140         nvlist_t *nv, *nvl;
141         uint64_t value;
142         zprop_source_t source;
143
144         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
145                 /*
146                  * zpool_get_all_props() has most likely failed because
147                  * the pool is faulted, but if all we need is the top level
148                  * vdev's guid then get it from the zhp config nvlist.
149                  */
150                 if ((prop == ZPOOL_PROP_GUID) &&
151                     (nvlist_lookup_nvlist(zhp->zpool_config,
152                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
153                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
154                     == 0)) {
155                         return (value);
156                 }
157                 return (zpool_prop_default_numeric(prop));
158         }
159
160         nvl = zhp->zpool_props;
161         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
162                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
163                 source = value;
164                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
165         } else {
166                 source = ZPROP_SRC_DEFAULT;
167                 value = zpool_prop_default_numeric(prop);
168         }
169
170         if (src)
171                 *src = source;
172
173         return (value);
174 }
175
176 /*
177  * Map VDEV STATE to printed strings.
178  */
179 char *
180 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
181 {
182         switch (state) {
183         default:
184                 break;
185         case VDEV_STATE_CLOSED:
186         case VDEV_STATE_OFFLINE:
187                 return (gettext("OFFLINE"));
188         case VDEV_STATE_REMOVED:
189                 return (gettext("REMOVED"));
190         case VDEV_STATE_CANT_OPEN:
191                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
192                         return (gettext("FAULTED"));
193                 else if (aux == VDEV_AUX_SPLIT_POOL)
194                         return (gettext("SPLIT"));
195                 else
196                         return (gettext("UNAVAIL"));
197         case VDEV_STATE_FAULTED:
198                 return (gettext("FAULTED"));
199         case VDEV_STATE_DEGRADED:
200                 return (gettext("DEGRADED"));
201         case VDEV_STATE_HEALTHY:
202                 return (gettext("ONLINE"));
203         }
204
205         return (gettext("UNKNOWN"));
206 }
207
208 /*
209  * Map POOL STATE to printed strings.
210  */
211 const char *
212 zpool_pool_state_to_name(pool_state_t state)
213 {
214         switch (state) {
215         default:
216                 break;
217         case POOL_STATE_ACTIVE:
218                 return (gettext("ACTIVE"));
219         case POOL_STATE_EXPORTED:
220                 return (gettext("EXPORTED"));
221         case POOL_STATE_DESTROYED:
222                 return (gettext("DESTROYED"));
223         case POOL_STATE_SPARE:
224                 return (gettext("SPARE"));
225         case POOL_STATE_L2CACHE:
226                 return (gettext("L2CACHE"));
227         case POOL_STATE_UNINITIALIZED:
228                 return (gettext("UNINITIALIZED"));
229         case POOL_STATE_UNAVAIL:
230                 return (gettext("UNAVAIL"));
231         case POOL_STATE_POTENTIALLY_ACTIVE:
232                 return (gettext("POTENTIALLY_ACTIVE"));
233         }
234
235         return (gettext("UNKNOWN"));
236 }
237
238 /*
239  * API compatibility wrapper around zpool_get_prop_literal
240  */
241 int
242 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
243     zprop_source_t *srctype)
244 {
245         return (zpool_get_prop_literal(zhp, prop, buf, len, srctype, B_FALSE));
246 }
247
248 /*
249  * Get a zpool property value for 'prop' and return the value in
250  * a pre-allocated buffer.
251  */
252 int
253 zpool_get_prop_literal(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
254     size_t len, zprop_source_t *srctype, boolean_t literal)
255 {
256         uint64_t intval;
257         const char *strval;
258         zprop_source_t src = ZPROP_SRC_NONE;
259         nvlist_t *nvroot;
260         vdev_stat_t *vs;
261         uint_t vsc;
262
263         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
264                 switch (prop) {
265                 case ZPOOL_PROP_NAME:
266                         (void) strlcpy(buf, zpool_get_name(zhp), len);
267                         break;
268
269                 case ZPOOL_PROP_HEALTH:
270                         (void) strlcpy(buf, "FAULTED", len);
271                         break;
272
273                 case ZPOOL_PROP_GUID:
274                         intval = zpool_get_prop_int(zhp, prop, &src);
275                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
276                         break;
277
278                 case ZPOOL_PROP_ALTROOT:
279                 case ZPOOL_PROP_CACHEFILE:
280                 case ZPOOL_PROP_COMMENT:
281                         if (zhp->zpool_props != NULL ||
282                             zpool_get_all_props(zhp) == 0) {
283                                 (void) strlcpy(buf,
284                                     zpool_get_prop_string(zhp, prop, &src),
285                                     len);
286                                 if (srctype != NULL)
287                                         *srctype = src;
288                                 return (0);
289                         }
290                         /* FALLTHROUGH */
291                 default:
292                         (void) strlcpy(buf, "-", len);
293                         break;
294                 }
295
296                 if (srctype != NULL)
297                         *srctype = src;
298                 return (0);
299         }
300
301         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
302             prop != ZPOOL_PROP_NAME)
303                 return (-1);
304
305         switch (zpool_prop_get_type(prop)) {
306         case PROP_TYPE_STRING:
307                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
308                     len);
309                 break;
310
311         case PROP_TYPE_NUMBER:
312                 intval = zpool_get_prop_int(zhp, prop, &src);
313
314                 switch (prop) {
315                 case ZPOOL_PROP_SIZE:
316                 case ZPOOL_PROP_ALLOCATED:
317                 case ZPOOL_PROP_FREE:
318                 case ZPOOL_PROP_FREEING:
319                 case ZPOOL_PROP_EXPANDSZ:
320                 case ZPOOL_PROP_ASHIFT:
321                         if (literal)
322                                 (void) snprintf(buf, len, "%llu",
323                                         (u_longlong_t)intval);
324                         else
325                                 (void) zfs_nicenum(intval, buf, len);
326                         break;
327
328                 case ZPOOL_PROP_CAPACITY:
329                         (void) snprintf(buf, len, "%llu%%",
330                             (u_longlong_t)intval);
331                         break;
332
333                 case ZPOOL_PROP_DEDUPRATIO:
334                         (void) snprintf(buf, len, "%llu.%02llux",
335                             (u_longlong_t)(intval / 100),
336                             (u_longlong_t)(intval % 100));
337                         break;
338
339                 case ZPOOL_PROP_HEALTH:
340                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
341                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
342                         verify(nvlist_lookup_uint64_array(nvroot,
343                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
344                             == 0);
345
346                         (void) strlcpy(buf, zpool_state_to_name(intval,
347                             vs->vs_aux), len);
348                         break;
349                 case ZPOOL_PROP_VERSION:
350                         if (intval >= SPA_VERSION_FEATURES) {
351                                 (void) snprintf(buf, len, "-");
352                                 break;
353                         }
354                         /* FALLTHROUGH */
355                 default:
356                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
357                 }
358                 break;
359
360         case PROP_TYPE_INDEX:
361                 intval = zpool_get_prop_int(zhp, prop, &src);
362                 if (zpool_prop_index_to_string(prop, intval, &strval)
363                     != 0)
364                         return (-1);
365                 (void) strlcpy(buf, strval, len);
366                 break;
367
368         default:
369                 abort();
370         }
371
372         if (srctype)
373                 *srctype = src;
374
375         return (0);
376 }
377
378 /*
379  * Check if the bootfs name has the same pool name as it is set to.
380  * Assuming bootfs is a valid dataset name.
381  */
382 static boolean_t
383 bootfs_name_valid(const char *pool, char *bootfs)
384 {
385         int len = strlen(pool);
386
387         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
388                 return (B_FALSE);
389
390         if (strncmp(pool, bootfs, len) == 0 &&
391             (bootfs[len] == '/' || bootfs[len] == '\0'))
392                 return (B_TRUE);
393
394         return (B_FALSE);
395 }
396
397 #if defined(__sun__) || defined(__sun)
398 /*
399  * Inspect the configuration to determine if any of the devices contain
400  * an EFI label.
401  */
402 static boolean_t
403 pool_uses_efi(nvlist_t *config)
404 {
405         nvlist_t **child;
406         uint_t c, children;
407
408         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
409             &child, &children) != 0)
410                 return (read_efi_label(config, NULL) >= 0);
411
412         for (c = 0; c < children; c++) {
413                 if (pool_uses_efi(child[c]))
414                         return (B_TRUE);
415         }
416         return (B_FALSE);
417 }
418 #endif
419
420 boolean_t
421 zpool_is_bootable(zpool_handle_t *zhp)
422 {
423         char bootfs[ZPOOL_MAXNAMELEN];
424
425         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
426             sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
427             sizeof (bootfs)) != 0);
428 }
429
430
431 /*
432  * Given an nvlist of zpool properties to be set, validate that they are
433  * correct, and parse any numeric properties (index, boolean, etc) if they are
434  * specified as strings.
435  */
436 static nvlist_t *
437 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
438     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
439 {
440         nvpair_t *elem;
441         nvlist_t *retprops;
442         zpool_prop_t prop;
443         char *strval;
444         uint64_t intval;
445         char *slash, *check;
446         struct stat64 statbuf;
447         zpool_handle_t *zhp;
448         nvlist_t *nvroot;
449
450         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
451                 (void) no_memory(hdl);
452                 return (NULL);
453         }
454
455         elem = NULL;
456         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
457                 const char *propname = nvpair_name(elem);
458
459                 prop = zpool_name_to_prop(propname);
460                 if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
461                         int err;
462                         zfeature_info_t *feature;
463                         char *fname = strchr(propname, '@') + 1;
464
465                         err = zfeature_lookup_name(fname, &feature);
466                         if (err != 0) {
467                                 ASSERT3U(err, ==, ENOENT);
468                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
469                                     "invalid feature '%s'"), fname);
470                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
471                                 goto error;
472                         }
473
474                         if (nvpair_type(elem) != DATA_TYPE_STRING) {
475                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
476                                     "'%s' must be a string"), propname);
477                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
478                                 goto error;
479                         }
480
481                         (void) nvpair_value_string(elem, &strval);
482                         if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) {
483                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
484                                     "property '%s' can only be set to "
485                                     "'enabled'"), propname);
486                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
487                                 goto error;
488                         }
489
490                         if (nvlist_add_uint64(retprops, propname, 0) != 0) {
491                                 (void) no_memory(hdl);
492                                 goto error;
493                         }
494                         continue;
495                 }
496
497                 /*
498                  * Make sure this property is valid and applies to this type.
499                  */
500                 if (prop == ZPROP_INVAL) {
501                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
502                             "invalid property '%s'"), propname);
503                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
504                         goto error;
505                 }
506
507                 if (zpool_prop_readonly(prop)) {
508                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
509                             "is readonly"), propname);
510                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
511                         goto error;
512                 }
513
514                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
515                     &strval, &intval, errbuf) != 0)
516                         goto error;
517
518                 /*
519                  * Perform additional checking for specific properties.
520                  */
521                 switch (prop) {
522                 default:
523                         break;
524                 case ZPOOL_PROP_VERSION:
525                         if (intval < version ||
526                             !SPA_VERSION_IS_SUPPORTED(intval)) {
527                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
528                                     "property '%s' number %d is invalid."),
529                                     propname, intval);
530                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
531                                 goto error;
532                         }
533                         break;
534
535                 case ZPOOL_PROP_ASHIFT:
536                         if (!flags.create) {
537                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
538                                     "property '%s' can only be set at "
539                                     "creation time"), propname);
540                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
541                                 goto error;
542                         }
543
544                         if (intval != 0 && (intval < 9 || intval > 13)) {
545                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
546                                     "property '%s' number %d is invalid."),
547                                     propname, intval);
548                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
549                                 goto error;
550                         }
551                         break;
552
553                 case ZPOOL_PROP_BOOTFS:
554                         if (flags.create || flags.import) {
555                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
556                                     "property '%s' cannot be set at creation "
557                                     "or import time"), propname);
558                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
559                                 goto error;
560                         }
561
562                         if (version < SPA_VERSION_BOOTFS) {
563                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
564                                     "pool must be upgraded to support "
565                                     "'%s' property"), propname);
566                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
567                                 goto error;
568                         }
569
570                         /*
571                          * bootfs property value has to be a dataset name and
572                          * the dataset has to be in the same pool as it sets to.
573                          */
574                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
575                             strval)) {
576                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
577                                     "is an invalid name"), strval);
578                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
579                                 goto error;
580                         }
581
582                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
583                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
584                                     "could not open pool '%s'"), poolname);
585                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
586                                 goto error;
587                         }
588                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
589                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
590
591 #if defined(__sun__) || defined(__sun)
592                         /*
593                          * bootfs property cannot be set on a disk which has
594                          * been EFI labeled.
595                          */
596                         if (pool_uses_efi(nvroot)) {
597                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
598                                     "property '%s' not supported on "
599                                     "EFI labeled devices"), propname);
600                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
601                                 zpool_close(zhp);
602                                 goto error;
603                         }
604 #endif
605                         zpool_close(zhp);
606                         break;
607
608                 case ZPOOL_PROP_ALTROOT:
609                         if (!flags.create && !flags.import) {
610                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
611                                     "property '%s' can only be set during pool "
612                                     "creation or import"), propname);
613                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
614                                 goto error;
615                         }
616
617                         if (strval[0] != '/') {
618                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
619                                     "bad alternate root '%s'"), strval);
620                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
621                                 goto error;
622                         }
623                         break;
624
625                 case ZPOOL_PROP_CACHEFILE:
626                         if (strval[0] == '\0')
627                                 break;
628
629                         if (strcmp(strval, "none") == 0)
630                                 break;
631
632                         if (strval[0] != '/') {
633                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
634                                     "property '%s' must be empty, an "
635                                     "absolute path, or 'none'"), propname);
636                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
637                                 goto error;
638                         }
639
640                         slash = strrchr(strval, '/');
641
642                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
643                             strcmp(slash, "/..") == 0) {
644                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
645                                     "'%s' is not a valid file"), strval);
646                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
647                                 goto error;
648                         }
649
650                         *slash = '\0';
651
652                         if (strval[0] != '\0' &&
653                             (stat64(strval, &statbuf) != 0 ||
654                             !S_ISDIR(statbuf.st_mode))) {
655                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
656                                     "'%s' is not a valid directory"),
657                                     strval);
658                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
659                                 goto error;
660                         }
661
662                         *slash = '/';
663                         break;
664
665                 case ZPOOL_PROP_COMMENT:
666                         for (check = strval; *check != '\0'; check++) {
667                                 if (!isprint(*check)) {
668                                         zfs_error_aux(hdl,
669                                             dgettext(TEXT_DOMAIN,
670                                             "comment may only have printable "
671                                             "characters"));
672                                         (void) zfs_error(hdl, EZFS_BADPROP,
673                                             errbuf);
674                                         goto error;
675                                 }
676                         }
677                         if (strlen(strval) > ZPROP_MAX_COMMENT) {
678                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
679                                     "comment must not exceed %d characters"),
680                                     ZPROP_MAX_COMMENT);
681                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
682                                 goto error;
683                         }
684                         break;
685                 case ZPOOL_PROP_READONLY:
686                         if (!flags.import) {
687                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
688                                     "property '%s' can only be set at "
689                                     "import time"), propname);
690                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
691                                 goto error;
692                         }
693                         break;
694                 }
695         }
696
697         return (retprops);
698 error:
699         nvlist_free(retprops);
700         return (NULL);
701 }
702
703 /*
704  * Set zpool property : propname=propval.
705  */
706 int
707 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
708 {
709         zfs_cmd_t zc = {"\0"};
710         int ret = -1;
711         char errbuf[1024];
712         nvlist_t *nvl = NULL;
713         nvlist_t *realprops;
714         uint64_t version;
715         prop_flags_t flags = { 0 };
716
717         (void) snprintf(errbuf, sizeof (errbuf),
718             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
719             zhp->zpool_name);
720
721         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
722                 return (no_memory(zhp->zpool_hdl));
723
724         if (nvlist_add_string(nvl, propname, propval) != 0) {
725                 nvlist_free(nvl);
726                 return (no_memory(zhp->zpool_hdl));
727         }
728
729         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
730         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
731             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
732                 nvlist_free(nvl);
733                 return (-1);
734         }
735
736         nvlist_free(nvl);
737         nvl = realprops;
738
739         /*
740          * Execute the corresponding ioctl() to set this property.
741          */
742         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
743
744         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
745                 nvlist_free(nvl);
746                 return (-1);
747         }
748
749         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
750
751         zcmd_free_nvlists(&zc);
752         nvlist_free(nvl);
753
754         if (ret)
755                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
756         else
757                 (void) zpool_props_refresh(zhp);
758
759         return (ret);
760 }
761
762 int
763 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
764 {
765         libzfs_handle_t *hdl = zhp->zpool_hdl;
766         zprop_list_t *entry;
767         char buf[ZFS_MAXPROPLEN];
768         nvlist_t *features = NULL;
769         nvpair_t *nvp;
770         zprop_list_t **last;
771         boolean_t firstexpand = (NULL == *plp);
772         int i;
773
774         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
775                 return (-1);
776
777         last = plp;
778         while (*last != NULL)
779                 last = &(*last)->pl_next;
780
781         if ((*plp)->pl_all)
782                 features = zpool_get_features(zhp);
783
784         if ((*plp)->pl_all && firstexpand) {
785                 for (i = 0; i < SPA_FEATURES; i++) {
786                         zprop_list_t *entry = zfs_alloc(hdl,
787                             sizeof (zprop_list_t));
788                         entry->pl_prop = ZPROP_INVAL;
789                         entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
790                             spa_feature_table[i].fi_uname);
791                         entry->pl_width = strlen(entry->pl_user_prop);
792                         entry->pl_all = B_TRUE;
793
794                         *last = entry;
795                         last = &entry->pl_next;
796                 }
797         }
798
799         /* add any unsupported features */
800         for (nvp = nvlist_next_nvpair(features, NULL);
801             nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
802                 char *propname;
803                 boolean_t found;
804                 zprop_list_t *entry;
805
806                 if (zfeature_is_supported(nvpair_name(nvp)))
807                         continue;
808
809                 propname = zfs_asprintf(hdl, "unsupported@%s",
810                     nvpair_name(nvp));
811
812                 /*
813                  * Before adding the property to the list make sure that no
814                  * other pool already added the same property.
815                  */
816                 found = B_FALSE;
817                 entry = *plp;
818                 while (entry != NULL) {
819                         if (entry->pl_user_prop != NULL &&
820                             strcmp(propname, entry->pl_user_prop) == 0) {
821                                 found = B_TRUE;
822                                 break;
823                         }
824                         entry = entry->pl_next;
825                 }
826                 if (found) {
827                         free(propname);
828                         continue;
829                 }
830
831                 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
832                 entry->pl_prop = ZPROP_INVAL;
833                 entry->pl_user_prop = propname;
834                 entry->pl_width = strlen(entry->pl_user_prop);
835                 entry->pl_all = B_TRUE;
836
837                 *last = entry;
838                 last = &entry->pl_next;
839         }
840
841         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
842
843                 if (entry->pl_fixed)
844                         continue;
845
846                 if (entry->pl_prop != ZPROP_INVAL &&
847                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
848                     NULL) == 0) {
849                         if (strlen(buf) > entry->pl_width)
850                                 entry->pl_width = strlen(buf);
851                 }
852         }
853
854         return (0);
855 }
856
857 /*
858  * Get the state for the given feature on the given ZFS pool.
859  */
860 int
861 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
862     size_t len)
863 {
864         uint64_t refcount;
865         boolean_t found = B_FALSE;
866         nvlist_t *features = zpool_get_features(zhp);
867         boolean_t supported;
868         const char *feature = strchr(propname, '@') + 1;
869
870         supported = zpool_prop_feature(propname);
871         ASSERT(supported || zpool_prop_unsupported(propname));
872
873         /*
874          * Convert from feature name to feature guid. This conversion is
875          * unecessary for unsupported@... properties because they already
876          * use guids.
877          */
878         if (supported) {
879                 int ret;
880                 zfeature_info_t *fi;
881
882                 ret = zfeature_lookup_name(feature, &fi);
883                 if (ret != 0) {
884                         (void) strlcpy(buf, "-", len);
885                         return (ENOTSUP);
886                 }
887                 feature = fi->fi_guid;
888         }
889
890         if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
891                 found = B_TRUE;
892
893         if (supported) {
894                 if (!found) {
895                         (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
896                 } else  {
897                         if (refcount == 0)
898                                 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
899                         else
900                                 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
901                 }
902         } else {
903                 if (found) {
904                         if (refcount == 0) {
905                                 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
906                         } else {
907                                 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
908                         }
909                 } else {
910                         (void) strlcpy(buf, "-", len);
911                         return (ENOTSUP);
912                 }
913         }
914
915         return (0);
916 }
917
918 /*
919  * Don't start the slice at the default block of 34; many storage
920  * devices will use a stripe width of 128k, other vendors prefer a 1m
921  * alignment.  It is best to play it safe and ensure a 1m alignment
922  * given 512B blocks.  When the block size is larger by a power of 2
923  * we will still be 1m aligned.  Some devices are sensitive to the
924  * partition ending alignment as well.
925  */
926 #define NEW_START_BLOCK         2048
927 #define PARTITION_END_ALIGNMENT 2048
928
929 /*
930  * Validate the given pool name, optionally putting an extended error message in
931  * 'buf'.
932  */
933 boolean_t
934 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
935 {
936         namecheck_err_t why;
937         char what;
938         int ret;
939
940         ret = pool_namecheck(pool, &why, &what);
941
942         /*
943          * The rules for reserved pool names were extended at a later point.
944          * But we need to support users with existing pools that may now be
945          * invalid.  So we only check for this expanded set of names during a
946          * create (or import), and only in userland.
947          */
948         if (ret == 0 && !isopen &&
949             (strncmp(pool, "mirror", 6) == 0 ||
950             strncmp(pool, "raidz", 5) == 0 ||
951             strncmp(pool, "spare", 5) == 0 ||
952             strcmp(pool, "log") == 0)) {
953                 if (hdl != NULL)
954                         zfs_error_aux(hdl,
955                             dgettext(TEXT_DOMAIN, "name is reserved"));
956                 return (B_FALSE);
957         }
958
959
960         if (ret != 0) {
961                 if (hdl != NULL) {
962                         switch (why) {
963                         case NAME_ERR_TOOLONG:
964                                 zfs_error_aux(hdl,
965                                     dgettext(TEXT_DOMAIN, "name is too long"));
966                                 break;
967
968                         case NAME_ERR_INVALCHAR:
969                                 zfs_error_aux(hdl,
970                                     dgettext(TEXT_DOMAIN, "invalid character "
971                                     "'%c' in pool name"), what);
972                                 break;
973
974                         case NAME_ERR_NOLETTER:
975                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
976                                     "name must begin with a letter"));
977                                 break;
978
979                         case NAME_ERR_RESERVED:
980                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
981                                     "name is reserved"));
982                                 break;
983
984                         case NAME_ERR_DISKLIKE:
985                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
986                                     "pool name is reserved"));
987                                 break;
988
989                         case NAME_ERR_LEADING_SLASH:
990                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
991                                     "leading slash in name"));
992                                 break;
993
994                         case NAME_ERR_EMPTY_COMPONENT:
995                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
996                                     "empty component in name"));
997                                 break;
998
999                         case NAME_ERR_TRAILING_SLASH:
1000                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1001                                     "trailing slash in name"));
1002                                 break;
1003
1004                         case NAME_ERR_MULTIPLE_AT:
1005                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1006                                     "multiple '@' delimiters in name"));
1007                                 break;
1008                         case NAME_ERR_NO_AT:
1009                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1010                                     "permission set is missing '@'"));
1011                                 break;
1012                         }
1013                 }
1014                 return (B_FALSE);
1015         }
1016
1017         return (B_TRUE);
1018 }
1019
1020 /*
1021  * Open a handle to the given pool, even if the pool is currently in the FAULTED
1022  * state.
1023  */
1024 zpool_handle_t *
1025 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1026 {
1027         zpool_handle_t *zhp;
1028         boolean_t missing;
1029
1030         /*
1031          * Make sure the pool name is valid.
1032          */
1033         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1034                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1035                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1036                     pool);
1037                 return (NULL);
1038         }
1039
1040         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1041                 return (NULL);
1042
1043         zhp->zpool_hdl = hdl;
1044         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1045
1046         if (zpool_refresh_stats(zhp, &missing) != 0) {
1047                 zpool_close(zhp);
1048                 return (NULL);
1049         }
1050
1051         if (missing) {
1052                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1053                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1054                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1055                 zpool_close(zhp);
1056                 return (NULL);
1057         }
1058
1059         return (zhp);
1060 }
1061
1062 /*
1063  * Like the above, but silent on error.  Used when iterating over pools (because
1064  * the configuration cache may be out of date).
1065  */
1066 int
1067 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1068 {
1069         zpool_handle_t *zhp;
1070         boolean_t missing;
1071
1072         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1073                 return (-1);
1074
1075         zhp->zpool_hdl = hdl;
1076         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1077
1078         if (zpool_refresh_stats(zhp, &missing) != 0) {
1079                 zpool_close(zhp);
1080                 return (-1);
1081         }
1082
1083         if (missing) {
1084                 zpool_close(zhp);
1085                 *ret = NULL;
1086                 return (0);
1087         }
1088
1089         *ret = zhp;
1090         return (0);
1091 }
1092
1093 /*
1094  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1095  * state.
1096  */
1097 zpool_handle_t *
1098 zpool_open(libzfs_handle_t *hdl, const char *pool)
1099 {
1100         zpool_handle_t *zhp;
1101
1102         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1103                 return (NULL);
1104
1105         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1106                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1107                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1108                 zpool_close(zhp);
1109                 return (NULL);
1110         }
1111
1112         return (zhp);
1113 }
1114
1115 /*
1116  * Close the handle.  Simply frees the memory associated with the handle.
1117  */
1118 void
1119 zpool_close(zpool_handle_t *zhp)
1120 {
1121         if (zhp->zpool_config)
1122                 nvlist_free(zhp->zpool_config);
1123         if (zhp->zpool_old_config)
1124                 nvlist_free(zhp->zpool_old_config);
1125         if (zhp->zpool_props)
1126                 nvlist_free(zhp->zpool_props);
1127         free(zhp);
1128 }
1129
1130 /*
1131  * Return the name of the pool.
1132  */
1133 const char *
1134 zpool_get_name(zpool_handle_t *zhp)
1135 {
1136         return (zhp->zpool_name);
1137 }
1138
1139
1140 /*
1141  * Return the state of the pool (ACTIVE or UNAVAILABLE)
1142  */
1143 int
1144 zpool_get_state(zpool_handle_t *zhp)
1145 {
1146         return (zhp->zpool_state);
1147 }
1148
1149 /*
1150  * Create the named pool, using the provided vdev list.  It is assumed
1151  * that the consumer has already validated the contents of the nvlist, so we
1152  * don't have to worry about error semantics.
1153  */
1154 int
1155 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1156     nvlist_t *props, nvlist_t *fsprops)
1157 {
1158         zfs_cmd_t zc = {"\0"};
1159         nvlist_t *zc_fsprops = NULL;
1160         nvlist_t *zc_props = NULL;
1161         char msg[1024];
1162         int ret = -1;
1163
1164         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1165             "cannot create '%s'"), pool);
1166
1167         if (!zpool_name_valid(hdl, B_FALSE, pool))
1168                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1169
1170         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1171                 return (-1);
1172
1173         if (props) {
1174                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1175
1176                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1177                     SPA_VERSION_1, flags, msg)) == NULL) {
1178                         goto create_failed;
1179                 }
1180         }
1181
1182         if (fsprops) {
1183                 uint64_t zoned;
1184                 char *zonestr;
1185
1186                 zoned = ((nvlist_lookup_string(fsprops,
1187                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1188                     strcmp(zonestr, "on") == 0);
1189
1190                 if ((zc_fsprops = zfs_valid_proplist(hdl,
1191                     ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
1192                         goto create_failed;
1193                 }
1194                 if (!zc_props &&
1195                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1196                         goto create_failed;
1197                 }
1198                 if (nvlist_add_nvlist(zc_props,
1199                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1200                         goto create_failed;
1201                 }
1202         }
1203
1204         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1205                 goto create_failed;
1206
1207         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1208
1209         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1210
1211                 zcmd_free_nvlists(&zc);
1212                 nvlist_free(zc_props);
1213                 nvlist_free(zc_fsprops);
1214
1215                 switch (errno) {
1216                 case EBUSY:
1217                         /*
1218                          * This can happen if the user has specified the same
1219                          * device multiple times.  We can't reliably detect this
1220                          * until we try to add it and see we already have a
1221                          * label.  This can also happen under if the device is
1222                          * part of an active md or lvm device.
1223                          */
1224                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1225                             "one or more vdevs refer to the same device, or "
1226                             "one of\nthe devices is part of an active md or "
1227                             "lvm device"));
1228                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1229
1230                 case EOVERFLOW:
1231                         /*
1232                          * This occurs when one of the devices is below
1233                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1234                          * device was the problem device since there's no
1235                          * reliable way to determine device size from userland.
1236                          */
1237                         {
1238                                 char buf[64];
1239
1240                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1241
1242                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1243                                     "one or more devices is less than the "
1244                                     "minimum size (%s)"), buf);
1245                         }
1246                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1247
1248                 case ENOSPC:
1249                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1250                             "one or more devices is out of space"));
1251                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1252
1253                 case ENOTBLK:
1254                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1255                             "cache device must be a disk or disk slice"));
1256                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1257
1258                 default:
1259                         return (zpool_standard_error(hdl, errno, msg));
1260                 }
1261         }
1262
1263 create_failed:
1264         zcmd_free_nvlists(&zc);
1265         nvlist_free(zc_props);
1266         nvlist_free(zc_fsprops);
1267         return (ret);
1268 }
1269
1270 /*
1271  * Destroy the given pool.  It is up to the caller to ensure that there are no
1272  * datasets left in the pool.
1273  */
1274 int
1275 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1276 {
1277         zfs_cmd_t zc = {"\0"};
1278         zfs_handle_t *zfp = NULL;
1279         libzfs_handle_t *hdl = zhp->zpool_hdl;
1280         char msg[1024];
1281
1282         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1283             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1284                 return (-1);
1285
1286         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1287         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1288
1289         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1290                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1291                     "cannot destroy '%s'"), zhp->zpool_name);
1292
1293                 if (errno == EROFS) {
1294                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1295                             "one or more devices is read only"));
1296                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1297                 } else {
1298                         (void) zpool_standard_error(hdl, errno, msg);
1299                 }
1300
1301                 if (zfp)
1302                         zfs_close(zfp);
1303                 return (-1);
1304         }
1305
1306         if (zfp) {
1307                 remove_mountpoint(zfp);
1308                 zfs_close(zfp);
1309         }
1310
1311         return (0);
1312 }
1313
1314 /*
1315  * Add the given vdevs to the pool.  The caller must have already performed the
1316  * necessary verification to ensure that the vdev specification is well-formed.
1317  */
1318 int
1319 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1320 {
1321         zfs_cmd_t zc = {"\0"};
1322         int ret;
1323         libzfs_handle_t *hdl = zhp->zpool_hdl;
1324         char msg[1024];
1325         nvlist_t **spares, **l2cache;
1326         uint_t nspares, nl2cache;
1327
1328         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1329             "cannot add to '%s'"), zhp->zpool_name);
1330
1331         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1332             SPA_VERSION_SPARES &&
1333             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1334             &spares, &nspares) == 0) {
1335                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1336                     "upgraded to add hot spares"));
1337                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1338         }
1339
1340 #if defined(__sun__) || defined(__sun)
1341         if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1342             ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1343                 uint64_t s;
1344
1345                 for (s = 0; s < nspares; s++) {
1346                         char *path;
1347
1348                         if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1349                             &path) == 0 && pool_uses_efi(spares[s])) {
1350                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1351                                     "device '%s' contains an EFI label and "
1352                                     "cannot be used on root pools."),
1353                                     zpool_vdev_name(hdl, NULL, spares[s],
1354                                     B_FALSE));
1355                                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1356                         }
1357                 }
1358         }
1359 #endif
1360
1361         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1362             SPA_VERSION_L2CACHE &&
1363             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1364             &l2cache, &nl2cache) == 0) {
1365                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1366                     "upgraded to add cache devices"));
1367                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1368         }
1369
1370         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1371                 return (-1);
1372         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1373
1374         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1375                 switch (errno) {
1376                 case EBUSY:
1377                         /*
1378                          * This can happen if the user has specified the same
1379                          * device multiple times.  We can't reliably detect this
1380                          * until we try to add it and see we already have a
1381                          * label.
1382                          */
1383                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1384                             "one or more vdevs refer to the same device"));
1385                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1386                         break;
1387
1388                 case EOVERFLOW:
1389                         /*
1390                          * This occurrs when one of the devices is below
1391                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1392                          * device was the problem device since there's no
1393                          * reliable way to determine device size from userland.
1394                          */
1395                         {
1396                                 char buf[64];
1397
1398                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1399
1400                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1401                                     "device is less than the minimum "
1402                                     "size (%s)"), buf);
1403                         }
1404                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1405                         break;
1406
1407                 case ENOTSUP:
1408                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1409                             "pool must be upgraded to add these vdevs"));
1410                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1411                         break;
1412
1413                 case ENOTBLK:
1414                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1415                             "cache device must be a disk or disk slice"));
1416                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1417                         break;
1418
1419                 default:
1420                         (void) zpool_standard_error(hdl, errno, msg);
1421                 }
1422
1423                 ret = -1;
1424         } else {
1425                 ret = 0;
1426         }
1427
1428         zcmd_free_nvlists(&zc);
1429
1430         return (ret);
1431 }
1432
1433 /*
1434  * Exports the pool from the system.  The caller must ensure that there are no
1435  * mounted datasets in the pool.
1436  */
1437 static int
1438 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1439     const char *log_str)
1440 {
1441         zfs_cmd_t zc = {"\0"};
1442         char msg[1024];
1443
1444         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1445             "cannot export '%s'"), zhp->zpool_name);
1446
1447         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1448         zc.zc_cookie = force;
1449         zc.zc_guid = hardforce;
1450         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1451
1452         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1453                 switch (errno) {
1454                 case EXDEV:
1455                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1456                             "use '-f' to override the following errors:\n"
1457                             "'%s' has an active shared spare which could be"
1458                             " used by other pools once '%s' is exported."),
1459                             zhp->zpool_name, zhp->zpool_name);
1460                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1461                             msg));
1462                 default:
1463                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1464                             msg));
1465                 }
1466         }
1467
1468         return (0);
1469 }
1470
1471 int
1472 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1473 {
1474         return (zpool_export_common(zhp, force, B_FALSE, log_str));
1475 }
1476
1477 int
1478 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1479 {
1480         return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1481 }
1482
1483 static void
1484 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1485     nvlist_t *config)
1486 {
1487         nvlist_t *nv = NULL;
1488         uint64_t rewindto;
1489         int64_t loss = -1;
1490         struct tm t;
1491         char timestr[128];
1492
1493         if (!hdl->libzfs_printerr || config == NULL)
1494                 return;
1495
1496         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1497             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1498                 return;
1499         }
1500
1501         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1502                 return;
1503         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1504
1505         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1506             strftime(timestr, 128, "%c", &t) != 0) {
1507                 if (dryrun) {
1508                         (void) printf(dgettext(TEXT_DOMAIN,
1509                             "Would be able to return %s "
1510                             "to its state as of %s.\n"),
1511                             name, timestr);
1512                 } else {
1513                         (void) printf(dgettext(TEXT_DOMAIN,
1514                             "Pool %s returned to its state as of %s.\n"),
1515                             name, timestr);
1516                 }
1517                 if (loss > 120) {
1518                         (void) printf(dgettext(TEXT_DOMAIN,
1519                             "%s approximately %lld "),
1520                             dryrun ? "Would discard" : "Discarded",
1521                             ((longlong_t)loss + 30) / 60);
1522                         (void) printf(dgettext(TEXT_DOMAIN,
1523                             "minutes of transactions.\n"));
1524                 } else if (loss > 0) {
1525                         (void) printf(dgettext(TEXT_DOMAIN,
1526                             "%s approximately %lld "),
1527                             dryrun ? "Would discard" : "Discarded",
1528                             (longlong_t)loss);
1529                         (void) printf(dgettext(TEXT_DOMAIN,
1530                             "seconds of transactions.\n"));
1531                 }
1532         }
1533 }
1534
1535 void
1536 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1537     nvlist_t *config)
1538 {
1539         nvlist_t *nv = NULL;
1540         int64_t loss = -1;
1541         uint64_t edata = UINT64_MAX;
1542         uint64_t rewindto;
1543         struct tm t;
1544         char timestr[128];
1545
1546         if (!hdl->libzfs_printerr)
1547                 return;
1548
1549         if (reason >= 0)
1550                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1551         else
1552                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1553
1554         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1555         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1556             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1557             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1558                 goto no_info;
1559
1560         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1561         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1562             &edata);
1563
1564         (void) printf(dgettext(TEXT_DOMAIN,
1565             "Recovery is possible, but will result in some data loss.\n"));
1566
1567         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1568             strftime(timestr, 128, "%c", &t) != 0) {
1569                 (void) printf(dgettext(TEXT_DOMAIN,
1570                     "\tReturning the pool to its state as of %s\n"
1571                     "\tshould correct the problem.  "),
1572                     timestr);
1573         } else {
1574                 (void) printf(dgettext(TEXT_DOMAIN,
1575                     "\tReverting the pool to an earlier state "
1576                     "should correct the problem.\n\t"));
1577         }
1578
1579         if (loss > 120) {
1580                 (void) printf(dgettext(TEXT_DOMAIN,
1581                     "Approximately %lld minutes of data\n"
1582                     "\tmust be discarded, irreversibly.  "),
1583                     ((longlong_t)loss + 30) / 60);
1584         } else if (loss > 0) {
1585                 (void) printf(dgettext(TEXT_DOMAIN,
1586                     "Approximately %lld seconds of data\n"
1587                     "\tmust be discarded, irreversibly.  "),
1588                     (longlong_t)loss);
1589         }
1590         if (edata != 0 && edata != UINT64_MAX) {
1591                 if (edata == 1) {
1592                         (void) printf(dgettext(TEXT_DOMAIN,
1593                             "After rewind, at least\n"
1594                             "\tone persistent user-data error will remain.  "));
1595                 } else {
1596                         (void) printf(dgettext(TEXT_DOMAIN,
1597                             "After rewind, several\n"
1598                             "\tpersistent user-data errors will remain.  "));
1599                 }
1600         }
1601         (void) printf(dgettext(TEXT_DOMAIN,
1602             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1603             reason >= 0 ? "clear" : "import", name);
1604
1605         (void) printf(dgettext(TEXT_DOMAIN,
1606             "A scrub of the pool\n"
1607             "\tis strongly recommended after recovery.\n"));
1608         return;
1609
1610 no_info:
1611         (void) printf(dgettext(TEXT_DOMAIN,
1612             "Destroy and re-create the pool from\n\ta backup source.\n"));
1613 }
1614
1615 /*
1616  * zpool_import() is a contracted interface. Should be kept the same
1617  * if possible.
1618  *
1619  * Applications should use zpool_import_props() to import a pool with
1620  * new properties value to be set.
1621  */
1622 int
1623 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1624     char *altroot)
1625 {
1626         nvlist_t *props = NULL;
1627         int ret;
1628
1629         if (altroot != NULL) {
1630                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1631                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1632                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1633                             newname));
1634                 }
1635
1636                 if (nvlist_add_string(props,
1637                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1638                     nvlist_add_string(props,
1639                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1640                         nvlist_free(props);
1641                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1642                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1643                             newname));
1644                 }
1645         }
1646
1647         ret = zpool_import_props(hdl, config, newname, props,
1648             ZFS_IMPORT_NORMAL);
1649         if (props)
1650                 nvlist_free(props);
1651         return (ret);
1652 }
1653
1654 static void
1655 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1656     int indent)
1657 {
1658         nvlist_t **child;
1659         uint_t c, children;
1660         char *vname;
1661         uint64_t is_log = 0;
1662
1663         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1664             &is_log);
1665
1666         if (name != NULL)
1667                 (void) printf("\t%*s%s%s\n", indent, "", name,
1668                     is_log ? " [log]" : "");
1669
1670         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1671             &child, &children) != 0)
1672                 return;
1673
1674         for (c = 0; c < children; c++) {
1675                 vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1676                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1677                 free(vname);
1678         }
1679 }
1680
1681 void
1682 zpool_print_unsup_feat(nvlist_t *config)
1683 {
1684         nvlist_t *nvinfo, *unsup_feat;
1685         nvpair_t *nvp;
1686
1687         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1688             0);
1689         verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1690             &unsup_feat) == 0);
1691
1692         for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1693             nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1694                 char *desc;
1695
1696                 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1697                 verify(nvpair_value_string(nvp, &desc) == 0);
1698
1699                 if (strlen(desc) > 0)
1700                         (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1701                 else
1702                         (void) printf("\t%s\n", nvpair_name(nvp));
1703         }
1704 }
1705
1706 /*
1707  * Import the given pool using the known configuration and a list of
1708  * properties to be set. The configuration should have come from
1709  * zpool_find_import(). The 'newname' parameters control whether the pool
1710  * is imported with a different name.
1711  */
1712 int
1713 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1714     nvlist_t *props, int flags)
1715 {
1716         zfs_cmd_t zc = {"\0"};
1717         zpool_rewind_policy_t policy;
1718         nvlist_t *nv = NULL;
1719         nvlist_t *nvinfo = NULL;
1720         nvlist_t *missing = NULL;
1721         char *thename;
1722         char *origname;
1723         int ret;
1724         int error = 0;
1725         char errbuf[1024];
1726
1727         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1728             &origname) == 0);
1729
1730         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1731             "cannot import pool '%s'"), origname);
1732
1733         if (newname != NULL) {
1734                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1735                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1736                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1737                             newname));
1738                 thename = (char *)newname;
1739         } else {
1740                 thename = origname;
1741         }
1742
1743         if (props) {
1744                 uint64_t version;
1745                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1746
1747                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1748                     &version) == 0);
1749
1750                 if ((props = zpool_valid_proplist(hdl, origname,
1751                     props, version, flags, errbuf)) == NULL) {
1752                         return (-1);
1753                 } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1754                         nvlist_free(props);
1755                         return (-1);
1756                 }
1757         }
1758
1759         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1760
1761         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1762             &zc.zc_guid) == 0);
1763
1764         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1765                 nvlist_free(props);
1766                 return (-1);
1767         }
1768         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1769                 nvlist_free(props);
1770                 return (-1);
1771         }
1772
1773         zc.zc_cookie = flags;
1774         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1775             errno == ENOMEM) {
1776                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1777                         zcmd_free_nvlists(&zc);
1778                         return (-1);
1779                 }
1780         }
1781         if (ret != 0)
1782                 error = errno;
1783
1784         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1785         zpool_get_rewind_policy(config, &policy);
1786
1787         if (error) {
1788                 char desc[1024];
1789
1790                 /*
1791                  * Dry-run failed, but we print out what success
1792                  * looks like if we found a best txg
1793                  */
1794                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1795                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1796                             B_TRUE, nv);
1797                         nvlist_free(nv);
1798                         return (-1);
1799                 }
1800
1801                 if (newname == NULL)
1802                         (void) snprintf(desc, sizeof (desc),
1803                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1804                             thename);
1805                 else
1806                         (void) snprintf(desc, sizeof (desc),
1807                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1808                             origname, thename);
1809
1810                 switch (error) {
1811                 case ENOTSUP:
1812                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1813                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1814                             nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1815                                 (void) printf(dgettext(TEXT_DOMAIN, "This "
1816                                     "pool uses the following feature(s) not "
1817                                     "supported by this system:\n"));
1818                                 zpool_print_unsup_feat(nv);
1819                                 if (nvlist_exists(nvinfo,
1820                                     ZPOOL_CONFIG_CAN_RDONLY)) {
1821                                         (void) printf(dgettext(TEXT_DOMAIN,
1822                                             "All unsupported features are only "
1823                                             "required for writing to the pool."
1824                                             "\nThe pool can be imported using "
1825                                             "'-o readonly=on'.\n"));
1826                                 }
1827                         }
1828                         /*
1829                          * Unsupported version.
1830                          */
1831                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1832                         break;
1833
1834                 case EINVAL:
1835                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1836                         break;
1837
1838                 case EROFS:
1839                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1840                             "one or more devices is read only"));
1841                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1842                         break;
1843
1844                 case ENXIO:
1845                         if (nv && nvlist_lookup_nvlist(nv,
1846                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1847                             nvlist_lookup_nvlist(nvinfo,
1848                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1849                                 (void) printf(dgettext(TEXT_DOMAIN,
1850                                     "The devices below are missing, use "
1851                                     "'-m' to import the pool anyway:\n"));
1852                                 print_vdev_tree(hdl, NULL, missing, 2);
1853                                 (void) printf("\n");
1854                         }
1855                         (void) zpool_standard_error(hdl, error, desc);
1856                         break;
1857
1858                 case EEXIST:
1859                         (void) zpool_standard_error(hdl, error, desc);
1860                         break;
1861
1862                 case EBUSY:
1863                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1864                             "one or more devices are already in use\n"));
1865                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1866                         break;
1867
1868                 default:
1869                         (void) zpool_standard_error(hdl, error, desc);
1870                         zpool_explain_recover(hdl,
1871                             newname ? origname : thename, -error, nv);
1872                         break;
1873                 }
1874
1875                 nvlist_free(nv);
1876                 ret = -1;
1877         } else {
1878                 zpool_handle_t *zhp;
1879
1880                 /*
1881                  * This should never fail, but play it safe anyway.
1882                  */
1883                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1884                         ret = -1;
1885                 else if (zhp != NULL)
1886                         zpool_close(zhp);
1887                 if (policy.zrp_request &
1888                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1889                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1890                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1891                 }
1892                 nvlist_free(nv);
1893                 return (0);
1894         }
1895
1896         zcmd_free_nvlists(&zc);
1897         nvlist_free(props);
1898
1899         return (ret);
1900 }
1901
1902 /*
1903  * Scan the pool.
1904  */
1905 int
1906 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1907 {
1908         zfs_cmd_t zc = {"\0"};
1909         char msg[1024];
1910         libzfs_handle_t *hdl = zhp->zpool_hdl;
1911
1912         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1913         zc.zc_cookie = func;
1914
1915         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1916             (errno == ENOENT && func != POOL_SCAN_NONE))
1917                 return (0);
1918
1919         if (func == POOL_SCAN_SCRUB) {
1920                 (void) snprintf(msg, sizeof (msg),
1921                     dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1922         } else if (func == POOL_SCAN_NONE) {
1923                 (void) snprintf(msg, sizeof (msg),
1924                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1925                     zc.zc_name);
1926         } else {
1927                 assert(!"unexpected result");
1928         }
1929
1930         if (errno == EBUSY) {
1931                 nvlist_t *nvroot;
1932                 pool_scan_stat_t *ps = NULL;
1933                 uint_t psc;
1934
1935                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1936                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1937                 (void) nvlist_lookup_uint64_array(nvroot,
1938                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1939                 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1940                         return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1941                 else
1942                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
1943         } else if (errno == ENOENT) {
1944                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1945         } else {
1946                 return (zpool_standard_error(hdl, errno, msg));
1947         }
1948 }
1949
1950 /*
1951  * Find a vdev that matches the search criteria specified. We use the
1952  * the nvpair name to determine how we should look for the device.
1953  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1954  * spare; but FALSE if its an INUSE spare.
1955  */
1956 static nvlist_t *
1957 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1958     boolean_t *l2cache, boolean_t *log)
1959 {
1960         uint_t c, children;
1961         nvlist_t **child;
1962         nvlist_t *ret;
1963         uint64_t is_log;
1964         char *srchkey;
1965         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1966
1967         /* Nothing to look for */
1968         if (search == NULL || pair == NULL)
1969                 return (NULL);
1970
1971         /* Obtain the key we will use to search */
1972         srchkey = nvpair_name(pair);
1973
1974         switch (nvpair_type(pair)) {
1975         case DATA_TYPE_UINT64:
1976                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1977                         uint64_t srchval, theguid;
1978
1979                         verify(nvpair_value_uint64(pair, &srchval) == 0);
1980                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1981                             &theguid) == 0);
1982                         if (theguid == srchval)
1983                                 return (nv);
1984                 }
1985                 break;
1986
1987         case DATA_TYPE_STRING: {
1988                 char *srchval, *val;
1989
1990                 verify(nvpair_value_string(pair, &srchval) == 0);
1991                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
1992                         break;
1993
1994                 /*
1995                  * Search for the requested value. Special cases:
1996                  *
1997                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
1998                  *   "-part1", or "p1".  The suffix is hidden from the user,
1999                  *   but included in the string, so this matches around it.
2000                  * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2001                  *   is used to check all possible expanded paths.
2002                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2003                  *
2004                  * Otherwise, all other searches are simple string compares.
2005                  */
2006                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
2007                         uint64_t wholedisk = 0;
2008
2009                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2010                             &wholedisk);
2011                         if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2012                                 return (nv);
2013
2014                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2015                         char *type, *idx, *end, *p;
2016                         uint64_t id, vdev_id;
2017
2018                         /*
2019                          * Determine our vdev type, keeping in mind
2020                          * that the srchval is composed of a type and
2021                          * vdev id pair (i.e. mirror-4).
2022                          */
2023                         if ((type = strdup(srchval)) == NULL)
2024                                 return (NULL);
2025
2026                         if ((p = strrchr(type, '-')) == NULL) {
2027                                 free(type);
2028                                 break;
2029                         }
2030                         idx = p + 1;
2031                         *p = '\0';
2032
2033                         /*
2034                          * If the types don't match then keep looking.
2035                          */
2036                         if (strncmp(val, type, strlen(val)) != 0) {
2037                                 free(type);
2038                                 break;
2039                         }
2040
2041                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
2042                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2043                             strncmp(type, VDEV_TYPE_MIRROR,
2044                             strlen(VDEV_TYPE_MIRROR)) == 0);
2045                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2046                             &id) == 0);
2047
2048                         errno = 0;
2049                         vdev_id = strtoull(idx, &end, 10);
2050
2051                         free(type);
2052                         if (errno != 0)
2053                                 return (NULL);
2054
2055                         /*
2056                          * Now verify that we have the correct vdev id.
2057                          */
2058                         if (vdev_id == id)
2059                                 return (nv);
2060                 }
2061
2062                 /*
2063                  * Common case
2064                  */
2065                 if (strcmp(srchval, val) == 0)
2066                         return (nv);
2067                 break;
2068         }
2069
2070         default:
2071                 break;
2072         }
2073
2074         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2075             &child, &children) != 0)
2076                 return (NULL);
2077
2078         for (c = 0; c < children; c++) {
2079                 if ((ret = vdev_to_nvlist_iter(child[c], search,
2080                     avail_spare, l2cache, NULL)) != NULL) {
2081                         /*
2082                          * The 'is_log' value is only set for the toplevel
2083                          * vdev, not the leaf vdevs.  So we always lookup the
2084                          * log device from the root of the vdev tree (where
2085                          * 'log' is non-NULL).
2086                          */
2087                         if (log != NULL &&
2088                             nvlist_lookup_uint64(child[c],
2089                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2090                             is_log) {
2091                                 *log = B_TRUE;
2092                         }
2093                         return (ret);
2094                 }
2095         }
2096
2097         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2098             &child, &children) == 0) {
2099                 for (c = 0; c < children; c++) {
2100                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2101                             avail_spare, l2cache, NULL)) != NULL) {
2102                                 *avail_spare = B_TRUE;
2103                                 return (ret);
2104                         }
2105                 }
2106         }
2107
2108         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2109             &child, &children) == 0) {
2110                 for (c = 0; c < children; c++) {
2111                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2112                             avail_spare, l2cache, NULL)) != NULL) {
2113                                 *l2cache = B_TRUE;
2114                                 return (ret);
2115                         }
2116                 }
2117         }
2118
2119         return (NULL);
2120 }
2121
2122 /*
2123  * Given a physical path (minus the "/devices" prefix), find the
2124  * associated vdev.
2125  */
2126 nvlist_t *
2127 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2128     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2129 {
2130         nvlist_t *search, *nvroot, *ret;
2131
2132         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2133         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2134
2135         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2136             &nvroot) == 0);
2137
2138         *avail_spare = B_FALSE;
2139         *l2cache = B_FALSE;
2140         if (log != NULL)
2141                 *log = B_FALSE;
2142         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2143         nvlist_free(search);
2144
2145         return (ret);
2146 }
2147
2148 /*
2149  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2150  */
2151 boolean_t
2152 zpool_vdev_is_interior(const char *name)
2153 {
2154         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2155             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2156                 return (B_TRUE);
2157         return (B_FALSE);
2158 }
2159
2160 nvlist_t *
2161 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2162     boolean_t *l2cache, boolean_t *log)
2163 {
2164         char *end;
2165         nvlist_t *nvroot, *search, *ret;
2166         uint64_t guid;
2167
2168         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2169
2170         guid = strtoull(path, &end, 10);
2171         if (guid != 0 && *end == '\0') {
2172                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2173         } else if (zpool_vdev_is_interior(path)) {
2174                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2175         } else {
2176                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2177         }
2178
2179         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2180             &nvroot) == 0);
2181
2182         *avail_spare = B_FALSE;
2183         *l2cache = B_FALSE;
2184         if (log != NULL)
2185                 *log = B_FALSE;
2186         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2187         nvlist_free(search);
2188
2189         return (ret);
2190 }
2191
2192 static int
2193 vdev_online(nvlist_t *nv)
2194 {
2195         uint64_t ival;
2196
2197         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2198             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2199             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2200                 return (0);
2201
2202         return (1);
2203 }
2204
2205 /*
2206  * Helper function for zpool_get_physpaths().
2207  */
2208 static int
2209 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2210     size_t *bytes_written)
2211 {
2212         size_t bytes_left, pos, rsz;
2213         char *tmppath;
2214         const char *format;
2215
2216         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2217             &tmppath) != 0)
2218                 return (EZFS_NODEVICE);
2219
2220         pos = *bytes_written;
2221         bytes_left = physpath_size - pos;
2222         format = (pos == 0) ? "%s" : " %s";
2223
2224         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2225         *bytes_written += rsz;
2226
2227         if (rsz >= bytes_left) {
2228                 /* if physpath was not copied properly, clear it */
2229                 if (bytes_left != 0) {
2230                         physpath[pos] = 0;
2231                 }
2232                 return (EZFS_NOSPC);
2233         }
2234         return (0);
2235 }
2236
2237 static int
2238 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2239     size_t *rsz, boolean_t is_spare)
2240 {
2241         char *type;
2242         int ret;
2243
2244         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2245                 return (EZFS_INVALCONFIG);
2246
2247         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2248                 /*
2249                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2250                  * For a spare vdev, we only want to boot from the active
2251                  * spare device.
2252                  */
2253                 if (is_spare) {
2254                         uint64_t spare = 0;
2255                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2256                             &spare);
2257                         if (!spare)
2258                                 return (EZFS_INVALCONFIG);
2259                 }
2260
2261                 if (vdev_online(nv)) {
2262                         if ((ret = vdev_get_one_physpath(nv, physpath,
2263                             phypath_size, rsz)) != 0)
2264                                 return (ret);
2265                 }
2266         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2267             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2268             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2269                 nvlist_t **child;
2270                 uint_t count;
2271                 int i, ret;
2272
2273                 if (nvlist_lookup_nvlist_array(nv,
2274                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2275                         return (EZFS_INVALCONFIG);
2276
2277                 for (i = 0; i < count; i++) {
2278                         ret = vdev_get_physpaths(child[i], physpath,
2279                             phypath_size, rsz, is_spare);
2280                         if (ret == EZFS_NOSPC)
2281                                 return (ret);
2282                 }
2283         }
2284
2285         return (EZFS_POOL_INVALARG);
2286 }
2287
2288 /*
2289  * Get phys_path for a root pool config.
2290  * Return 0 on success; non-zero on failure.
2291  */
2292 static int
2293 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2294 {
2295         size_t rsz;
2296         nvlist_t *vdev_root;
2297         nvlist_t **child;
2298         uint_t count;
2299         char *type;
2300
2301         rsz = 0;
2302
2303         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2304             &vdev_root) != 0)
2305                 return (EZFS_INVALCONFIG);
2306
2307         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2308             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2309             &child, &count) != 0)
2310                 return (EZFS_INVALCONFIG);
2311
2312 #if defined(__sun__) || defined(__sun)
2313         /*
2314          * root pool can not have EFI labeled disks and can only have
2315          * a single top-level vdev.
2316          */
2317         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2318             pool_uses_efi(vdev_root))
2319                 return (EZFS_POOL_INVALARG);
2320 #endif
2321
2322         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2323             B_FALSE);
2324
2325         /* No online devices */
2326         if (rsz == 0)
2327                 return (EZFS_NODEVICE);
2328
2329         return (0);
2330 }
2331
2332 /*
2333  * Get phys_path for a root pool
2334  * Return 0 on success; non-zero on failure.
2335  */
2336 int
2337 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2338 {
2339         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2340             phypath_size));
2341 }
2342
2343 /*
2344  * If the device has being dynamically expanded then we need to relabel
2345  * the disk to use the new unallocated space.
2346  */
2347 static int
2348 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
2349 {
2350         int fd, error;
2351
2352         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2353                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2354                     "relabel '%s': unable to open device: %d"), path, errno);
2355                 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
2356         }
2357
2358         /*
2359          * It's possible that we might encounter an error if the device
2360          * does not have any unallocated space left. If so, we simply
2361          * ignore that error and continue on.
2362          *
2363          * Also, we don't call efi_rescan() - that would just return EBUSY.
2364          * The module will do it for us in vdev_disk_open().
2365          */
2366         error = efi_use_whole_disk(fd);
2367         (void) close(fd);
2368         if (error && error != VT_ENOSPC) {
2369                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2370                     "relabel '%s': unable to read disk capacity"), path);
2371                 return (zfs_error(hdl, EZFS_NOCAP, msg));
2372         }
2373         return (0);
2374 }
2375
2376 /*
2377  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2378  * ZFS_ONLINE_* flags.
2379  */
2380 int
2381 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2382     vdev_state_t *newstate)
2383 {
2384         zfs_cmd_t zc = {"\0"};
2385         char msg[1024];
2386         nvlist_t *tgt;
2387         boolean_t avail_spare, l2cache, islog;
2388         libzfs_handle_t *hdl = zhp->zpool_hdl;
2389         int error;
2390
2391         if (flags & ZFS_ONLINE_EXPAND) {
2392                 (void) snprintf(msg, sizeof (msg),
2393                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2394         } else {
2395                 (void) snprintf(msg, sizeof (msg),
2396                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2397         }
2398
2399         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2400         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2401             &islog)) == NULL)
2402                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2403
2404         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2405
2406         if (avail_spare)
2407                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2408
2409         if (flags & ZFS_ONLINE_EXPAND ||
2410             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2411                 uint64_t wholedisk = 0;
2412
2413                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2414                     &wholedisk);
2415
2416                 /*
2417                  * XXX - L2ARC 1.0 devices can't support expansion.
2418                  */
2419                 if (l2cache) {
2420                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2421                             "cannot expand cache devices"));
2422                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2423                 }
2424
2425                 if (wholedisk) {
2426                         const char *fullpath = path;
2427                         char buf[MAXPATHLEN];
2428
2429                         if (path[0] != '/') {
2430                                 error = zfs_resolve_shortname(path, buf,
2431                                     sizeof (buf));
2432                                 if (error != 0)
2433                                         return (zfs_error(hdl, EZFS_NODEVICE,
2434                                             msg));
2435
2436                                 fullpath = buf;
2437                         }
2438
2439                         error = zpool_relabel_disk(hdl, fullpath, msg);
2440                         if (error != 0)
2441                                 return (error);
2442                 }
2443         }
2444
2445         zc.zc_cookie = VDEV_STATE_ONLINE;
2446         zc.zc_obj = flags;
2447
2448         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2449                 if (errno == EINVAL) {
2450                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2451                             "from this pool into a new one.  Use '%s' "
2452                             "instead"), "zpool detach");
2453                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2454                 }
2455                 return (zpool_standard_error(hdl, errno, msg));
2456         }
2457
2458         *newstate = zc.zc_cookie;
2459         return (0);
2460 }
2461
2462 /*
2463  * Take the specified vdev offline
2464  */
2465 int
2466 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2467 {
2468         zfs_cmd_t zc = {"\0"};
2469         char msg[1024];
2470         nvlist_t *tgt;
2471         boolean_t avail_spare, l2cache;
2472         libzfs_handle_t *hdl = zhp->zpool_hdl;
2473
2474         (void) snprintf(msg, sizeof (msg),
2475             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2476
2477         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2478         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2479             NULL)) == NULL)
2480                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2481
2482         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2483
2484         if (avail_spare)
2485                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2486
2487         zc.zc_cookie = VDEV_STATE_OFFLINE;
2488         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2489
2490         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2491                 return (0);
2492
2493         switch (errno) {
2494         case EBUSY:
2495
2496                 /*
2497                  * There are no other replicas of this device.
2498                  */
2499                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2500
2501         case EEXIST:
2502                 /*
2503                  * The log device has unplayed logs
2504                  */
2505                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2506
2507         default:
2508                 return (zpool_standard_error(hdl, errno, msg));
2509         }
2510 }
2511
2512 /*
2513  * Mark the given vdev faulted.
2514  */
2515 int
2516 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2517 {
2518         zfs_cmd_t zc = {"\0"};
2519         char msg[1024];
2520         libzfs_handle_t *hdl = zhp->zpool_hdl;
2521
2522         (void) snprintf(msg, sizeof (msg),
2523             dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2524
2525         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2526         zc.zc_guid = guid;
2527         zc.zc_cookie = VDEV_STATE_FAULTED;
2528         zc.zc_obj = aux;
2529
2530         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2531                 return (0);
2532
2533         switch (errno) {
2534         case EBUSY:
2535
2536                 /*
2537                  * There are no other replicas of this device.
2538                  */
2539                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2540
2541         default:
2542                 return (zpool_standard_error(hdl, errno, msg));
2543         }
2544
2545 }
2546
2547 /*
2548  * Mark the given vdev degraded.
2549  */
2550 int
2551 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2552 {
2553         zfs_cmd_t zc = {"\0"};
2554         char msg[1024];
2555         libzfs_handle_t *hdl = zhp->zpool_hdl;
2556
2557         (void) snprintf(msg, sizeof (msg),
2558             dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2559
2560         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2561         zc.zc_guid = guid;
2562         zc.zc_cookie = VDEV_STATE_DEGRADED;
2563         zc.zc_obj = aux;
2564
2565         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2566                 return (0);
2567
2568         return (zpool_standard_error(hdl, errno, msg));
2569 }
2570
2571 /*
2572  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2573  * a hot spare.
2574  */
2575 static boolean_t
2576 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2577 {
2578         nvlist_t **child;
2579         uint_t c, children;
2580         char *type;
2581
2582         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2583             &children) == 0) {
2584                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2585                     &type) == 0);
2586
2587                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2588                     children == 2 && child[which] == tgt)
2589                         return (B_TRUE);
2590
2591                 for (c = 0; c < children; c++)
2592                         if (is_replacing_spare(child[c], tgt, which))
2593                                 return (B_TRUE);
2594         }
2595
2596         return (B_FALSE);
2597 }
2598
2599 /*
2600  * Attach new_disk (fully described by nvroot) to old_disk.
2601  * If 'replacing' is specified, the new disk will replace the old one.
2602  */
2603 int
2604 zpool_vdev_attach(zpool_handle_t *zhp,
2605     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2606 {
2607         zfs_cmd_t zc = {"\0"};
2608         char msg[1024];
2609         int ret;
2610         nvlist_t *tgt;
2611         boolean_t avail_spare, l2cache, islog;
2612         uint64_t val;
2613         char *newname;
2614         nvlist_t **child;
2615         uint_t children;
2616         nvlist_t *config_root;
2617         libzfs_handle_t *hdl = zhp->zpool_hdl;
2618         boolean_t rootpool = zpool_is_bootable(zhp);
2619
2620         if (replacing)
2621                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2622                     "cannot replace %s with %s"), old_disk, new_disk);
2623         else
2624                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2625                     "cannot attach %s to %s"), new_disk, old_disk);
2626
2627 #if defined(__sun__) || defined(__sun)
2628         /*
2629          * If this is a root pool, make sure that we're not attaching an
2630          * EFI labeled device.
2631          */
2632         if (rootpool && pool_uses_efi(nvroot)) {
2633                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2634                     "EFI labeled devices are not supported on root pools."));
2635                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2636         }
2637 #endif
2638
2639         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2640         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2641             &islog)) == 0)
2642                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2643
2644         if (avail_spare)
2645                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2646
2647         if (l2cache)
2648                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2649
2650         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2651         zc.zc_cookie = replacing;
2652
2653         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2654             &child, &children) != 0 || children != 1) {
2655                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2656                     "new device must be a single disk"));
2657                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2658         }
2659
2660         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2661             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2662
2663         if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2664                 return (-1);
2665
2666         /*
2667          * If the target is a hot spare that has been swapped in, we can only
2668          * replace it with another hot spare.
2669          */
2670         if (replacing &&
2671             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2672             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2673             NULL) == NULL || !avail_spare) &&
2674             is_replacing_spare(config_root, tgt, 1)) {
2675                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2676                     "can only be replaced by another hot spare"));
2677                 free(newname);
2678                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2679         }
2680
2681         free(newname);
2682
2683         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2684                 return (-1);
2685
2686         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2687
2688         zcmd_free_nvlists(&zc);
2689
2690         if (ret == 0) {
2691                 if (rootpool) {
2692                         /*
2693                          * XXX need a better way to prevent user from
2694                          * booting up a half-baked vdev.
2695                          */
2696                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2697                             "sure to wait until resilver is done "
2698                             "before rebooting.\n"));
2699                 }
2700                 return (0);
2701         }
2702
2703         switch (errno) {
2704         case ENOTSUP:
2705                 /*
2706                  * Can't attach to or replace this type of vdev.
2707                  */
2708                 if (replacing) {
2709                         uint64_t version = zpool_get_prop_int(zhp,
2710                             ZPOOL_PROP_VERSION, NULL);
2711
2712                         if (islog)
2713                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2714                                     "cannot replace a log with a spare"));
2715                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2716                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2717                                     "already in replacing/spare config; wait "
2718                                     "for completion or use 'zpool detach'"));
2719                         else
2720                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2721                                     "cannot replace a replacing device"));
2722                 } else {
2723                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2724                             "can only attach to mirrors and top-level "
2725                             "disks"));
2726                 }
2727                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2728                 break;
2729
2730         case EINVAL:
2731                 /*
2732                  * The new device must be a single disk.
2733                  */
2734                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2735                     "new device must be a single disk"));
2736                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2737                 break;
2738
2739         case EBUSY:
2740                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2741                     new_disk);
2742                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2743                 break;
2744
2745         case EOVERFLOW:
2746                 /*
2747                  * The new device is too small.
2748                  */
2749                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2750                     "device is too small"));
2751                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2752                 break;
2753
2754         case EDOM:
2755                 /*
2756                  * The new device has a different alignment requirement.
2757                  */
2758                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2759                     "devices have different sector alignment"));
2760                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2761                 break;
2762
2763         case ENAMETOOLONG:
2764                 /*
2765                  * The resulting top-level vdev spec won't fit in the label.
2766                  */
2767                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2768                 break;
2769
2770         default:
2771                 (void) zpool_standard_error(hdl, errno, msg);
2772         }
2773
2774         return (-1);
2775 }
2776
2777 /*
2778  * Detach the specified device.
2779  */
2780 int
2781 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2782 {
2783         zfs_cmd_t zc = {"\0"};
2784         char msg[1024];
2785         nvlist_t *tgt;
2786         boolean_t avail_spare, l2cache;
2787         libzfs_handle_t *hdl = zhp->zpool_hdl;
2788
2789         (void) snprintf(msg, sizeof (msg),
2790             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2791
2792         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2793         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2794             NULL)) == 0)
2795                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2796
2797         if (avail_spare)
2798                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2799
2800         if (l2cache)
2801                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2802
2803         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2804
2805         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2806                 return (0);
2807
2808         switch (errno) {
2809
2810         case ENOTSUP:
2811                 /*
2812                  * Can't detach from this type of vdev.
2813                  */
2814                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2815                     "applicable to mirror and replacing vdevs"));
2816                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2817                 break;
2818
2819         case EBUSY:
2820                 /*
2821                  * There are no other replicas of this device.
2822                  */
2823                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2824                 break;
2825
2826         default:
2827                 (void) zpool_standard_error(hdl, errno, msg);
2828         }
2829
2830         return (-1);
2831 }
2832
2833 /*
2834  * Find a mirror vdev in the source nvlist.
2835  *
2836  * The mchild array contains a list of disks in one of the top-level mirrors
2837  * of the source pool.  The schild array contains a list of disks that the
2838  * user specified on the command line.  We loop over the mchild array to
2839  * see if any entry in the schild array matches.
2840  *
2841  * If a disk in the mchild array is found in the schild array, we return
2842  * the index of that entry.  Otherwise we return -1.
2843  */
2844 static int
2845 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2846     nvlist_t **schild, uint_t schildren)
2847 {
2848         uint_t mc;
2849
2850         for (mc = 0; mc < mchildren; mc++) {
2851                 uint_t sc;
2852                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2853                     mchild[mc], B_FALSE);
2854
2855                 for (sc = 0; sc < schildren; sc++) {
2856                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2857                             schild[sc], B_FALSE);
2858                         boolean_t result = (strcmp(mpath, spath) == 0);
2859
2860                         free(spath);
2861                         if (result) {
2862                                 free(mpath);
2863                                 return (mc);
2864                         }
2865                 }
2866
2867                 free(mpath);
2868         }
2869
2870         return (-1);
2871 }
2872
2873 /*
2874  * Split a mirror pool.  If newroot points to null, then a new nvlist
2875  * is generated and it is the responsibility of the caller to free it.
2876  */
2877 int
2878 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2879     nvlist_t *props, splitflags_t flags)
2880 {
2881         zfs_cmd_t zc = {"\0"};
2882         char msg[1024];
2883         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2884         nvlist_t **varray = NULL, *zc_props = NULL;
2885         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2886         libzfs_handle_t *hdl = zhp->zpool_hdl;
2887         uint64_t vers;
2888         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2889         int retval = 0;
2890
2891         (void) snprintf(msg, sizeof (msg),
2892             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2893
2894         if (!zpool_name_valid(hdl, B_FALSE, newname))
2895                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2896
2897         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2898                 (void) fprintf(stderr, gettext("Internal error: unable to "
2899                     "retrieve pool configuration\n"));
2900                 return (-1);
2901         }
2902
2903         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2904             == 0);
2905         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2906
2907         if (props) {
2908                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2909                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2910                     props, vers, flags, msg)) == NULL)
2911                         return (-1);
2912         }
2913
2914         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2915             &children) != 0) {
2916                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2917                     "Source pool is missing vdev tree"));
2918                 if (zc_props)
2919                         nvlist_free(zc_props);
2920                 return (-1);
2921         }
2922
2923         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2924         vcount = 0;
2925
2926         if (*newroot == NULL ||
2927             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2928             &newchild, &newchildren) != 0)
2929                 newchildren = 0;
2930
2931         for (c = 0; c < children; c++) {
2932                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2933                 char *type;
2934                 nvlist_t **mchild, *vdev;
2935                 uint_t mchildren;
2936                 int entry;
2937
2938                 /*
2939                  * Unlike cache & spares, slogs are stored in the
2940                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2941                  */
2942                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2943                     &is_log);
2944                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2945                     &is_hole);
2946                 if (is_log || is_hole) {
2947                         /*
2948                          * Create a hole vdev and put it in the config.
2949                          */
2950                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2951                                 goto out;
2952                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
2953                             VDEV_TYPE_HOLE) != 0)
2954                                 goto out;
2955                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
2956                             1) != 0)
2957                                 goto out;
2958                         if (lastlog == 0)
2959                                 lastlog = vcount;
2960                         varray[vcount++] = vdev;
2961                         continue;
2962                 }
2963                 lastlog = 0;
2964                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
2965                     == 0);
2966                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
2967                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2968                             "Source pool must be composed only of mirrors\n"));
2969                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2970                         goto out;
2971                 }
2972
2973                 verify(nvlist_lookup_nvlist_array(child[c],
2974                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
2975
2976                 /* find or add an entry for this top-level vdev */
2977                 if (newchildren > 0 &&
2978                     (entry = find_vdev_entry(zhp, mchild, mchildren,
2979                     newchild, newchildren)) >= 0) {
2980                         /* We found a disk that the user specified. */
2981                         vdev = mchild[entry];
2982                         ++found;
2983                 } else {
2984                         /* User didn't specify a disk for this vdev. */
2985                         vdev = mchild[mchildren - 1];
2986                 }
2987
2988                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
2989                         goto out;
2990         }
2991
2992         /* did we find every disk the user specified? */
2993         if (found != newchildren) {
2994                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
2995                     "include at most one disk from each mirror"));
2996                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2997                 goto out;
2998         }
2999
3000         /* Prepare the nvlist for populating. */
3001         if (*newroot == NULL) {
3002                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3003                         goto out;
3004                 freelist = B_TRUE;
3005                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3006                     VDEV_TYPE_ROOT) != 0)
3007                         goto out;
3008         } else {
3009                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3010         }
3011
3012         /* Add all the children we found */
3013         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3014             lastlog == 0 ? vcount : lastlog) != 0)
3015                 goto out;
3016
3017         /*
3018          * If we're just doing a dry run, exit now with success.
3019          */
3020         if (flags.dryrun) {
3021                 memory_err = B_FALSE;
3022                 freelist = B_FALSE;
3023                 goto out;
3024         }
3025
3026         /* now build up the config list & call the ioctl */
3027         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3028                 goto out;
3029
3030         if (nvlist_add_nvlist(newconfig,
3031             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3032             nvlist_add_string(newconfig,
3033             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3034             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3035                 goto out;
3036
3037         /*
3038          * The new pool is automatically part of the namespace unless we
3039          * explicitly export it.
3040          */
3041         if (!flags.import)
3042                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3043         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3044         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3045         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3046                 goto out;
3047         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3048                 goto out;
3049
3050         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3051                 retval = zpool_standard_error(hdl, errno, msg);
3052                 goto out;
3053         }
3054
3055         freelist = B_FALSE;
3056         memory_err = B_FALSE;
3057
3058 out:
3059         if (varray != NULL) {
3060                 int v;
3061
3062                 for (v = 0; v < vcount; v++)
3063                         nvlist_free(varray[v]);
3064                 free(varray);
3065         }
3066         zcmd_free_nvlists(&zc);
3067         if (zc_props)
3068                 nvlist_free(zc_props);
3069         if (newconfig)
3070                 nvlist_free(newconfig);
3071         if (freelist) {
3072                 nvlist_free(*newroot);
3073                 *newroot = NULL;
3074         }
3075
3076         if (retval != 0)
3077                 return (retval);
3078
3079         if (memory_err)
3080                 return (no_memory(hdl));
3081
3082         return (0);
3083 }
3084
3085 /*
3086  * Remove the given device.  Currently, this is supported only for hot spares
3087  * and level 2 cache devices.
3088  */
3089 int
3090 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3091 {
3092         zfs_cmd_t zc = {"\0"};
3093         char msg[1024];
3094         nvlist_t *tgt;
3095         boolean_t avail_spare, l2cache, islog;
3096         libzfs_handle_t *hdl = zhp->zpool_hdl;
3097         uint64_t version;
3098
3099         (void) snprintf(msg, sizeof (msg),
3100             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3101
3102         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3103         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3104             &islog)) == 0)
3105                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3106         /*
3107          * XXX - this should just go away.
3108          */
3109         if (!avail_spare && !l2cache && !islog) {
3110                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3111                     "only inactive hot spares, cache, top-level, "
3112                     "or log devices can be removed"));
3113                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3114         }
3115
3116         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3117         if (islog && version < SPA_VERSION_HOLES) {
3118                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3119                     "pool must be upgrade to support log removal"));
3120                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3121         }
3122
3123         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3124
3125         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3126                 return (0);
3127
3128         return (zpool_standard_error(hdl, errno, msg));
3129 }
3130
3131 /*
3132  * Clear the errors for the pool, or the particular device if specified.
3133  */
3134 int
3135 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3136 {
3137         zfs_cmd_t zc = {"\0"};
3138         char msg[1024];
3139         nvlist_t *tgt;
3140         zpool_rewind_policy_t policy;
3141         boolean_t avail_spare, l2cache;
3142         libzfs_handle_t *hdl = zhp->zpool_hdl;
3143         nvlist_t *nvi = NULL;
3144         int error;
3145
3146         if (path)
3147                 (void) snprintf(msg, sizeof (msg),
3148                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3149                     path);
3150         else
3151                 (void) snprintf(msg, sizeof (msg),
3152                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3153                     zhp->zpool_name);
3154
3155         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3156         if (path) {
3157                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3158                     &l2cache, NULL)) == 0)
3159                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
3160
3161                 /*
3162                  * Don't allow error clearing for hot spares.  Do allow
3163                  * error clearing for l2cache devices.
3164                  */
3165                 if (avail_spare)
3166                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
3167
3168                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3169                     &zc.zc_guid) == 0);
3170         }
3171
3172         zpool_get_rewind_policy(rewindnvl, &policy);
3173         zc.zc_cookie = policy.zrp_request;
3174
3175         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3176                 return (-1);
3177
3178         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3179                 return (-1);
3180
3181         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3182             errno == ENOMEM) {
3183                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3184                         zcmd_free_nvlists(&zc);
3185                         return (-1);
3186                 }
3187         }
3188
3189         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
3190             errno != EPERM && errno != EACCES)) {
3191                 if (policy.zrp_request &
3192                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3193                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3194                         zpool_rewind_exclaim(hdl, zc.zc_name,
3195                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
3196                             nvi);
3197                         nvlist_free(nvi);
3198                 }
3199                 zcmd_free_nvlists(&zc);
3200                 return (0);
3201         }
3202
3203         zcmd_free_nvlists(&zc);
3204         return (zpool_standard_error(hdl, errno, msg));
3205 }
3206
3207 /*
3208  * Similar to zpool_clear(), but takes a GUID (used by fmd).
3209  */
3210 int
3211 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3212 {
3213         zfs_cmd_t zc = {"\0"};
3214         char msg[1024];
3215         libzfs_handle_t *hdl = zhp->zpool_hdl;
3216
3217         (void) snprintf(msg, sizeof (msg),
3218             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3219             (u_longlong_t)guid);
3220
3221         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3222         zc.zc_guid = guid;
3223         zc.zc_cookie = ZPOOL_NO_REWIND;
3224
3225         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3226                 return (0);
3227
3228         return (zpool_standard_error(hdl, errno, msg));
3229 }
3230
3231 /*
3232  * Change the GUID for a pool.
3233  */
3234 int
3235 zpool_reguid(zpool_handle_t *zhp)
3236 {
3237         char msg[1024];
3238         libzfs_handle_t *hdl = zhp->zpool_hdl;
3239         zfs_cmd_t zc = {"\0"};
3240
3241         (void) snprintf(msg, sizeof (msg),
3242             dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3243
3244         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3245         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3246                 return (0);
3247
3248         return (zpool_standard_error(hdl, errno, msg));
3249 }
3250
3251 /*
3252  * Reopen the pool.
3253  */
3254 int
3255 zpool_reopen(zpool_handle_t *zhp)
3256 {
3257         zfs_cmd_t zc = {"\0"};
3258         char msg[1024];
3259         libzfs_handle_t *hdl = zhp->zpool_hdl;
3260
3261         (void) snprintf(msg, sizeof (msg),
3262             dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3263             zhp->zpool_name);
3264
3265         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3266         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3267                 return (0);
3268         return (zpool_standard_error(hdl, errno, msg));
3269 }
3270
3271 /*
3272  * Convert from a devid string to a path.
3273  */
3274 static char *
3275 devid_to_path(char *devid_str)
3276 {
3277         ddi_devid_t devid;
3278         char *minor;
3279         char *path;
3280         devid_nmlist_t *list = NULL;
3281         int ret;
3282
3283         if (devid_str_decode(devid_str, &devid, &minor) != 0)
3284                 return (NULL);
3285
3286         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3287
3288         devid_str_free(minor);
3289         devid_free(devid);
3290
3291         if (ret != 0)
3292                 return (NULL);
3293
3294         if ((path = strdup(list[0].devname)) == NULL)
3295                 return (NULL);
3296
3297         devid_free_nmlist(list);
3298
3299         return (path);
3300 }
3301
3302 /*
3303  * Convert from a path to a devid string.
3304  */
3305 static char *
3306 path_to_devid(const char *path)
3307 {
3308         int fd;
3309         ddi_devid_t devid;
3310         char *minor, *ret;
3311
3312         if ((fd = open(path, O_RDONLY)) < 0)
3313                 return (NULL);
3314
3315         minor = NULL;
3316         ret = NULL;
3317         if (devid_get(fd, &devid) == 0) {
3318                 if (devid_get_minor_name(fd, &minor) == 0)
3319                         ret = devid_str_encode(devid, minor);
3320                 if (minor != NULL)
3321                         devid_str_free(minor);
3322                 devid_free(devid);
3323         }
3324         (void) close(fd);
3325
3326         return (ret);
3327 }
3328
3329 /*
3330  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3331  * ignore any failure here, since a common case is for an unprivileged user to
3332  * type 'zpool status', and we'll display the correct information anyway.
3333  */
3334 static void
3335 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3336 {
3337         zfs_cmd_t zc = {"\0"};
3338
3339         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3340         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3341         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3342             &zc.zc_guid) == 0);
3343
3344         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3345 }
3346
3347 /*
3348  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3349  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3350  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3351  * third case only occurs when preceded by a string matching the regular
3352  * expression "^[hs]d[a-z]+", i.e. a scsi or ide disk.
3353  */
3354 static char *
3355 strip_partition(libzfs_handle_t *hdl, char *path)
3356 {
3357         char *tmp = zfs_strdup(hdl, path);
3358         char *part = NULL, *d = NULL;
3359
3360         if ((part = strstr(tmp, "-part")) && part != tmp) {
3361                 d = part + 5;
3362         } else if ((part = strrchr(tmp, 'p')) &&
3363             part > tmp + 1 && isdigit(*(part-1))) {
3364                 d = part + 1;
3365         } else if ((tmp[0] == 'h' || tmp[0] == 's') && tmp[1] == 'd') {
3366                 for (d = &tmp[2]; isalpha(*d); part = ++d);
3367         }
3368         if (part && d && *d != '\0') {
3369                 for (; isdigit(*d); d++);
3370                 if (*d == '\0')
3371                         *part = '\0';
3372         }
3373         return (tmp);
3374 }
3375
3376 #define PATH_BUF_LEN    64
3377
3378 /*
3379  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3380  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3381  * We also check if this is a whole disk, in which case we strip off the
3382  * trailing 's0' slice name.
3383  *
3384  * This routine is also responsible for identifying when disks have been
3385  * reconfigured in a new location.  The kernel will have opened the device by
3386  * devid, but the path will still refer to the old location.  To catch this, we
3387  * first do a path -> devid translation (which is fast for the common case).  If
3388  * the devid matches, we're done.  If not, we do a reverse devid -> path
3389  * translation and issue the appropriate ioctl() to update the path of the vdev.
3390  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3391  * of these checks.
3392  */
3393 char *
3394 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3395     boolean_t verbose)
3396 {
3397         char *path, *devid, *type;
3398         uint64_t value;
3399         char buf[PATH_BUF_LEN];
3400         char tmpbuf[PATH_BUF_LEN];
3401         vdev_stat_t *vs;
3402         uint_t vsc;
3403
3404         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
3405             &value) == 0) {
3406                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3407                     &value) == 0);
3408                 (void) snprintf(buf, sizeof (buf), "%llu",
3409                     (u_longlong_t)value);
3410                 path = buf;
3411         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3412                 /*
3413                  * If the device is dead (faulted, offline, etc) then don't
3414                  * bother opening it.  Otherwise we may be forcing the user to
3415                  * open a misbehaving device, which can have undesirable
3416                  * effects.
3417                  */
3418                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3419                     (uint64_t **)&vs, &vsc) != 0 ||
3420                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3421                     zhp != NULL &&
3422                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3423                         /*
3424                          * Determine if the current path is correct.
3425                          */
3426                         char *newdevid = path_to_devid(path);
3427
3428                         if (newdevid == NULL ||
3429                             strcmp(devid, newdevid) != 0) {
3430                                 char *newpath;
3431
3432                                 if ((newpath = devid_to_path(devid)) != NULL) {
3433                                         /*
3434                                          * Update the path appropriately.
3435                                          */
3436                                         set_path(zhp, nv, newpath);
3437                                         if (nvlist_add_string(nv,
3438                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3439                                                 verify(nvlist_lookup_string(nv,
3440                                                     ZPOOL_CONFIG_PATH,
3441                                                     &path) == 0);
3442                                         free(newpath);
3443                                 }
3444                         }
3445
3446                         if (newdevid)
3447                                 devid_str_free(newdevid);
3448                 }
3449
3450                 /*
3451                  * For a block device only use the name.
3452                  */
3453                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3454                 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
3455                         path = strrchr(path, '/');
3456                         path++;
3457                 }
3458
3459                 /*
3460                  * Remove the partition from the path it this is a whole disk.
3461                  */
3462                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3463                     &value) == 0 && value) {
3464                         return (strip_partition(hdl, path));
3465                 }
3466         } else {
3467                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3468
3469                 /*
3470                  * If it's a raidz device, we need to stick in the parity level.
3471                  */
3472                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3473
3474                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3475                             &value) == 0);
3476                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3477                             (u_longlong_t)value);
3478                         path = buf;
3479                 }
3480
3481                 /*
3482                  * We identify each top-level vdev by using a <type-id>
3483                  * naming convention.
3484                  */
3485                 if (verbose) {
3486                         uint64_t id;
3487
3488                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3489                             &id) == 0);
3490                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3491                             path, (u_longlong_t)id);
3492                         path = tmpbuf;
3493                 }
3494         }
3495
3496         return (zfs_strdup(hdl, path));
3497 }
3498
3499 static int
3500 zbookmark_compare(const void *a, const void *b)
3501 {
3502         return (memcmp(a, b, sizeof (zbookmark_t)));
3503 }
3504
3505 /*
3506  * Retrieve the persistent error log, uniquify the members, and return to the
3507  * caller.
3508  */
3509 int
3510 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3511 {
3512         zfs_cmd_t zc = {"\0"};
3513         uint64_t count;
3514         zbookmark_t *zb = NULL;
3515         int i;
3516
3517         /*
3518          * Retrieve the raw error list from the kernel.  If the number of errors
3519          * has increased, allocate more space and continue until we get the
3520          * entire list.
3521          */
3522         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3523             &count) == 0);
3524         if (count == 0)
3525                 return (0);
3526         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3527             count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
3528                 return (-1);
3529         zc.zc_nvlist_dst_size = count;
3530         (void) strcpy(zc.zc_name, zhp->zpool_name);
3531         for (;;) {
3532                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3533                     &zc) != 0) {
3534                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3535                         if (errno == ENOMEM) {
3536                                 count = zc.zc_nvlist_dst_size;
3537                                 if ((zc.zc_nvlist_dst = (uintptr_t)
3538                                     zfs_alloc(zhp->zpool_hdl, count *
3539                                     sizeof (zbookmark_t))) == (uintptr_t)NULL)
3540                                         return (-1);
3541                         } else {
3542                                 return (-1);
3543                         }
3544                 } else {
3545                         break;
3546                 }
3547         }
3548
3549         /*
3550          * Sort the resulting bookmarks.  This is a little confusing due to the
3551          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3552          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3553          * _not_ copied as part of the process.  So we point the start of our
3554          * array appropriate and decrement the total number of elements.
3555          */
3556         zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
3557             zc.zc_nvlist_dst_size;
3558         count -= zc.zc_nvlist_dst_size;
3559
3560         qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
3561
3562         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3563
3564         /*
3565          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3566          */
3567         for (i = 0; i < count; i++) {
3568                 nvlist_t *nv;
3569
3570                 /* ignoring zb_blkid and zb_level for now */
3571                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3572                     zb[i-1].zb_object == zb[i].zb_object)
3573                         continue;
3574
3575                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3576                         goto nomem;
3577                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3578                     zb[i].zb_objset) != 0) {
3579                         nvlist_free(nv);
3580                         goto nomem;
3581                 }
3582                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3583                     zb[i].zb_object) != 0) {
3584                         nvlist_free(nv);
3585                         goto nomem;
3586                 }
3587                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3588                         nvlist_free(nv);
3589                         goto nomem;
3590                 }
3591                 nvlist_free(nv);
3592         }
3593
3594         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3595         return (0);
3596
3597 nomem:
3598         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3599         return (no_memory(zhp->zpool_hdl));
3600 }
3601
3602 /*
3603  * Upgrade a ZFS pool to the latest on-disk version.
3604  */
3605 int
3606 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3607 {
3608         zfs_cmd_t zc = {"\0"};
3609         libzfs_handle_t *hdl = zhp->zpool_hdl;
3610
3611         (void) strcpy(zc.zc_name, zhp->zpool_name);
3612         zc.zc_cookie = new_version;
3613
3614         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3615                 return (zpool_standard_error_fmt(hdl, errno,
3616                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3617                     zhp->zpool_name));
3618         return (0);
3619 }
3620
3621 void
3622 zfs_save_arguments(int argc, char **argv, char *string, int len)
3623 {
3624         int i;
3625
3626         (void) strlcpy(string, basename(argv[0]), len);
3627         for (i = 1; i < argc; i++) {
3628                 (void) strlcat(string, " ", len);
3629                 (void) strlcat(string, argv[i], len);
3630         }
3631 }
3632
3633 int
3634 zpool_log_history(libzfs_handle_t *hdl, const char *message)
3635 {
3636         zfs_cmd_t zc = {"\0"};
3637         nvlist_t *args;
3638         int err;
3639
3640         args = fnvlist_alloc();
3641         fnvlist_add_string(args, "message", message);
3642         err = zcmd_write_src_nvlist(hdl, &zc, args);
3643         if (err == 0)
3644                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
3645         nvlist_free(args);
3646         zcmd_free_nvlists(&zc);
3647         return (err);
3648 }
3649
3650 /*
3651  * Perform ioctl to get some command history of a pool.
3652  *
3653  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3654  * logical offset of the history buffer to start reading from.
3655  *
3656  * Upon return, 'off' is the next logical offset to read from and
3657  * 'len' is the actual amount of bytes read into 'buf'.
3658  */
3659 static int
3660 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3661 {
3662         zfs_cmd_t zc = {"\0"};
3663         libzfs_handle_t *hdl = zhp->zpool_hdl;
3664
3665         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3666
3667         zc.zc_history = (uint64_t)(uintptr_t)buf;
3668         zc.zc_history_len = *len;
3669         zc.zc_history_offset = *off;
3670
3671         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3672                 switch (errno) {
3673                 case EPERM:
3674                         return (zfs_error_fmt(hdl, EZFS_PERM,
3675                             dgettext(TEXT_DOMAIN,
3676                             "cannot show history for pool '%s'"),
3677                             zhp->zpool_name));
3678                 case ENOENT:
3679                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3680                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3681                             "'%s'"), zhp->zpool_name));
3682                 case ENOTSUP:
3683                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3684                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3685                             "'%s', pool must be upgraded"), zhp->zpool_name));
3686                 default:
3687                         return (zpool_standard_error_fmt(hdl, errno,
3688                             dgettext(TEXT_DOMAIN,
3689                             "cannot get history for '%s'"), zhp->zpool_name));
3690                 }
3691         }
3692
3693         *len = zc.zc_history_len;
3694         *off = zc.zc_history_offset;
3695
3696         return (0);
3697 }
3698
3699 /*
3700  * Process the buffer of nvlists, unpacking and storing each nvlist record
3701  * into 'records'.  'leftover' is set to the number of bytes that weren't
3702  * processed as there wasn't a complete record.
3703  */
3704 int
3705 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3706     nvlist_t ***records, uint_t *numrecords)
3707 {
3708         uint64_t reclen;
3709         nvlist_t *nv;
3710         int i;
3711
3712         while (bytes_read > sizeof (reclen)) {
3713
3714                 /* get length of packed record (stored as little endian) */
3715                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3716                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3717
3718                 if (bytes_read < sizeof (reclen) + reclen)
3719                         break;
3720
3721                 /* unpack record */
3722                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3723                         return (ENOMEM);
3724                 bytes_read -= sizeof (reclen) + reclen;
3725                 buf += sizeof (reclen) + reclen;
3726
3727                 /* add record to nvlist array */
3728                 (*numrecords)++;
3729                 if (ISP2(*numrecords + 1)) {
3730                         *records = realloc(*records,
3731                             *numrecords * 2 * sizeof (nvlist_t *));
3732                 }
3733                 (*records)[*numrecords - 1] = nv;
3734         }
3735
3736         *leftover = bytes_read;
3737         return (0);
3738 }
3739
3740 #define HIS_BUF_LEN     (128*1024)
3741
3742 /*
3743  * Retrieve the command history of a pool.
3744  */
3745 int
3746 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3747 {
3748         char buf[HIS_BUF_LEN];
3749         uint64_t off = 0;
3750         nvlist_t **records = NULL;
3751         uint_t numrecords = 0;
3752         int err, i;
3753
3754         do {
3755                 uint64_t bytes_read = sizeof (buf);
3756                 uint64_t leftover;
3757
3758                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3759                         break;
3760
3761                 /* if nothing else was read in, we're at EOF, just return */
3762                 if (!bytes_read)
3763                         break;
3764
3765                 if ((err = zpool_history_unpack(buf, bytes_read,
3766                     &leftover, &records, &numrecords)) != 0)
3767                         break;
3768                 off -= leftover;
3769
3770                 /* CONSTCOND */
3771         } while (1);
3772
3773         if (!err) {
3774                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3775                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3776                     records, numrecords) == 0);
3777         }
3778         for (i = 0; i < numrecords; i++)
3779                 nvlist_free(records[i]);
3780         free(records);
3781
3782         return (err);
3783 }
3784
3785 /*
3786  * Retrieve the next event.  If there is a new event available 'nvp' will
3787  * contain a newly allocated nvlist and 'dropped' will be set to the number
3788  * of missed events since the last call to this function.  When 'nvp' is
3789  * set to NULL it indicates no new events are available.  In either case
3790  * the function returns 0 and it is up to the caller to free 'nvp'.  In
3791  * the case of a fatal error the function will return a non-zero value.
3792  * When the function is called in blocking mode it will not return until
3793  * a new event is available.
3794  */
3795 int
3796 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
3797     int *dropped, int block, int cleanup_fd)
3798 {
3799         zfs_cmd_t zc = {"\0"};
3800         int error = 0;
3801
3802         *nvp = NULL;
3803         *dropped = 0;
3804         zc.zc_cleanup_fd = cleanup_fd;
3805
3806         if (!block)
3807                 zc.zc_guid = ZEVENT_NONBLOCK;
3808
3809         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3810                 return (-1);
3811
3812 retry:
3813         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3814                 switch (errno) {
3815                 case ESHUTDOWN:
3816                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3817                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
3818                         goto out;
3819                 case ENOENT:
3820                         /* Blocking error case should not occur */
3821                         if (block)
3822                                 error = zpool_standard_error_fmt(hdl, errno,
3823                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3824
3825                         goto out;
3826                 case ENOMEM:
3827                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3828                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3829                                     dgettext(TEXT_DOMAIN, "cannot get event"));
3830                                 goto out;
3831                         } else {
3832                                 goto retry;
3833                         }
3834                 default:
3835                         error = zpool_standard_error_fmt(hdl, errno,
3836                             dgettext(TEXT_DOMAIN, "cannot get event"));
3837                         goto out;
3838                 }
3839         }
3840
3841         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3842         if (error != 0)
3843                 goto out;
3844
3845         *dropped = (int)zc.zc_cookie;
3846 out:
3847         zcmd_free_nvlists(&zc);
3848
3849         return (error);
3850 }
3851
3852 /*
3853  * Clear all events.
3854  */
3855 int
3856 zpool_events_clear(libzfs_handle_t *hdl, int *count)
3857 {
3858         zfs_cmd_t zc = {"\0"};
3859         char msg[1024];
3860
3861         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3862             "cannot clear events"));
3863
3864         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
3865                 return (zpool_standard_error_fmt(hdl, errno, msg));
3866
3867         if (count != NULL)
3868                 *count = (int)zc.zc_cookie; /* # of events cleared */
3869
3870         return (0);
3871 }
3872
3873 void
3874 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3875     char *pathname, size_t len)
3876 {
3877         zfs_cmd_t zc = {"\0"};
3878         boolean_t mounted = B_FALSE;
3879         char *mntpnt = NULL;
3880         char dsname[MAXNAMELEN];
3881
3882         if (dsobj == 0) {
3883                 /* special case for the MOS */
3884                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
3885                     (longlong_t)obj);
3886                 return;
3887         }
3888
3889         /* get the dataset's name */
3890         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3891         zc.zc_obj = dsobj;
3892         if (ioctl(zhp->zpool_hdl->libzfs_fd,
3893             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3894                 /* just write out a path of two object numbers */
3895                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3896                     (longlong_t)dsobj, (longlong_t)obj);
3897                 return;
3898         }
3899         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3900
3901         /* find out if the dataset is mounted */
3902         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3903
3904         /* get the corrupted object's path */
3905         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3906         zc.zc_obj = obj;
3907         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3908             &zc) == 0) {
3909                 if (mounted) {
3910                         (void) snprintf(pathname, len, "%s%s", mntpnt,
3911                             zc.zc_value);
3912                 } else {
3913                         (void) snprintf(pathname, len, "%s:%s",
3914                             dsname, zc.zc_value);
3915                 }
3916         } else {
3917                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
3918                     (longlong_t)obj);
3919         }
3920         free(mntpnt);
3921 }
3922
3923 /*
3924  * Read the EFI label from the config, if a label does not exist then
3925  * pass back the error to the caller. If the caller has passed a non-NULL
3926  * diskaddr argument then we set it to the starting address of the EFI
3927  * partition.
3928  */
3929 static int
3930 read_efi_label(nvlist_t *config, diskaddr_t *sb)
3931 {
3932         char *path;
3933         int fd;
3934         char diskname[MAXPATHLEN];
3935         int err = -1;
3936
3937         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3938                 return (err);
3939
3940         (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
3941             strrchr(path, '/'));
3942         if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
3943                 struct dk_gpt *vtoc;
3944
3945                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3946                         if (sb != NULL)
3947                                 *sb = vtoc->efi_parts[0].p_start;
3948                         efi_free(vtoc);
3949                 }
3950                 (void) close(fd);
3951         }
3952         return (err);
3953 }
3954
3955 /*
3956  * determine where a partition starts on a disk in the current
3957  * configuration
3958  */
3959 static diskaddr_t
3960 find_start_block(nvlist_t *config)
3961 {
3962         nvlist_t **child;
3963         uint_t c, children;
3964         diskaddr_t sb = MAXOFFSET_T;
3965         uint64_t wholedisk;
3966
3967         if (nvlist_lookup_nvlist_array(config,
3968             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3969                 if (nvlist_lookup_uint64(config,
3970                     ZPOOL_CONFIG_WHOLE_DISK,
3971                     &wholedisk) != 0 || !wholedisk) {
3972                         return (MAXOFFSET_T);
3973                 }
3974                 if (read_efi_label(config, &sb) < 0)
3975                         sb = MAXOFFSET_T;
3976                 return (sb);
3977         }
3978
3979         for (c = 0; c < children; c++) {
3980                 sb = find_start_block(child[c]);
3981                 if (sb != MAXOFFSET_T) {
3982                         return (sb);
3983                 }
3984         }
3985         return (MAXOFFSET_T);
3986 }
3987
3988 int
3989 zpool_label_disk_wait(char *path, int timeout)
3990 {
3991         struct stat64 statbuf;
3992         int i;
3993
3994         /*
3995          * Wait timeout miliseconds for a newly created device to be available
3996          * from the given path.  There is a small window when a /dev/ device
3997          * will exist and the udev link will not, so we must wait for the
3998          * symlink.  Depending on the udev rules this may take a few seconds.
3999          */
4000         for (i = 0; i < timeout; i++) {
4001                 usleep(1000);
4002
4003                 errno = 0;
4004                 if ((stat64(path, &statbuf) == 0) && (errno == 0))
4005                         return (0);
4006         }
4007
4008         return (ENOENT);
4009 }
4010
4011 int
4012 zpool_label_disk_check(char *path)
4013 {
4014         struct dk_gpt *vtoc;
4015         int fd, err;
4016
4017         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
4018                 return (errno);
4019
4020         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4021                 (void) close(fd);
4022                 return (err);
4023         }
4024
4025         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4026                 efi_free(vtoc);
4027                 (void) close(fd);
4028                 return (EIDRM);
4029         }
4030
4031         efi_free(vtoc);
4032         (void) close(fd);
4033         return (0);
4034 }
4035
4036 /*
4037  * Label an individual disk.  The name provided is the short name,
4038  * stripped of any leading /dev path.
4039  */
4040 int
4041 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4042 {
4043         char path[MAXPATHLEN];
4044         struct dk_gpt *vtoc;
4045         int rval, fd;
4046         size_t resv = EFI_MIN_RESV_SIZE;
4047         uint64_t slice_size;
4048         diskaddr_t start_block;
4049         char errbuf[1024];
4050
4051         /* prepare an error message just in case */
4052         (void) snprintf(errbuf, sizeof (errbuf),
4053             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4054
4055         if (zhp) {
4056                 nvlist_t *nvroot;
4057
4058 #if defined(__sun__) || defined(__sun)
4059                 if (zpool_is_bootable(zhp)) {
4060                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4061                             "EFI labeled devices are not supported on root "
4062                             "pools."));
4063                         return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
4064                 }
4065 #endif
4066
4067                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4068                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4069
4070                 if (zhp->zpool_start_block == 0)
4071                         start_block = find_start_block(nvroot);
4072                 else
4073                         start_block = zhp->zpool_start_block;
4074                 zhp->zpool_start_block = start_block;
4075         } else {
4076                 /* new pool */
4077                 start_block = NEW_START_BLOCK;
4078         }
4079
4080         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4081
4082         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
4083                 /*
4084                  * This shouldn't happen.  We've long since verified that this
4085                  * is a valid device.
4086                  */
4087                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4088                     "label '%s': unable to open device: %d"), path, errno);
4089                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4090         }
4091
4092         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4093                 /*
4094                  * The only way this can fail is if we run out of memory, or we
4095                  * were unable to read the disk's capacity
4096                  */
4097                 if (errno == ENOMEM)
4098                         (void) no_memory(hdl);
4099
4100                 (void) close(fd);
4101                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4102                     "label '%s': unable to read disk capacity"), path);
4103
4104                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4105         }
4106
4107         slice_size = vtoc->efi_last_u_lba + 1;
4108         slice_size -= EFI_MIN_RESV_SIZE;
4109         if (start_block == MAXOFFSET_T)
4110                 start_block = NEW_START_BLOCK;
4111         slice_size -= start_block;
4112         slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
4113
4114         vtoc->efi_parts[0].p_start = start_block;
4115         vtoc->efi_parts[0].p_size = slice_size;
4116
4117         /*
4118          * Why we use V_USR: V_BACKUP confuses users, and is considered
4119          * disposable by some EFI utilities (since EFI doesn't have a backup
4120          * slice).  V_UNASSIGNED is supposed to be used only for zero size
4121          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4122          * etc. were all pretty specific.  V_USR is as close to reality as we
4123          * can get, in the absence of V_OTHER.
4124          */
4125         vtoc->efi_parts[0].p_tag = V_USR;
4126         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
4127
4128         vtoc->efi_parts[8].p_start = slice_size + start_block;
4129         vtoc->efi_parts[8].p_size = resv;
4130         vtoc->efi_parts[8].p_tag = V_RESERVED;
4131
4132         if ((rval = efi_write(fd, vtoc)) != 0 || (rval = efi_rescan(fd)) != 0) {
4133                 /*
4134                  * Some block drivers (like pcata) may not support EFI
4135                  * GPT labels.  Print out a helpful error message dir-
4136                  * ecting the user to manually label the disk and give
4137                  * a specific slice.
4138                  */
4139                 (void) close(fd);
4140                 efi_free(vtoc);
4141
4142                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4143                     "parted(8) and then provide a specific slice: %d"), rval);
4144                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4145         }
4146
4147         (void) close(fd);
4148         efi_free(vtoc);
4149
4150         /* Wait for the first expected partition to appear. */
4151
4152         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4153         (void) zfs_append_partition(path, MAXPATHLEN);
4154
4155         rval = zpool_label_disk_wait(path, 3000);
4156         if (rval) {
4157                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4158                     "detect device partitions on '%s': %d"), path, rval);
4159                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4160         }
4161
4162         /* We can't be to paranoid.  Read the label back and verify it. */
4163         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4164         rval = zpool_label_disk_check(path);
4165         if (rval) {
4166                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4167                     "EFI label on '%s' is damaged.  Ensure\nthis device "
4168                     "is not in in use, and is functioning properly: %d"),
4169                     path, rval);
4170                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4171         }
4172
4173         return (0);
4174 }