]> granicus.if.org Git - zfs/blob - lib/libzfs/libzfs_pool.c
Add libtpool (thread pools)
[zfs] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright (c) 2017 Datto Inc.
28  */
29
30 #include <ctype.h>
31 #include <errno.h>
32 #include <devid.h>
33 #include <fcntl.h>
34 #include <libintl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <libgen.h>
40 #include <zone.h>
41 #include <sys/stat.h>
42 #include <sys/efi_partition.h>
43 #include <sys/systeminfo.h>
44 #include <sys/vtoc.h>
45 #include <sys/zfs_ioctl.h>
46 #include <dlfcn.h>
47
48 #include "zfs_namecheck.h"
49 #include "zfs_prop.h"
50 #include "libzfs_impl.h"
51 #include "zfs_comutil.h"
52 #include "zfeature_common.h"
53
54 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
55
56 typedef struct prop_flags {
57         int create:1;   /* Validate property on creation */
58         int import:1;   /* Validate property on import */
59 } prop_flags_t;
60
61 /*
62  * ====================================================================
63  *   zpool property functions
64  * ====================================================================
65  */
66
67 static int
68 zpool_get_all_props(zpool_handle_t *zhp)
69 {
70         zfs_cmd_t zc = {"\0"};
71         libzfs_handle_t *hdl = zhp->zpool_hdl;
72
73         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
74
75         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
76                 return (-1);
77
78         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
79                 if (errno == ENOMEM) {
80                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
81                                 zcmd_free_nvlists(&zc);
82                                 return (-1);
83                         }
84                 } else {
85                         zcmd_free_nvlists(&zc);
86                         return (-1);
87                 }
88         }
89
90         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
91                 zcmd_free_nvlists(&zc);
92                 return (-1);
93         }
94
95         zcmd_free_nvlists(&zc);
96
97         return (0);
98 }
99
100 static int
101 zpool_props_refresh(zpool_handle_t *zhp)
102 {
103         nvlist_t *old_props;
104
105         old_props = zhp->zpool_props;
106
107         if (zpool_get_all_props(zhp) != 0)
108                 return (-1);
109
110         nvlist_free(old_props);
111         return (0);
112 }
113
114 static char *
115 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
116     zprop_source_t *src)
117 {
118         nvlist_t *nv, *nvl;
119         uint64_t ival;
120         char *value;
121         zprop_source_t source;
122
123         nvl = zhp->zpool_props;
124         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
125                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
126                 source = ival;
127                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
128         } else {
129                 source = ZPROP_SRC_DEFAULT;
130                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
131                         value = "-";
132         }
133
134         if (src)
135                 *src = source;
136
137         return (value);
138 }
139
140 uint64_t
141 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
142 {
143         nvlist_t *nv, *nvl;
144         uint64_t value;
145         zprop_source_t source;
146
147         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
148                 /*
149                  * zpool_get_all_props() has most likely failed because
150                  * the pool is faulted, but if all we need is the top level
151                  * vdev's guid then get it from the zhp config nvlist.
152                  */
153                 if ((prop == ZPOOL_PROP_GUID) &&
154                     (nvlist_lookup_nvlist(zhp->zpool_config,
155                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
156                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
157                     == 0)) {
158                         return (value);
159                 }
160                 return (zpool_prop_default_numeric(prop));
161         }
162
163         nvl = zhp->zpool_props;
164         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
165                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
166                 source = value;
167                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
168         } else {
169                 source = ZPROP_SRC_DEFAULT;
170                 value = zpool_prop_default_numeric(prop);
171         }
172
173         if (src)
174                 *src = source;
175
176         return (value);
177 }
178
179 /*
180  * Map VDEV STATE to printed strings.
181  */
182 char *
183 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
184 {
185         switch (state) {
186         case VDEV_STATE_CLOSED:
187         case VDEV_STATE_OFFLINE:
188                 return (gettext("OFFLINE"));
189         case VDEV_STATE_REMOVED:
190                 return (gettext("REMOVED"));
191         case VDEV_STATE_CANT_OPEN:
192                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
193                         return (gettext("FAULTED"));
194                 else if (aux == VDEV_AUX_SPLIT_POOL)
195                         return (gettext("SPLIT"));
196                 else
197                         return (gettext("UNAVAIL"));
198         case VDEV_STATE_FAULTED:
199                 return (gettext("FAULTED"));
200         case VDEV_STATE_DEGRADED:
201                 return (gettext("DEGRADED"));
202         case VDEV_STATE_HEALTHY:
203                 return (gettext("ONLINE"));
204
205         default:
206                 break;
207         }
208
209         return (gettext("UNKNOWN"));
210 }
211
212 /*
213  * Map POOL STATE to printed strings.
214  */
215 const char *
216 zpool_pool_state_to_name(pool_state_t state)
217 {
218         switch (state) {
219         default:
220                 break;
221         case POOL_STATE_ACTIVE:
222                 return (gettext("ACTIVE"));
223         case POOL_STATE_EXPORTED:
224                 return (gettext("EXPORTED"));
225         case POOL_STATE_DESTROYED:
226                 return (gettext("DESTROYED"));
227         case POOL_STATE_SPARE:
228                 return (gettext("SPARE"));
229         case POOL_STATE_L2CACHE:
230                 return (gettext("L2CACHE"));
231         case POOL_STATE_UNINITIALIZED:
232                 return (gettext("UNINITIALIZED"));
233         case POOL_STATE_UNAVAIL:
234                 return (gettext("UNAVAIL"));
235         case POOL_STATE_POTENTIALLY_ACTIVE:
236                 return (gettext("POTENTIALLY_ACTIVE"));
237         }
238
239         return (gettext("UNKNOWN"));
240 }
241
242 /*
243  * Get a zpool property value for 'prop' and return the value in
244  * a pre-allocated buffer.
245  */
246 int
247 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
248     size_t len, zprop_source_t *srctype, boolean_t literal)
249 {
250         uint64_t intval;
251         const char *strval;
252         zprop_source_t src = ZPROP_SRC_NONE;
253         nvlist_t *nvroot;
254         vdev_stat_t *vs;
255         uint_t vsc;
256
257         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
258                 switch (prop) {
259                 case ZPOOL_PROP_NAME:
260                         (void) strlcpy(buf, zpool_get_name(zhp), len);
261                         break;
262
263                 case ZPOOL_PROP_HEALTH:
264                         (void) strlcpy(buf, "FAULTED", len);
265                         break;
266
267                 case ZPOOL_PROP_GUID:
268                         intval = zpool_get_prop_int(zhp, prop, &src);
269                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
270                         break;
271
272                 case ZPOOL_PROP_ALTROOT:
273                 case ZPOOL_PROP_CACHEFILE:
274                 case ZPOOL_PROP_COMMENT:
275                         if (zhp->zpool_props != NULL ||
276                             zpool_get_all_props(zhp) == 0) {
277                                 (void) strlcpy(buf,
278                                     zpool_get_prop_string(zhp, prop, &src),
279                                     len);
280                                 break;
281                         }
282                         /* FALLTHROUGH */
283                 default:
284                         (void) strlcpy(buf, "-", len);
285                         break;
286                 }
287
288                 if (srctype != NULL)
289                         *srctype = src;
290                 return (0);
291         }
292
293         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
294             prop != ZPOOL_PROP_NAME)
295                 return (-1);
296
297         switch (zpool_prop_get_type(prop)) {
298         case PROP_TYPE_STRING:
299                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
300                     len);
301                 break;
302
303         case PROP_TYPE_NUMBER:
304                 intval = zpool_get_prop_int(zhp, prop, &src);
305
306                 switch (prop) {
307                 case ZPOOL_PROP_SIZE:
308                 case ZPOOL_PROP_ALLOCATED:
309                 case ZPOOL_PROP_FREE:
310                 case ZPOOL_PROP_FREEING:
311                 case ZPOOL_PROP_LEAKED:
312                 case ZPOOL_PROP_ASHIFT:
313                         if (literal)
314                                 (void) snprintf(buf, len, "%llu",
315                                     (u_longlong_t)intval);
316                         else
317                                 (void) zfs_nicenum(intval, buf, len);
318                         break;
319
320                 case ZPOOL_PROP_EXPANDSZ:
321                         if (intval == 0) {
322                                 (void) strlcpy(buf, "-", len);
323                         } else if (literal) {
324                                 (void) snprintf(buf, len, "%llu",
325                                     (u_longlong_t)intval);
326                         } else {
327                                 (void) zfs_nicebytes(intval, buf, len);
328                         }
329                         break;
330
331                 case ZPOOL_PROP_CAPACITY:
332                         if (literal) {
333                                 (void) snprintf(buf, len, "%llu",
334                                     (u_longlong_t)intval);
335                         } else {
336                                 (void) snprintf(buf, len, "%llu%%",
337                                     (u_longlong_t)intval);
338                         }
339                         break;
340
341                 case ZPOOL_PROP_FRAGMENTATION:
342                         if (intval == UINT64_MAX) {
343                                 (void) strlcpy(buf, "-", len);
344                         } else if (literal) {
345                                 (void) snprintf(buf, len, "%llu",
346                                     (u_longlong_t)intval);
347                         } else {
348                                 (void) snprintf(buf, len, "%llu%%",
349                                     (u_longlong_t)intval);
350                         }
351                         break;
352
353                 case ZPOOL_PROP_DEDUPRATIO:
354                         if (literal)
355                                 (void) snprintf(buf, len, "%llu.%02llu",
356                                     (u_longlong_t)(intval / 100),
357                                     (u_longlong_t)(intval % 100));
358                         else
359                                 (void) snprintf(buf, len, "%llu.%02llux",
360                                     (u_longlong_t)(intval / 100),
361                                     (u_longlong_t)(intval % 100));
362                         break;
363
364                 case ZPOOL_PROP_HEALTH:
365                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
366                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
367                         verify(nvlist_lookup_uint64_array(nvroot,
368                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
369                             == 0);
370
371                         (void) strlcpy(buf, zpool_state_to_name(intval,
372                             vs->vs_aux), len);
373                         break;
374                 case ZPOOL_PROP_VERSION:
375                         if (intval >= SPA_VERSION_FEATURES) {
376                                 (void) snprintf(buf, len, "-");
377                                 break;
378                         }
379                         /* FALLTHROUGH */
380                 default:
381                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
382                 }
383                 break;
384
385         case PROP_TYPE_INDEX:
386                 intval = zpool_get_prop_int(zhp, prop, &src);
387                 if (zpool_prop_index_to_string(prop, intval, &strval)
388                     != 0)
389                         return (-1);
390                 (void) strlcpy(buf, strval, len);
391                 break;
392
393         default:
394                 abort();
395         }
396
397         if (srctype)
398                 *srctype = src;
399
400         return (0);
401 }
402
403 /*
404  * Check if the bootfs name has the same pool name as it is set to.
405  * Assuming bootfs is a valid dataset name.
406  */
407 static boolean_t
408 bootfs_name_valid(const char *pool, char *bootfs)
409 {
410         int len = strlen(pool);
411
412         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
413                 return (B_FALSE);
414
415         if (strncmp(pool, bootfs, len) == 0 &&
416             (bootfs[len] == '/' || bootfs[len] == '\0'))
417                 return (B_TRUE);
418
419         return (B_FALSE);
420 }
421
422 boolean_t
423 zpool_is_bootable(zpool_handle_t *zhp)
424 {
425         char bootfs[ZFS_MAX_DATASET_NAME_LEN];
426
427         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
428             sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
429             sizeof (bootfs)) != 0);
430 }
431
432
433 /*
434  * Given an nvlist of zpool properties to be set, validate that they are
435  * correct, and parse any numeric properties (index, boolean, etc) if they are
436  * specified as strings.
437  */
438 static nvlist_t *
439 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
440     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
441 {
442         nvpair_t *elem;
443         nvlist_t *retprops;
444         zpool_prop_t prop;
445         char *strval;
446         uint64_t intval;
447         char *slash, *check;
448         struct stat64 statbuf;
449         zpool_handle_t *zhp;
450
451         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
452                 (void) no_memory(hdl);
453                 return (NULL);
454         }
455
456         elem = NULL;
457         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
458                 const char *propname = nvpair_name(elem);
459
460                 prop = zpool_name_to_prop(propname);
461                 if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
462                         int err;
463                         char *fname = strchr(propname, '@') + 1;
464
465                         err = zfeature_lookup_name(fname, NULL);
466                         if (err != 0) {
467                                 ASSERT3U(err, ==, ENOENT);
468                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
469                                     "invalid feature '%s'"), fname);
470                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
471                                 goto error;
472                         }
473
474                         if (nvpair_type(elem) != DATA_TYPE_STRING) {
475                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
476                                     "'%s' must be a string"), propname);
477                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
478                                 goto error;
479                         }
480
481                         (void) nvpair_value_string(elem, &strval);
482                         if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
483                             strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
484                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
485                                     "property '%s' can only be set to "
486                                     "'enabled' or 'disabled'"), propname);
487                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
488                                 goto error;
489                         }
490
491                         if (nvlist_add_uint64(retprops, propname, 0) != 0) {
492                                 (void) no_memory(hdl);
493                                 goto error;
494                         }
495                         continue;
496                 }
497
498                 /*
499                  * Make sure this property is valid and applies to this type.
500                  */
501                 if (prop == ZPROP_INVAL) {
502                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
503                             "invalid property '%s'"), propname);
504                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
505                         goto error;
506                 }
507
508                 if (zpool_prop_readonly(prop)) {
509                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
510                             "is readonly"), propname);
511                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
512                         goto error;
513                 }
514
515                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
516                     &strval, &intval, errbuf) != 0)
517                         goto error;
518
519                 /*
520                  * Perform additional checking for specific properties.
521                  */
522                 switch (prop) {
523                 case ZPOOL_PROP_VERSION:
524                         if (intval < version ||
525                             !SPA_VERSION_IS_SUPPORTED(intval)) {
526                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
527                                     "property '%s' number %d is invalid."),
528                                     propname, intval);
529                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
530                                 goto error;
531                         }
532                         break;
533
534                 case ZPOOL_PROP_ASHIFT:
535                         if (intval != 0 &&
536                             (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
537                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
538                                     "invalid '%s=%d' property: only values "
539                                     "between %" PRId32 " and %" PRId32 " "
540                                     "are allowed.\n"),
541                                     propname, intval, ASHIFT_MIN, ASHIFT_MAX);
542                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
543                                 goto error;
544                         }
545                         break;
546
547                 case ZPOOL_PROP_BOOTFS:
548                         if (flags.create || flags.import) {
549                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
550                                     "property '%s' cannot be set at creation "
551                                     "or import time"), propname);
552                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
553                                 goto error;
554                         }
555
556                         if (version < SPA_VERSION_BOOTFS) {
557                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
558                                     "pool must be upgraded to support "
559                                     "'%s' property"), propname);
560                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
561                                 goto error;
562                         }
563
564                         /*
565                          * bootfs property value has to be a dataset name and
566                          * the dataset has to be in the same pool as it sets to.
567                          */
568                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
569                             strval)) {
570                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
571                                     "is an invalid name"), strval);
572                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
573                                 goto error;
574                         }
575
576                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
577                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
578                                     "could not open pool '%s'"), poolname);
579                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
580                                 goto error;
581                         }
582                         zpool_close(zhp);
583                         break;
584
585                 case ZPOOL_PROP_ALTROOT:
586                         if (!flags.create && !flags.import) {
587                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
588                                     "property '%s' can only be set during pool "
589                                     "creation or import"), propname);
590                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
591                                 goto error;
592                         }
593
594                         if (strval[0] != '/') {
595                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
596                                     "bad alternate root '%s'"), strval);
597                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
598                                 goto error;
599                         }
600                         break;
601
602                 case ZPOOL_PROP_CACHEFILE:
603                         if (strval[0] == '\0')
604                                 break;
605
606                         if (strcmp(strval, "none") == 0)
607                                 break;
608
609                         if (strval[0] != '/') {
610                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
611                                     "property '%s' must be empty, an "
612                                     "absolute path, or 'none'"), propname);
613                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
614                                 goto error;
615                         }
616
617                         slash = strrchr(strval, '/');
618
619                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
620                             strcmp(slash, "/..") == 0) {
621                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
622                                     "'%s' is not a valid file"), strval);
623                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
624                                 goto error;
625                         }
626
627                         *slash = '\0';
628
629                         if (strval[0] != '\0' &&
630                             (stat64(strval, &statbuf) != 0 ||
631                             !S_ISDIR(statbuf.st_mode))) {
632                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
633                                     "'%s' is not a valid directory"),
634                                     strval);
635                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
636                                 goto error;
637                         }
638
639                         *slash = '/';
640                         break;
641
642                 case ZPOOL_PROP_COMMENT:
643                         for (check = strval; *check != '\0'; check++) {
644                                 if (!isprint(*check)) {
645                                         zfs_error_aux(hdl,
646                                             dgettext(TEXT_DOMAIN,
647                                             "comment may only have printable "
648                                             "characters"));
649                                         (void) zfs_error(hdl, EZFS_BADPROP,
650                                             errbuf);
651                                         goto error;
652                                 }
653                         }
654                         if (strlen(strval) > ZPROP_MAX_COMMENT) {
655                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
656                                     "comment must not exceed %d characters"),
657                                     ZPROP_MAX_COMMENT);
658                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
659                                 goto error;
660                         }
661                         break;
662                 case ZPOOL_PROP_READONLY:
663                         if (!flags.import) {
664                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
665                                     "property '%s' can only be set at "
666                                     "import time"), propname);
667                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
668                                 goto error;
669                         }
670                         break;
671                 case ZPOOL_PROP_TNAME:
672                         if (!flags.create) {
673                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
674                                     "property '%s' can only be set at "
675                                     "creation time"), propname);
676                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
677                                 goto error;
678                         }
679                         break;
680                 case ZPOOL_PROP_MULTIHOST:
681                         if (get_system_hostid() == 0) {
682                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
683                                     "requires a non-zero system hostid"));
684                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
685                                 goto error;
686                         }
687                         break;
688                 default:
689                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
690                             "property '%s'(%d) not defined"), propname, prop);
691                         break;
692                 }
693         }
694
695         return (retprops);
696 error:
697         nvlist_free(retprops);
698         return (NULL);
699 }
700
701 /*
702  * Set zpool property : propname=propval.
703  */
704 int
705 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
706 {
707         zfs_cmd_t zc = {"\0"};
708         int ret = -1;
709         char errbuf[1024];
710         nvlist_t *nvl = NULL;
711         nvlist_t *realprops;
712         uint64_t version;
713         prop_flags_t flags = { 0 };
714
715         (void) snprintf(errbuf, sizeof (errbuf),
716             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
717             zhp->zpool_name);
718
719         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
720                 return (no_memory(zhp->zpool_hdl));
721
722         if (nvlist_add_string(nvl, propname, propval) != 0) {
723                 nvlist_free(nvl);
724                 return (no_memory(zhp->zpool_hdl));
725         }
726
727         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
728         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
729             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
730                 nvlist_free(nvl);
731                 return (-1);
732         }
733
734         nvlist_free(nvl);
735         nvl = realprops;
736
737         /*
738          * Execute the corresponding ioctl() to set this property.
739          */
740         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
741
742         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
743                 nvlist_free(nvl);
744                 return (-1);
745         }
746
747         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
748
749         zcmd_free_nvlists(&zc);
750         nvlist_free(nvl);
751
752         if (ret)
753                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
754         else
755                 (void) zpool_props_refresh(zhp);
756
757         return (ret);
758 }
759
760 int
761 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
762 {
763         libzfs_handle_t *hdl = zhp->zpool_hdl;
764         zprop_list_t *entry;
765         char buf[ZFS_MAXPROPLEN];
766         nvlist_t *features = NULL;
767         nvpair_t *nvp;
768         zprop_list_t **last;
769         boolean_t firstexpand = (NULL == *plp);
770         int i;
771
772         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
773                 return (-1);
774
775         last = plp;
776         while (*last != NULL)
777                 last = &(*last)->pl_next;
778
779         if ((*plp)->pl_all)
780                 features = zpool_get_features(zhp);
781
782         if ((*plp)->pl_all && firstexpand) {
783                 for (i = 0; i < SPA_FEATURES; i++) {
784                         zprop_list_t *entry = zfs_alloc(hdl,
785                             sizeof (zprop_list_t));
786                         entry->pl_prop = ZPROP_INVAL;
787                         entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
788                             spa_feature_table[i].fi_uname);
789                         entry->pl_width = strlen(entry->pl_user_prop);
790                         entry->pl_all = B_TRUE;
791
792                         *last = entry;
793                         last = &entry->pl_next;
794                 }
795         }
796
797         /* add any unsupported features */
798         for (nvp = nvlist_next_nvpair(features, NULL);
799             nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
800                 char *propname;
801                 boolean_t found;
802                 zprop_list_t *entry;
803
804                 if (zfeature_is_supported(nvpair_name(nvp)))
805                         continue;
806
807                 propname = zfs_asprintf(hdl, "unsupported@%s",
808                     nvpair_name(nvp));
809
810                 /*
811                  * Before adding the property to the list make sure that no
812                  * other pool already added the same property.
813                  */
814                 found = B_FALSE;
815                 entry = *plp;
816                 while (entry != NULL) {
817                         if (entry->pl_user_prop != NULL &&
818                             strcmp(propname, entry->pl_user_prop) == 0) {
819                                 found = B_TRUE;
820                                 break;
821                         }
822                         entry = entry->pl_next;
823                 }
824                 if (found) {
825                         free(propname);
826                         continue;
827                 }
828
829                 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
830                 entry->pl_prop = ZPROP_INVAL;
831                 entry->pl_user_prop = propname;
832                 entry->pl_width = strlen(entry->pl_user_prop);
833                 entry->pl_all = B_TRUE;
834
835                 *last = entry;
836                 last = &entry->pl_next;
837         }
838
839         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
840
841                 if (entry->pl_fixed)
842                         continue;
843
844                 if (entry->pl_prop != ZPROP_INVAL &&
845                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
846                     NULL, B_FALSE) == 0) {
847                         if (strlen(buf) > entry->pl_width)
848                                 entry->pl_width = strlen(buf);
849                 }
850         }
851
852         return (0);
853 }
854
855 /*
856  * Get the state for the given feature on the given ZFS pool.
857  */
858 int
859 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
860     size_t len)
861 {
862         uint64_t refcount;
863         boolean_t found = B_FALSE;
864         nvlist_t *features = zpool_get_features(zhp);
865         boolean_t supported;
866         const char *feature = strchr(propname, '@') + 1;
867
868         supported = zpool_prop_feature(propname);
869         ASSERT(supported || zpool_prop_unsupported(propname));
870
871         /*
872          * Convert from feature name to feature guid. This conversion is
873          * unnecessary for unsupported@... properties because they already
874          * use guids.
875          */
876         if (supported) {
877                 int ret;
878                 spa_feature_t fid;
879
880                 ret = zfeature_lookup_name(feature, &fid);
881                 if (ret != 0) {
882                         (void) strlcpy(buf, "-", len);
883                         return (ENOTSUP);
884                 }
885                 feature = spa_feature_table[fid].fi_guid;
886         }
887
888         if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
889                 found = B_TRUE;
890
891         if (supported) {
892                 if (!found) {
893                         (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
894                 } else  {
895                         if (refcount == 0)
896                                 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
897                         else
898                                 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
899                 }
900         } else {
901                 if (found) {
902                         if (refcount == 0) {
903                                 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
904                         } else {
905                                 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
906                         }
907                 } else {
908                         (void) strlcpy(buf, "-", len);
909                         return (ENOTSUP);
910                 }
911         }
912
913         return (0);
914 }
915
916 /*
917  * Don't start the slice at the default block of 34; many storage
918  * devices will use a stripe width of 128k, other vendors prefer a 1m
919  * alignment.  It is best to play it safe and ensure a 1m alignment
920  * given 512B blocks.  When the block size is larger by a power of 2
921  * we will still be 1m aligned.  Some devices are sensitive to the
922  * partition ending alignment as well.
923  */
924 #define NEW_START_BLOCK         2048
925 #define PARTITION_END_ALIGNMENT 2048
926
927 /*
928  * Validate the given pool name, optionally putting an extended error message in
929  * 'buf'.
930  */
931 boolean_t
932 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
933 {
934         namecheck_err_t why;
935         char what;
936         int ret;
937
938         ret = pool_namecheck(pool, &why, &what);
939
940         /*
941          * The rules for reserved pool names were extended at a later point.
942          * But we need to support users with existing pools that may now be
943          * invalid.  So we only check for this expanded set of names during a
944          * create (or import), and only in userland.
945          */
946         if (ret == 0 && !isopen &&
947             (strncmp(pool, "mirror", 6) == 0 ||
948             strncmp(pool, "raidz", 5) == 0 ||
949             strncmp(pool, "spare", 5) == 0 ||
950             strcmp(pool, "log") == 0)) {
951                 if (hdl != NULL)
952                         zfs_error_aux(hdl,
953                             dgettext(TEXT_DOMAIN, "name is reserved"));
954                 return (B_FALSE);
955         }
956
957
958         if (ret != 0) {
959                 if (hdl != NULL) {
960                         switch (why) {
961                         case NAME_ERR_TOOLONG:
962                                 zfs_error_aux(hdl,
963                                     dgettext(TEXT_DOMAIN, "name is too long"));
964                                 break;
965
966                         case NAME_ERR_INVALCHAR:
967                                 zfs_error_aux(hdl,
968                                     dgettext(TEXT_DOMAIN, "invalid character "
969                                     "'%c' in pool name"), what);
970                                 break;
971
972                         case NAME_ERR_NOLETTER:
973                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
974                                     "name must begin with a letter"));
975                                 break;
976
977                         case NAME_ERR_RESERVED:
978                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
979                                     "name is reserved"));
980                                 break;
981
982                         case NAME_ERR_DISKLIKE:
983                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
984                                     "pool name is reserved"));
985                                 break;
986
987                         case NAME_ERR_LEADING_SLASH:
988                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
989                                     "leading slash in name"));
990                                 break;
991
992                         case NAME_ERR_EMPTY_COMPONENT:
993                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
994                                     "empty component in name"));
995                                 break;
996
997                         case NAME_ERR_TRAILING_SLASH:
998                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
999                                     "trailing slash in name"));
1000                                 break;
1001
1002                         case NAME_ERR_MULTIPLE_DELIMITERS:
1003                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1004                                     "multiple '@' and/or '#' delimiters in "
1005                                     "name"));
1006                                 break;
1007
1008                         case NAME_ERR_NO_AT:
1009                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1010                                     "permission set is missing '@'"));
1011                                 break;
1012
1013                         default:
1014                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1015                                     "(%d) not defined"), why);
1016                                 break;
1017                         }
1018                 }
1019                 return (B_FALSE);
1020         }
1021
1022         return (B_TRUE);
1023 }
1024
1025 /*
1026  * Open a handle to the given pool, even if the pool is currently in the FAULTED
1027  * state.
1028  */
1029 zpool_handle_t *
1030 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1031 {
1032         zpool_handle_t *zhp;
1033         boolean_t missing;
1034
1035         /*
1036          * Make sure the pool name is valid.
1037          */
1038         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1039                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1040                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1041                     pool);
1042                 return (NULL);
1043         }
1044
1045         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1046                 return (NULL);
1047
1048         zhp->zpool_hdl = hdl;
1049         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1050
1051         if (zpool_refresh_stats(zhp, &missing) != 0) {
1052                 zpool_close(zhp);
1053                 return (NULL);
1054         }
1055
1056         if (missing) {
1057                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1058                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1059                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1060                 zpool_close(zhp);
1061                 return (NULL);
1062         }
1063
1064         return (zhp);
1065 }
1066
1067 /*
1068  * Like the above, but silent on error.  Used when iterating over pools (because
1069  * the configuration cache may be out of date).
1070  */
1071 int
1072 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1073 {
1074         zpool_handle_t *zhp;
1075         boolean_t missing;
1076
1077         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1078                 return (-1);
1079
1080         zhp->zpool_hdl = hdl;
1081         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1082
1083         if (zpool_refresh_stats(zhp, &missing) != 0) {
1084                 zpool_close(zhp);
1085                 return (-1);
1086         }
1087
1088         if (missing) {
1089                 zpool_close(zhp);
1090                 *ret = NULL;
1091                 return (0);
1092         }
1093
1094         *ret = zhp;
1095         return (0);
1096 }
1097
1098 /*
1099  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1100  * state.
1101  */
1102 zpool_handle_t *
1103 zpool_open(libzfs_handle_t *hdl, const char *pool)
1104 {
1105         zpool_handle_t *zhp;
1106
1107         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1108                 return (NULL);
1109
1110         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1111                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1112                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1113                 zpool_close(zhp);
1114                 return (NULL);
1115         }
1116
1117         return (zhp);
1118 }
1119
1120 /*
1121  * Close the handle.  Simply frees the memory associated with the handle.
1122  */
1123 void
1124 zpool_close(zpool_handle_t *zhp)
1125 {
1126         nvlist_free(zhp->zpool_config);
1127         nvlist_free(zhp->zpool_old_config);
1128         nvlist_free(zhp->zpool_props);
1129         free(zhp);
1130 }
1131
1132 /*
1133  * Return the name of the pool.
1134  */
1135 const char *
1136 zpool_get_name(zpool_handle_t *zhp)
1137 {
1138         return (zhp->zpool_name);
1139 }
1140
1141
1142 /*
1143  * Return the state of the pool (ACTIVE or UNAVAILABLE)
1144  */
1145 int
1146 zpool_get_state(zpool_handle_t *zhp)
1147 {
1148         return (zhp->zpool_state);
1149 }
1150
1151 /*
1152  * Create the named pool, using the provided vdev list.  It is assumed
1153  * that the consumer has already validated the contents of the nvlist, so we
1154  * don't have to worry about error semantics.
1155  */
1156 int
1157 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1158     nvlist_t *props, nvlist_t *fsprops)
1159 {
1160         zfs_cmd_t zc = {"\0"};
1161         nvlist_t *zc_fsprops = NULL;
1162         nvlist_t *zc_props = NULL;
1163         char msg[1024];
1164         int ret = -1;
1165
1166         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1167             "cannot create '%s'"), pool);
1168
1169         if (!zpool_name_valid(hdl, B_FALSE, pool))
1170                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1171
1172         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1173                 return (-1);
1174
1175         if (props) {
1176                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1177
1178                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1179                     SPA_VERSION_1, flags, msg)) == NULL) {
1180                         goto create_failed;
1181                 }
1182         }
1183
1184         if (fsprops) {
1185                 uint64_t zoned;
1186                 char *zonestr;
1187
1188                 zoned = ((nvlist_lookup_string(fsprops,
1189                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1190                     strcmp(zonestr, "on") == 0);
1191
1192                 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1193                     fsprops, zoned, NULL, NULL, msg)) == NULL) {
1194                         goto create_failed;
1195                 }
1196                 if (!zc_props &&
1197                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1198                         goto create_failed;
1199                 }
1200                 if (nvlist_add_nvlist(zc_props,
1201                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1202                         goto create_failed;
1203                 }
1204         }
1205
1206         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1207                 goto create_failed;
1208
1209         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1210
1211         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1212
1213                 zcmd_free_nvlists(&zc);
1214                 nvlist_free(zc_props);
1215                 nvlist_free(zc_fsprops);
1216
1217                 switch (errno) {
1218                 case EBUSY:
1219                         /*
1220                          * This can happen if the user has specified the same
1221                          * device multiple times.  We can't reliably detect this
1222                          * until we try to add it and see we already have a
1223                          * label.  This can also happen under if the device is
1224                          * part of an active md or lvm device.
1225                          */
1226                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1227                             "one or more vdevs refer to the same device, or "
1228                             "one of\nthe devices is part of an active md or "
1229                             "lvm device"));
1230                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1231
1232                 case ERANGE:
1233                         /*
1234                          * This happens if the record size is smaller or larger
1235                          * than the allowed size range, or not a power of 2.
1236                          *
1237                          * NOTE: although zfs_valid_proplist is called earlier,
1238                          * this case may have slipped through since the
1239                          * pool does not exist yet and it is therefore
1240                          * impossible to read properties e.g. max blocksize
1241                          * from the pool.
1242                          */
1243                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1244                             "record size invalid"));
1245                         return (zfs_error(hdl, EZFS_BADPROP, msg));
1246
1247                 case EOVERFLOW:
1248                         /*
1249                          * This occurs when one of the devices is below
1250                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1251                          * device was the problem device since there's no
1252                          * reliable way to determine device size from userland.
1253                          */
1254                         {
1255                                 char buf[64];
1256
1257                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1258                                     sizeof (buf));
1259
1260                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1261                                     "one or more devices is less than the "
1262                                     "minimum size (%s)"), buf);
1263                         }
1264                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1265
1266                 case ENOSPC:
1267                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1268                             "one or more devices is out of space"));
1269                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1270
1271                 case ENOTBLK:
1272                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1273                             "cache device must be a disk or disk slice"));
1274                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1275
1276                 default:
1277                         return (zpool_standard_error(hdl, errno, msg));
1278                 }
1279         }
1280
1281 create_failed:
1282         zcmd_free_nvlists(&zc);
1283         nvlist_free(zc_props);
1284         nvlist_free(zc_fsprops);
1285         return (ret);
1286 }
1287
1288 /*
1289  * Destroy the given pool.  It is up to the caller to ensure that there are no
1290  * datasets left in the pool.
1291  */
1292 int
1293 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1294 {
1295         zfs_cmd_t zc = {"\0"};
1296         zfs_handle_t *zfp = NULL;
1297         libzfs_handle_t *hdl = zhp->zpool_hdl;
1298         char msg[1024];
1299
1300         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1301             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1302                 return (-1);
1303
1304         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1305         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1306
1307         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1308                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1309                     "cannot destroy '%s'"), zhp->zpool_name);
1310
1311                 if (errno == EROFS) {
1312                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1313                             "one or more devices is read only"));
1314                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1315                 } else {
1316                         (void) zpool_standard_error(hdl, errno, msg);
1317                 }
1318
1319                 if (zfp)
1320                         zfs_close(zfp);
1321                 return (-1);
1322         }
1323
1324         if (zfp) {
1325                 remove_mountpoint(zfp);
1326                 zfs_close(zfp);
1327         }
1328
1329         return (0);
1330 }
1331
1332 /*
1333  * Add the given vdevs to the pool.  The caller must have already performed the
1334  * necessary verification to ensure that the vdev specification is well-formed.
1335  */
1336 int
1337 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1338 {
1339         zfs_cmd_t zc = {"\0"};
1340         int ret;
1341         libzfs_handle_t *hdl = zhp->zpool_hdl;
1342         char msg[1024];
1343         nvlist_t **spares, **l2cache;
1344         uint_t nspares, nl2cache;
1345
1346         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1347             "cannot add to '%s'"), zhp->zpool_name);
1348
1349         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1350             SPA_VERSION_SPARES &&
1351             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1352             &spares, &nspares) == 0) {
1353                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1354                     "upgraded to add hot spares"));
1355                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1356         }
1357
1358         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1359             SPA_VERSION_L2CACHE &&
1360             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1361             &l2cache, &nl2cache) == 0) {
1362                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1363                     "upgraded to add cache devices"));
1364                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1365         }
1366
1367         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1368                 return (-1);
1369         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1370
1371         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1372                 switch (errno) {
1373                 case EBUSY:
1374                         /*
1375                          * This can happen if the user has specified the same
1376                          * device multiple times.  We can't reliably detect this
1377                          * until we try to add it and see we already have a
1378                          * label.
1379                          */
1380                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1381                             "one or more vdevs refer to the same device"));
1382                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1383                         break;
1384
1385                 case EOVERFLOW:
1386                         /*
1387                          * This occurrs when one of the devices is below
1388                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1389                          * device was the problem device since there's no
1390                          * reliable way to determine device size from userland.
1391                          */
1392                         {
1393                                 char buf[64];
1394
1395                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1396                                     sizeof (buf));
1397
1398                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1399                                     "device is less than the minimum "
1400                                     "size (%s)"), buf);
1401                         }
1402                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1403                         break;
1404
1405                 case ENOTSUP:
1406                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1407                             "pool must be upgraded to add these vdevs"));
1408                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1409                         break;
1410
1411                 case ENOTBLK:
1412                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1413                             "cache device must be a disk or disk slice"));
1414                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1415                         break;
1416
1417                 default:
1418                         (void) zpool_standard_error(hdl, errno, msg);
1419                 }
1420
1421                 ret = -1;
1422         } else {
1423                 ret = 0;
1424         }
1425
1426         zcmd_free_nvlists(&zc);
1427
1428         return (ret);
1429 }
1430
1431 /*
1432  * Exports the pool from the system.  The caller must ensure that there are no
1433  * mounted datasets in the pool.
1434  */
1435 static int
1436 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1437     const char *log_str)
1438 {
1439         zfs_cmd_t zc = {"\0"};
1440         char msg[1024];
1441
1442         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1443             "cannot export '%s'"), zhp->zpool_name);
1444
1445         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1446         zc.zc_cookie = force;
1447         zc.zc_guid = hardforce;
1448         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1449
1450         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1451                 switch (errno) {
1452                 case EXDEV:
1453                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1454                             "use '-f' to override the following errors:\n"
1455                             "'%s' has an active shared spare which could be"
1456                             " used by other pools once '%s' is exported."),
1457                             zhp->zpool_name, zhp->zpool_name);
1458                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1459                             msg));
1460                 default:
1461                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1462                             msg));
1463                 }
1464         }
1465
1466         return (0);
1467 }
1468
1469 int
1470 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1471 {
1472         return (zpool_export_common(zhp, force, B_FALSE, log_str));
1473 }
1474
1475 int
1476 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1477 {
1478         return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1479 }
1480
1481 static void
1482 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1483     nvlist_t *config)
1484 {
1485         nvlist_t *nv = NULL;
1486         uint64_t rewindto;
1487         int64_t loss = -1;
1488         struct tm t;
1489         char timestr[128];
1490
1491         if (!hdl->libzfs_printerr || config == NULL)
1492                 return;
1493
1494         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1495             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1496                 return;
1497         }
1498
1499         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1500                 return;
1501         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1502
1503         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1504             strftime(timestr, 128, "%c", &t) != 0) {
1505                 if (dryrun) {
1506                         (void) printf(dgettext(TEXT_DOMAIN,
1507                             "Would be able to return %s "
1508                             "to its state as of %s.\n"),
1509                             name, timestr);
1510                 } else {
1511                         (void) printf(dgettext(TEXT_DOMAIN,
1512                             "Pool %s returned to its state as of %s.\n"),
1513                             name, timestr);
1514                 }
1515                 if (loss > 120) {
1516                         (void) printf(dgettext(TEXT_DOMAIN,
1517                             "%s approximately %lld "),
1518                             dryrun ? "Would discard" : "Discarded",
1519                             ((longlong_t)loss + 30) / 60);
1520                         (void) printf(dgettext(TEXT_DOMAIN,
1521                             "minutes of transactions.\n"));
1522                 } else if (loss > 0) {
1523                         (void) printf(dgettext(TEXT_DOMAIN,
1524                             "%s approximately %lld "),
1525                             dryrun ? "Would discard" : "Discarded",
1526                             (longlong_t)loss);
1527                         (void) printf(dgettext(TEXT_DOMAIN,
1528                             "seconds of transactions.\n"));
1529                 }
1530         }
1531 }
1532
1533 void
1534 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1535     nvlist_t *config)
1536 {
1537         nvlist_t *nv = NULL;
1538         int64_t loss = -1;
1539         uint64_t edata = UINT64_MAX;
1540         uint64_t rewindto;
1541         struct tm t;
1542         char timestr[128];
1543
1544         if (!hdl->libzfs_printerr)
1545                 return;
1546
1547         if (reason >= 0)
1548                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1549         else
1550                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1551
1552         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1553         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1554             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1555             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1556                 goto no_info;
1557
1558         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1559         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1560             &edata);
1561
1562         (void) printf(dgettext(TEXT_DOMAIN,
1563             "Recovery is possible, but will result in some data loss.\n"));
1564
1565         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1566             strftime(timestr, 128, "%c", &t) != 0) {
1567                 (void) printf(dgettext(TEXT_DOMAIN,
1568                     "\tReturning the pool to its state as of %s\n"
1569                     "\tshould correct the problem.  "),
1570                     timestr);
1571         } else {
1572                 (void) printf(dgettext(TEXT_DOMAIN,
1573                     "\tReverting the pool to an earlier state "
1574                     "should correct the problem.\n\t"));
1575         }
1576
1577         if (loss > 120) {
1578                 (void) printf(dgettext(TEXT_DOMAIN,
1579                     "Approximately %lld minutes of data\n"
1580                     "\tmust be discarded, irreversibly.  "),
1581                     ((longlong_t)loss + 30) / 60);
1582         } else if (loss > 0) {
1583                 (void) printf(dgettext(TEXT_DOMAIN,
1584                     "Approximately %lld seconds of data\n"
1585                     "\tmust be discarded, irreversibly.  "),
1586                     (longlong_t)loss);
1587         }
1588         if (edata != 0 && edata != UINT64_MAX) {
1589                 if (edata == 1) {
1590                         (void) printf(dgettext(TEXT_DOMAIN,
1591                             "After rewind, at least\n"
1592                             "\tone persistent user-data error will remain.  "));
1593                 } else {
1594                         (void) printf(dgettext(TEXT_DOMAIN,
1595                             "After rewind, several\n"
1596                             "\tpersistent user-data errors will remain.  "));
1597                 }
1598         }
1599         (void) printf(dgettext(TEXT_DOMAIN,
1600             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1601             reason >= 0 ? "clear" : "import", name);
1602
1603         (void) printf(dgettext(TEXT_DOMAIN,
1604             "A scrub of the pool\n"
1605             "\tis strongly recommended after recovery.\n"));
1606         return;
1607
1608 no_info:
1609         (void) printf(dgettext(TEXT_DOMAIN,
1610             "Destroy and re-create the pool from\n\ta backup source.\n"));
1611 }
1612
1613 /*
1614  * zpool_import() is a contracted interface. Should be kept the same
1615  * if possible.
1616  *
1617  * Applications should use zpool_import_props() to import a pool with
1618  * new properties value to be set.
1619  */
1620 int
1621 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1622     char *altroot)
1623 {
1624         nvlist_t *props = NULL;
1625         int ret;
1626
1627         if (altroot != NULL) {
1628                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1629                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1630                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1631                             newname));
1632                 }
1633
1634                 if (nvlist_add_string(props,
1635                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1636                     nvlist_add_string(props,
1637                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1638                         nvlist_free(props);
1639                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1640                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1641                             newname));
1642                 }
1643         }
1644
1645         ret = zpool_import_props(hdl, config, newname, props,
1646             ZFS_IMPORT_NORMAL);
1647         nvlist_free(props);
1648         return (ret);
1649 }
1650
1651 static void
1652 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1653     int indent)
1654 {
1655         nvlist_t **child;
1656         uint_t c, children;
1657         char *vname;
1658         uint64_t is_log = 0;
1659
1660         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1661             &is_log);
1662
1663         if (name != NULL)
1664                 (void) printf("\t%*s%s%s\n", indent, "", name,
1665                     is_log ? " [log]" : "");
1666
1667         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1668             &child, &children) != 0)
1669                 return;
1670
1671         for (c = 0; c < children; c++) {
1672                 vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
1673                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1674                 free(vname);
1675         }
1676 }
1677
1678 void
1679 zpool_print_unsup_feat(nvlist_t *config)
1680 {
1681         nvlist_t *nvinfo, *unsup_feat;
1682         nvpair_t *nvp;
1683
1684         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1685             0);
1686         verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1687             &unsup_feat) == 0);
1688
1689         for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1690             nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1691                 char *desc;
1692
1693                 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1694                 verify(nvpair_value_string(nvp, &desc) == 0);
1695
1696                 if (strlen(desc) > 0)
1697                         (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1698                 else
1699                         (void) printf("\t%s\n", nvpair_name(nvp));
1700         }
1701 }
1702
1703 /*
1704  * Import the given pool using the known configuration and a list of
1705  * properties to be set. The configuration should have come from
1706  * zpool_find_import(). The 'newname' parameters control whether the pool
1707  * is imported with a different name.
1708  */
1709 int
1710 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1711     nvlist_t *props, int flags)
1712 {
1713         zfs_cmd_t zc = {"\0"};
1714         zpool_rewind_policy_t policy;
1715         nvlist_t *nv = NULL;
1716         nvlist_t *nvinfo = NULL;
1717         nvlist_t *missing = NULL;
1718         char *thename;
1719         char *origname;
1720         int ret;
1721         int error = 0;
1722         char errbuf[1024];
1723
1724         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1725             &origname) == 0);
1726
1727         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1728             "cannot import pool '%s'"), origname);
1729
1730         if (newname != NULL) {
1731                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1732                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1733                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1734                             newname));
1735                 thename = (char *)newname;
1736         } else {
1737                 thename = origname;
1738         }
1739
1740         if (props != NULL) {
1741                 uint64_t version;
1742                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1743
1744                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1745                     &version) == 0);
1746
1747                 if ((props = zpool_valid_proplist(hdl, origname,
1748                     props, version, flags, errbuf)) == NULL)
1749                         return (-1);
1750                 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1751                         nvlist_free(props);
1752                         return (-1);
1753                 }
1754                 nvlist_free(props);
1755         }
1756
1757         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1758
1759         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1760             &zc.zc_guid) == 0);
1761
1762         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1763                 zcmd_free_nvlists(&zc);
1764                 return (-1);
1765         }
1766         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1767                 zcmd_free_nvlists(&zc);
1768                 return (-1);
1769         }
1770
1771         zc.zc_cookie = flags;
1772         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1773             errno == ENOMEM) {
1774                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1775                         zcmd_free_nvlists(&zc);
1776                         return (-1);
1777                 }
1778         }
1779         if (ret != 0)
1780                 error = errno;
1781
1782         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1783
1784         zcmd_free_nvlists(&zc);
1785
1786         zpool_get_rewind_policy(config, &policy);
1787
1788         if (error) {
1789                 char desc[1024];
1790                 char aux[256];
1791
1792                 /*
1793                  * Dry-run failed, but we print out what success
1794                  * looks like if we found a best txg
1795                  */
1796                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1797                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1798                             B_TRUE, nv);
1799                         nvlist_free(nv);
1800                         return (-1);
1801                 }
1802
1803                 if (newname == NULL)
1804                         (void) snprintf(desc, sizeof (desc),
1805                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1806                             thename);
1807                 else
1808                         (void) snprintf(desc, sizeof (desc),
1809                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1810                             origname, thename);
1811
1812                 switch (error) {
1813                 case ENOTSUP:
1814                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1815                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1816                             nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1817                                 (void) printf(dgettext(TEXT_DOMAIN, "This "
1818                                     "pool uses the following feature(s) not "
1819                                     "supported by this system:\n"));
1820                                 zpool_print_unsup_feat(nv);
1821                                 if (nvlist_exists(nvinfo,
1822                                     ZPOOL_CONFIG_CAN_RDONLY)) {
1823                                         (void) printf(dgettext(TEXT_DOMAIN,
1824                                             "All unsupported features are only "
1825                                             "required for writing to the pool."
1826                                             "\nThe pool can be imported using "
1827                                             "'-o readonly=on'.\n"));
1828                                 }
1829                         }
1830                         /*
1831                          * Unsupported version.
1832                          */
1833                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1834                         break;
1835
1836                 case EREMOTEIO:
1837                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1838                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
1839                                 char *hostname = "<unknown>";
1840                                 uint64_t hostid = 0;
1841                                 mmp_state_t mmp_state;
1842
1843                                 mmp_state = fnvlist_lookup_uint64(nvinfo,
1844                                     ZPOOL_CONFIG_MMP_STATE);
1845
1846                                 if (nvlist_exists(nvinfo,
1847                                     ZPOOL_CONFIG_MMP_HOSTNAME))
1848                                         hostname = fnvlist_lookup_string(nvinfo,
1849                                             ZPOOL_CONFIG_MMP_HOSTNAME);
1850
1851                                 if (nvlist_exists(nvinfo,
1852                                     ZPOOL_CONFIG_MMP_HOSTID))
1853                                         hostid = fnvlist_lookup_uint64(nvinfo,
1854                                             ZPOOL_CONFIG_MMP_HOSTID);
1855
1856                                 if (mmp_state == MMP_STATE_ACTIVE) {
1857                                         (void) snprintf(aux, sizeof (aux),
1858                                             dgettext(TEXT_DOMAIN, "pool is imp"
1859                                             "orted on host '%s' (hostid=%lx).\n"
1860                                             "Export the pool on the other "
1861                                             "system, then run 'zpool import'."),
1862                                             hostname, (unsigned long) hostid);
1863                                 } else if (mmp_state == MMP_STATE_NO_HOSTID) {
1864                                         (void) snprintf(aux, sizeof (aux),
1865                                             dgettext(TEXT_DOMAIN, "pool has "
1866                                             "the multihost property on and "
1867                                             "the\nsystem's hostid is not set. "
1868                                             "Set a unique system hostid with "
1869                                             "the zgenhostid(8) command.\n"));
1870                                 }
1871
1872                                 (void) zfs_error_aux(hdl, aux);
1873                         }
1874                         (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
1875                         break;
1876
1877                 case EINVAL:
1878                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1879                         break;
1880
1881                 case EROFS:
1882                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1883                             "one or more devices is read only"));
1884                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1885                         break;
1886
1887                 case ENXIO:
1888                         if (nv && nvlist_lookup_nvlist(nv,
1889                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1890                             nvlist_lookup_nvlist(nvinfo,
1891                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1892                                 (void) printf(dgettext(TEXT_DOMAIN,
1893                                     "The devices below are missing, use "
1894                                     "'-m' to import the pool anyway:\n"));
1895                                 print_vdev_tree(hdl, NULL, missing, 2);
1896                                 (void) printf("\n");
1897                         }
1898                         (void) zpool_standard_error(hdl, error, desc);
1899                         break;
1900
1901                 case EEXIST:
1902                         (void) zpool_standard_error(hdl, error, desc);
1903                         break;
1904
1905                 case EBUSY:
1906                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1907                             "one or more devices are already in use\n"));
1908                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1909                         break;
1910                 case ENAMETOOLONG:
1911                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1912                             "new name of at least one dataset is longer than "
1913                             "the maximum allowable length"));
1914                         (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
1915                         break;
1916                 default:
1917                         (void) zpool_standard_error(hdl, error, desc);
1918                         zpool_explain_recover(hdl,
1919                             newname ? origname : thename, -error, nv);
1920                         break;
1921                 }
1922
1923                 nvlist_free(nv);
1924                 ret = -1;
1925         } else {
1926                 zpool_handle_t *zhp;
1927
1928                 /*
1929                  * This should never fail, but play it safe anyway.
1930                  */
1931                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1932                         ret = -1;
1933                 else if (zhp != NULL)
1934                         zpool_close(zhp);
1935                 if (policy.zrp_request &
1936                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1937                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1938                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1939                 }
1940                 nvlist_free(nv);
1941                 return (0);
1942         }
1943
1944         return (ret);
1945 }
1946
1947 /*
1948  * Scan the pool.
1949  */
1950 int
1951 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
1952 {
1953         zfs_cmd_t zc = {"\0"};
1954         char msg[1024];
1955         int err;
1956         libzfs_handle_t *hdl = zhp->zpool_hdl;
1957
1958         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1959         zc.zc_cookie = func;
1960         zc.zc_flags = cmd;
1961
1962         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
1963                 return (0);
1964
1965         err = errno;
1966
1967         /* ECANCELED on a scrub means we resumed a paused scrub */
1968         if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
1969             cmd == POOL_SCRUB_NORMAL)
1970                 return (0);
1971
1972         if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
1973                 return (0);
1974
1975         if (func == POOL_SCAN_SCRUB) {
1976                 if (cmd == POOL_SCRUB_PAUSE) {
1977                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1978                             "cannot pause scrubbing %s"), zc.zc_name);
1979                 } else {
1980                         assert(cmd == POOL_SCRUB_NORMAL);
1981                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1982                             "cannot scrub %s"), zc.zc_name);
1983                 }
1984         } else if (func == POOL_SCAN_NONE) {
1985                 (void) snprintf(msg, sizeof (msg),
1986                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1987                     zc.zc_name);
1988         } else {
1989                 assert(!"unexpected result");
1990         }
1991
1992         if (err == EBUSY) {
1993                 nvlist_t *nvroot;
1994                 pool_scan_stat_t *ps = NULL;
1995                 uint_t psc;
1996
1997                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1998                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1999                 (void) nvlist_lookup_uint64_array(nvroot,
2000                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2001                 if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2002                         if (cmd == POOL_SCRUB_PAUSE)
2003                                 return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2004                         else
2005                                 return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2006                 } else {
2007                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
2008                 }
2009         } else if (err == ENOENT) {
2010                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
2011         } else {
2012                 return (zpool_standard_error(hdl, err, msg));
2013         }
2014 }
2015
2016 /*
2017  * Find a vdev that matches the search criteria specified. We use the
2018  * the nvpair name to determine how we should look for the device.
2019  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2020  * spare; but FALSE if its an INUSE spare.
2021  */
2022 static nvlist_t *
2023 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2024     boolean_t *l2cache, boolean_t *log)
2025 {
2026         uint_t c, children;
2027         nvlist_t **child;
2028         nvlist_t *ret;
2029         uint64_t is_log;
2030         char *srchkey;
2031         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2032
2033         /* Nothing to look for */
2034         if (search == NULL || pair == NULL)
2035                 return (NULL);
2036
2037         /* Obtain the key we will use to search */
2038         srchkey = nvpair_name(pair);
2039
2040         switch (nvpair_type(pair)) {
2041         case DATA_TYPE_UINT64:
2042                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
2043                         uint64_t srchval, theguid;
2044
2045                         verify(nvpair_value_uint64(pair, &srchval) == 0);
2046                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2047                             &theguid) == 0);
2048                         if (theguid == srchval)
2049                                 return (nv);
2050                 }
2051                 break;
2052
2053         case DATA_TYPE_STRING: {
2054                 char *srchval, *val;
2055
2056                 verify(nvpair_value_string(pair, &srchval) == 0);
2057                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2058                         break;
2059
2060                 /*
2061                  * Search for the requested value. Special cases:
2062                  *
2063                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
2064                  *   "-part1", or "p1".  The suffix is hidden from the user,
2065                  *   but included in the string, so this matches around it.
2066                  * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2067                  *   is used to check all possible expanded paths.
2068                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2069                  *
2070                  * Otherwise, all other searches are simple string compares.
2071                  */
2072                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
2073                         uint64_t wholedisk = 0;
2074
2075                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2076                             &wholedisk);
2077                         if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2078                                 return (nv);
2079
2080                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2081                         char *type, *idx, *end, *p;
2082                         uint64_t id, vdev_id;
2083
2084                         /*
2085                          * Determine our vdev type, keeping in mind
2086                          * that the srchval is composed of a type and
2087                          * vdev id pair (i.e. mirror-4).
2088                          */
2089                         if ((type = strdup(srchval)) == NULL)
2090                                 return (NULL);
2091
2092                         if ((p = strrchr(type, '-')) == NULL) {
2093                                 free(type);
2094                                 break;
2095                         }
2096                         idx = p + 1;
2097                         *p = '\0';
2098
2099                         /*
2100                          * If the types don't match then keep looking.
2101                          */
2102                         if (strncmp(val, type, strlen(val)) != 0) {
2103                                 free(type);
2104                                 break;
2105                         }
2106
2107                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
2108                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2109                             strncmp(type, VDEV_TYPE_MIRROR,
2110                             strlen(VDEV_TYPE_MIRROR)) == 0);
2111                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2112                             &id) == 0);
2113
2114                         errno = 0;
2115                         vdev_id = strtoull(idx, &end, 10);
2116
2117                         free(type);
2118                         if (errno != 0)
2119                                 return (NULL);
2120
2121                         /*
2122                          * Now verify that we have the correct vdev id.
2123                          */
2124                         if (vdev_id == id)
2125                                 return (nv);
2126                 }
2127
2128                 /*
2129                  * Common case
2130                  */
2131                 if (strcmp(srchval, val) == 0)
2132                         return (nv);
2133                 break;
2134         }
2135
2136         default:
2137                 break;
2138         }
2139
2140         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2141             &child, &children) != 0)
2142                 return (NULL);
2143
2144         for (c = 0; c < children; c++) {
2145                 if ((ret = vdev_to_nvlist_iter(child[c], search,
2146                     avail_spare, l2cache, NULL)) != NULL) {
2147                         /*
2148                          * The 'is_log' value is only set for the toplevel
2149                          * vdev, not the leaf vdevs.  So we always lookup the
2150                          * log device from the root of the vdev tree (where
2151                          * 'log' is non-NULL).
2152                          */
2153                         if (log != NULL &&
2154                             nvlist_lookup_uint64(child[c],
2155                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2156                             is_log) {
2157                                 *log = B_TRUE;
2158                         }
2159                         return (ret);
2160                 }
2161         }
2162
2163         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2164             &child, &children) == 0) {
2165                 for (c = 0; c < children; c++) {
2166                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2167                             avail_spare, l2cache, NULL)) != NULL) {
2168                                 *avail_spare = B_TRUE;
2169                                 return (ret);
2170                         }
2171                 }
2172         }
2173
2174         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2175             &child, &children) == 0) {
2176                 for (c = 0; c < children; c++) {
2177                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2178                             avail_spare, l2cache, NULL)) != NULL) {
2179                                 *l2cache = B_TRUE;
2180                                 return (ret);
2181                         }
2182                 }
2183         }
2184
2185         return (NULL);
2186 }
2187
2188 /*
2189  * Given a physical path (minus the "/devices" prefix), find the
2190  * associated vdev.
2191  */
2192 nvlist_t *
2193 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2194     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2195 {
2196         nvlist_t *search, *nvroot, *ret;
2197
2198         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2199         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2200
2201         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2202             &nvroot) == 0);
2203
2204         *avail_spare = B_FALSE;
2205         *l2cache = B_FALSE;
2206         if (log != NULL)
2207                 *log = B_FALSE;
2208         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2209         nvlist_free(search);
2210
2211         return (ret);
2212 }
2213
2214 /*
2215  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2216  */
2217 boolean_t
2218 zpool_vdev_is_interior(const char *name)
2219 {
2220         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2221             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2222                 return (B_TRUE);
2223         return (B_FALSE);
2224 }
2225
2226 nvlist_t *
2227 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2228     boolean_t *l2cache, boolean_t *log)
2229 {
2230         char *end;
2231         nvlist_t *nvroot, *search, *ret;
2232         uint64_t guid;
2233
2234         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2235
2236         guid = strtoull(path, &end, 0);
2237         if (guid != 0 && *end == '\0') {
2238                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2239         } else if (zpool_vdev_is_interior(path)) {
2240                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2241         } else {
2242                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2243         }
2244
2245         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2246             &nvroot) == 0);
2247
2248         *avail_spare = B_FALSE;
2249         *l2cache = B_FALSE;
2250         if (log != NULL)
2251                 *log = B_FALSE;
2252         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2253         nvlist_free(search);
2254
2255         return (ret);
2256 }
2257
2258 static int
2259 vdev_is_online(nvlist_t *nv)
2260 {
2261         uint64_t ival;
2262
2263         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2264             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2265             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2266                 return (0);
2267
2268         return (1);
2269 }
2270
2271 /*
2272  * Helper function for zpool_get_physpaths().
2273  */
2274 static int
2275 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2276     size_t *bytes_written)
2277 {
2278         size_t bytes_left, pos, rsz;
2279         char *tmppath;
2280         const char *format;
2281
2282         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2283             &tmppath) != 0)
2284                 return (EZFS_NODEVICE);
2285
2286         pos = *bytes_written;
2287         bytes_left = physpath_size - pos;
2288         format = (pos == 0) ? "%s" : " %s";
2289
2290         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2291         *bytes_written += rsz;
2292
2293         if (rsz >= bytes_left) {
2294                 /* if physpath was not copied properly, clear it */
2295                 if (bytes_left != 0) {
2296                         physpath[pos] = 0;
2297                 }
2298                 return (EZFS_NOSPC);
2299         }
2300         return (0);
2301 }
2302
2303 static int
2304 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2305     size_t *rsz, boolean_t is_spare)
2306 {
2307         char *type;
2308         int ret;
2309
2310         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2311                 return (EZFS_INVALCONFIG);
2312
2313         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2314                 /*
2315                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2316                  * For a spare vdev, we only want to boot from the active
2317                  * spare device.
2318                  */
2319                 if (is_spare) {
2320                         uint64_t spare = 0;
2321                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2322                             &spare);
2323                         if (!spare)
2324                                 return (EZFS_INVALCONFIG);
2325                 }
2326
2327                 if (vdev_is_online(nv)) {
2328                         if ((ret = vdev_get_one_physpath(nv, physpath,
2329                             phypath_size, rsz)) != 0)
2330                                 return (ret);
2331                 }
2332         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2333             strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2334             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2335             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2336                 nvlist_t **child;
2337                 uint_t count;
2338                 int i, ret;
2339
2340                 if (nvlist_lookup_nvlist_array(nv,
2341                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2342                         return (EZFS_INVALCONFIG);
2343
2344                 for (i = 0; i < count; i++) {
2345                         ret = vdev_get_physpaths(child[i], physpath,
2346                             phypath_size, rsz, is_spare);
2347                         if (ret == EZFS_NOSPC)
2348                                 return (ret);
2349                 }
2350         }
2351
2352         return (EZFS_POOL_INVALARG);
2353 }
2354
2355 /*
2356  * Get phys_path for a root pool config.
2357  * Return 0 on success; non-zero on failure.
2358  */
2359 static int
2360 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2361 {
2362         size_t rsz;
2363         nvlist_t *vdev_root;
2364         nvlist_t **child;
2365         uint_t count;
2366         char *type;
2367
2368         rsz = 0;
2369
2370         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2371             &vdev_root) != 0)
2372                 return (EZFS_INVALCONFIG);
2373
2374         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2375             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2376             &child, &count) != 0)
2377                 return (EZFS_INVALCONFIG);
2378
2379         /*
2380          * root pool can only have a single top-level vdev.
2381          */
2382         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
2383                 return (EZFS_POOL_INVALARG);
2384
2385         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2386             B_FALSE);
2387
2388         /* No online devices */
2389         if (rsz == 0)
2390                 return (EZFS_NODEVICE);
2391
2392         return (0);
2393 }
2394
2395 /*
2396  * Get phys_path for a root pool
2397  * Return 0 on success; non-zero on failure.
2398  */
2399 int
2400 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2401 {
2402         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2403             phypath_size));
2404 }
2405
2406 /*
2407  * If the device has being dynamically expanded then we need to relabel
2408  * the disk to use the new unallocated space.
2409  */
2410 static int
2411 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
2412 {
2413         int fd, error;
2414
2415         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2416                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2417                     "relabel '%s': unable to open device: %d"), path, errno);
2418                 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
2419         }
2420
2421         /*
2422          * It's possible that we might encounter an error if the device
2423          * does not have any unallocated space left. If so, we simply
2424          * ignore that error and continue on.
2425          *
2426          * Also, we don't call efi_rescan() - that would just return EBUSY.
2427          * The module will do it for us in vdev_disk_open().
2428          */
2429         error = efi_use_whole_disk(fd);
2430
2431         /* Flush the buffers to disk and invalidate the page cache. */
2432         (void) fsync(fd);
2433         (void) ioctl(fd, BLKFLSBUF);
2434
2435         (void) close(fd);
2436         if (error && error != VT_ENOSPC) {
2437                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2438                     "relabel '%s': unable to read disk capacity"), path);
2439                 return (zfs_error(hdl, EZFS_NOCAP, msg));
2440         }
2441
2442         return (0);
2443 }
2444
2445 /*
2446  * Convert a vdev path to a GUID.  Returns GUID or 0 on error.
2447  *
2448  * If is_spare, is_l2cache, or is_log is non-NULL, then store within it
2449  * if the VDEV is a spare, l2cache, or log device.  If they're NULL then
2450  * ignore them.
2451  */
2452 static uint64_t
2453 zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path,
2454     boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log)
2455 {
2456         uint64_t guid;
2457         boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE;
2458         nvlist_t *tgt;
2459
2460         if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache,
2461             &log)) == NULL)
2462                 return (0);
2463
2464         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0);
2465         if (is_spare != NULL)
2466                 *is_spare = spare;
2467         if (is_l2cache != NULL)
2468                 *is_l2cache = l2cache;
2469         if (is_log != NULL)
2470                 *is_log = log;
2471
2472         return (guid);
2473 }
2474
2475 /* Convert a vdev path to a GUID.  Returns GUID or 0 on error. */
2476 uint64_t
2477 zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path)
2478 {
2479         return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL));
2480 }
2481
2482 /*
2483  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2484  * ZFS_ONLINE_* flags.
2485  */
2486 int
2487 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2488     vdev_state_t *newstate)
2489 {
2490         zfs_cmd_t zc = {"\0"};
2491         char msg[1024];
2492         nvlist_t *tgt;
2493         boolean_t avail_spare, l2cache, islog;
2494         libzfs_handle_t *hdl = zhp->zpool_hdl;
2495         int error;
2496
2497         if (flags & ZFS_ONLINE_EXPAND) {
2498                 (void) snprintf(msg, sizeof (msg),
2499                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2500         } else {
2501                 (void) snprintf(msg, sizeof (msg),
2502                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2503         }
2504
2505         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2506         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2507             &islog)) == NULL)
2508                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2509
2510         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2511
2512         if (avail_spare)
2513                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2514
2515         if (flags & ZFS_ONLINE_EXPAND ||
2516             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2517                 uint64_t wholedisk = 0;
2518
2519                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2520                     &wholedisk);
2521
2522                 /*
2523                  * XXX - L2ARC 1.0 devices can't support expansion.
2524                  */
2525                 if (l2cache) {
2526                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2527                             "cannot expand cache devices"));
2528                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2529                 }
2530
2531                 if (wholedisk) {
2532                         const char *fullpath = path;
2533                         char buf[MAXPATHLEN];
2534
2535                         if (path[0] != '/') {
2536                                 error = zfs_resolve_shortname(path, buf,
2537                                     sizeof (buf));
2538                                 if (error != 0)
2539                                         return (zfs_error(hdl, EZFS_NODEVICE,
2540                                             msg));
2541
2542                                 fullpath = buf;
2543                         }
2544
2545                         error = zpool_relabel_disk(hdl, fullpath, msg);
2546                         if (error != 0)
2547                                 return (error);
2548                 }
2549         }
2550
2551         zc.zc_cookie = VDEV_STATE_ONLINE;
2552         zc.zc_obj = flags;
2553
2554         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2555                 if (errno == EINVAL) {
2556                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2557                             "from this pool into a new one.  Use '%s' "
2558                             "instead"), "zpool detach");
2559                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2560                 }
2561                 return (zpool_standard_error(hdl, errno, msg));
2562         }
2563
2564         *newstate = zc.zc_cookie;
2565         return (0);
2566 }
2567
2568 /*
2569  * Take the specified vdev offline
2570  */
2571 int
2572 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2573 {
2574         zfs_cmd_t zc = {"\0"};
2575         char msg[1024];
2576         nvlist_t *tgt;
2577         boolean_t avail_spare, l2cache;
2578         libzfs_handle_t *hdl = zhp->zpool_hdl;
2579
2580         (void) snprintf(msg, sizeof (msg),
2581             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2582
2583         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2584         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2585             NULL)) == NULL)
2586                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2587
2588         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2589
2590         if (avail_spare)
2591                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2592
2593         zc.zc_cookie = VDEV_STATE_OFFLINE;
2594         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2595
2596         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2597                 return (0);
2598
2599         switch (errno) {
2600         case EBUSY:
2601
2602                 /*
2603                  * There are no other replicas of this device.
2604                  */
2605                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2606
2607         case EEXIST:
2608                 /*
2609                  * The log device has unplayed logs
2610                  */
2611                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2612
2613         default:
2614                 return (zpool_standard_error(hdl, errno, msg));
2615         }
2616 }
2617
2618 /*
2619  * Mark the given vdev faulted.
2620  */
2621 int
2622 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2623 {
2624         zfs_cmd_t zc = {"\0"};
2625         char msg[1024];
2626         libzfs_handle_t *hdl = zhp->zpool_hdl;
2627
2628         (void) snprintf(msg, sizeof (msg),
2629             dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2630
2631         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2632         zc.zc_guid = guid;
2633         zc.zc_cookie = VDEV_STATE_FAULTED;
2634         zc.zc_obj = aux;
2635
2636         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2637                 return (0);
2638
2639         switch (errno) {
2640         case EBUSY:
2641
2642                 /*
2643                  * There are no other replicas of this device.
2644                  */
2645                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2646
2647         default:
2648                 return (zpool_standard_error(hdl, errno, msg));
2649         }
2650
2651 }
2652
2653 /*
2654  * Mark the given vdev degraded.
2655  */
2656 int
2657 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2658 {
2659         zfs_cmd_t zc = {"\0"};
2660         char msg[1024];
2661         libzfs_handle_t *hdl = zhp->zpool_hdl;
2662
2663         (void) snprintf(msg, sizeof (msg),
2664             dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2665
2666         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2667         zc.zc_guid = guid;
2668         zc.zc_cookie = VDEV_STATE_DEGRADED;
2669         zc.zc_obj = aux;
2670
2671         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2672                 return (0);
2673
2674         return (zpool_standard_error(hdl, errno, msg));
2675 }
2676
2677 /*
2678  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2679  * a hot spare.
2680  */
2681 static boolean_t
2682 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2683 {
2684         nvlist_t **child;
2685         uint_t c, children;
2686         char *type;
2687
2688         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2689             &children) == 0) {
2690                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2691                     &type) == 0);
2692
2693                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2694                     children == 2 && child[which] == tgt)
2695                         return (B_TRUE);
2696
2697                 for (c = 0; c < children; c++)
2698                         if (is_replacing_spare(child[c], tgt, which))
2699                                 return (B_TRUE);
2700         }
2701
2702         return (B_FALSE);
2703 }
2704
2705 /*
2706  * Attach new_disk (fully described by nvroot) to old_disk.
2707  * If 'replacing' is specified, the new disk will replace the old one.
2708  */
2709 int
2710 zpool_vdev_attach(zpool_handle_t *zhp,
2711     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2712 {
2713         zfs_cmd_t zc = {"\0"};
2714         char msg[1024];
2715         int ret;
2716         nvlist_t *tgt;
2717         boolean_t avail_spare, l2cache, islog;
2718         uint64_t val;
2719         char *newname;
2720         nvlist_t **child;
2721         uint_t children;
2722         nvlist_t *config_root;
2723         libzfs_handle_t *hdl = zhp->zpool_hdl;
2724         boolean_t rootpool = zpool_is_bootable(zhp);
2725
2726         if (replacing)
2727                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2728                     "cannot replace %s with %s"), old_disk, new_disk);
2729         else
2730                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2731                     "cannot attach %s to %s"), new_disk, old_disk);
2732
2733         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2734         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2735             &islog)) == 0)
2736                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2737
2738         if (avail_spare)
2739                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2740
2741         if (l2cache)
2742                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2743
2744         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2745         zc.zc_cookie = replacing;
2746
2747         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2748             &child, &children) != 0 || children != 1) {
2749                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2750                     "new device must be a single disk"));
2751                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2752         }
2753
2754         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2755             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2756
2757         if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
2758                 return (-1);
2759
2760         /*
2761          * If the target is a hot spare that has been swapped in, we can only
2762          * replace it with another hot spare.
2763          */
2764         if (replacing &&
2765             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2766             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2767             NULL) == NULL || !avail_spare) &&
2768             is_replacing_spare(config_root, tgt, 1)) {
2769                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2770                     "can only be replaced by another hot spare"));
2771                 free(newname);
2772                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2773         }
2774
2775         free(newname);
2776
2777         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2778                 return (-1);
2779
2780         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2781
2782         zcmd_free_nvlists(&zc);
2783
2784         if (ret == 0) {
2785                 if (rootpool) {
2786                         /*
2787                          * XXX need a better way to prevent user from
2788                          * booting up a half-baked vdev.
2789                          */
2790                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2791                             "sure to wait until resilver is done "
2792                             "before rebooting.\n"));
2793                 }
2794                 return (0);
2795         }
2796
2797         switch (errno) {
2798         case ENOTSUP:
2799                 /*
2800                  * Can't attach to or replace this type of vdev.
2801                  */
2802                 if (replacing) {
2803                         uint64_t version = zpool_get_prop_int(zhp,
2804                             ZPOOL_PROP_VERSION, NULL);
2805
2806                         if (islog)
2807                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2808                                     "cannot replace a log with a spare"));
2809                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2810                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2811                                     "already in replacing/spare config; wait "
2812                                     "for completion or use 'zpool detach'"));
2813                         else
2814                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2815                                     "cannot replace a replacing device"));
2816                 } else {
2817                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2818                             "can only attach to mirrors and top-level "
2819                             "disks"));
2820                 }
2821                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2822                 break;
2823
2824         case EINVAL:
2825                 /*
2826                  * The new device must be a single disk.
2827                  */
2828                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2829                     "new device must be a single disk"));
2830                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2831                 break;
2832
2833         case EBUSY:
2834                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2835                     new_disk);
2836                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2837                 break;
2838
2839         case EOVERFLOW:
2840                 /*
2841                  * The new device is too small.
2842                  */
2843                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2844                     "device is too small"));
2845                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2846                 break;
2847
2848         case EDOM:
2849                 /*
2850                  * The new device has a different optimal sector size.
2851                  */
2852                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2853                     "new device has a different optimal sector size; use the "
2854                     "option '-o ashift=N' to override the optimal size"));
2855                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2856                 break;
2857
2858         case ENAMETOOLONG:
2859                 /*
2860                  * The resulting top-level vdev spec won't fit in the label.
2861                  */
2862                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2863                 break;
2864
2865         default:
2866                 (void) zpool_standard_error(hdl, errno, msg);
2867         }
2868
2869         return (-1);
2870 }
2871
2872 /*
2873  * Detach the specified device.
2874  */
2875 int
2876 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2877 {
2878         zfs_cmd_t zc = {"\0"};
2879         char msg[1024];
2880         nvlist_t *tgt;
2881         boolean_t avail_spare, l2cache;
2882         libzfs_handle_t *hdl = zhp->zpool_hdl;
2883
2884         (void) snprintf(msg, sizeof (msg),
2885             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2886
2887         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2888         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2889             NULL)) == 0)
2890                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2891
2892         if (avail_spare)
2893                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2894
2895         if (l2cache)
2896                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2897
2898         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2899
2900         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2901                 return (0);
2902
2903         switch (errno) {
2904
2905         case ENOTSUP:
2906                 /*
2907                  * Can't detach from this type of vdev.
2908                  */
2909                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2910                     "applicable to mirror and replacing vdevs"));
2911                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2912                 break;
2913
2914         case EBUSY:
2915                 /*
2916                  * There are no other replicas of this device.
2917                  */
2918                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2919                 break;
2920
2921         default:
2922                 (void) zpool_standard_error(hdl, errno, msg);
2923         }
2924
2925         return (-1);
2926 }
2927
2928 /*
2929  * Find a mirror vdev in the source nvlist.
2930  *
2931  * The mchild array contains a list of disks in one of the top-level mirrors
2932  * of the source pool.  The schild array contains a list of disks that the
2933  * user specified on the command line.  We loop over the mchild array to
2934  * see if any entry in the schild array matches.
2935  *
2936  * If a disk in the mchild array is found in the schild array, we return
2937  * the index of that entry.  Otherwise we return -1.
2938  */
2939 static int
2940 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2941     nvlist_t **schild, uint_t schildren)
2942 {
2943         uint_t mc;
2944
2945         for (mc = 0; mc < mchildren; mc++) {
2946                 uint_t sc;
2947                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2948                     mchild[mc], 0);
2949
2950                 for (sc = 0; sc < schildren; sc++) {
2951                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2952                             schild[sc], 0);
2953                         boolean_t result = (strcmp(mpath, spath) == 0);
2954
2955                         free(spath);
2956                         if (result) {
2957                                 free(mpath);
2958                                 return (mc);
2959                         }
2960                 }
2961
2962                 free(mpath);
2963         }
2964
2965         return (-1);
2966 }
2967
2968 /*
2969  * Split a mirror pool.  If newroot points to null, then a new nvlist
2970  * is generated and it is the responsibility of the caller to free it.
2971  */
2972 int
2973 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2974     nvlist_t *props, splitflags_t flags)
2975 {
2976         zfs_cmd_t zc = {"\0"};
2977         char msg[1024];
2978         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2979         nvlist_t **varray = NULL, *zc_props = NULL;
2980         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2981         libzfs_handle_t *hdl = zhp->zpool_hdl;
2982         uint64_t vers;
2983         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2984         int retval = 0;
2985
2986         (void) snprintf(msg, sizeof (msg),
2987             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2988
2989         if (!zpool_name_valid(hdl, B_FALSE, newname))
2990                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2991
2992         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2993                 (void) fprintf(stderr, gettext("Internal error: unable to "
2994                     "retrieve pool configuration\n"));
2995                 return (-1);
2996         }
2997
2998         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2999             == 0);
3000         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
3001
3002         if (props) {
3003                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
3004                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
3005                     props, vers, flags, msg)) == NULL)
3006                         return (-1);
3007         }
3008
3009         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
3010             &children) != 0) {
3011                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3012                     "Source pool is missing vdev tree"));
3013                 nvlist_free(zc_props);
3014                 return (-1);
3015         }
3016
3017         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
3018         vcount = 0;
3019
3020         if (*newroot == NULL ||
3021             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
3022             &newchild, &newchildren) != 0)
3023                 newchildren = 0;
3024
3025         for (c = 0; c < children; c++) {
3026                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
3027                 char *type;
3028                 nvlist_t **mchild, *vdev;
3029                 uint_t mchildren;
3030                 int entry;
3031
3032                 /*
3033                  * Unlike cache & spares, slogs are stored in the
3034                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
3035                  */
3036                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3037                     &is_log);
3038                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3039                     &is_hole);
3040                 if (is_log || is_hole) {
3041                         /*
3042                          * Create a hole vdev and put it in the config.
3043                          */
3044                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3045                                 goto out;
3046                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3047                             VDEV_TYPE_HOLE) != 0)
3048                                 goto out;
3049                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3050                             1) != 0)
3051                                 goto out;
3052                         if (lastlog == 0)
3053                                 lastlog = vcount;
3054                         varray[vcount++] = vdev;
3055                         continue;
3056                 }
3057                 lastlog = 0;
3058                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3059                     == 0);
3060                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3061                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3062                             "Source pool must be composed only of mirrors\n"));
3063                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3064                         goto out;
3065                 }
3066
3067                 verify(nvlist_lookup_nvlist_array(child[c],
3068                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3069
3070                 /* find or add an entry for this top-level vdev */
3071                 if (newchildren > 0 &&
3072                     (entry = find_vdev_entry(zhp, mchild, mchildren,
3073                     newchild, newchildren)) >= 0) {
3074                         /* We found a disk that the user specified. */
3075                         vdev = mchild[entry];
3076                         ++found;
3077                 } else {
3078                         /* User didn't specify a disk for this vdev. */
3079                         vdev = mchild[mchildren - 1];
3080                 }
3081
3082                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3083                         goto out;
3084         }
3085
3086         /* did we find every disk the user specified? */
3087         if (found != newchildren) {
3088                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3089                     "include at most one disk from each mirror"));
3090                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3091                 goto out;
3092         }
3093
3094         /* Prepare the nvlist for populating. */
3095         if (*newroot == NULL) {
3096                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3097                         goto out;
3098                 freelist = B_TRUE;
3099                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3100                     VDEV_TYPE_ROOT) != 0)
3101                         goto out;
3102         } else {
3103                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3104         }
3105
3106         /* Add all the children we found */
3107         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3108             lastlog == 0 ? vcount : lastlog) != 0)
3109                 goto out;
3110
3111         /*
3112          * If we're just doing a dry run, exit now with success.
3113          */
3114         if (flags.dryrun) {
3115                 memory_err = B_FALSE;
3116                 freelist = B_FALSE;
3117                 goto out;
3118         }
3119
3120         /* now build up the config list & call the ioctl */
3121         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3122                 goto out;
3123
3124         if (nvlist_add_nvlist(newconfig,
3125             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3126             nvlist_add_string(newconfig,
3127             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3128             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3129                 goto out;
3130
3131         /*
3132          * The new pool is automatically part of the namespace unless we
3133          * explicitly export it.
3134          */
3135         if (!flags.import)
3136                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3137         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3138         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3139         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3140                 goto out;
3141         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3142                 goto out;
3143
3144         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3145                 retval = zpool_standard_error(hdl, errno, msg);
3146                 goto out;
3147         }
3148
3149         freelist = B_FALSE;
3150         memory_err = B_FALSE;
3151
3152 out:
3153         if (varray != NULL) {
3154                 int v;
3155
3156                 for (v = 0; v < vcount; v++)
3157                         nvlist_free(varray[v]);
3158                 free(varray);
3159         }
3160         zcmd_free_nvlists(&zc);
3161         nvlist_free(zc_props);
3162         nvlist_free(newconfig);
3163         if (freelist) {
3164                 nvlist_free(*newroot);
3165                 *newroot = NULL;
3166         }
3167
3168         if (retval != 0)
3169                 return (retval);
3170
3171         if (memory_err)
3172                 return (no_memory(hdl));
3173
3174         return (0);
3175 }
3176
3177 /*
3178  * Remove the given device.  Currently, this is supported only for hot spares,
3179  * cache, and log devices.
3180  */
3181 int
3182 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3183 {
3184         zfs_cmd_t zc = {"\0"};
3185         char msg[1024];
3186         nvlist_t *tgt;
3187         boolean_t avail_spare, l2cache, islog;
3188         libzfs_handle_t *hdl = zhp->zpool_hdl;
3189         uint64_t version;
3190
3191         (void) snprintf(msg, sizeof (msg),
3192             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3193
3194         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3195         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3196             &islog)) == 0)
3197                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3198         /*
3199          * XXX - this should just go away.
3200          */
3201         if (!avail_spare && !l2cache && !islog) {
3202                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3203                     "only inactive hot spares, cache, "
3204                     "or log devices can be removed"));
3205                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3206         }
3207
3208         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3209         if (islog && version < SPA_VERSION_HOLES) {
3210                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3211                     "pool must be upgrade to support log removal"));
3212                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3213         }
3214
3215         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3216
3217         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3218                 return (0);
3219
3220         return (zpool_standard_error(hdl, errno, msg));
3221 }
3222
3223 /*
3224  * Clear the errors for the pool, or the particular device if specified.
3225  */
3226 int
3227 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3228 {
3229         zfs_cmd_t zc = {"\0"};
3230         char msg[1024];
3231         nvlist_t *tgt;
3232         zpool_rewind_policy_t policy;
3233         boolean_t avail_spare, l2cache;
3234         libzfs_handle_t *hdl = zhp->zpool_hdl;
3235         nvlist_t *nvi = NULL;
3236         int error;
3237
3238         if (path)
3239                 (void) snprintf(msg, sizeof (msg),
3240                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3241                     path);
3242         else
3243                 (void) snprintf(msg, sizeof (msg),
3244                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3245                     zhp->zpool_name);
3246
3247         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3248         if (path) {
3249                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3250                     &l2cache, NULL)) == 0)
3251                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
3252
3253                 /*
3254                  * Don't allow error clearing for hot spares.  Do allow
3255                  * error clearing for l2cache devices.
3256                  */
3257                 if (avail_spare)
3258                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
3259
3260                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3261                     &zc.zc_guid) == 0);
3262         }
3263
3264         zpool_get_rewind_policy(rewindnvl, &policy);
3265         zc.zc_cookie = policy.zrp_request;
3266
3267         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3268                 return (-1);
3269
3270         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3271                 return (-1);
3272
3273         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3274             errno == ENOMEM) {
3275                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3276                         zcmd_free_nvlists(&zc);
3277                         return (-1);
3278                 }
3279         }
3280
3281         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
3282             errno != EPERM && errno != EACCES)) {
3283                 if (policy.zrp_request &
3284                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3285                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3286                         zpool_rewind_exclaim(hdl, zc.zc_name,
3287                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
3288                             nvi);
3289                         nvlist_free(nvi);
3290                 }
3291                 zcmd_free_nvlists(&zc);
3292                 return (0);
3293         }
3294
3295         zcmd_free_nvlists(&zc);
3296         return (zpool_standard_error(hdl, errno, msg));
3297 }
3298
3299 /*
3300  * Similar to zpool_clear(), but takes a GUID (used by fmd).
3301  */
3302 int
3303 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3304 {
3305         zfs_cmd_t zc = {"\0"};
3306         char msg[1024];
3307         libzfs_handle_t *hdl = zhp->zpool_hdl;
3308
3309         (void) snprintf(msg, sizeof (msg),
3310             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3311             (u_longlong_t)guid);
3312
3313         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3314         zc.zc_guid = guid;
3315         zc.zc_cookie = ZPOOL_NO_REWIND;
3316
3317         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3318                 return (0);
3319
3320         return (zpool_standard_error(hdl, errno, msg));
3321 }
3322
3323 /*
3324  * Change the GUID for a pool.
3325  */
3326 int
3327 zpool_reguid(zpool_handle_t *zhp)
3328 {
3329         char msg[1024];
3330         libzfs_handle_t *hdl = zhp->zpool_hdl;
3331         zfs_cmd_t zc = {"\0"};
3332
3333         (void) snprintf(msg, sizeof (msg),
3334             dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3335
3336         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3337         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3338                 return (0);
3339
3340         return (zpool_standard_error(hdl, errno, msg));
3341 }
3342
3343 /*
3344  * Reopen the pool.
3345  */
3346 int
3347 zpool_reopen(zpool_handle_t *zhp)
3348 {
3349         zfs_cmd_t zc = {"\0"};
3350         char msg[1024];
3351         libzfs_handle_t *hdl = zhp->zpool_hdl;
3352
3353         (void) snprintf(msg, sizeof (msg),
3354             dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3355             zhp->zpool_name);
3356
3357         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3358         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3359                 return (0);
3360         return (zpool_standard_error(hdl, errno, msg));
3361 }
3362
3363 /* call into libzfs_core to execute the sync IOCTL per pool */
3364 int
3365 zpool_sync_one(zpool_handle_t *zhp, void *data)
3366 {
3367         int ret;
3368         libzfs_handle_t *hdl = zpool_get_handle(zhp);
3369         const char *pool_name = zpool_get_name(zhp);
3370         boolean_t *force = data;
3371         nvlist_t *innvl = fnvlist_alloc();
3372
3373         fnvlist_add_boolean_value(innvl, "force", *force);
3374         if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
3375                 nvlist_free(innvl);
3376                 return (zpool_standard_error_fmt(hdl, ret,
3377                     dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
3378         }
3379         nvlist_free(innvl);
3380
3381         return (0);
3382 }
3383
3384 #if defined(__sun__) || defined(__sun)
3385 /*
3386  * Convert from a devid string to a path.
3387  */
3388 static char *
3389 devid_to_path(char *devid_str)
3390 {
3391         ddi_devid_t devid;
3392         char *minor;
3393         char *path;
3394         devid_nmlist_t *list = NULL;
3395         int ret;
3396
3397         if (devid_str_decode(devid_str, &devid, &minor) != 0)
3398                 return (NULL);
3399
3400         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3401
3402         devid_str_free(minor);
3403         devid_free(devid);
3404
3405         if (ret != 0)
3406                 return (NULL);
3407
3408         /*
3409          * In a case the strdup() fails, we will just return NULL below.
3410          */
3411         path = strdup(list[0].devname);
3412
3413         devid_free_nmlist(list);
3414
3415         return (path);
3416 }
3417
3418 /*
3419  * Convert from a path to a devid string.
3420  */
3421 static char *
3422 path_to_devid(const char *path)
3423 {
3424         int fd;
3425         ddi_devid_t devid;
3426         char *minor, *ret;
3427
3428         if ((fd = open(path, O_RDONLY)) < 0)
3429                 return (NULL);
3430
3431         minor = NULL;
3432         ret = NULL;
3433         if (devid_get(fd, &devid) == 0) {
3434                 if (devid_get_minor_name(fd, &minor) == 0)
3435                         ret = devid_str_encode(devid, minor);
3436                 if (minor != NULL)
3437                         devid_str_free(minor);
3438                 devid_free(devid);
3439         }
3440         (void) close(fd);
3441
3442         return (ret);
3443 }
3444
3445 /*
3446  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3447  * ignore any failure here, since a common case is for an unprivileged user to
3448  * type 'zpool status', and we'll display the correct information anyway.
3449  */
3450 static void
3451 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3452 {
3453         zfs_cmd_t zc = {"\0"};
3454
3455         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3456         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3457         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3458             &zc.zc_guid) == 0);
3459
3460         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3461 }
3462 #endif /* sun */
3463
3464 /*
3465  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3466  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3467  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3468  * third case only occurs when preceded by a string matching the regular
3469  * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
3470  *
3471  * caller must free the returned string
3472  */
3473 char *
3474 zfs_strip_partition(char *path)
3475 {
3476         char *tmp = strdup(path);
3477         char *part = NULL, *d = NULL;
3478         if (!tmp)
3479                 return (NULL);
3480
3481         if ((part = strstr(tmp, "-part")) && part != tmp) {
3482                 d = part + 5;
3483         } else if ((part = strrchr(tmp, 'p')) &&
3484             part > tmp + 1 && isdigit(*(part-1))) {
3485                 d = part + 1;
3486         } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
3487             tmp[1] == 'd') {
3488                 for (d = &tmp[2]; isalpha(*d); part = ++d) { }
3489         } else if (strncmp("xvd", tmp, 3) == 0) {
3490                 for (d = &tmp[3]; isalpha(*d); part = ++d) { }
3491         }
3492         if (part && d && *d != '\0') {
3493                 for (; isdigit(*d); d++) { }
3494                 if (*d == '\0')
3495                         *part = '\0';
3496         }
3497
3498         return (tmp);
3499 }
3500
3501 /*
3502  * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
3503  *
3504  * path:        /dev/sda1
3505  * returns:     /dev/sda
3506  *
3507  * Returned string must be freed.
3508  */
3509 char *
3510 zfs_strip_partition_path(char *path)
3511 {
3512         char *newpath = strdup(path);
3513         char *sd_offset;
3514         char *new_sd;
3515
3516         if (!newpath)
3517                 return (NULL);
3518
3519         /* Point to "sda1" part of "/dev/sda1" */
3520         sd_offset = strrchr(newpath, '/') + 1;
3521
3522         /* Get our new name "sda" */
3523         new_sd = zfs_strip_partition(sd_offset);
3524         if (!new_sd) {
3525                 free(newpath);
3526                 return (NULL);
3527         }
3528
3529         /* Paste the "sda" where "sda1" was */
3530         strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
3531
3532         /* Free temporary "sda" */
3533         free(new_sd);
3534
3535         return (newpath);
3536 }
3537
3538 #define PATH_BUF_LEN    64
3539
3540 /*
3541  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3542  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3543  * We also check if this is a whole disk, in which case we strip off the
3544  * trailing 's0' slice name.
3545  *
3546  * This routine is also responsible for identifying when disks have been
3547  * reconfigured in a new location.  The kernel will have opened the device by
3548  * devid, but the path will still refer to the old location.  To catch this, we
3549  * first do a path -> devid translation (which is fast for the common case).  If
3550  * the devid matches, we're done.  If not, we do a reverse devid -> path
3551  * translation and issue the appropriate ioctl() to update the path of the vdev.
3552  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3553  * of these checks.
3554  */
3555 char *
3556 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3557     int name_flags)
3558 {
3559         char *path, *type, *env;
3560         uint64_t value;
3561         char buf[PATH_BUF_LEN];
3562         char tmpbuf[PATH_BUF_LEN];
3563
3564         env = getenv("ZPOOL_VDEV_NAME_PATH");
3565         if (env && (strtoul(env, NULL, 0) > 0 ||
3566             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3567                 name_flags |= VDEV_NAME_PATH;
3568
3569         env = getenv("ZPOOL_VDEV_NAME_GUID");
3570         if (env && (strtoul(env, NULL, 0) > 0 ||
3571             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3572                 name_flags |= VDEV_NAME_GUID;
3573
3574         env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3575         if (env && (strtoul(env, NULL, 0) > 0 ||
3576             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3577                 name_flags |= VDEV_NAME_FOLLOW_LINKS;
3578
3579         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3580             name_flags & VDEV_NAME_GUID) {
3581                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
3582                 (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
3583                 path = buf;
3584         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3585 #if defined(__sun__) || defined(__sun)
3586                 /*
3587                  * Live VDEV path updates to a kernel VDEV during a
3588                  * zpool_vdev_name lookup are not supported on Linux.
3589                  */
3590                 char *devid;
3591                 vdev_stat_t *vs;
3592                 uint_t vsc;
3593
3594                 /*
3595                  * If the device is dead (faulted, offline, etc) then don't
3596                  * bother opening it.  Otherwise we may be forcing the user to
3597                  * open a misbehaving device, which can have undesirable
3598                  * effects.
3599                  */
3600                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3601                     (uint64_t **)&vs, &vsc) != 0 ||
3602                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3603                     zhp != NULL &&
3604                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3605                         /*
3606                          * Determine if the current path is correct.
3607                          */
3608                         char *newdevid = path_to_devid(path);
3609
3610                         if (newdevid == NULL ||
3611                             strcmp(devid, newdevid) != 0) {
3612                                 char *newpath;
3613
3614                                 if ((newpath = devid_to_path(devid)) != NULL) {
3615                                         /*
3616                                          * Update the path appropriately.
3617                                          */
3618                                         set_path(zhp, nv, newpath);
3619                                         if (nvlist_add_string(nv,
3620                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3621                                                 verify(nvlist_lookup_string(nv,
3622                                                     ZPOOL_CONFIG_PATH,
3623                                                     &path) == 0);
3624                                         free(newpath);
3625                                 }
3626                         }
3627
3628                         if (newdevid)
3629                                 devid_str_free(newdevid);
3630                 }
3631 #endif /* sun */
3632
3633                 if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3634                         char *rp = realpath(path, NULL);
3635                         if (rp) {
3636                                 strlcpy(buf, rp, sizeof (buf));
3637                                 path = buf;
3638                                 free(rp);
3639                         }
3640                 }
3641
3642                 /*
3643                  * For a block device only use the name.
3644                  */
3645                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3646                 if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
3647                     !(name_flags & VDEV_NAME_PATH)) {
3648                         path = strrchr(path, '/');
3649                         path++;
3650                 }
3651
3652                 /*
3653                  * Remove the partition from the path it this is a whole disk.
3654                  */
3655                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3656                     == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
3657                         return (zfs_strip_partition(path));
3658                 }
3659         } else {
3660                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3661
3662                 /*
3663                  * If it's a raidz device, we need to stick in the parity level.
3664                  */
3665                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3666                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3667                             &value) == 0);
3668                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3669                             (u_longlong_t)value);
3670                         path = buf;
3671                 }
3672
3673                 /*
3674                  * We identify each top-level vdev by using a <type-id>
3675                  * naming convention.
3676                  */
3677                 if (name_flags & VDEV_NAME_TYPE_ID) {
3678                         uint64_t id;
3679                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3680                             &id) == 0);
3681                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3682                             path, (u_longlong_t)id);
3683                         path = tmpbuf;
3684                 }
3685         }
3686
3687         return (zfs_strdup(hdl, path));
3688 }
3689
3690 static int
3691 zbookmark_mem_compare(const void *a, const void *b)
3692 {
3693         return (memcmp(a, b, sizeof (zbookmark_phys_t)));
3694 }
3695
3696 /*
3697  * Retrieve the persistent error log, uniquify the members, and return to the
3698  * caller.
3699  */
3700 int
3701 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3702 {
3703         zfs_cmd_t zc = {"\0"};
3704         libzfs_handle_t *hdl = zhp->zpool_hdl;
3705         uint64_t count;
3706         zbookmark_phys_t *zb = NULL;
3707         int i;
3708
3709         /*
3710          * Retrieve the raw error list from the kernel.  If the number of errors
3711          * has increased, allocate more space and continue until we get the
3712          * entire list.
3713          */
3714         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3715             &count) == 0);
3716         if (count == 0)
3717                 return (0);
3718         zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3719             count * sizeof (zbookmark_phys_t));
3720         zc.zc_nvlist_dst_size = count;
3721         (void) strcpy(zc.zc_name, zhp->zpool_name);
3722         for (;;) {
3723                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3724                     &zc) != 0) {
3725                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3726                         if (errno == ENOMEM) {
3727                                 void *dst;
3728
3729                                 count = zc.zc_nvlist_dst_size;
3730                                 dst = zfs_alloc(zhp->zpool_hdl, count *
3731                                     sizeof (zbookmark_phys_t));
3732                                 zc.zc_nvlist_dst = (uintptr_t)dst;
3733                         } else {
3734                                 return (zpool_standard_error_fmt(hdl, errno,
3735                                     dgettext(TEXT_DOMAIN, "errors: List of "
3736                                     "errors unavailable")));
3737                         }
3738                 } else {
3739                         break;
3740                 }
3741         }
3742
3743         /*
3744          * Sort the resulting bookmarks.  This is a little confusing due to the
3745          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3746          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3747          * _not_ copied as part of the process.  So we point the start of our
3748          * array appropriate and decrement the total number of elements.
3749          */
3750         zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
3751             zc.zc_nvlist_dst_size;
3752         count -= zc.zc_nvlist_dst_size;
3753
3754         qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
3755
3756         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3757
3758         /*
3759          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3760          */
3761         for (i = 0; i < count; i++) {
3762                 nvlist_t *nv;
3763
3764                 /* ignoring zb_blkid and zb_level for now */
3765                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3766                     zb[i-1].zb_object == zb[i].zb_object)
3767                         continue;
3768
3769                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3770                         goto nomem;
3771                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3772                     zb[i].zb_objset) != 0) {
3773                         nvlist_free(nv);
3774                         goto nomem;
3775                 }
3776                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3777                     zb[i].zb_object) != 0) {
3778                         nvlist_free(nv);
3779                         goto nomem;
3780                 }
3781                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3782                         nvlist_free(nv);
3783                         goto nomem;
3784                 }
3785                 nvlist_free(nv);
3786         }
3787
3788         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3789         return (0);
3790
3791 nomem:
3792         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3793         return (no_memory(zhp->zpool_hdl));
3794 }
3795
3796 /*
3797  * Upgrade a ZFS pool to the latest on-disk version.
3798  */
3799 int
3800 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3801 {
3802         zfs_cmd_t zc = {"\0"};
3803         libzfs_handle_t *hdl = zhp->zpool_hdl;
3804
3805         (void) strcpy(zc.zc_name, zhp->zpool_name);
3806         zc.zc_cookie = new_version;
3807
3808         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3809                 return (zpool_standard_error_fmt(hdl, errno,
3810                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3811                     zhp->zpool_name));
3812         return (0);
3813 }
3814
3815 void
3816 zfs_save_arguments(int argc, char **argv, char *string, int len)
3817 {
3818         int i;
3819
3820         (void) strlcpy(string, basename(argv[0]), len);
3821         for (i = 1; i < argc; i++) {
3822                 (void) strlcat(string, " ", len);
3823                 (void) strlcat(string, argv[i], len);
3824         }
3825 }
3826
3827 int
3828 zpool_log_history(libzfs_handle_t *hdl, const char *message)
3829 {
3830         zfs_cmd_t zc = {"\0"};
3831         nvlist_t *args;
3832         int err;
3833
3834         args = fnvlist_alloc();
3835         fnvlist_add_string(args, "message", message);
3836         err = zcmd_write_src_nvlist(hdl, &zc, args);
3837         if (err == 0)
3838                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
3839         nvlist_free(args);
3840         zcmd_free_nvlists(&zc);
3841         return (err);
3842 }
3843
3844 /*
3845  * Perform ioctl to get some command history of a pool.
3846  *
3847  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3848  * logical offset of the history buffer to start reading from.
3849  *
3850  * Upon return, 'off' is the next logical offset to read from and
3851  * 'len' is the actual amount of bytes read into 'buf'.
3852  */
3853 static int
3854 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3855 {
3856         zfs_cmd_t zc = {"\0"};
3857         libzfs_handle_t *hdl = zhp->zpool_hdl;
3858
3859         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3860
3861         zc.zc_history = (uint64_t)(uintptr_t)buf;
3862         zc.zc_history_len = *len;
3863         zc.zc_history_offset = *off;
3864
3865         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3866                 switch (errno) {
3867                 case EPERM:
3868                         return (zfs_error_fmt(hdl, EZFS_PERM,
3869                             dgettext(TEXT_DOMAIN,
3870                             "cannot show history for pool '%s'"),
3871                             zhp->zpool_name));
3872                 case ENOENT:
3873                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3874                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3875                             "'%s'"), zhp->zpool_name));
3876                 case ENOTSUP:
3877                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3878                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3879                             "'%s', pool must be upgraded"), zhp->zpool_name));
3880                 default:
3881                         return (zpool_standard_error_fmt(hdl, errno,
3882                             dgettext(TEXT_DOMAIN,
3883                             "cannot get history for '%s'"), zhp->zpool_name));
3884                 }
3885         }
3886
3887         *len = zc.zc_history_len;
3888         *off = zc.zc_history_offset;
3889
3890         return (0);
3891 }
3892
3893 /*
3894  * Process the buffer of nvlists, unpacking and storing each nvlist record
3895  * into 'records'.  'leftover' is set to the number of bytes that weren't
3896  * processed as there wasn't a complete record.
3897  */
3898 int
3899 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3900     nvlist_t ***records, uint_t *numrecords)
3901 {
3902         uint64_t reclen;
3903         nvlist_t *nv;
3904         int i;
3905         void *tmp;
3906
3907         while (bytes_read > sizeof (reclen)) {
3908
3909                 /* get length of packed record (stored as little endian) */
3910                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3911                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3912
3913                 if (bytes_read < sizeof (reclen) + reclen)
3914                         break;
3915
3916                 /* unpack record */
3917                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3918                         return (ENOMEM);
3919                 bytes_read -= sizeof (reclen) + reclen;
3920                 buf += sizeof (reclen) + reclen;
3921
3922                 /* add record to nvlist array */
3923                 (*numrecords)++;
3924                 if (ISP2(*numrecords + 1)) {
3925                         tmp = realloc(*records,
3926                             *numrecords * 2 * sizeof (nvlist_t *));
3927                         if (tmp == NULL) {
3928                                 nvlist_free(nv);
3929                                 (*numrecords)--;
3930                                 return (ENOMEM);
3931                         }
3932                         *records = tmp;
3933                 }
3934                 (*records)[*numrecords - 1] = nv;
3935         }
3936
3937         *leftover = bytes_read;
3938         return (0);
3939 }
3940
3941 /*
3942  * Retrieve the command history of a pool.
3943  */
3944 int
3945 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3946 {
3947         char *buf;
3948         int buflen = 128 * 1024;
3949         uint64_t off = 0;
3950         nvlist_t **records = NULL;
3951         uint_t numrecords = 0;
3952         int err, i;
3953
3954         buf = malloc(buflen);
3955         if (buf == NULL)
3956                 return (ENOMEM);
3957         do {
3958                 uint64_t bytes_read = buflen;
3959                 uint64_t leftover;
3960
3961                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3962                         break;
3963
3964                 /* if nothing else was read in, we're at EOF, just return */
3965                 if (!bytes_read)
3966                         break;
3967
3968                 if ((err = zpool_history_unpack(buf, bytes_read,
3969                     &leftover, &records, &numrecords)) != 0)
3970                         break;
3971                 off -= leftover;
3972                 if (leftover == bytes_read) {
3973                         /*
3974                          * no progress made, because buffer is not big enough
3975                          * to hold this record; resize and retry.
3976                          */
3977                         buflen *= 2;
3978                         free(buf);
3979                         buf = malloc(buflen);
3980                         if (buf == NULL)
3981                                 return (ENOMEM);
3982                 }
3983
3984                 /* CONSTCOND */
3985         } while (1);
3986
3987         free(buf);
3988
3989         if (!err) {
3990                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3991                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3992                     records, numrecords) == 0);
3993         }
3994         for (i = 0; i < numrecords; i++)
3995                 nvlist_free(records[i]);
3996         free(records);
3997
3998         return (err);
3999 }
4000
4001 /*
4002  * Retrieve the next event given the passed 'zevent_fd' file descriptor.
4003  * If there is a new event available 'nvp' will contain a newly allocated
4004  * nvlist and 'dropped' will be set to the number of missed events since
4005  * the last call to this function.  When 'nvp' is set to NULL it indicates
4006  * no new events are available.  In either case the function returns 0 and
4007  * it is up to the caller to free 'nvp'.  In the case of a fatal error the
4008  * function will return a non-zero value.  When the function is called in
4009  * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
4010  * it will not return until a new event is available.
4011  */
4012 int
4013 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
4014     int *dropped, unsigned flags, int zevent_fd)
4015 {
4016         zfs_cmd_t zc = {"\0"};
4017         int error = 0;
4018
4019         *nvp = NULL;
4020         *dropped = 0;
4021         zc.zc_cleanup_fd = zevent_fd;
4022
4023         if (flags & ZEVENT_NONBLOCK)
4024                 zc.zc_guid = ZEVENT_NONBLOCK;
4025
4026         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
4027                 return (-1);
4028
4029 retry:
4030         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
4031                 switch (errno) {
4032                 case ESHUTDOWN:
4033                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
4034                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
4035                         goto out;
4036                 case ENOENT:
4037                         /* Blocking error case should not occur */
4038                         if (!(flags & ZEVENT_NONBLOCK))
4039                                 error = zpool_standard_error_fmt(hdl, errno,
4040                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4041
4042                         goto out;
4043                 case ENOMEM:
4044                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
4045                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4046                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4047                                 goto out;
4048                         } else {
4049                                 goto retry;
4050                         }
4051                 default:
4052                         error = zpool_standard_error_fmt(hdl, errno,
4053                             dgettext(TEXT_DOMAIN, "cannot get event"));
4054                         goto out;
4055                 }
4056         }
4057
4058         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
4059         if (error != 0)
4060                 goto out;
4061
4062         *dropped = (int)zc.zc_cookie;
4063 out:
4064         zcmd_free_nvlists(&zc);
4065
4066         return (error);
4067 }
4068
4069 /*
4070  * Clear all events.
4071  */
4072 int
4073 zpool_events_clear(libzfs_handle_t *hdl, int *count)
4074 {
4075         zfs_cmd_t zc = {"\0"};
4076         char msg[1024];
4077
4078         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
4079             "cannot clear events"));
4080
4081         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
4082                 return (zpool_standard_error_fmt(hdl, errno, msg));
4083
4084         if (count != NULL)
4085                 *count = (int)zc.zc_cookie; /* # of events cleared */
4086
4087         return (0);
4088 }
4089
4090 /*
4091  * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
4092  * the passed zevent_fd file handle.  On success zero is returned,
4093  * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
4094  */
4095 int
4096 zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
4097 {
4098         zfs_cmd_t zc = {"\0"};
4099         int error = 0;
4100
4101         zc.zc_guid = eid;
4102         zc.zc_cleanup_fd = zevent_fd;
4103
4104         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
4105                 switch (errno) {
4106                 case ENOENT:
4107                         error = zfs_error_fmt(hdl, EZFS_NOENT,
4108                             dgettext(TEXT_DOMAIN, "cannot get event"));
4109                         break;
4110
4111                 case ENOMEM:
4112                         error = zfs_error_fmt(hdl, EZFS_NOMEM,
4113                             dgettext(TEXT_DOMAIN, "cannot get event"));
4114                         break;
4115
4116                 default:
4117                         error = zpool_standard_error_fmt(hdl, errno,
4118                             dgettext(TEXT_DOMAIN, "cannot get event"));
4119                         break;
4120                 }
4121         }
4122
4123         return (error);
4124 }
4125
4126 void
4127 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4128     char *pathname, size_t len)
4129 {
4130         zfs_cmd_t zc = {"\0"};
4131         boolean_t mounted = B_FALSE;
4132         char *mntpnt = NULL;
4133         char dsname[ZFS_MAX_DATASET_NAME_LEN];
4134
4135         if (dsobj == 0) {
4136                 /* special case for the MOS */
4137                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
4138                     (longlong_t)obj);
4139                 return;
4140         }
4141
4142         /* get the dataset's name */
4143         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4144         zc.zc_obj = dsobj;
4145         if (ioctl(zhp->zpool_hdl->libzfs_fd,
4146             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4147                 /* just write out a path of two object numbers */
4148                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
4149                     (longlong_t)dsobj, (longlong_t)obj);
4150                 return;
4151         }
4152         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4153
4154         /* find out if the dataset is mounted */
4155         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4156
4157         /* get the corrupted object's path */
4158         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4159         zc.zc_obj = obj;
4160         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4161             &zc) == 0) {
4162                 if (mounted) {
4163                         (void) snprintf(pathname, len, "%s%s", mntpnt,
4164                             zc.zc_value);
4165                 } else {
4166                         (void) snprintf(pathname, len, "%s:%s",
4167                             dsname, zc.zc_value);
4168                 }
4169         } else {
4170                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
4171                     (longlong_t)obj);
4172         }
4173         free(mntpnt);
4174 }
4175
4176 /*
4177  * Read the EFI label from the config, if a label does not exist then
4178  * pass back the error to the caller. If the caller has passed a non-NULL
4179  * diskaddr argument then we set it to the starting address of the EFI
4180  * partition.
4181  */
4182 static int
4183 read_efi_label(nvlist_t *config, diskaddr_t *sb)
4184 {
4185         char *path;
4186         int fd;
4187         char diskname[MAXPATHLEN];
4188         int err = -1;
4189
4190         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4191                 return (err);
4192
4193         (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
4194             strrchr(path, '/'));
4195         if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
4196                 struct dk_gpt *vtoc;
4197
4198                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4199                         if (sb != NULL)
4200                                 *sb = vtoc->efi_parts[0].p_start;
4201                         efi_free(vtoc);
4202                 }
4203                 (void) close(fd);
4204         }
4205         return (err);
4206 }
4207
4208 /*
4209  * determine where a partition starts on a disk in the current
4210  * configuration
4211  */
4212 static diskaddr_t
4213 find_start_block(nvlist_t *config)
4214 {
4215         nvlist_t **child;
4216         uint_t c, children;
4217         diskaddr_t sb = MAXOFFSET_T;
4218         uint64_t wholedisk;
4219
4220         if (nvlist_lookup_nvlist_array(config,
4221             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4222                 if (nvlist_lookup_uint64(config,
4223                     ZPOOL_CONFIG_WHOLE_DISK,
4224                     &wholedisk) != 0 || !wholedisk) {
4225                         return (MAXOFFSET_T);
4226                 }
4227                 if (read_efi_label(config, &sb) < 0)
4228                         sb = MAXOFFSET_T;
4229                 return (sb);
4230         }
4231
4232         for (c = 0; c < children; c++) {
4233                 sb = find_start_block(child[c]);
4234                 if (sb != MAXOFFSET_T) {
4235                         return (sb);
4236                 }
4237         }
4238         return (MAXOFFSET_T);
4239 }
4240
4241 static int
4242 zpool_label_disk_check(char *path)
4243 {
4244         struct dk_gpt *vtoc;
4245         int fd, err;
4246
4247         if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
4248                 return (errno);
4249
4250         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4251                 (void) close(fd);
4252                 return (err);
4253         }
4254
4255         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4256                 efi_free(vtoc);
4257                 (void) close(fd);
4258                 return (EIDRM);
4259         }
4260
4261         efi_free(vtoc);
4262         (void) close(fd);
4263         return (0);
4264 }
4265
4266 /*
4267  * Generate a unique partition name for the ZFS member.  Partitions must
4268  * have unique names to ensure udev will be able to create symlinks under
4269  * /dev/disk/by-partlabel/ for all pool members.  The partition names are
4270  * of the form <pool>-<unique-id>.
4271  */
4272 static void
4273 zpool_label_name(char *label_name, int label_size)
4274 {
4275         uint64_t id = 0;
4276         int fd;
4277
4278         fd = open("/dev/urandom", O_RDONLY);
4279         if (fd >= 0) {
4280                 if (read(fd, &id, sizeof (id)) != sizeof (id))
4281                         id = 0;
4282
4283                 close(fd);
4284         }
4285
4286         if (id == 0)
4287                 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
4288
4289         snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
4290 }
4291
4292 /*
4293  * Label an individual disk.  The name provided is the short name,
4294  * stripped of any leading /dev path.
4295  */
4296 int
4297 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4298 {
4299         char path[MAXPATHLEN];
4300         struct dk_gpt *vtoc;
4301         int rval, fd;
4302         size_t resv = EFI_MIN_RESV_SIZE;
4303         uint64_t slice_size;
4304         diskaddr_t start_block;
4305         char errbuf[1024];
4306
4307         /* prepare an error message just in case */
4308         (void) snprintf(errbuf, sizeof (errbuf),
4309             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4310
4311         if (zhp) {
4312                 nvlist_t *nvroot;
4313
4314                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4315                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4316
4317                 if (zhp->zpool_start_block == 0)
4318                         start_block = find_start_block(nvroot);
4319                 else
4320                         start_block = zhp->zpool_start_block;
4321                 zhp->zpool_start_block = start_block;
4322         } else {
4323                 /* new pool */
4324                 start_block = NEW_START_BLOCK;
4325         }
4326
4327         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4328
4329         if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
4330                 /*
4331                  * This shouldn't happen.  We've long since verified that this
4332                  * is a valid device.
4333                  */
4334                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4335                     "label '%s': unable to open device: %d"), path, errno);
4336                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4337         }
4338
4339         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4340                 /*
4341                  * The only way this can fail is if we run out of memory, or we
4342                  * were unable to read the disk's capacity
4343                  */
4344                 if (errno == ENOMEM)
4345                         (void) no_memory(hdl);
4346
4347                 (void) close(fd);
4348                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4349                     "label '%s': unable to read disk capacity"), path);
4350
4351                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4352         }
4353
4354         slice_size = vtoc->efi_last_u_lba + 1;
4355         slice_size -= EFI_MIN_RESV_SIZE;
4356         if (start_block == MAXOFFSET_T)
4357                 start_block = NEW_START_BLOCK;
4358         slice_size -= start_block;
4359         slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
4360
4361         vtoc->efi_parts[0].p_start = start_block;
4362         vtoc->efi_parts[0].p_size = slice_size;
4363
4364         /*
4365          * Why we use V_USR: V_BACKUP confuses users, and is considered
4366          * disposable by some EFI utilities (since EFI doesn't have a backup
4367          * slice).  V_UNASSIGNED is supposed to be used only for zero size
4368          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4369          * etc. were all pretty specific.  V_USR is as close to reality as we
4370          * can get, in the absence of V_OTHER.
4371          */
4372         vtoc->efi_parts[0].p_tag = V_USR;
4373         zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
4374
4375         vtoc->efi_parts[8].p_start = slice_size + start_block;
4376         vtoc->efi_parts[8].p_size = resv;
4377         vtoc->efi_parts[8].p_tag = V_RESERVED;
4378
4379         rval = efi_write(fd, vtoc);
4380
4381         /* Flush the buffers to disk and invalidate the page cache. */
4382         (void) fsync(fd);
4383         (void) ioctl(fd, BLKFLSBUF);
4384
4385         if (rval == 0)
4386                 rval = efi_rescan(fd);
4387
4388         /*
4389          * Some block drivers (like pcata) may not support EFI GPT labels.
4390          * Print out a helpful error message directing the user to manually
4391          * label the disk and give a specific slice.
4392          */
4393         if (rval != 0) {
4394                 (void) close(fd);
4395                 efi_free(vtoc);
4396
4397                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4398                     "parted(8) and then provide a specific slice: %d"), rval);
4399                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4400         }
4401
4402         (void) close(fd);
4403         efi_free(vtoc);
4404
4405         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4406         (void) zfs_append_partition(path, MAXPATHLEN);
4407
4408         /* Wait to udev to signal use the device has settled. */
4409         rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
4410         if (rval) {
4411                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4412                     "detect device partitions on '%s': %d"), path, rval);
4413                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4414         }
4415
4416         /* We can't be to paranoid.  Read the label back and verify it. */
4417         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4418         rval = zpool_label_disk_check(path);
4419         if (rval) {
4420                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4421                     "EFI label on '%s' is damaged.  Ensure\nthis device "
4422                     "is not in in use, and is functioning properly: %d"),
4423                     path, rval);
4424                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4425         }
4426
4427         return (0);
4428 }
4429
4430 /*
4431  * Allocate and return the underlying device name for a device mapper device.
4432  * If a device mapper device maps to multiple devices, return the first device.
4433  *
4434  * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
4435  * DM device (like /dev/disk/by-vdev/A0) are also allowed.
4436  *
4437  * Returns device name, or NULL on error or no match.  If dm_name is not a DM
4438  * device then return NULL.
4439  *
4440  * NOTE: The returned name string must be *freed*.
4441  */
4442 char *
4443 dm_get_underlying_path(char *dm_name)
4444 {
4445         DIR *dp = NULL;
4446         struct dirent *ep;
4447         char *realp;
4448         char *tmp = NULL;
4449         char *path = NULL;
4450         char *dev_str;
4451         int size;
4452
4453         if (dm_name == NULL)
4454                 return (NULL);
4455
4456         /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
4457         realp = realpath(dm_name, NULL);
4458         if (realp == NULL)
4459                 return (NULL);
4460
4461         /*
4462          * If they preface 'dev' with a path (like "/dev") then strip it off.
4463          * We just want the 'dm-N' part.
4464          */
4465         tmp = strrchr(realp, '/');
4466         if (tmp != NULL)
4467                 dev_str = tmp + 1;    /* +1 since we want the chr after '/' */
4468         else
4469                 dev_str = tmp;
4470
4471         size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
4472         if (size == -1 || !tmp)
4473                 goto end;
4474
4475         dp = opendir(tmp);
4476         if (dp == NULL)
4477                 goto end;
4478
4479         /* Return first sd* entry in /sys/block/dm-N/slaves/ */
4480         while ((ep = readdir(dp))) {
4481                 if (ep->d_type != DT_DIR) {     /* skip "." and ".." dirs */
4482                         size = asprintf(&path, "/dev/%s", ep->d_name);
4483                         break;
4484                 }
4485         }
4486
4487 end:
4488         if (dp != NULL)
4489                 closedir(dp);
4490         free(tmp);
4491         free(realp);
4492         return (path);
4493 }
4494
4495 /*
4496  * Return 1 if device is a device mapper or multipath device.
4497  * Return 0 if not.
4498  */
4499 int
4500 zfs_dev_is_dm(char *dev_name)
4501 {
4502
4503         char *tmp;
4504         tmp = dm_get_underlying_path(dev_name);
4505         if (tmp == NULL)
4506                 return (0);
4507
4508         free(tmp);
4509         return (1);
4510 }
4511
4512 /*
4513  * By "whole disk" we mean an entire physical disk (something we can
4514  * label, toggle the write cache on, etc.) as opposed to the full
4515  * capacity of a pseudo-device such as lofi or did.  We act as if we
4516  * are labeling the disk, which should be a pretty good test of whether
4517  * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
4518  * it isn't.
4519  */
4520 int
4521 zfs_dev_is_whole_disk(char *dev_name)
4522 {
4523         struct dk_gpt *label;
4524         int fd;
4525
4526         if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
4527                 return (0);
4528
4529         if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
4530                 (void) close(fd);
4531                 return (0);
4532         }
4533
4534         efi_free(label);
4535         (void) close(fd);
4536
4537         return (1);
4538 }
4539
4540 /*
4541  * Lookup the underlying device for a device name
4542  *
4543  * Often you'll have a symlink to a device, a partition device,
4544  * or a multipath device, and want to look up the underlying device.
4545  * This function returns the underlying device name.  If the device
4546  * name is already the underlying device, then just return the same
4547  * name.  If the device is a DM device with multiple underlying devices
4548  * then return the first one.
4549  *
4550  * For example:
4551  *
4552  * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
4553  * dev_name:    /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
4554  * returns:     /dev/sda
4555  *
4556  * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
4557  * dev_name:    /dev/mapper/mpatha
4558  * returns:     /dev/sda (first device)
4559  *
4560  * 3. /dev/sda (already the underlying device)
4561  * dev_name:    /dev/sda
4562  * returns:     /dev/sda
4563  *
4564  * 4. /dev/dm-3 (mapped to /dev/sda)
4565  * dev_name:    /dev/dm-3
4566  * returns:     /dev/sda
4567  *
4568  * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
4569  * dev_name:    /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
4570  * returns:     /dev/sdb
4571  *
4572  * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
4573  * dev_name:    /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
4574  * returns:     /dev/sda
4575  *
4576  * Returns underlying device name, or NULL on error or no match.
4577  *
4578  * NOTE: The returned name string must be *freed*.
4579  */
4580 char *
4581 zfs_get_underlying_path(char *dev_name)
4582 {
4583         char *name = NULL;
4584         char *tmp;
4585
4586         if (dev_name == NULL)
4587                 return (NULL);
4588
4589         tmp = dm_get_underlying_path(dev_name);
4590
4591         /* dev_name not a DM device, so just un-symlinkize it */
4592         if (tmp == NULL)
4593                 tmp = realpath(dev_name, NULL);
4594
4595         if (tmp != NULL) {
4596                 name = zfs_strip_partition_path(tmp);
4597                 free(tmp);
4598         }
4599
4600         return (name);
4601 }
4602
4603 /*
4604  * Given a dev name like "sda", return the full enclosure sysfs path to
4605  * the disk.  You can also pass in the name with "/dev" prepended
4606  * to it (like /dev/sda).
4607  *
4608  * For example, disk "sda" in enclosure slot 1:
4609  *     dev:            "sda"
4610  *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1"
4611  *
4612  * 'dev' must be a non-devicemapper device.
4613  *
4614  * Returned string must be freed.
4615  */
4616 char *
4617 zfs_get_enclosure_sysfs_path(char *dev_name)
4618 {
4619         DIR *dp = NULL;
4620         struct dirent *ep;
4621         char buf[MAXPATHLEN];
4622         char *tmp1 = NULL;
4623         char *tmp2 = NULL;
4624         char *tmp3 = NULL;
4625         char *path = NULL;
4626         size_t size;
4627         int tmpsize;
4628
4629         if (dev_name == NULL)
4630                 return (NULL);
4631
4632         /* If they preface 'dev' with a path (like "/dev") then strip it off */
4633         tmp1 = strrchr(dev_name, '/');
4634         if (tmp1 != NULL)
4635                 dev_name = tmp1 + 1;    /* +1 since we want the chr after '/' */
4636
4637         tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
4638         if (tmpsize == -1 || tmp1 == NULL) {
4639                 tmp1 = NULL;
4640                 goto end;
4641         }
4642
4643         dp = opendir(tmp1);
4644         if (dp == NULL) {
4645                 tmp1 = NULL;    /* To make free() at the end a NOP */
4646                 goto end;
4647         }
4648
4649         /*
4650          * Look though all sysfs entries in /sys/block/<dev>/device for
4651          * the enclosure symlink.
4652          */
4653         while ((ep = readdir(dp))) {
4654                 /* Ignore everything that's not our enclosure_device link */
4655                 if (strstr(ep->d_name, "enclosure_device") == NULL)
4656                         continue;
4657
4658                 if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
4659                     tmp2 == NULL)
4660                         break;
4661
4662                 size = readlink(tmp2, buf, sizeof (buf));
4663
4664                 /* Did readlink fail or crop the link name? */
4665                 if (size == -1 || size >= sizeof (buf)) {
4666                         free(tmp2);
4667                         tmp2 = NULL;    /* To make free() at the end a NOP */
4668                         break;
4669                 }
4670
4671                 /*
4672                  * We got a valid link.  readlink() doesn't terminate strings
4673                  * so we have to do it.
4674                  */
4675                 buf[size] = '\0';
4676
4677                 /*
4678                  * Our link will look like:
4679                  *
4680                  * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
4681                  *
4682                  * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
4683                  */
4684                 tmp3 = strstr(buf, "enclosure");
4685                 if (tmp3 == NULL)
4686                         break;
4687
4688                 if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
4689                         /* If asprintf() fails, 'path' is undefined */
4690                         path = NULL;
4691                         break;
4692                 }
4693
4694                 if (path == NULL)
4695                         break;
4696         }
4697
4698 end:
4699         free(tmp2);
4700         free(tmp1);
4701
4702         if (dp != NULL)
4703                 closedir(dp);
4704
4705         return (path);
4706 }