case HELP_PROMOTE:
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
- return (gettext("\treceive [-vnFu] <filesystem|volume|"
+ return (gettext("\treceive [-vnsFu] <filesystem|volume|"
"snapshot>\n"
- "\treceive [-vnFu] [-o origin=<snapshot>] [-d | -e] "
- "<filesystem>\n"));
+ "\treceive [-vnsFu] [-o origin=<snapshot>] [-d | -e] "
+ "<filesystem>\n"
+ "\treceive -A <filesystem|volume>\n"));
case HELP_RENAME:
return (gettext("\trename [-f] <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] "
"<snapshot>\n"
"\tsend [-Le] [-i snapshot|bookmark] "
- "<filesystem|volume|snapshot>\n"));
+ "<filesystem|volume|snapshot>\n"
+ "\tsend [-nvPe] -t <receive_resume_token>\n"));
case HELP_SET:
return (gettext("\tset <property=value> ... "
"<filesystem|volume|snapshot> ...\n"));
{
char *fromname = NULL;
char *toname = NULL;
+ char *resume_token = NULL;
char *cp;
zfs_handle_t *zhp;
sendflags_t flags = { 0 };
boolean_t extraverbose = B_FALSE;
/* check options */
- while ((c = getopt(argc, argv, ":i:I:RDpvnPLe")) != -1) {
+ while ((c = getopt(argc, argv, ":i:I:RDpvnPLet:")) != -1) {
switch (c) {
case 'i':
if (fromname)
case 'e':
flags.embed_data = B_TRUE;
break;
+ case 't':
+ resume_token = optarg;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
argc -= optind;
argv += optind;
- /* check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(B_FALSE);
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
+ if (resume_token != NULL) {
+ if (fromname != NULL || flags.replicate || flags.props ||
+ flags.dedup) {
+ (void) fprintf(stderr,
+ gettext("invalid flags combined with -t\n"));
+ usage(B_FALSE);
+ }
+ if (argc != 0) {
+ (void) fprintf(stderr, gettext("no additional "
+ "arguments are permitted with -t\n"));
+ usage(B_FALSE);
+ }
+ } else {
+ if (argc < 1) {
+ (void) fprintf(stderr,
+ gettext("missing snapshot argument\n"));
+ usage(B_FALSE);
+ }
+ if (argc > 1) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
}
if (!flags.dryrun && isatty(STDOUT_FILENO)) {
return (1);
}
+ if (resume_token != NULL) {
+ return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO,
+ resume_token));
+ }
+
/*
* Special case sending a filesystem, or from a bookmark.
*/
}
/*
- * zfs receive [-vnFu] [-d | -e] <fs@snap>
- *
* Restore a backup stream from stdin.
*/
static int
{
int c, err;
recvflags_t flags = { 0 };
+ boolean_t abort_resumable = B_FALSE;
+
nvlist_t *props;
nvpair_t *nvp = NULL;
nomem();
/* check options */
- while ((c = getopt(argc, argv, ":o:denuvF")) != -1) {
+ while ((c = getopt(argc, argv, ":o:denuvFsA")) != -1) {
switch (c) {
case 'o':
if (parseprop(props, optarg) != 0)
case 'v':
flags.verbose = B_TRUE;
break;
+ case 's':
+ flags.resumable = B_TRUE;
+ break;
case 'F':
flags.force = B_TRUE;
break;
+ case 'A':
+ abort_resumable = B_TRUE;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
}
}
+ if (abort_resumable) {
+ if (flags.isprefix || flags.istail || flags.dryrun ||
+ flags.resumable || flags.nomount) {
+ (void) fprintf(stderr, gettext("invalid option"));
+ usage(B_FALSE);
+ }
+
+ char namebuf[ZFS_MAXNAMELEN];
+ (void) snprintf(namebuf, sizeof (namebuf),
+ "%s/%%recv", argv[0]);
+
+ if (zfs_dataset_exists(g_zfs, namebuf,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) {
+ zfs_handle_t *zhp = zfs_open(g_zfs,
+ namebuf, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL)
+ return (1);
+ err = zfs_destroy(zhp, B_FALSE);
+ } else {
+ zfs_handle_t *zhp = zfs_open(g_zfs,
+ argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL)
+ usage(B_FALSE);
+ if (!zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) ||
+ zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ NULL, 0, NULL, NULL, 0, B_TRUE) == -1) {
+ (void) fprintf(stderr,
+ gettext("'%s' does not have any "
+ "resumable receive state to abort\n"),
+ argv[0]);
+ return (1);
+ }
+ err = zfs_destroy(zhp, B_FALSE);
+ }
+
+ return (err != 0);
+ }
+
if (isatty(STDIN_FILENO)) {
(void) fprintf(stderr,
gettext("Error: Backup stream can not be read "
"You must redirect standard input.\n"));
return (1);
}
-
err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL);
return (err != 0);
return (0);
}
+ /*
+ * If this filesystem is inconsistent and has a receive resume
+ * token, we can not mount it.
+ */
+ if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
+ zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
+ if (!explicit)
+ return (0);
+
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "Contains partially-completed state from "
+ "\"zfs receive -r\", which can be resumed with "
+ "\"zfs send -t\"\n"),
+ cmdname, zfs_get_name(zhp));
+ return (1);
+ }
+
/*
* At this point, we have verified that the mountpoint and/or
* shareopts are appropriate for auto management. If the
(longlong_t)saved_cksum.zc_word[1],
(longlong_t)saved_cksum.zc_word[2],
(longlong_t)saved_cksum.zc_word[3]);
- exit(1);
+ return (0);
}
return (sizeof (*drr));
}
if (verbose)
(void) printf("\n");
- if ((DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
- DMU_COMPOUNDSTREAM) && drr->drr_payloadlen != 0) {
+ if (drr->drr_payloadlen != 0) {
nvlist_t *nv;
int sz = drr->drr_payloadlen;
AC_PATH_TOOL(SHUF, shuf, "")
AC_PATH_TOOL(SLEEP, sleep, "")
AC_PATH_TOOL(SORT, sort, "")
+ AC_PATH_TOOL(STAT, stat, "")
AC_PATH_TOOL(STRINGS, strings, "")
AC_PATH_TOOL(SU, su, "")
AC_PATH_TOOL(SUM, sum, "")
AC_PATH_TOOL(TAR, tar, "")
AC_PATH_TOOL(TOUCH, touch, "")
AC_PATH_TOOL(TR, tr, "")
+ AC_PATH_TOOL(TRUNCATE, truncate, "")
AC_PATH_TOOL(TRUE, true, "")
AC_PATH_TOOL(UMASK, umask, "")
AC_PATH_TOOL(UMOUNT, umount, "")
AC_PATH_TOOL(SHARE, exportfs, "")
AC_PATH_TOOL(SWAP, swapon, "")
AC_PATH_TOOL(SWAPADD, swapon, "")
- AC_PATH_TOOL(TRUNCATE, truncate, "")
AC_PATH_TOOL(UDEVADM, udevadm, "")
AC_PATH_TOOL(UFSDUMP, dump, "")
AC_PATH_TOOL(UFSRESTORE, restore, "")
extern int zfs_send(zfs_handle_t *, const char *, const char *,
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags);
+extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd,
+ const char *);
+extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl,
+ const char *token);
extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *,
/* set "canmount=off" on all modified filesystems */
boolean_t canmountoff;
+ /*
+ * Mark the file systems as "resumable" and do not destroy them if the
+ * receive is interrupted
+ */
+ boolean_t resumable;
+
/* byteswap flag is used internally; callers need not specify */
boolean_t byteswap;
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#ifndef _LIBZFS_CORE_H
};
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
+int lzc_send_resume(const char *, const char *, int,
+ enum lzc_send_flags, uint64_t, uint64_t);
int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
+int lzc_receive_resumable(const char *, nvlist_t *, const char *,
+ boolean_t, int);
int lzc_send_space(const char *, const char *, uint64_t *);
boolean_t lzc_exists(const char *);
*/
/*
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_IMPL_H
uint64_t dsa_featureflags;
uint64_t dsa_last_data_object;
uint64_t dsa_last_data_offset;
+ uint64_t dsa_resume_object;
+ uint64_t dsa_resume_offset;
} dmu_sendarg_t;
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
struct dsl_dataset;
struct drr_begin;
struct avl_tree;
+struct dmu_replay_record;
-int dmu_send(const char *tosnap, const char *fromsnap,
- boolean_t embedok, boolean_t large_block_ok,
- int outfd, struct vnode *vp, offset_t *off);
+extern const char *recv_clone_name;
+
+int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+ boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
+ struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep);
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
+ struct dmu_replay_record *drc_drr_begin;
struct drr_begin *drc_drrb;
const char *drc_tofs;
const char *drc_tosnap;
boolean_t drc_newfs;
boolean_t drc_byteswap;
boolean_t drc_force;
+ boolean_t drc_resumable;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
cred_t *drc_cred;
} dmu_recv_cookie_t;
-int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
- boolean_t force, char *origin, dmu_recv_cookie_t *drc);
+int dmu_recv_begin(char *tofs, char *tosnap,
+ struct dmu_replay_record *drr_begin,
+ boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner);
int traverse_dataset(struct dsl_dataset *ds,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
+int traverse_dataset_resume(struct dsl_dataset *ds, uint64_t txg_start,
+ zbookmark_phys_t *resume, int flags, blkptr_cb_t func, void *arg);
int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
uint64_t txg_start, zbookmark_phys_t *resume, int flags,
blkptr_cb_t func, void *arg);
*/
#define DS_FIELD_LARGE_DNODE "org.zfsonlinux:large_dnode"
+/*
+ * These fields are set on datasets that are in the middle of a resumable
+ * receive, and allow the sender to resume the send if it is interrupted.
+ */
+#define DS_FIELD_RESUME_FROMGUID "com.delphix:resume_fromguid"
+#define DS_FIELD_RESUME_TONAME "com.delphix:resume_toname"
+#define DS_FIELD_RESUME_TOGUID "com.delphix:resume_toguid"
+#define DS_FIELD_RESUME_OBJECT "com.delphix:resume_object"
+#define DS_FIELD_RESUME_OFFSET "com.delphix:resume_offset"
+#define DS_FIELD_RESUME_BYTES "com.delphix:resume_bytes"
+#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
+
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
* name lookups should be performed case-insensitively.
kmutex_t ds_sendstream_lock;
list_t ds_sendstreams;
+ /*
+ * When in the middle of a resumable receive, tracks how much
+ * progress we have made.
+ */
+ uint64_t ds_resume_object[TXG_SIZE];
+ uint64_t ds_resume_offset[TXG_SIZE];
+ uint64_t ds_resume_bytes[TXG_SIZE];
+
/* Protected by our dsl_dir's dd_lock */
list_t ds_prop_cbs;
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
+boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
zprop_source_t source, uint64_t value, dmu_tx_t *tx);
void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx);
+boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds);
+boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result);
void dsl_dataset_deactivate_feature(uint64_t dsobj,
ZFS_PROP_REDUNDANT_METADATA,
ZFS_PROP_OVERLAY,
ZFS_PROP_PREV_SNAP,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN,
ZFS_NUM_PROPS
} zfs_prop_t;
* Feature flags for zfs send streams (flags in drr_versioninfo)
*/
-#define DMU_BACKUP_FEATURE_DEDUP (1<<0)
-#define DMU_BACKUP_FEATURE_DEDUPPROPS (1<<1)
-#define DMU_BACKUP_FEATURE_SA_SPILL (1<<2)
+#define DMU_BACKUP_FEATURE_DEDUP (1 << 0)
+#define DMU_BACKUP_FEATURE_DEDUPPROPS (1 << 1)
+#define DMU_BACKUP_FEATURE_SA_SPILL (1 << 2)
/* flags #3 - #15 are reserved for incompatible closed-source implementations */
-#define DMU_BACKUP_FEATURE_EMBED_DATA (1<<16)
-#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1<<17)
+#define DMU_BACKUP_FEATURE_EMBED_DATA (1 << 16)
+#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1 << 17)
/* flag #18 is reserved for a Delphix feature */
-#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1<<19)
-/* flag #20 is reserved for resumable streams */
-#define DMU_BACKUP_FEATURE_LARGE_DNODE (1<<21)
+#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19)
+#define DMU_BACKUP_FEATURE_RESUMING (1 << 20)
+#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 21)
/*
* Mask of all supported backup features
#define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \
- DMU_BACKUP_FEATURE_LARGE_BLOCKS | DMU_BACKUP_FEATURE_LARGE_DNODE)
+ DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
+ DMU_BACKUP_FEATURE_LARGE_DNODE)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
+typedef enum dmu_send_resume_token_version {
+ ZFS_SEND_RESUME_TOKEN_VERSION = 1
+} dmu_send_resume_token_version_t;
+
/*
* The drr_versioninfo field of the dmu_replay_record has the
* following layout:
ZFS_CASE_MIXED
} zfs_case_t;
+/*
+ * Note: this struct must have the same layout in 32-bit and 64-bit, so
+ * that 32-bit processes (like /sbin/zfs) can pass it to the 64-bit
+ * kernel. Therefore, we add padding to it so that no "hidden" padding
+ * is automatically added on 64-bit (but not on 32-bit).
+ */
typedef struct zfs_cmd {
char zc_name[MAXPATHLEN]; /* name of pool or dataset */
uint64_t zc_nvlist_src; /* really (char *) */
uint64_t zc_iflags; /* internal to zfs(7fs) */
zfs_share_t zc_share;
dmu_objset_stats_t zc_objset_stats;
- struct drr_begin zc_begin_record;
+ dmu_replay_record_t zc_begin_record;
zinject_record_t zc_inject_record;
uint32_t zc_defer_destroy;
uint32_t zc_flags;
uint64_t zc_action_handle;
int zc_cleanup_fd;
uint8_t zc_simple;
- uint8_t zc_pad[3]; /* alignment */
+ boolean_t zc_resumable;
+ uint8_t zc_pad[2]; /* alignment */
uint64_t zc_sendobj;
uint64_t zc_fromobj;
uint64_t zc_createtxg;
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL
+extern void *zvol_tag;
+
extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_rename_minors(spa_t *spa, const char *oldname,
return (value);
}
-static char *
+static const char *
getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
{
nvlist_t *nv;
- char *value;
+ const char *value;
*source = NULL;
if (nvlist_lookup_nvlist(zhp->zfs_props,
zfs_prop_to_name(prop), &nv) == 0) {
- verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
+ value = fnvlist_lookup_string(nv, ZPROP_VALUE);
(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
} else {
verify(!zhp->zfs_props_table ||
zhp->zfs_props_table[prop] == B_TRUE);
- if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
- value = "";
+ value = zfs_prop_default_string(prop);
*source = "";
}
{
char *source = NULL;
uint64_t val;
- char *str;
+ const char *str;
const char *strval;
boolean_t received = zfs_is_recvd_props_mode(zhp);
break;
case ZFS_PROP_ORIGIN:
- (void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
- proplen);
- /*
- * If there is no parent at all, return failure to indicate that
- * it doesn't apply to this dataset.
- */
- if (propbuf[0] == '\0')
+ str = getprop_string(zhp, prop, &source);
+ if (str == NULL)
return (-1);
+ (void) strlcpy(propbuf, str, proplen);
break;
case ZFS_PROP_CLONES:
break;
case PROP_TYPE_STRING:
- (void) strlcpy(propbuf,
- getprop_string(zhp, prop, &source), proplen);
+ str = getprop_string(zhp, prop, &source);
+ if (str == NULL)
+ return (-1);
+ (void) strlcpy(propbuf, str, proplen);
break;
case PROP_TYPE_INDEX:
return (0);
}
+ /*
+ * If this filesystem is inconsistent and has a receive resume
+ * token, we can not mount it.
+ */
+ if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
+ zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
+ zfs_close(zhp);
+ return (0);
+ }
+
libzfs_add_handle(cbp, zhp);
if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) {
zfs_close(zhp);
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved
#include "zfs_prop.h"
#include "zfs_fletcher.h"
#include "libzfs_impl.h"
+#include <zlib.h>
#include <sys/zio_checksum.h>
#include <sys/ddt.h>
#include <sys/socket.h>
static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
uint64_t *);
+static int guid_to_name(libzfs_handle_t *, const char *,
+ uint64_t, boolean_t, char *);
static const zio_cksum_t zero_cksum = { { 0 } };
{
dedup_arg_t *dda = arg;
char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
- dmu_replay_record_t thedrr;
+ dmu_replay_record_t thedrr = { 0 };
dmu_replay_record_t *drr = &thedrr;
FILE *ofp;
int outfd;
DMU_BACKUP_FEATURE_DEDUPPROPS);
DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
- if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
- DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
+ if (drr->drr_payloadlen != 0) {
sz = drr->drr_payloadlen;
if (sz > SPA_MAXBLOCKSIZE) {
send_progress_thread(void *arg)
{
progress_arg_t *pa = arg;
-
zfs_cmd_t zc = {"\0"};
zfs_handle_t *zhp = pa->pa_zhp;
libzfs_handle_t *hdl = zhp->zfs_hdl;
unsigned long long bytes;
char buf[16];
-
time_t t;
struct tm *tm;
- assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
if (!pa->pa_parsable)
}
}
+static void
+send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
+ uint64_t size, boolean_t parsable)
+{
+ if (parsable) {
+ if (fromsnap != NULL) {
+ (void) fprintf(fout, "incremental\t%s\t%s",
+ fromsnap, tosnap);
+ } else {
+ (void) fprintf(fout, "full\t%s",
+ tosnap);
+ }
+ } else {
+ if (fromsnap != NULL) {
+ if (strchr(fromsnap, '@') == NULL &&
+ strchr(fromsnap, '#') == NULL) {
+ (void) fprintf(fout, dgettext(TEXT_DOMAIN,
+ "send from @%s to %s"),
+ fromsnap, tosnap);
+ } else {
+ (void) fprintf(fout, dgettext(TEXT_DOMAIN,
+ "send from %s to %s"),
+ fromsnap, tosnap);
+ }
+ } else {
+ (void) fprintf(fout, dgettext(TEXT_DOMAIN,
+ "full send of %s"),
+ tosnap);
+ }
+ }
+
+ if (size != 0) {
+ if (parsable) {
+ (void) fprintf(fout, "\t%llu",
+ (longlong_t)size);
+ } else {
+ char buf[16];
+ zfs_nicenum(size, buf, sizeof (buf));
+ (void) fprintf(fout, dgettext(TEXT_DOMAIN,
+ " estimated size is %s"), buf);
+ }
+ }
+ (void) fprintf(fout, "\n");
+}
+
static int
dump_snapshot(zfs_handle_t *zhp, void *arg)
{
(sdd->fromorigin || sdd->replicate);
if (sdd->verbose) {
- uint64_t size;
- err = estimate_ioctl(zhp, sdd->prevsnap_obj,
+ uint64_t size = 0;
+ (void) estimate_ioctl(zhp, sdd->prevsnap_obj,
fromorigin, &size);
- if (sdd->parsable) {
- if (sdd->prevsnap[0] != '\0') {
- (void) fprintf(fout, "incremental\t%s\t%s",
- sdd->prevsnap, zhp->zfs_name);
- } else {
- (void) fprintf(fout, "full\t%s",
- zhp->zfs_name);
- }
- } else {
- (void) fprintf(fout, dgettext(TEXT_DOMAIN,
- "send from @%s to %s"),
- sdd->prevsnap, zhp->zfs_name);
- }
- if (err == 0) {
- if (sdd->parsable) {
- (void) fprintf(fout, "\t%llu\n",
- (longlong_t)size);
- } else {
- char buf[16];
- zfs_nicenum(size, buf, sizeof (buf));
- (void) fprintf(fout, dgettext(TEXT_DOMAIN,
- " estimated size is %s\n"), buf);
- }
- sdd->size += size;
- } else {
- (void) fprintf(fout, "\n");
- }
+ send_print_verbose(fout, zhp->zfs_name,
+ sdd->prevsnap[0] ? sdd->prevsnap : NULL,
+ size, sdd->parsable);
+ sdd->size += size;
}
if (!sdd->dryrun) {
return (0);
}
+nvlist_t *
+zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
+{
+ unsigned int version;
+ int nread, i;
+ unsigned long long checksum, packed_len;
+
+ /*
+ * Decode token header, which is:
+ * <token version>-<checksum of payload>-<uncompressed payload length>
+ * Note that the only supported token version is 1.
+ */
+ nread = sscanf(token, "%u-%llx-%llx-",
+ &version, &checksum, &packed_len);
+ if (nread != 3) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "resume token is corrupt (invalid format)"));
+ return (NULL);
+ }
+
+ if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "resume token is corrupt (invalid version %u)"),
+ version);
+ return (NULL);
+ }
+
+ /* convert hexadecimal representation to binary */
+ token = strrchr(token, '-') + 1;
+ int len = strlen(token) / 2;
+ unsigned char *compressed = zfs_alloc(hdl, len);
+ for (i = 0; i < len; i++) {
+ nread = sscanf(token + i * 2, "%2hhx", compressed + i);
+ if (nread != 1) {
+ free(compressed);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "resume token is corrupt "
+ "(payload is not hex-encoded)"));
+ return (NULL);
+ }
+ }
+
+ /* verify checksum */
+ zio_cksum_t cksum;
+ fletcher_4_native(compressed, len, &cksum);
+ if (cksum.zc_word[0] != checksum) {
+ free(compressed);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "resume token is corrupt (incorrect checksum)"));
+ return (NULL);
+ }
+
+ /* uncompress */
+ void *packed = zfs_alloc(hdl, packed_len);
+ uLongf packed_len_long = packed_len;
+ if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
+ packed_len_long != packed_len) {
+ free(packed);
+ free(compressed);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "resume token is corrupt (decompression failed)"));
+ return (NULL);
+ }
+
+ /* unpack nvlist */
+ nvlist_t *nv;
+ int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
+ free(packed);
+ free(compressed);
+ if (error != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "resume token is corrupt (nvlist_unpack failed)"));
+ return (NULL);
+ }
+ return (nv);
+}
+
+int
+zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
+ const char *resume_token)
+{
+ char errbuf[1024];
+ char *toname;
+ char *fromname = NULL;
+ uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
+ zfs_handle_t *zhp;
+ int error = 0;
+ char name[ZFS_MAXNAMELEN];
+ enum lzc_send_flags lzc_flags = 0;
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot resume send"));
+
+ nvlist_t *resume_nvl =
+ zfs_send_resume_token_to_nvlist(hdl, resume_token);
+ if (resume_nvl == NULL) {
+ /*
+ * zfs_error_aux has already been set by
+ * zfs_send_resume_token_to_nvlist
+ */
+ return (zfs_error(hdl, EZFS_FAULT, errbuf));
+ }
+ if (flags->verbose) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "resume token contents:\n"));
+ nvlist_print(stderr, resume_nvl);
+ }
+
+ if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
+ nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
+ nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
+ nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
+ nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "resume token is corrupt"));
+ return (zfs_error(hdl, EZFS_FAULT, errbuf));
+ }
+ fromguid = 0;
+ (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
+
+ if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
+ lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
+
+ if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
+ if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "'%s' is no longer the same snapshot used in "
+ "the initial send"), toname);
+ } else {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "'%s' used in the initial send no longer exists"),
+ toname);
+ }
+ return (zfs_error(hdl, EZFS_BADPATH, errbuf));
+ }
+ zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
+ if (zhp == NULL) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "unable to access '%s'"), name);
+ return (zfs_error(hdl, EZFS_BADPATH, errbuf));
+ }
+
+ if (fromguid != 0) {
+ if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "incremental source %#llx no longer exists"),
+ (longlong_t)fromguid);
+ return (zfs_error(hdl, EZFS_BADPATH, errbuf));
+ }
+ fromname = name;
+ }
+
+ if (flags->verbose) {
+ uint64_t size = 0;
+ error = lzc_send_space(zhp->zfs_name, fromname, &size);
+ if (error == 0)
+ size = MAX(0, (int64_t)(size - bytes));
+ send_print_verbose(stderr, zhp->zfs_name, fromname,
+ size, flags->parsable);
+ }
+
+ if (!flags->dryrun) {
+ progress_arg_t pa = { 0 };
+ pthread_t tid;
+ /*
+ * If progress reporting is requested, spawn a new thread to
+ * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
+ */
+ if (flags->progress) {
+ pa.pa_zhp = zhp;
+ pa.pa_fd = outfd;
+ pa.pa_parsable = flags->parsable;
+
+ error = pthread_create(&tid, NULL,
+ send_progress_thread, &pa);
+ if (error != 0) {
+ zfs_close(zhp);
+ return (error);
+ }
+ }
+
+ error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
+ lzc_flags, resumeobj, resumeoff);
+
+ if (flags->progress) {
+ (void) pthread_cancel(tid);
+ (void) pthread_join(tid, NULL);
+ }
+
+ char errbuf[1024];
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "warning: cannot send '%s'"), zhp->zfs_name);
+
+ zfs_close(zhp);
+
+ switch (error) {
+ case 0:
+ return (0);
+ case EXDEV:
+ case ENOENT:
+ case EDQUOT:
+ case EFBIG:
+ case EIO:
+ case ENOLINK:
+ case ENOSPC:
+ case ENOSTR:
+ case ENXIO:
+ case EPIPE:
+ case ERANGE:
+ case EFAULT:
+ case EROFS:
+ zfs_error_aux(hdl, strerror(errno));
+ return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
+
+ default:
+ return (zfs_standard_error(hdl, errno, errbuf));
+ }
+ }
+
+
+ zfs_close(zhp);
+
+ return (error);
+}
+
/*
* Generate a send stream for the dataset identified by the argument zhp.
*
dmu_replay_record_t drr = { 0 };
char *packbuf = NULL;
size_t buflen = 0;
- zio_cksum_t zc = { { 0 } };
+ zio_cksum_t zc;
+
+ ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
if (flags->replicate || flags->props) {
nvlist_t *hdrnv;
typedef struct guid_to_name_data {
uint64_t guid;
+ boolean_t bookmark_ok;
char *name;
char *skip;
} guid_to_name_data_t;
guid_to_name_cb(zfs_handle_t *zhp, void *arg)
{
guid_to_name_data_t *gtnd = arg;
+ const char *slash;
int err;
if (gtnd->skip != NULL &&
- strcmp(zhp->zfs_name, gtnd->skip) == 0) {
+ (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
+ strcmp(slash + 1, gtnd->skip) == 0) {
+ zfs_close(zhp);
return (0);
}
- if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
+ if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
(void) strcpy(gtnd->name, zhp->zfs_name);
zfs_close(zhp);
return (EEXIST);
}
err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
+ if (err != EEXIST && gtnd->bookmark_ok)
+ err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
zfs_close(zhp);
return (err);
}
*/
static int
guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
- char *name)
+ boolean_t bookmark_ok, char *name)
{
- /* exhaustive search all local snapshots */
char pname[ZFS_MAXNAMELEN];
guid_to_name_data_t gtnd;
- int err = 0;
- zfs_handle_t *zhp;
- char *cp;
gtnd.guid = guid;
+ gtnd.bookmark_ok = bookmark_ok;
gtnd.name = name;
gtnd.skip = NULL;
- (void) strlcpy(pname, parent, sizeof (pname));
-
/*
- * Search progressively larger portions of the hierarchy. This will
+ * Search progressively larger portions of the hierarchy, starting
+ * with the filesystem specified by 'parent'. This will
* select the "most local" version of the origin snapshot in the case
* that there are multiple matching snapshots in the system.
*/
- while ((cp = strrchr(pname, '/')) != NULL) {
-
+ (void) strlcpy(pname, parent, sizeof (pname));
+ char *cp = strrchr(pname, '@');
+ if (cp == NULL)
+ cp = strchr(pname, '\0');
+ for (; cp != NULL; cp = strrchr(pname, '/')) {
/* Chop off the last component and open the parent */
*cp = '\0';
- zhp = make_dataset_handle(hdl, pname);
+ zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
if (zhp == NULL)
continue;
-
- err = zfs_iter_children(zhp, guid_to_name_cb, >nd);
+ int err = guid_to_name_cb(zfs_handle_dup(zhp), >nd);
+ if (err != EEXIST)
+ err = zfs_iter_children(zhp, guid_to_name_cb, >nd);
+ if (err != EEXIST && bookmark_ok)
+ err = zfs_iter_bookmarks(zhp, guid_to_name_cb, >nd);
zfs_close(zhp);
if (err == EEXIST)
return (0);
/*
- * Remember the dataset that we already searched, so we
- * skip it next time through.
+ * Remember the last portion of the dataset so we skip it next
+ * time through (as we've already searched that portion of the
+ * hierarchy).
*/
- gtnd.skip = pname;
+ gtnd.skip = strrchr(pname, '/') + 1;
}
return (ENOENT);
switch (drr->drr_type) {
case DRR_BEGIN:
- /* NB: not to be used on v2 stream packages */
if (drr->drr_payloadlen != 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid substream header"));
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+ (void) recv_read(hdl, fd, buf,
+ drr->drr_payloadlen, B_FALSE, NULL);
}
break;
return (-1);
}
+static void
+recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
+ boolean_t resumable)
+{
+ char target_fs[ZFS_MAXNAMELEN];
+
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "checksum mismatch or incomplete stream"));
+
+ if (!resumable)
+ return;
+ (void) strlcpy(target_fs, target_snap, sizeof (target_fs));
+ *strchr(target_fs, '@') = '\0';
+ zfs_handle_t *zhp = zfs_open(hdl, target_fs,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL)
+ return;
+
+ char token_buf[ZFS_MAXPROPLEN];
+ int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ token_buf, sizeof (token_buf),
+ NULL, NULL, 0, B_TRUE);
+ if (error == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "checksum mismatch or incomplete stream.\n"
+ "Partially received snapshot is saved.\n"
+ "A resuming stream can be generated on the sending "
+ "system by running:\n"
+ " zfs send -t %s"),
+ token_buf);
+ }
+ zfs_close(zhp);
+}
+
/*
* Restores a backup of tosnap from the file descriptor specified by infd.
*/
*/
if (drrb->drr_flags & DRR_FLAG_CLONE) {
if (guid_to_name(hdl, zc.zc_value,
- drrb->drr_fromguid, zc.zc_string) != 0) {
+ drrb->drr_fromguid, B_FALSE, zc.zc_string) != 0) {
zcmd_free_nvlists(&zc);
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"local origin for clone %s does not exist"),
zc.zc_string);
}
+ boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_RESUMING;
stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
- (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap);
+ (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
if (stream_wantsnewfs) {
/*
char suffix[ZFS_MAXNAMELEN];
(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
- zc.zc_value) == 0) {
+ B_FALSE, zc.zc_value) == 0) {
*strchr(zc.zc_value, '@') = '\0';
(void) strcat(zc.zc_value, suffix);
}
char snap[ZFS_MAXNAMELEN];
(void) strcpy(snap, strchr(zc.zc_value, '@'));
if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
- zc.zc_value) == 0) {
+ B_FALSE, zc.zc_value) == 0) {
*strchr(zc.zc_value, '@') = '\0';
(void) strcat(zc.zc_value, snap);
}
zfs_handle_t *zhp;
/*
- * Destination fs exists. Therefore this should either
- * be an incremental, or the stream specifies a new fs
- * (full stream or clone) and they want us to blow it
- * away (and have therefore specified -F and removed any
- * snapshots).
+ * Destination fs exists. It must be one of these cases:
+ * - an incremental send stream
+ * - the stream specifies a new fs (full stream or clone)
+ * and they want us to blow away the existing fs (and
+ * have therefore specified -F and removed any snapshots)
+ * - we are resuming a failed receive.
*/
if (stream_wantsnewfs) {
if (!flags->force) {
return (-1);
}
}
+
+ /*
+ * If we are resuming a newfs, set newfs here so that we will
+ * mount it if the recv succeeds this time. We can tell
+ * that it was a newfs on the first recv because the fs
+ * itself will be inconsistent (if the fs existed when we
+ * did the first recv, we would have received it into
+ * .../%recv).
+ */
+ if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
+ newfs = B_TRUE;
+
zfs_close(zhp);
} else {
/*
newfs = B_TRUE;
}
- zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
+ zc.zc_begin_record = *drr_noswap;
zc.zc_cookie = infd;
zc.zc_guid = flags->force;
+ zc.zc_resumable = flags->resumable;
if (flags->verbose) {
(void) printf("%s %s stream of %s into %s\n",
flags->dryrun ? "would receive" : "receiving",
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
case ECKSUM:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid stream (checksum mismatch)"));
+ recv_ecksum_set_aux(hdl, zc.zc_value, flags->resumable);
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
case ENOTSUP:
* Restores a backup of tosnap from the file descriptor specified by infd.
* Return 0 on total success, -2 if some things couldn't be
* destroyed/renamed/promoted, -1 if some things couldn't be received.
- * (-1 will override -2).
+ * (-1 will override -2, if -1 and the resumable flag was specified the
+ * transfer can be resumed if the sending side supports it).
*/
int
zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
int
lzc_send(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags)
+{
+ return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
+}
+
+int
+lzc_send_resume(const char *snapname, const char *from, int fd,
+ enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
{
nvlist_t *args;
int err;
fnvlist_add_boolean(args, "largeblockok");
if (flags & LZC_SEND_FLAG_EMBED_DATA)
fnvlist_add_boolean(args, "embedok");
+ if (resumeobj != 0 || resumeoff != 0) {
+ fnvlist_add_uint64(args, "resume_object", resumeobj);
+ fnvlist_add_uint64(args, "resume_offset", resumeoff);
+ }
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
nvlist_free(args);
return (err);
return (0);
}
-/*
- * The simplest receive case: receive from the specified fd, creating the
- * specified snapshot. Apply the specified properties a "received" properties
- * (which can be overridden by locally-set properties). If the stream is a
- * clone, its origin snapshot must be specified by 'origin'. The 'force'
- * flag will cause the target filesystem to be rolled back or destroyed if
- * necessary to receive.
- *
- * Return 0 on success or an errno on failure.
- *
- * Note: this interface does not work on dedup'd streams
- * (those with DMU_BACKUP_FEATURE_DEDUP).
- */
-int
-lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
- boolean_t force, int fd)
+static int
+lzc_receive_impl(const char *snapname, nvlist_t *props, const char *origin,
+ boolean_t force, boolean_t resumable, int fd)
{
/*
* The receive ioctl is still legacy, so we need to construct our own
char *atp;
char *packed = NULL;
size_t size;
- dmu_replay_record_t drr;
int error;
ASSERT3S(g_refcount, >, 0);
(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
/* zc_begin_record is non-byteswapped BEGIN record */
- error = recv_read(fd, &drr, sizeof (drr));
+ error = recv_read(fd, &zc.zc_begin_record, sizeof (zc.zc_begin_record));
if (error != 0)
goto out;
- zc.zc_begin_record = drr.drr_u.drr_begin;
/* zc_cookie is fd to read from */
zc.zc_cookie = fd;
/* zc guid is force flag */
zc.zc_guid = force;
+ zc.zc_resumable = resumable;
+
/* zc_cleanup_fd is unused */
zc.zc_cleanup_fd = -1;
return (error);
}
+/*
+ * The simplest receive case: receive from the specified fd, creating the
+ * specified snapshot. Apply the specified properties as "received" properties
+ * (which can be overridden by locally-set properties). If the stream is a
+ * clone, its origin snapshot must be specified by 'origin'. The 'force'
+ * flag will cause the target filesystem to be rolled back or destroyed if
+ * necessary to receive.
+ *
+ * Return 0 on success or an errno on failure.
+ *
+ * Note: this interface does not work on dedup'd streams
+ * (those with DMU_BACKUP_FEATURE_DEDUP).
+ */
+int
+lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
+ boolean_t force, int fd)
+{
+ return (lzc_receive_impl(snapname, props, origin, force, B_FALSE, fd));
+}
+
+/*
+ * Like lzc_receive, but if the receive fails due to premature stream
+ * termination, the intermediate state will be preserved on disk. In this
+ * case, ECKSUM will be returned. The receive may subsequently be resumed
+ * with a resuming send stream generated by lzc_send_resume().
+ */
+int
+lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
+ boolean_t force, int fd)
+{
+ return (lzc_receive_impl(snapname, props, origin, force, B_TRUE, fd));
+}
+
/*
* Roll back this filesystem or volume to its most recent snapshot.
* If snapnamebuf is not NULL, it will be filled in with the name
return (0);
}
+void *zvol_tag = "zvol_tag";
+
void
zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
{
.LP
.nf
-\fBzfs\fR \fBsend\fR [\fB-eL\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+\fBzfs\fR \fBsend\fR [\fB-Le\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
.fi
.LP
.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+\fBzfs\fR \fBsend\fR [\fB-Penv\fR] \fB-t\fR \fIreceive_resume_token\fR
.fi
.LP
.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR
+\fBzfs\fR \fBreceive\fR [\fB-Fnsuv\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBreceive\fR [\fB-Fnsuv\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBreceive\fR \fB-A\fR \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
.sp
.ne 2
.na
+\fB\fBreceive_resume_token\fR\fR
+.ad
+.sp .6
+.RS 4n
+For filesystems or volumes which have saved partially-completed state from \fBzfs receive -s\fR , this opaque token can be provided to \fBzfs send -t\fR to resume and complete the \fBzfs receive\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBreferenced\fR\fR
.ad
.sp .6
.sp
.ne 2
.na
-\fBzfs send\fR [\fB-eL\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+\fBzfs send\fR [\fB-Le\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
.ad
.sp .6
.RS 4n
stream generated from a filesystem or volume is received, the default snapshot
name will be "--head--".
-.sp
-.ne 2
-.na
-\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR
-.ad
-.sp .6
-.RS 4n
-Generate an incremental send stream. The incremental source must be an earlier
-snapshot in the destination's history. It will commonly be an earlier
-snapshot in the destination's filesystem, in which case it can be
-specified as the last component of the name (the \fB#\fR or \fB@\fR character
-and following).
-.sp
-If the incremental target is a clone, the incremental source can
-be the origin snapshot, or an earlier snapshot in the origin's filesystem,
-or the origin's origin, etc.
-.RE
-
.sp
.ne 2
.na
\fBembedded_data\fR feature.
.RE
+.sp
+.ne 2
+.na
+\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR
+.ad
+.sp .6
+.RS 4n
+Generate an incremental send stream. The incremental source must be an earlier snapshot in the destination's history. It will commonly be an earlier snapshot in the destination's filesystem, in which case it can be specified as the last component of the name (the \fB#\fR or \fB@\fR character and following).
+.sp
+If the incremental target is a clone, the incremental source can be the origin snapshot, or an earlier snapshot in the origin's filesystem, or the origin's origin, etc.
+.RE
+
+.RE
+.sp
+.ne 2
+.na
+\fB\fBzfs send\fR [\fB-Penv\fR] \fB-t\fR \fIreceive_resume_token\fR\fR
+.ad
+.sp .6
+.RS 4n
+Creates a send stream which resumes an interrupted receive. The \fIreceive_resume_token\fR is the value of this property on the filesystem or volume that was being received into. See the documentation for \fBzfs receive -s\fR for more details.
+
+.RE
+
.RE
.sp
.ne 2
.na
-\fB\fBzfs receive\fR [\fB-vnFu\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+\fB\fBzfs receive\fR [\fB-Fnsuv\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
.ad
.br
.na
-\fB\fBzfs receive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR\fR
+\fB\fBzfs receive\fR [\fB-Fnsuv\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR\fR
.ad
.sp .6
.RS 4n
.sp
.ne 2
.na
-\fB\fB-d\fR\fR
+\fB\fB-F\fR\fR
.ad
.sp .6
.RS 4n
-Discard the first element of the sent snapshot's file system name, using the remaining elements to determine the name of the target file system for the new snapshot as described in the paragraph above.
+Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy snapshots and file systems that do not exist on the sending side.
.RE
.sp
.ne 2
+.mk
.na
-\fB\fB-e\fR\fR
+\fB\fB-n\fR\fR
.ad
.sp .6
.RS 4n
-Discard all but the last element of the sent snapshot's file system name, using that element to determine the name of the target file system for the new snapshot as described in the paragraph above.
+Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to verify the name the receive operation would use.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-s\fR\fR
+.ad
+.sp .6
+.RS 4n
+If the receive is interrupted, save the partially received state, rather than deleting it. Interruption may be due to premature termination of the stream (e.g. due to network failure or failure of the remote system if the stream is being read over a network connection), a checksum error in the stream, termination of the \fBzfs receive\fR process, or unclean shutdown of the system.
+.sp
+The receive can be resumed with a stream generated by \fBzfs send -t\fR token, where the \fItoken\fR is the value of the \fBreceive_resume_token\fR property of the filesystem or volume which is received into.
+.sp
+To use this flag, the storage pool must have the \fBextensible_dataset\fR feature enabled. See \fBzpool-features\fR(5) for details on ZFS feature flags.
.RE
.sp
.sp
.ne 2
.na
-\fB\fB-n\fR\fR
+\fB\fB-d\fR\fR
.ad
.sp .6
.RS 4n
-Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to verify the name the receive operation would use.
+Discard the first element of the sent snapshot's file system name, using the remaining elements to determine the name of the target file system for the new snapshot as described in the paragraph above.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-e\fR\fR
+.ad
+.sp .6
+.RS 4n
+Discard all but the last element of the sent snapshot's file system name, using that element to determine the name of the target file system for the new snapshot as described in the paragraph above.
.RE
.sp
Forces the stream to be received as a clone of the given snapshot. This is only valid if the stream is an incremental stream whose source is the same as the provided origin.
.RE
+.RE
+
.sp
.ne 2
.na
-\fB\fB-F\fR\fR
+\fB\fBzfs receive\fR [\fB-A\fR] \fIfilesystem\fR|\fIvolume\fR
.ad
.sp .6
.RS 4n
-Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy snapshots and file systems that do not exist on the sending side.
-.RE
+Abort an interrupted \fBzfs receive \fB-s\fR\fR, deleting its saved partially received state.
.RE
zprop_register_string(ZFS_PROP_SELINUX_ROOTCONTEXT, "rootcontext",
"none", PROP_DEFAULT, ZFS_TYPE_DATASET, "<selinux rootcontext>",
"ROOTCONTEXT");
+ zprop_register_string(ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ "receive_resume_token",
+ NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<string token>", "RESUMETOK");
/* readonly number properties */
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
* checksum/compression/copies.
*/
if (ds != NULL) {
+ boolean_t needlock = B_FALSE;
+
+ /*
+ * Note: it's valid to open the objset if the dataset is
+ * long-held, in which case the pool_config lock will not
+ * be held.
+ */
+ if (!dsl_pool_config_held(dmu_objset_pool(os))) {
+ needlock = B_TRUE;
+ dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+ }
err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os);
dnodesize_changed_cb, os);
}
}
+ if (needlock)
+ dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (err != 0) {
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
&os->os_phys_buf));
{
int err = 0;
+ /*
+ * We shouldn't be doing anything with dsl_dataset_t's unless the
+ * pool_config lock is held, or the dataset is long-held.
+ */
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) ||
+ dsl_dataset_long_held(ds));
+
mutex_enter(&ds->ds_opening_lock);
if (ds->ds_objset == NULL) {
objset_t *os;
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2014 HybridCluster. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
int zfs_recv_queue_length = 16 * 1024 * 1024;
static char *dmu_recv_tag = "dmu_recv_tag";
-static const char *recv_clone_name = "%recv";
+const char *recv_clone_name = "%recv";
#define BP_SPAN(datablkszsec, indblkshift, level) \
(((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
(level) * (indblkshift - SPA_BLKPTRSHIFT)))
+static void byteswap_record(dmu_replay_record_t *drr);
+
struct send_thread_arg {
bqueue_t q;
dsl_dataset_t *ds; /* Dataset to traverse */
int flags; /* flags to pass to traverse_dataset */
int error_code;
boolean_t cancel;
+ zbookmark_phys_t resume;
};
struct send_block_record {
{
dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
dmu_sendarg_t *dsp = dbi->dbi_dsp;
- dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
+ dsl_dataset_t *ds = dmu_objset_ds(dsp->dsa_os);
ssize_t resid; /* have to get resid to get detailed errno */
ASSERT0(dbi->dbi_len % 8);
* that the receiving system doesn't have any dbufs in the range
* being freed. This is always true because there is a one-record
* constraint: we only send one WRITE record for any given
- * object+offset. We know that the one-record constraint is
+ * object,offset. We know that the one-record constraint is
* true because we always send data in increasing order by
* object,offset.
*
{
struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+ if (object < dsp->dsa_resume_object) {
+ /*
+ * Note: when resuming, we will visit all the dnodes in
+ * the block of dnodes that we are resuming from. In
+ * this case it's unnecessary to send the dnodes prior to
+ * the one we are resuming from. We should be at most one
+ * block's worth of dnodes behind the resume point.
+ */
+ ASSERT3U(dsp->dsa_resume_object - object, <,
+ 1 << (DNODE_BLOCK_SHIFT - DNODE_SHIFT));
+ return (0);
+ }
+
if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
return (dump_freeobjects(dsp, object, 1));
uint64_t record_size;
int err = 0;
+ ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+ zb->zb_object >= sta->resume.zb_object);
+
if (sta->cancel)
return (SET_ERROR(EINTR));
struct send_block_record *data;
if (st_arg->ds != NULL) {
- err = traverse_dataset(st_arg->ds, st_arg->fromtxg,
- st_arg->flags, send_cb, arg);
+ err = traverse_dataset_resume(st_arg->ds,
+ st_arg->fromtxg, &st_arg->resume,
+ st_arg->flags, send_cb, st_arg);
+
if (err != EINTR)
st_arg->error_code = err;
}
ASSERT3U(zb->zb_level, >=, 0);
+ ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+ zb->zb_object >= dsa->dsa_resume_object);
+
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
return (0);
uint64_t offset;
ASSERT0(zb->zb_level);
+ ASSERT(zb->zb_object > dsa->dsa_resume_object ||
+ (zb->zb_object == dsa->dsa_resume_object &&
+ zb->zb_blkid * blksz >= dsa->dsa_resume_offset));
+
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
&aflags, zb) != 0) {
*/
static int
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
- zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok,
- boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off)
+ zfs_bookmark_phys_t *ancestor_zb,
+ boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, int outfd,
+ uint64_t resumeobj, uint64_t resumeoff,
+ vnode_t *vp, offset_t *off)
{
objset_t *os;
dmu_replay_record_t *drr;
uint64_t fromtxg = 0;
uint64_t featureflags = 0;
struct send_thread_arg to_arg;
+ void *payload = NULL;
+ size_t payload_len = 0;
struct send_block_record *to_data;
err = dmu_objset_from_ds(to_ds, &os);
DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
DMU_SUBSTREAM);
+ bzero(&to_arg, sizeof (to_arg));
+
#ifdef _KERNEL
if (dmu_objset_type(os) == DMU_OST_ZFS) {
uint64_t version;
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
}
+ if (resumeobj != 0 || resumeoff != 0) {
+ featureflags |= DMU_BACKUP_FEATURE_RESUMING;
+ }
+
DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
featureflags);
dsp->dsa_pending_op = PENDING_NONE;
dsp->dsa_incremental = (ancestor_zb != NULL);
dsp->dsa_featureflags = featureflags;
+ dsp->dsa_resume_object = resumeobj;
+ dsp->dsa_resume_offset = resumeoff;
mutex_enter(&to_ds->ds_sendstream_lock);
list_insert_head(&to_ds->ds_sendstreams, dsp);
dsl_dataset_long_hold(to_ds, FTAG);
dsl_pool_rele(dp, tag);
- if (dump_record(dsp, NULL, 0) != 0) {
+ if (resumeobj != 0 || resumeoff != 0) {
+ dmu_object_info_t to_doi;
+ nvlist_t *nvl;
+ err = dmu_object_info(os, resumeobj, &to_doi);
+ if (err != 0)
+ goto out;
+ SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
+ resumeoff / to_doi.doi_data_block_size);
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint64(nvl, "resume_object", resumeobj);
+ fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+ payload = fnvlist_pack(nvl, &payload_len);
+ drr->drr_payloadlen = payload_len;
+ fnvlist_free(nvl);
+ }
+
+ err = dump_record(dsp, payload, payload_len);
+ fnvlist_pack_free(payload, payload_len);
+ if (err != 0) {
err = dsp->dsa_err;
goto out;
}
is_clone = (fromds->ds_dir != ds->ds_dir);
dsl_dataset_rele(fromds, FTAG);
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, outfd, 0, 0, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, outfd, 0, 0, vp, off);
}
dsl_dataset_rele(ds, FTAG);
return (err);
}
int
-dmu_send(const char *tosnap, const char *fromsnap,
- boolean_t embedok, boolean_t large_block_ok,
- int outfd, vnode_t *vp, offset_t *off)
+dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+ boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
+ vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
return (err);
}
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok,
+ outfd, resumeobj, resumeoff, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok,
+ outfd, resumeobj, resumeoff, vp, off);
}
if (owned)
dsl_dataset_disown(ds, FTAG);
/* already checked */
ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+ ASSERT(!(featureflags & DMU_BACKUP_FEATURE_RESUMING));
if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM ||
spa_version(dp->dp_spa) < SPA_VERSION_SA)
return (SET_ERROR(ENOTSUP));
+ if (drba->drba_cookie->drc_resumable &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EXTENSIBLE_DATASET))
+ return (SET_ERROR(ENOTSUP));
+
/*
* The receiving code doesn't know how to translate a WRITE_EMBEDDED
* record to a plan WRITE record, so the pool must have the
{
dmu_recv_begin_arg_t *drba = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
const char *tofs = drba->drba_cookie->drc_tofs;
dsl_dataset_t *ds, *newds;
uint64_t dsobj;
int error;
- uint64_t crflags;
+ uint64_t crflags = 0;
- crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
- DS_FLAG_CI_DATASET : 0;
+ if (drrb->drr_flags & DRR_FLAG_CI_DATA)
+ crflags |= DS_FLAG_CI_DATASET;
error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
if (error == 0) {
}
VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
+ if (drba->drba_cookie->drc_resumable) {
+ uint64_t one = 1;
+ uint64_t zero = 0;
+
+ dsl_dataset_zapify(newds, tx);
+ if (drrb->drr_fromguid != 0) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_FROMGUID,
+ 8, 1, &drrb->drr_fromguid, tx));
+ }
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TOGUID,
+ 8, 1, &drrb->drr_toguid, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TONAME,
+ 1, strlen(drrb->drr_toname) + 1, drrb->drr_toname, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OBJECT,
+ 8, 1, &one, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OFFSET,
+ 8, 1, &zero, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES,
+ 8, 1, &zero, tx));
+ if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_EMBED_DATA) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK,
+ 8, 1, &one, tx));
+ }
+ }
+
dmu_buf_will_dirty(newds->ds_dbuf, tx);
dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
spa_history_log_internal_ds(newds, "receive", tx, "");
}
+static int
+dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
+{
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+ int error;
+ uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+ dsl_dataset_t *ds;
+ const char *tofs = drba->drba_cookie->drc_tofs;
+ char recvname[ZFS_MAXNAMELEN];
+ uint64_t val;
+
+ /* already checked */
+ ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+ ASSERT(featureflags & DMU_BACKUP_FEATURE_RESUMING);
+
+ if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM ||
+ drrb->drr_type >= DMU_OST_NUMTYPES)
+ return (SET_ERROR(EINVAL));
+
+ /* Verify pool version supports SA if SA_SPILL feature set */
+ if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
+ spa_version(dp->dp_spa) < SPA_VERSION_SA)
+ return (SET_ERROR(ENOTSUP));
+
+ /*
+ * The receiving code doesn't know how to translate a WRITE_EMBEDDED
+ * record to a plain WRITE record, so the pool must have the
+ * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
+ * records. Same with WRITE_EMBEDDED records that use LZ4 compression.
+ */
+ if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
+ return (SET_ERROR(ENOTSUP));
+ if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
+ return (SET_ERROR(ENOTSUP));
+
+ (void) snprintf(recvname, sizeof (recvname), "%s/%s",
+ tofs, recv_clone_name);
+
+ if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ /* %recv does not exist; continue in tofs */
+ error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ if (error != 0)
+ return (error);
+ }
+
+ /* check that ds is marked inconsistent */
+ if (!DS_IS_INCONSISTENT(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /* check that there is resuming data, and that the toguid matches */
+ if (!dsl_dataset_is_zapified(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+ error = zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val);
+ if (error != 0 || drrb->drr_toguid != val) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Check if the receive is still running. If so, it will be owned.
+ * Note that nothing else can own the dataset (e.g. after the receive
+ * fails) because it will be marked inconsistent.
+ */
+ if (dsl_dataset_has_owner(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EBUSY));
+ }
+
+ /* There should not be any snapshots of this fs yet. */
+ if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Note: resume point will be checked when we process the first WRITE
+ * record.
+ */
+
+ /* check that the origin matches */
+ val = 0;
+ (void) zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val);
+ if (drrb->drr_fromguid != val) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
+
+static void
+dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
+{
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ const char *tofs = drba->drba_cookie->drc_tofs;
+ dsl_dataset_t *ds;
+ uint64_t dsobj;
+ char recvname[ZFS_MAXNAMELEN];
+
+ (void) snprintf(recvname, sizeof (recvname), "%s/%s",
+ tofs, recv_clone_name);
+
+ if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ /* %recv does not exist; continue in tofs */
+ VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds));
+ drba->drba_cookie->drc_newfs = B_TRUE;
+ }
+
+ /* clear the inconsistent flag so that we can own it */
+ ASSERT(DS_IS_INCONSISTENT(ds));
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ dsobj = ds->ds_object;
+ dsl_dataset_rele(ds, FTAG);
+
+ VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds));
+
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
+
+ ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)));
+
+ drba->drba_cookie->drc_ds = ds;
+
+ spa_history_log_internal_ds(ds, "resume receive", tx, "");
+}
+
/*
* NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
* succeeds; otherwise we will leak the holds on the datasets.
*/
int
-dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
- boolean_t force, char *origin, dmu_recv_cookie_t *drc)
+dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
+ boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc)
{
dmu_recv_begin_arg_t drba = { 0 };
- dmu_replay_record_t *drr;
bzero(drc, sizeof (dmu_recv_cookie_t));
- drc->drc_drrb = drrb;
+ drc->drc_drr_begin = drr_begin;
+ drc->drc_drrb = &drr_begin->drr_u.drr_begin;
drc->drc_tosnap = tosnap;
drc->drc_tofs = tofs;
drc->drc_force = force;
+ drc->drc_resumable = resumable;
drc->drc_cred = CRED();
- if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
+ if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
drc->drc_byteswap = B_TRUE;
- else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
- return (SET_ERROR(EINVAL));
-
- drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
- drr->drr_type = DRR_BEGIN;
- drr->drr_u.drr_begin = *drc->drc_drrb;
- if (drc->drc_byteswap) {
- fletcher_4_incremental_byteswap(drr,
+ fletcher_4_incremental_byteswap(drr_begin,
sizeof (dmu_replay_record_t), &drc->drc_cksum);
- } else {
- fletcher_4_incremental_native(drr,
+ byteswap_record(drr_begin);
+ } else if (drc->drc_drrb->drr_magic == DMU_BACKUP_MAGIC) {
+ fletcher_4_incremental_native(drr_begin,
sizeof (dmu_replay_record_t), &drc->drc_cksum);
- }
- kmem_free(drr, sizeof (dmu_replay_record_t));
-
- if (drc->drc_byteswap) {
- drrb->drr_magic = BSWAP_64(drrb->drr_magic);
- drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
- drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
- drrb->drr_type = BSWAP_32(drrb->drr_type);
- drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
- drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
+ } else {
+ return (SET_ERROR(EINVAL));
}
drba.drba_origin = origin;
drba.drba_cookie = drc;
drba.drba_cred = CRED();
- return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
- &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ if (DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_RESUMING) {
+ return (dsl_sync_task(tofs,
+ dmu_recv_resume_begin_check, dmu_recv_resume_begin_sync,
+ &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ } else {
+ return (dsl_sync_task(tofs,
+ dmu_recv_begin_check, dmu_recv_begin_sync,
+ &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ }
}
struct receive_record_arg {
*/
arc_buf_t *write_buf;
int payload_size;
+ uint64_t bytes_read; /* bytes read from stream when record created */
boolean_t eos_marker; /* Marks the end of the stream */
bqueue_node_t node;
};
objset_t *os;
boolean_t byteswap;
bqueue_t q;
+
/*
* These three args are used to signal to the main thread that we're
* done.
kmutex_t mutex;
kcondvar_t cv;
boolean_t done;
+
int err;
/* A map from guid to dataset to help handle dedup'd streams. */
avl_tree_t *guid_to_ds_map;
+ boolean_t resumable;
+ uint64_t last_object, last_offset;
+ uint64_t bytes_read; /* bytes read when current record created */
};
struct receive_arg {
objset_t *os;
vnode_t *vp; /* The vnode to read the stream from */
uint64_t voff; /* The current offset in the stream */
+ uint64_t bytes_read;
/*
* A record that has had its payload read in, but hasn't yet been handed
* off to the worker thread.
ra->voff, UIO_SYSSPACE, FAPPEND,
RLIM64_INFINITY, CRED(), &resid);
- if (resid == len - done)
- ra->err = SET_ERROR(EINVAL);
+ if (resid == len - done) {
+ /*
+ * Note: ECKSUM indicates that the receive
+ * was interrupted and can potentially be resumed.
+ */
+ ra->err = SET_ERROR(ECKSUM);
+ }
ra->voff += len - done - resid;
done = len - resid;
if (ra->err != 0)
return (ra->err);
}
+ ra->bytes_read += len;
+
ASSERT3U(done, ==, len);
return (0);
}
}
}
+static void
+save_resume_state(struct receive_writer_arg *rwa,
+ uint64_t object, uint64_t offset, dmu_tx_t *tx)
+{
+ int txgoff = dmu_tx_get_txg(tx) & TXG_MASK;
+
+ if (!rwa->resumable)
+ return;
+
+ /*
+ * We use ds_resume_bytes[] != 0 to indicate that we need to
+ * update this on disk, so it must not be 0.
+ */
+ ASSERT(rwa->bytes_read != 0);
+
+ /*
+ * We only resume from write records, which have a valid
+ * (non-meta-dnode) object number.
+ */
+ ASSERT(object != 0);
+
+ /*
+ * For resuming to work correctly, we must receive records in order,
+ * sorted by object,offset. This is checked by the callers, but
+ * assert it here for good measure.
+ */
+ ASSERT3U(object, >=, rwa->os->os_dsl_dataset->ds_resume_object[txgoff]);
+ ASSERT(object != rwa->os->os_dsl_dataset->ds_resume_object[txgoff] ||
+ offset >= rwa->os->os_dsl_dataset->ds_resume_offset[txgoff]);
+ ASSERT3U(rwa->bytes_read, >=,
+ rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff]);
+
+ rwa->os->os_dsl_dataset->ds_resume_object[txgoff] = object;
+ rwa->os->os_dsl_dataset->ds_resume_offset[txgoff] = offset;
+ rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
+}
+
noinline static int
receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
void *data)
dmu_buf_rele(db, FTAG);
}
dmu_tx_commit(tx);
+
return (0);
}
!DMU_OT_IS_VALID(drrw->drr_type))
return (SET_ERROR(EINVAL));
+ /*
+ * For resuming to work, records must be in increasing order
+ * by (object, offset).
+ */
+ if (drrw->drr_object < rwa->last_object ||
+ (drrw->drr_object == rwa->last_object &&
+ drrw->drr_offset < rwa->last_offset)) {
+ return (SET_ERROR(EINVAL));
+ }
+ rwa->last_object = drrw->drr_object;
+ rwa->last_offset = drrw->drr_offset;
+
if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
return (SET_ERROR(EINVAL));
dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
+
+ /*
+ * Note: If the receive fails, we want the resume stream to start
+ * with the same record that we last successfully received (as opposed
+ * to the next record), so that we can verify that we are
+ * resuming from the correct location.
+ */
+ save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
dmu_tx_commit(tx);
dmu_buf_rele(bonus, FTAG);
+
return (0);
}
dmu_write(rwa->os, drrwbr->drr_object,
drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
dmu_buf_rele(dbp, FTAG);
+
+ /* See comment in restore_write. */
+ save_resume_state(rwa, drrwbr->drr_object, drrwbr->drr_offset, tx);
dmu_tx_commit(tx);
return (0);
}
static int
receive_write_embedded(struct receive_writer_arg *rwa,
- struct drr_write_embedded *drrwnp, void *data)
+ struct drr_write_embedded *drrwe, void *data)
{
dmu_tx_t *tx;
int err;
- if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
+ if (drrwe->drr_offset + drrwe->drr_length < drrwe->drr_offset)
return (EINVAL);
- if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE)
+ if (drrwe->drr_psize > BPE_PAYLOAD_SIZE)
return (EINVAL);
- if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES)
+ if (drrwe->drr_etype >= NUM_BP_EMBEDDED_TYPES)
return (EINVAL);
- if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
+ if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
return (EINVAL);
tx = dmu_tx_create(rwa->os);
- dmu_tx_hold_write(tx, drrwnp->drr_object,
- drrwnp->drr_offset, drrwnp->drr_length);
+ dmu_tx_hold_write(tx, drrwe->drr_object,
+ drrwe->drr_offset, drrwe->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
- dmu_write_embedded(rwa->os, drrwnp->drr_object,
- drrwnp->drr_offset, data, drrwnp->drr_etype,
- drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
+ dmu_write_embedded(rwa->os, drrwe->drr_object,
+ drrwe->drr_offset, data, drrwe->drr_etype,
+ drrwe->drr_compression, drrwe->drr_lsize, drrwe->drr_psize,
rwa->byteswap ^ ZFS_HOST_BYTEORDER, tx);
+ /* See comment in restore_write. */
+ save_resume_state(rwa, drrwe->drr_object, drrwe->drr_offset, tx);
dmu_tx_commit(tx);
return (0);
}
static void
dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
{
- char name[MAXNAMELEN];
- dsl_dataset_name(drc->drc_ds, name);
- dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
- (void) dsl_destroy_head(name);
+ if (drc->drc_resumable) {
+ /* wait for our resume state to be written to disk */
+ txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0);
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ } else {
+ char name[MAXNAMELEN];
+ dsl_dataset_name(drc->drc_ds, name);
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ (void) dsl_destroy_head(name);
+ }
}
static void
if (len != 0) {
ASSERT3U(len, <=, SPA_MAXBLOCKSIZE);
- ra->rrd->payload = buf;
- ra->rrd->payload_size = len;
- err = receive_read(ra, len, ra->rrd->payload);
+ err = receive_read(ra, len, buf);
if (err != 0)
return (err);
- receive_cksum(ra, len, ra->rrd->payload);
+ receive_cksum(ra, len, buf);
+
+ /* note: rrd is NULL when reading the begin record's payload */
+ if (ra->rrd != NULL) {
+ ra->rrd->payload = buf;
+ ra->rrd->payload_size = len;
+ ra->rrd->bytes_read = ra->bytes_read;
+ }
}
ra->prev_cksum = ra->cksum;
ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
err = receive_read(ra, sizeof (ra->next_rrd->header),
&ra->next_rrd->header);
+ ra->next_rrd->bytes_read = ra->bytes_read;
if (err != 0) {
kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
ra->next_rrd = NULL;
{
struct drr_end *drre = &ra->rrd->header.drr_u.drr_end;
if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum))
- return (SET_ERROR(EINVAL));
+ return (SET_ERROR(ECKSUM));
return (0);
}
case DRR_SPILL:
{
int err;
+ /* Processing in order, therefore bytes_read should be increasing. */
+ ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read);
+ rwa->bytes_read = rrd->bytes_read;
+
switch (rrd->header.drr_type) {
case DRR_OBJECT:
{
mutex_exit(&rwa->mutex);
}
+static int
+resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
+{
+ uint64_t val;
+ objset_t *mos = dmu_objset_pool(ra->os)->dp_meta_objset;
+ uint64_t dsobj = dmu_objset_id(ra->os);
+ uint64_t resume_obj, resume_off;
+
+ if (nvlist_lookup_uint64(begin_nvl,
+ "resume_object", &resume_obj) != 0 ||
+ nvlist_lookup_uint64(begin_nvl,
+ "resume_offset", &resume_off) != 0) {
+ return (SET_ERROR(EINVAL));
+ }
+ VERIFY0(zap_lookup(mos, dsobj,
+ DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val));
+ if (resume_obj != val)
+ return (SET_ERROR(EINVAL));
+ VERIFY0(zap_lookup(mos, dsobj,
+ DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val));
+ if (resume_off != val)
+ return (SET_ERROR(EINVAL));
+
+ return (0);
+}
+
+
/*
* Read in the stream's records, one by one, and apply them to the pool. There
* are two threads involved; the thread that calls this function will spin up a
struct receive_writer_arg *rwa;
int featureflags;
struct receive_ign_obj_node *n;
+ uint32_t payloadlen;
+ void *payload;
+ nvlist_t *begin_nvl = NULL;
ra = kmem_zalloc(sizeof (*ra), KM_SLEEP);
rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP);
ra->cksum = drc->drc_cksum;
ra->vp = vp;
ra->voff = *voffp;
+
+ if (dsl_dataset_is_zapified(drc->drc_ds)) {
+ (void) zap_lookup(drc->drc_ds->ds_dir->dd_pool->dp_meta_objset,
+ drc->drc_ds->ds_object, DS_FIELD_RESUME_BYTES,
+ sizeof (ra->bytes_read), 1, &ra->bytes_read);
+ }
+
list_create(&ra->ignore_obj_list, sizeof (struct receive_ign_obj_node),
offsetof(struct receive_ign_obj_node, node));
drc->drc_guid_to_ds_map = rwa->guid_to_ds_map;
}
- err = receive_read_payload_and_next_header(ra, 0, NULL);
- if (err)
+ payloadlen = drc->drc_drr_begin->drr_payloadlen;
+ payload = NULL;
+ if (payloadlen != 0)
+ payload = kmem_alloc(payloadlen, KM_SLEEP);
+
+ err = receive_read_payload_and_next_header(ra, payloadlen, payload);
+ if (err != 0) {
+ if (payloadlen != 0)
+ kmem_free(payload, payloadlen);
goto out;
+ }
+ if (payloadlen != 0) {
+ err = nvlist_unpack(payload, payloadlen, &begin_nvl, KM_SLEEP);
+ kmem_free(payload, payloadlen);
+ if (err != 0)
+ goto out;
+ }
+
+ if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+ err = resume_check(ra, begin_nvl);
+ if (err != 0)
+ goto out;
+ }
(void) bqueue_init(&rwa->q, zfs_recv_queue_length,
offsetof(struct receive_record_arg, node));
mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL);
rwa->os = ra->os;
rwa->byteswap = drc->drc_byteswap;
+ rwa->resumable = drc->drc_resumable;
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
TS_RUN, minclsyspri);
* We can leave this loop in 3 ways: First, if rwa->err is
* non-zero. In that case, the writer thread will free the rrd we just
* pushed. Second, if we're interrupted; in that case, either it's the
- * first loop and ra->rrd was never allocated, or it's later, and ra.rrd
+ * first loop and ra->rrd was never allocated, or it's later and ra->rrd
* has been handed off to the writer thread who will free it. Finally,
* if receive_read_record fails or we're at the end of the stream, then
* we free ra->rrd and exit.
err = rwa->err;
out:
+ nvlist_free(begin_nvl);
if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
zfs_onexit_fd_rele(cleanup_fd);
if (err != 0) {
/*
- * destroy what we created, so we don't leave it in the
- * inconsistent restoring state.
+ * Clean up references. If receive is not resumable,
+ * destroy what we created, so we don't leave it in
+ * the inconsistent state.
*/
dmu_recv_cleanup_ds(drc);
}
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ if (dsl_dataset_has_resume_receive_state(ds)) {
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OBJECT, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OFFSET, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_BYTES, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TONAME, tx);
+ }
}
drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
int pd_flags;
boolean_t pd_cancel;
boolean_t pd_exited;
+ zbookmark_phys_t pd_resume;
} prefetch_data_t;
typedef struct traverse_data {
uint32_t flags = ARC_FLAG_WAIT;
int32_t i;
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
- dnode_phys_t *cdnp;
+ dnode_phys_t *child_dnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0)
goto post;
- cdnp = buf->b_data;
+ child_dnp = buf->b_data;
- for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
- prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+ prefetch_dnode_metadata(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
}
/* recursively visitbp() blocks below this */
- for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
- err = traverse_dnode(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+ err = traverse_dnode(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
if (err != 0)
break;
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
- dnode_phys_t *mdnp, *gdnp, *udnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
goto post;
osp = buf->b_data;
- mdnp = &osp->os_meta_dnode;
- gdnp = &osp->os_groupused_dnode;
- udnp = &osp->os_userused_dnode;
-
- prefetch_dnode_metadata(td, mdnp, zb->zb_objset,
+ prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
/*
* See the block comment above for the goal of this variable.
td->td_realloc_possible = B_FALSE;
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- prefetch_dnode_metadata(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
- prefetch_dnode_metadata(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
- err = traverse_dnode(td, mdnp, zb->zb_objset,
+ err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
}
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
}
* Set the bookmark to the first level-0 block that we need
* to visit. This way, the resuming code does not need to
* deal with resuming from indirect blocks.
+ *
+ * Note, if zb_level <= 0, dnp may be NULL, so we don't want
+ * to dereference it.
*/
- td->td_resume->zb_blkid = zb->zb_blkid <<
- (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+ td->td_resume->zb_blkid = zb->zb_blkid;
+ if (zb->zb_level > 0) {
+ td->td_resume->zb_blkid <<= zb->zb_level *
+ (dnp->dn_indblkshift - SPA_BLKPTRSHIFT);
+ }
td->td_paused = B_TRUE;
}
int j, err = 0;
zbookmark_phys_t czb;
+ if (object != DMU_META_DNODE_OBJECT && td->td_resume != NULL &&
+ object < td->td_resume->zb_object)
+ return (0);
+
if (td->td_flags & TRAVERSE_PRE) {
SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
ZB_DNODE_BLKID);
td.td_func = traverse_prefetcher;
td.td_arg = td_main->td_pfd;
td.td_pfd = NULL;
+ td.td_resume = &td_main->td_pfd->pd_resume;
SET_BOOKMARK(&czb, td.td_objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
ASSERT(ds == NULL || objset == ds->ds_object);
ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
- /*
- * The data prefetching mechanism (the prefetch thread) is incompatible
- * with resuming from a bookmark.
- */
- ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
-
td = kmem_alloc(sizeof (traverse_data_t), KM_SLEEP);
pd = kmem_zalloc(sizeof (prefetch_data_t), KM_SLEEP);
czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP);
}
pd->pd_flags = flags;
+ if (resume != NULL)
+ pd->pd_resume = *resume;
mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
* in syncing context).
*/
int
-traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
- blkptr_cb_t func, void *arg)
+traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start,
+ zbookmark_phys_t *resume,
+ int flags, blkptr_cb_t func, void *arg)
{
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
- &dsl_dataset_phys(ds)->ds_bp, txg_start, NULL, flags, func, arg));
+ &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg));
+}
+
+int
+traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start,
+ int flags, blkptr_cb_t func, void *arg)
+{
+ return (traverse_dataset_resume(ds, txg_start, NULL, flags, func, arg));
}
int
/* visit each dataset */
for (obj = 1; err == 0;
- err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
+ err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) {
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
#include <sys/dsl_userhold.h>
#include <sys/dsl_bookmark.h>
#include <sys/policy.h>
+#include <sys/dmu_send.h>
+#include <sys/zio_compress.h>
+#include <zfs_fletcher.h>
/*
* The SPA supports block sizes up to 16MB. However, very large blocks
{
boolean_t gotit = FALSE;
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
mutex_enter(&ds->ds_lock);
if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
ds->ds_owner = tag;
return (gotit);
}
+boolean_t
+dsl_dataset_has_owner(dsl_dataset_t *ds)
+{
+ boolean_t rv;
+ mutex_enter(&ds->ds_lock);
+ rv = (ds->ds_owner != NULL);
+ mutex_exit(&ds->ds_lock);
+ return (rv);
+}
+
static void
dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
{
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid;
+ if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) {
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1,
+ &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx));
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1,
+ &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx));
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1,
+ &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx));
+ ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0;
+ ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0;
+ ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
+ }
+
dmu_objset_sync(ds->ds_objset, zio, tx);
for (f = 0; f < SPA_FEATURES; f++) {
nvlist_free(propval);
}
+static void
+get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+ if (dsl_dataset_has_resume_receive_state(ds)) {
+ char *str;
+ void *packed;
+ uint8_t *compressed;
+ uint64_t val;
+ nvlist_t *token_nv = fnvlist_alloc();
+ size_t packed_size, compressed_size;
+ zio_cksum_t cksum;
+ char *propval;
+ char buf[MAXNAMELEN];
+ int i;
+
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "fromguid", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "object", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "offset", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "bytes", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "toguid", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {
+ fnvlist_add_string(token_nv, "toname", buf);
+ }
+ if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_EMBEDOK) == 0) {
+ fnvlist_add_boolean(token_nv, "embedok");
+ }
+ packed = fnvlist_pack(token_nv, &packed_size);
+ fnvlist_free(token_nv);
+ compressed = kmem_alloc(packed_size, KM_SLEEP);
+
+ compressed_size = gzip_compress(packed, compressed,
+ packed_size, packed_size, 6);
+
+ fletcher_4_native(compressed, compressed_size, &cksum);
+
+ str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP);
+ for (i = 0; i < compressed_size; i++) {
+ (void) sprintf(str + i * 2, "%02x", compressed[i]);
+ }
+ str[compressed_size * 2] = '\0';
+ propval = kmem_asprintf("%u-%llx-%llx-%s",
+ ZFS_SEND_RESUME_TOKEN_VERSION,
+ (longlong_t)cksum.zc_word[0],
+ (longlong_t)packed_size, str);
+ dsl_prop_nvlist_add_string(nv,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN, propval);
+ kmem_free(packed, packed_size);
+ kmem_free(str, compressed_size * 2 + 1);
+ kmem_free(compressed, packed_size);
+ strfree(propval);
+ }
+}
+
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
}
}
+ if (!dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_t *recv_ds;
+ char recvname[ZFS_MAXNAMELEN];
+
+ /*
+ * A failed "newfs" (e.g. full) resumable receive leaves
+ * the stats set on this dataset. Check here for the prop.
+ */
+ get_receive_resume_stats(ds, nv);
+
+ /*
+ * A failed incremental resumable receive leaves the
+ * stats set on our child named "%recv". Check the child
+ * for the prop.
+ */
+ dsl_dataset_name(ds, recvname);
+ (void) strcat(recvname, "/");
+ (void) strcat(recvname, recv_clone_name);
+ if (dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) {
+ get_receive_resume_stats(recv_ds, nv);
+ dsl_dataset_rele(recv_ds, FTAG);
+ }
+ }
}
void
* only one long hold on the dataset. We're not allowed to change anything here
* so we don't permanently release the long hold or regular hold here. We want
* to do this only when syncing to avoid the dataset unexpectedly going away
- * when we release the long hold.
+ * when we release the long hold. Allow a long hold to exist for volumes, this
+ * may occur when asynchronously registering the minor with the kernel.
*/
static int
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
dsl_dataset_long_rele(ds, owner);
}
- held = dsl_dataset_long_held(ds);
+ held = (dsl_dataset_long_held(ds) && (ds->ds_owner != zvol_tag));
if (owner != NULL)
dsl_dataset_long_hold(ds, owner);
dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);
}
+boolean_t
+dsl_dataset_is_zapified(dsl_dataset_t *ds)
+{
+ dmu_object_info_t doi;
+
+ dmu_object_info_from_db(ds->ds_dbuf, &doi);
+ return (doi.doi_type == DMU_OTN_ZAP_METADATA);
+}
+
+boolean_t
+dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_is_zapified(ds) &&
+ zap_contains(ds->ds_dir->dd_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
#if defined(_LP64)
module_param(zfs_max_recordsize, int, 0644);
objset_t *os;
if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
- boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+ boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+
+ /*
+ * If the dataset is inconsistent because a resumable receive
+ * has failed, then do not destroy it.
+ */
+ if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
+ need_destroy = B_FALSE;
+
dmu_objset_rele(os, FTAG);
- if (inconsistent)
+ if (need_destroy)
(void) dsl_destroy_head(dsname);
}
return (0);
* zc_guid force flag
* zc_cleanup_fd cleanup-on-exit file descriptor
* zc_action_handle handle for this guid/ds mapping (or zero on first call)
+ * zc_resumable if data is incomplete assume sender will resume
*
* outputs:
* zc_cookie number of bytes read
return (SET_ERROR(EBADF));
}
- VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ errors = fnvlist_alloc();
if (zc->zc_string[0])
origin = zc->zc_string;
error = dmu_recv_begin(tofs, tosnap,
- &zc->zc_begin_record, force, origin, &drc);
+ &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
if (error != 0)
goto out;
* indicates that blocks > 128KB are permitted
* (optional) "embedok" -> (value ignored)
* presence indicates DRR_WRITE_EMBEDDED records are permitted
+ * (optional) "resume_object" and "resume_offset" -> (uint64)
+ * if present, resume send stream from specified object and offset.
* }
*
* outnvl is unused
file_t *fp;
boolean_t largeblockok;
boolean_t embedok;
+ uint64_t resumeobj = 0;
+ uint64_t resumeoff = 0;
error = nvlist_lookup_int32(innvl, "fd", &fd);
if (error != 0)
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
+ (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
+ (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
+
if ((fp = getf(fd)) == NULL)
return (SET_ERROR(EBADF));
off = fp->f_offset;
- error = dmu_send(snapname, fromname, embedok, largeblockok,
- fd, fp->f_vnode, &off);
+ error = dmu_send(snapname, fromname, embedok, largeblockok, fd,
+ resumeobj, resumeoff, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
static kmutex_t zvol_state_lock;
static list_t zvol_state_list;
-static char *zvol_tag = "zvol_tag";
+void *zvol_tag = "zvol_tag";
/*
* The in-core state of each volume.
tests = []
# DISABLED:
-# zfs_receive_003_pos - needs investigation
-# zfs_receive_010_pos - needs investigation
-# zfs_receive_011_pos - needs investigation
-# zfs_receive_012_pos - needs investigation
+# zfs_receive_004_neg - Fails for OpenZFS on illumos
+# zfs_receive_011_pos - Requires port of OpenZFS 6562
+# zfs_receive_012_pos - Requires port of OpenZFS 6562
[tests/functional/cli_root/zfs_receive]
-tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_005_neg',
- 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos',
- 'zfs_receive_009_neg']
+tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
+ 'zfs_receive_005_neg', 'zfs_receive_006_pos',
+ 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg']
# DISABLED:
# zfs_rename_002_pos - needs investigation
[tests/functional/cli_root/zfs_rollback]
tests = ['zfs_rollback_003_neg', 'zfs_rollback_004_neg']
-# DISABLED:
-# zfs_send_007_pos - needs investigation
[tests/functional/cli_root/zfs_send]
tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
- 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos']
+ 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos',
+ 'zfs_send_007_pos']
# DISABLED:
# mountpoint_003_pos - needs investigation
# DISABLED:
# zfs_snapshot_008_neg - nested pools
+# zfs_snapshot_009_pos - Fails for OpenZFS on illumos
[tests/functional/cli_root/zfs_snapshot]
tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg',
'zfs_snapshot_003_neg', 'zfs_snapshot_004_neg', 'zfs_snapshot_005_neg',
- 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_009_pos']
+ 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg']
# DISABLED:
# zfs_unmount_005_pos - needs investigation
#[tests/functional/rootpool]
#tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_neg']
-# DISABLED: Hangs on I/O for unclear reason.
-#[tests/functional/rsend]
-#tests = ['rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
-# 'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos', 'rsend_008_pos',
-# 'rsend_009_pos', 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos',
-# 'rsend_013_pos']
+# DISABLED:
+# rsend_008_pos - Fails for OpenZFS on illumos
+# rsend_009_pos - Fails for OpenZFS on illumos
+# rsend_020_pos - ASSERTs in dump_record()
+[tests/functional/rsend]
+tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
+ 'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos',
+ 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos',
+ 'rsend_013_pos', 'rsend_014_pos',
+ 'rsend_019_pos',
+ 'rsend_021_pos', 'rsend_022_pos', 'rsend_024_pos']
[tests/functional/scrub_mirror]
tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',
'slog_009_neg', 'slog_010_neg', 'slog_011_neg']
# DISABLED:
+# clone_001_pos - nested pools
# rollback_003_pos - Hangs in unmount and spins.
-# snapshot_013_pos - Hangs on I/O for unclear reason.
-# snapshot_016_pos - .zfs mv/rmdir/mkdir disabled by default.
-#[tests/functional/snapshot]
-#tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
-# 'snapshot_001_pos', 'snapshot_002_pos',
-# 'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos',
-# 'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos',
-# 'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos',
-# 'snapshot_012_pos', 'snapshot_014_pos',
-# 'snapshot_015_pos', 'snapshot_017_pos']
+# snapshot_016_pos - Problem with automount
+[tests/functional/snapshot]
+tests = ['rollback_001_pos', 'rollback_002_pos',
+ 'snapshot_001_pos', 'snapshot_002_pos',
+ 'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos',
+ 'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos',
+ 'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos',
+ 'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos',
+ 'snapshot_015_pos', 'snapshot_017_pos']
[tests/functional/snapused]
tests = ['snapused_001_pos', 'snapused_002_pos', 'snapused_003_pos',
'snapused_004_pos', 'snapused_005_pos']
exit(errno);
}
- if (fsetxattr(fd, "xattr", pbuf, 1024, 0) < 0) {
+ if (fsetxattr(fd, "user.xattr", pbuf, 1024, 0) < 0) {
(void) fprintf(stderr, "fsetxattr(fd, \"xattr\", pbuf, "
"1024, 0) failed.\n[%d]: %s.\n", errno, strerror(errno));
exit(errno);
export SHUF="@SHUF@"
export SLEEP="@SLEEP@"
export SORT="@SORT@"
+export STAT="@STAT@"
export STRINGS="@STRINGS@"
export SU="@SU@"
export SUM="@SUM@"
export TAR="@TAR@"
export TOUCH="@TOUCH@"
export TR="@TR@"
-export TRUE="@TRUE@"
export TRUNCATE="@TRUNCATE@"
+export TRUE="@TRUE@"
export UDEVADM="@UDEVADM@"
export UFSDUMP="@UFSDUMP@"
export UFSRESTORE="@UFSRESTORE@"
zfs_receive_006_pos.ksh \
zfs_receive_007_neg.ksh \
zfs_receive_008_pos.ksh \
- zfs_receive_009_neg.ksh
+ zfs_receive_009_neg.ksh \
+ zfs_receive_010_pos.ksh \
+ zfs_receive_011_pos.ksh \
+ zfs_receive_012_pos.ksh
first_object=$(ls -i $mntpnt | awk '{print $1}')
log_must $ZFS snapshot $POOL/fs@a
while true; do
- log_must $FIND $mntpnt -delete
+ log_must $FIND $mntpnt/* -delete
sync
log_must $MKFILES "$mntpnt/" 4000
FILE=$(ls -i $mntpnt | awk \
if is_global_zone ; then
log_must $ZFS create -V 16M $pool/vol
log_must $ZFS create -V 16M $pool/$FS/vol
+ block_device_wait
log_must $ZFS snapshot $pool/$FS/vol@vsnap
log_must $ZFS clone $pool/$FS/vol@vsnap $pool/$FS/vclone
+ block_device_wait
fi
log_must snapshot_tree $pool/$FS/fs1/fs2@fsnap
typeset dtst1=$1
typeset dtst2=$2
- for item in "type" "origin" "volblocksize" "aclinherit" "aclmode" \
+ for item in "type" "origin" "volblocksize" "aclinherit" "acltype" \
"atime" "canmount" "checksum" "compression" "copies" "devices" \
"dnodesize" "exec" "quota" "readonly" "recordsize" "reservation" \
- "setuid" "sharenfs" "snapdir" "version" "volsize" "xattr" "zoned" \
+ "setuid" "snapdir" "version" "volsize" "xattr" "zoned" \
"mountpoint";
do
$ZFS get -H -o property,value,source $item $dtst1 >> \
for ((i=0; i<$nfiles; i=i+1)); do
$DD if=/dev/urandom \
of=/$fs/file-$maxsize-$((i+$file_id_offset)) \
- bs=$(($RANDOM * $RANDOM % $maxsize)) \
+ bs=$((($RANDOM * $RANDOM % ($maxsize - 1)) + 1)) \
count=1 >/dev/null 2>&1 || log_fail \
"Failed to create /$fs/file-$maxsize-$((i+$file_id_offset))"
done
# write the same value that's already there.
#
log_must eval "$DD if=/dev/urandom of=$file conv=notrunc " \
- "bs=1 count=2 oseek=$offset >/dev/null 2>&1"
+ "bs=1 count=2 seek=$offset >/dev/null 2>&1"
else
log_must $TRUNCATE -s $offset $file
fi
mk_files 100 1048576 0 $sendfs &
mk_files 10 10485760 0 $sendfs &
mk_files 1 104857600 0 $sendfs &
- log_must $WAIT
+ wait
log_must $ZFS snapshot $sendfs@a
rm_files 200 256 0 $sendfs &
rm_files 200 131072 0 $sendfs &
rm_files 20 1048576 0 $sendfs &
rm_files 2 10485760 0 $sendfs &
- log_must $WAIT
+ wait
mk_files 400 256 0 $sendfs &
mk_files 400 131072 0 $sendfs &
mk_files 40 1048576 0 $sendfs &
mk_files 4 10485760 0 $sendfs &
- log_must $WAIT
+ wait
log_must $ZFS snapshot $sendfs@b
log_must eval "$ZFS send -v $sendfs@a >/$sendpool/initial.zsend"
log_must $ZFS inherit $prop $POOL2
done
- #if is_shared $POOL; then
- # log_must $ZFS set sharenfs=off $POOL
- #fi
log_must setup_test_model $POOL
if [[ -d $TESTDIR ]]; then
"$POOL/$FS/fs1/fs2" "$POOL/$FS/fs1/fclone" ; do
rand_set_prop $fs aclinherit "discard" "noallow" "secure" "passthrough"
rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256"
- rand_set_prop $fs aclmode "discard" "groupmask" "passthrough"
+ rand_set_prop $fs acltype "off" "noacl" "posixacl"
rand_set_prop $fs atime "on" "off"
rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256"
rand_set_prop $fs compression "on" "off" "lzjb" "gzip" \
# Verify inherited property can be received
-rand_set_prop $POOL sharenfs "on" "off" "rw"
+rand_set_prop $POOL redundant_metadata "all" "most"
+rand_set_prop $POOL sync "standard" "always" "disabled"
#
# Duplicate POOL2 for testing
log_must eval "$ZFS send -R $POOL/$FS@final > $BACKDIR/fs-final-R"
log_must eval "$ZFS receive -d $POOL2 < $BACKDIR/fs-final-R"
+block_device_wait
log_must eval "$ZPOOL export $POOL"
log_must eval "$ZPOOL import $POOL"
export LARGEST_FILE=${TESTSDIR}/zfs-tests/cmd/largest_file/largest_file
export MKBUSY=${TESTSDIR}/zfs-tests/cmd/mkbusy/mkbusy
export MKFILE=${TESTSDIR}/zfs-tests/cmd/mkfile/mkfile
-export MKFILES=${TESTSDIR}/zfs-tests/cmd/mkfile/mkfiles
+export MKFILES=${TESTSDIR}/zfs-tests/cmd/mkfiles/mkfiles
export MKTREE=${TESTSDIR}/zfs-tests/cmd/mktree/mktree
export MMAP_EXEC=${TESTSDIR}/zfs-tests/cmd/mmap_exec/mmap_exec
export MMAPWRITE=${TESTSDIR}/zfs-tests/cmd/mmapwrite/mmapwrite