*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */
+ /*
+ * Max block size for this ZIL. Note that this can not be changed
+ * while the ZIL is in use because consumers (ZPL/zvol) need to take
+ * this into account when deciding between WR_COPIED and WR_NEED_COPY
+ * (see zil_max_copied_data()).
+ */
+ uint64_t zl_max_block_size;
};
typedef struct zil_bp_node {
avl_node_t zn_node;
} zil_bp_node_t;
-/*
- * Maximum amount of write data that can be put into single log block.
- */
-#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
- sizeof (lr_write_t))
-
-/*
- * Maximum amount of log space we agree to waste to reduce number of
- * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
- */
-#define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8)
-
-/*
- * Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY
- * as more space efficient if we can't fit at least two log records into
- * maximum sized log block.
- */
-#define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \
- sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t))
-
#ifdef __cplusplus
}
#endif
UINT64_MAX
};
+/*
+ * Maximum block size used by the ZIL. This is picked up when the ZIL is
+ * initialized. Otherwise this should not be used directly; see
+ * zl_max_block_size instead.
+ */
+int zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
+
/*
* Start a log block write and advance to the next log block.
* Calls are serialized.
zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t);
for (i = 0; zil_blksz > zil_block_buckets[i]; i++)
continue;
- zil_blksz = zil_block_buckets[i];
- if (zil_blksz == UINT64_MAX)
- zil_blksz = SPA_OLD_MAXBLOCKSIZE;
+ zil_blksz = MIN(zil_block_buckets[i], zilog->zl_max_block_size);
zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
for (i = 0; i < ZIL_PREV_BLKS; i++)
zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
return (nlwb);
}
+/*
+ * Maximum amount of write data that can be put into single log block.
+ */
+uint64_t
+zil_max_log_data(zilog_t *zilog)
+{
+ return (zilog->zl_max_block_size -
+ sizeof (zil_chain_t) - sizeof (lr_write_t));
+}
+
+/*
+ * Maximum amount of log space we agree to waste to reduce number of
+ * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
+ */
+static inline uint64_t
+zil_max_waste_space(zilog_t *zilog)
+{
+ return (zil_max_log_data(zilog) / 8);
+}
+
+/*
+ * Maximum amount of write data for WR_COPIED. For correctness, consumers
+ * must fall back to WR_NEED_COPY if we can't fit the entire record into one
+ * maximum sized log block, because each WR_COPIED record must fit in a
+ * single log block. For space efficiency, we want to fit two records into a
+ * max-sized log block.
+ */
+uint64_t
+zil_max_copied_data(zilog_t *zilog)
+{
+ return ((zilog->zl_max_block_size - sizeof (zil_chain_t)) / 2 -
+ sizeof (lr_write_t));
+}
+
static lwb_t *
zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
{
lr_t *lrcb, *lrc;
lr_write_t *lrwb, *lrw;
char *lr_buf;
- uint64_t dlen, dnow, lwb_sp, reclen, txg;
+ uint64_t dlen, dnow, lwb_sp, reclen, txg, max_log_data;
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
ASSERT3P(lwb, !=, NULL);
* For WR_NEED_COPY optimize layout for minimal number of chunks.
*/
lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+ max_log_data = zil_max_log_data(zilog);
if (reclen > lwb_sp || (reclen + dlen > lwb_sp &&
- lwb_sp < ZIL_MAX_WASTE_SPACE && (dlen % ZIL_MAX_LOG_DATA == 0 ||
- lwb_sp < reclen + dlen % ZIL_MAX_LOG_DATA))) {
+ lwb_sp < zil_max_waste_space(zilog) &&
+ (dlen % max_log_data == 0 ||
+ lwb_sp < reclen + dlen % max_log_data))) {
lwb = zil_lwb_write_issue(zilog, lwb);
if (lwb == NULL)
return (NULL);
zil_lwb_write_open(zilog, lwb);
ASSERT(LWB_EMPTY(lwb));
lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+
+ /*
+ * There must be enough space in the new, empty log block to
+ * hold reclen. For WR_COPIED, we need to fit the whole
+ * record in one block, and reclen is the header size + the
+ * data size. For WR_NEED_COPY, we can create multiple
+ * records, splitting the data into multiple blocks, so we
+ * only need to fit one word of data per block; in this case
+ * reclen is just the header size (no data).
+ */
ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp);
}
zilog->zl_dirty_max_txg = 0;
zilog->zl_last_lwb_opened = NULL;
zilog->zl_last_lwb_latency = 0;
+ zilog->zl_max_block_size = zil_maxblocksize;
mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zilog->zl_issuer_lock, NULL, MUTEX_DEFAULT, NULL);
module_param(zil_slog_bulk, ulong, 0644);
MODULE_PARM_DESC(zil_slog_bulk, "Limit in bytes slog sync writes per commit");
+
+module_param(zil_maxblocksize, int, 0644);
+MODULE_PARM_DESC(zil_maxblocksize, "Limit in bytes of ZIL log block size");
/* END CSTYLED */
#endif