From ad434473ebd2d24dcf400896ac1539676009af08 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 7 Mar 2008 23:20:21 +0000
Subject: [PATCH] This patch addresses some issues in TOAST compression
 strategy that were discussed last year, but we felt it was too late in the
 8.3 cycle to change the code immediately.  Specifically, the patch:

* Reduces the minimum datum size to be considered for compression from
256 to 32 bytes, as suggested by Greg Stark.

* Increases the required compression rate for compressed storage from
20% to 25%, again per Greg's suggestion.

* Replaces force_input_size (size above which compression is forced)
with a maximum size to be considered for compression.  It was agreed
that allowing large inputs to escape the minimum-compression-rate
requirement was not bright, and that indeed we'd rather have a knob
that acted in the other direction.  I set this value to 1MB for the
moment, but it could use some performance studies to tune it.

* Adds an early-failure path to the compressor as suggested by Jan:
if it's been unable to find even one compressible substring in the
first 1KB (parameterizable), assume we're looking at incompressible
input and give up.  (Possibly this logic can be improved, but I'll
commit it as-is for now.)

* Improves the toasting heuristics so that when we have very large
fields with attstorage 'x' or 'e', we will push those out to toast
storage before considering inline compression of shorter fields.
This also responds to a suggestion of Greg's, though my original
proposal for a solution was a bit off base because it didn't fix
the problem for large 'e' fields.

There was some discussion in the earlier threads of exposing some
of the compression knobs to users, perhaps even on a per-column
basis.  I have not done anything about that here.  It seems to me
that if we are changing around the parameters, we'd better get some
experience and be sure we are happy with the design before we set
things in stone by providing user-visible knobs.
---
 src/backend/access/heap/tuptoaster.c  |  92 +++++++++++++++-------
 src/backend/utils/adt/pg_lzcompress.c | 106 ++++++++++++++------------
 src/include/utils/pg_lzcompress.h     |  45 +++++------
 3 files changed, 141 insertions(+), 102 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 3ace204249..40f5d72056 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.83 2008/02/29 17:47:41 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.84 2008/03/07 23:20:21 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -576,7 +576,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	/* ----------
 	 * Compress and/or save external until data fits into target length
 	 *
-	 *	1: Inline compress attributes with attstorage 'x'
+	 *	1: Inline compress attributes with attstorage 'x', and store very
+	 *	   large attributes with attstorage 'x' or 'e' external immediately
 	 *	2: Store attributes with attstorage 'x' or 'e' external
 	 *	3: Inline compress attributes with attstorage 'm'
 	 *	4: Store attributes with attstorage 'm' external
@@ -595,7 +596,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	maxDataLen = TOAST_TUPLE_TARGET - hoff;
 
 	/*
-	 * Look for attributes with attstorage 'x' to compress
+	 * Look for attributes with attstorage 'x' to compress.  Also find large
+	 * attributes with attstorage 'x' or 'e', and store them external.
 	 */
 	while (heap_compute_data_size(tupleDesc,
 								  toast_values, toast_isnull) > maxDataLen)
@@ -606,7 +608,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		Datum		new_value;
 
 		/*
-		 * Search for the biggest yet uncompressed internal attribute
+		 * Search for the biggest yet unprocessed internal attribute
 		 */
 		for (i = 0; i < numAttrs; i++)
 		{
@@ -616,7 +618,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				continue;		/* can't happen, toast_action would be 'p' */
 			if (VARATT_IS_COMPRESSED(toast_values[i]))
 				continue;
-			if (att[i]->attstorage != 'x')
+			if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
 				continue;
 			if (toast_sizes[i] > biggest_size)
 			{
@@ -629,30 +631,58 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 			break;
 
 		/*
-		 * Attempt to compress it inline
+		 * Attempt to compress it inline, if it has attstorage 'x'
 		 */
 		i = biggest_attno;
-		old_value = toast_values[i];
-		new_value = toast_compress_datum(old_value);
+		if (att[i]->attstorage == 'x')
+		{
+			old_value = toast_values[i];
+			new_value = toast_compress_datum(old_value);
 
-		if (DatumGetPointer(new_value) != NULL)
+			if (DatumGetPointer(new_value) != NULL)
+			{
+				/* successful compression */
+				if (toast_free[i])
+					pfree(DatumGetPointer(old_value));
+				toast_values[i] = new_value;
+				toast_free[i] = true;
+				toast_sizes[i] = VARSIZE(toast_values[i]);
+				need_change = true;
+				need_free = true;
+			}
+			else
+			{
+				/* incompressible, ignore on subsequent compression passes */
+				toast_action[i] = 'x';
+			}
+		}
+		else
 		{
-			/* successful compression */
+			/* has attstorage 'e', ignore on subsequent compression passes */
+			toast_action[i] = 'x';
+		}
+
+		/*
+		 * If this value is by itself more than maxDataLen (after compression
+		 * if any), push it out to the toast table immediately, if possible.
+		 * This avoids uselessly compressing other fields in the common case
+		 * where we have one long field and several short ones.
+		 *
+		 * XXX maybe the threshold should be less than maxDataLen?
+		 */
+		if (toast_sizes[i] > maxDataLen &&
+			rel->rd_rel->reltoastrelid != InvalidOid)
+		{
+			old_value = toast_values[i];
+			toast_action[i] = 'p';
+			toast_values[i] = toast_save_datum(rel, toast_values[i],
+											   use_wal, use_fsm);
 			if (toast_free[i])
 				pfree(DatumGetPointer(old_value));
-			toast_values[i] = new_value;
 			toast_free[i] = true;
-			toast_sizes[i] = VARSIZE(toast_values[i]);
 			need_change = true;
 			need_free = true;
 		}
-		else
-		{
-			/*
-			 * incompressible data, ignore on subsequent compression passes
-			 */
-			toast_action[i] = 'x';
-		}
 	}
 
 	/*
@@ -761,9 +791,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		}
 		else
 		{
-			/*
-			 * incompressible data, ignore on subsequent compression passes
-			 */
+			/* incompressible, ignore on subsequent compression passes */
 			toast_action[i] = 'x';
 		}
 	}
@@ -1047,16 +1075,28 @@ toast_compress_datum(Datum value)
 	Assert(!VARATT_IS_COMPRESSED(value));
 
 	/*
-	 * No point in wasting a palloc cycle if value is too short for
-	 * compression
+	 * No point in wasting a palloc cycle if value size is out of the
+	 * allowed range for compression
 	 */
-	if (valsize < PGLZ_strategy_default->min_input_size)
+	if (valsize < PGLZ_strategy_default->min_input_size ||
+		valsize > PGLZ_strategy_default->max_input_size)
 		return PointerGetDatum(NULL);
 
 	tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
+
+	/*
+	 * We recheck the actual size even if pglz_compress() reports success,
+	 * because it might be satisfied with having saved as little as one byte
+	 * in the compressed data --- which could turn into a net loss once you
+	 * consider header and alignment padding.  Worst case, the compressed
+	 * format might require three padding bytes (plus header, which is included
+	 * in VARSIZE(tmp)), whereas the uncompressed format would take only one
+	 * header byte and no padding if the value is short enough.  So we insist
+	 * on a savings of more than 2 bytes to ensure we have a gain.
+	 */
 	if (pglz_compress(VARDATA_ANY(value), valsize,
 					  (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
-		VARSIZE(tmp) < VARSIZE_ANY(value))
+		VARSIZE(tmp) < valsize - 2)
 	{
 		/* successful compression */
 		return PointerGetDatum(tmp);
diff --git a/src/backend/utils/adt/pg_lzcompress.c b/src/backend/utils/adt/pg_lzcompress.c
index a7cd8f6eae..0716b25298 100644
--- a/src/backend/utils/adt/pg_lzcompress.c
+++ b/src/backend/utils/adt/pg_lzcompress.c
@@ -4,7 +4,7 @@
  *		This is an implementation of LZ compression for PostgreSQL.
  *		It uses a simple history table and generates 2-3 byte tags
  *		capable of backward copy information for 3-273 bytes with
- *		an offset of max. 4095.
+ *		a max offset of 4095.
  *
  *		Entry routines:
  *
@@ -166,13 +166,12 @@
  *
  * Copyright (c) 1999-2008, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.29 2008/01/01 19:45:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.30 2008/03/07 23:20:21 tgl Exp $
  * ----------
  */
 #include "postgres.h"
 
-#include <unistd.h>
-#include <fcntl.h>
+#include <limits.h>
 
 #include "utils/pg_lzcompress.h"
 
@@ -211,27 +210,23 @@ typedef struct PGLZ_HistEntry
  * ----------
  */
 static const PGLZ_Strategy strategy_default_data = {
-	256,						/* Data chunks less than 256 bytes are not
-								 * compressed */
-	6144,						/* Data chunks >= 6K force compression, unless
-								 * compressed output is larger than input */
-	20,							/* Below 6K, compression rates below 20% mean
-								 * fallback to uncompressed */
-	128,						/* Stop history lookup if a match of 128 bytes
-								 * is found */
-	10							/* Lower good match size by 10% at every
-								 * lookup loop iteration */
+	32,				/* Data chunks less than 32 bytes are not compressed */
+	1024 * 1024,	/* Data chunks over 1MB are not compressed either */
+	25,				/* Require 25% compression rate, or not worth it */
+	1024,			/* Give up if no compression in the first 1KB */
+	128,			/* Stop history lookup if a match of 128 bytes is found */
+	10				/* Lower good match size by 10% at every loop iteration */
 };
 const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data;
 
 
 static const PGLZ_Strategy strategy_always_data = {
-	0,							/* Chunks of any size are compressed */
-	0,
-	0,							/* It's enough to save one single byte */
-	128,						/* Stop history lookup if a match of 128 bytes
-								 * is found */
-	6							/* Look harder for a good match */
+	0,				/* Chunks of any size are compressed */
+	INT_MAX,
+	0,				/* It's enough to save one single byte */
+	INT_MAX,		/* Never give up early */
+	128,			/* Stop history lookup if a match of 128 bytes is found */
+	6				/* Look harder for a good match */
 };
 const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
 
@@ -491,6 +486,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	unsigned char *ctrlp = &ctrl_dummy;
 	unsigned char ctrlb = 0;
 	unsigned char ctrl = 0;
+	bool		found_match = false;
 	int32		match_len;
 	int32		match_off;
 	int32		good_match;
@@ -506,11 +502,12 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 		strategy = PGLZ_strategy_default;
 
 	/*
-	 * If the strategy forbids compression (at all or if source chunk too
-	 * small), fail.
+	 * If the strategy forbids compression (at all or if source chunk size
+	 * out of range), fail.
 	 */
 	if (strategy->match_size_good <= 0 ||
-		slen < strategy->min_input_size)
+		slen < strategy->min_input_size ||
+		slen > strategy->max_input_size)
 		return false;
 
 	/*
@@ -519,41 +516,44 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	dest->rawsize = slen;
 
 	/*
-	 * Limit the match size to the maximum implementation allowed value
+	 * Limit the match parameters to the supported range.
 	 */
-	if ((good_match = strategy->match_size_good) > PGLZ_MAX_MATCH)
+	good_match = strategy->match_size_good;
+	if (good_match > PGLZ_MAX_MATCH)
 		good_match = PGLZ_MAX_MATCH;
-	if (good_match < 17)
+	else if (good_match < 17)
 		good_match = 17;
 
-	if ((good_drop = strategy->match_size_drop) < 0)
+	good_drop = strategy->match_size_drop;
+	if (good_drop < 0)
 		good_drop = 0;
-	if (good_drop > 100)
+	else if (good_drop > 100)
 		good_drop = 100;
 
-	/*
-	 * Initialize the history lists to empty.  We do not need to zero the
-	 * hist_entries[] array; its entries are initialized as they are used.
-	 */
-	memset((void *) hist_start, 0, sizeof(hist_start));
+	need_rate = strategy->min_comp_rate;
+	if (need_rate < 0)
+		need_rate = 0;
+	else if (need_rate > 99)
+		need_rate = 99;
 
 	/*
-	 * Compute the maximum result size allowed by the strategy. If the input
-	 * size exceeds force_input_size, the max result size is the input size
-	 * itself. Otherwise, it is the input size minus the minimum wanted
-	 * compression rate.
+	 * Compute the maximum result size allowed by the strategy, namely
+	 * the input size minus the minimum wanted compression rate.  This had
+	 * better be <= slen, else we might overrun the provided output buffer.
 	 */
-	if (slen >= strategy->force_input_size)
-		result_max = slen;
-	else
+	if (slen > (INT_MAX/100))
 	{
-		need_rate = strategy->min_comp_rate;
-		if (need_rate < 0)
-			need_rate = 0;
-		else if (need_rate > 99)
-			need_rate = 99;
-		result_max = slen - ((slen * need_rate) / 100);
+		/* Approximate to avoid overflow */
+		result_max = (slen / 100) * (100 - need_rate);
 	}
+	else
+		result_max = (slen * (100 - need_rate)) / 100;
+
+	/*
+	 * Initialize the history lists to empty.  We do not need to zero the
+	 * hist_entries[] array; its entries are initialized as they are used.
+	 */
+	memset(hist_start, 0, sizeof(hist_start));
 
 	/*
 	 * Compress the source directly into the output buffer.
@@ -570,6 +570,15 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 		if (bp - bstart >= result_max)
 			return false;
 
+		/*
+		 * If we've emitted more than first_success_by bytes without finding
+		 * anything compressible at all, fail.  This lets us fall out
+		 * reasonably quickly when looking at incompressible input (such as
+		 * pre-compressed data).
+		 */
+		if (!found_match && bp - bstart >= strategy->first_success_by)
+			return false;
+
 		/*
 		 * Try to find a match in the history
 		 */
@@ -586,9 +595,10 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 				pglz_hist_add(hist_start, hist_entries,
 							  hist_next, hist_recycle,
 							  dp, dend);
-				dp++;			/* Do not do this ++ in the line above!		*/
+				dp++;			/* Do not do this ++ in the line above! */
 				/* The macro would do it four times - Jan.	*/
 			}
+			found_match = true;
 		}
 		else
 		{
@@ -599,7 +609,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 			pglz_hist_add(hist_start, hist_entries,
 						  hist_next, hist_recycle,
 						  dp, dend);
-			dp++;				/* Do not do this ++ in the line above!		*/
+			dp++;				/* Do not do this ++ in the line above! */
 			/* The macro would do it four times - Jan.	*/
 		}
 	}
diff --git a/src/include/utils/pg_lzcompress.h b/src/include/utils/pg_lzcompress.h
index a3c49ae7a7..e81ae0d5ca 100644
--- a/src/include/utils/pg_lzcompress.h
+++ b/src/include/utils/pg_lzcompress.h
@@ -3,7 +3,7 @@
  *
  *	Definitions for the builtin LZ compressor
  *
- * $PostgreSQL: pgsql/src/include/utils/pg_lzcompress.h,v 1.16 2007/11/15 21:14:45 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/pg_lzcompress.h,v 1.17 2008/03/07 23:20:21 tgl Exp $
  * ----------
  */
 
@@ -14,7 +14,7 @@
 /* ----------
  * PGLZ_Header -
  *
- *		The information at the top of the compressed data.
+ *		The information at the start of the compressed data.
  * ----------
  */
 typedef struct PGLZ_Header
@@ -48,19 +48,17 @@ typedef struct PGLZ_Header
  *
  *		Some values that control the compression algorithm.
  *
- *		min_input_size		Minimum input data size to start compression.
+ *		min_input_size		Minimum input data size to consider compression.
  *
- *		force_input_size	Minimum input data size to force compression
- *							even if the compression rate drops below
- *							min_comp_rate.	But in any case the output
- *							must be smaller than the input.  If that isn't
- *							the case, the compressor will throw away its
- *							output and copy the original, uncompressed data
- *							to the output buffer.
+ *		max_input_size		Maximum input data size to consider compression.
  *
- *		min_comp_rate		Minimum compression rate (0-99%) to require for
- *							inputs smaller than force_input_size.  If not
- *							achieved, the output will be uncompressed.
+ *		min_comp_rate		Minimum compression rate (0-99%) to require.
+ *							Regardless of min_comp_rate, the output must be
+ *							smaller than the input, else we don't store
+ *							compressed.
+ *
+ *		first_success_by	Abandon compression if we find no compressible
+ *							data within the first this-many bytes.
  *
  *		match_size_good		The initial GOOD match size when starting history
  *							lookup. When looking up the history to find a
@@ -81,8 +79,9 @@ typedef struct PGLZ_Header
 typedef struct PGLZ_Strategy
 {
 	int32		min_input_size;
-	int32		force_input_size;
+	int32		max_input_size;
 	int32		min_comp_rate;
+	int32		first_success_by;
 	int32		match_size_good;
 	int32		match_size_drop;
 } PGLZ_Strategy;
@@ -91,21 +90,11 @@ typedef struct PGLZ_Strategy
 /* ----------
  * The standard strategies
  *
- *		PGLZ_strategy_default		Starts compression only if input is
- *									at least 256 bytes large. Stores output
- *									uncompressed if compression does not
- *									gain at least 20% size reducture but
- *									input does not exceed 6K. Stops history
- *									lookup if at least a 128 byte long
- *									match has been found.
- *
- *									This is the default strategy if none
- *									is given to pglz_compress().
+ *		PGLZ_strategy_default		Recommended default strategy for TOAST.
  *
- *		PGLZ_strategy_always		Starts compression on any infinitely
- *									small input and does fallback to
- *									uncompressed storage only if output
- *									would be larger than input.
+ *		PGLZ_strategy_always		Try to compress inputs of any length.
+ *									Fallback to uncompressed storage only if
+ *									output would be larger than input.
  * ----------
  */
 extern const PGLZ_Strategy *const PGLZ_strategy_default;
-- 
2.40.0