From ffae5cc5a6024b4e25ec920ed5c4dfac649605f8 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 25 Sep 2006 22:01:10 +0000
Subject: [PATCH] Add a check to prevent overwriting valid data if
 smgrnblocks() gives a wrong answer, as has been seen to occur with a buggy
 Linux kernel.  Not really our bug, but it's a simple test in a seldom-used
 control path, so might as well have a defense.

---
 src/backend/storage/buffer/bufmgr.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 147b2ef8d8..aaa0dff684 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.210 2006/09/17 22:16:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.211 2006/09/25 22:01:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -175,9 +175,26 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
 		/*
 		 * We get here only in the corner case where we are trying to extend
 		 * the relation but we found a pre-existing buffer marked BM_VALID.
-		 * (This can happen because mdread doesn't complain about reads
-		 * beyond EOF --- which is arguably bogus, but changing it seems
-		 * tricky.)  We *must* do smgrextend before succeeding, else the
+		 * This can happen because mdread doesn't complain about reads beyond
+		 * EOF --- which is arguably bogus, but changing it seems tricky ---
+		 * and so a previous attempt to read a block just beyond EOF could
+		 * have left a "valid" zero-filled buffer.  Unfortunately, we have
+		 * also seen this case occurring because of buggy Linux kernels that
+		 * sometimes return an lseek(SEEK_END) result that doesn't account for
+		 * a recent write.  In that situation, the pre-existing buffer would
+		 * contain valid data that we don't want to overwrite.  Since the
+		 * legitimate cases should always have left a zero-filled buffer,
+		 * complain if not PageIsNew.
+		 */
+		bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
+		if (!PageIsNew((PageHeader) bufBlock))
+			ereport(ERROR,
+					(errmsg("unexpected data beyond EOF in block %u of relation \"%s\"",
+							blockNum, RelationGetRelationName(reln)),
+					 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
+
+		/*
+		 * We *must* do smgrextend before succeeding, else the
 		 * page will not be reserved by the kernel, and the next P_NEW call
 		 * will decide to return the same page.  Clear the BM_VALID bit,
 		 * do the StartBufferIO call that BufferAlloc didn't, and proceed.
-- 
2.49.0