1 /*-------------------------------------------------------------------------
4 * This code manages relations that reside on magnetic disk.
6 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.116 2005/06/20 18:37:01 tgl Exp $
13 *-------------------------------------------------------------------------
22 #include "catalog/catalog.h"
23 #include "miscadmin.h"
24 #include "postmaster/bgwriter.h"
25 #include "storage/fd.h"
26 #include "storage/smgr.h"
27 #include "utils/hsearch.h"
28 #include "utils/memutils.h"
32 * The magnetic disk storage manager keeps track of open file
33 * descriptors in its own descriptor pool. This is done to make it
34 * easier to support relations that are larger than the operating
35 * system's file size limit (often 2GBytes). In order to do that,
36 * we break relations up into chunks of < 2GBytes and store one chunk
37 * in each of several files that represent the relation. See the
38 * BLCKSZ and RELSEG_SIZE configuration constants in pg_config_manual.h.
39 * All chunks except the last MUST have size exactly equal to RELSEG_SIZE
40 * blocks --- see mdnblocks() and mdtruncate().
42 * The file descriptor pointer (md_fd field) stored in the SMgrRelation
43 * cache is, therefore, just the head of a list of MdfdVec objects.
44 * But note the md_fd pointer can be NULL, indicating relation not open.
46 * Note that mdfd_chain == NULL does not necessarily mean the relation
47 * doesn't have another segment after this one; we may just not have
48 * opened the next segment yet. (We could not have "all segments are
49 * in the chain" as an invariant anyway, since another backend could
50 * extend the relation when we weren't looking.)
52 * All MdfdVec objects are palloc'd in the MdCxt memory context.
55 typedef struct _MdfdVec
57 File mdfd_vfd; /* fd number in fd.c's pool */
58 BlockNumber mdfd_segno; /* segment number, from 0 */
59 #ifndef LET_OS_MANAGE_FILESIZE /* for large relations */
60 struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
64 static MemoryContext MdCxt; /* context for all md.c allocations */
68 * In some contexts (currently, standalone backends and the bgwriter process)
69 * we keep track of pending fsync operations: we need to remember all relation
70 * segments that have been written since the last checkpoint, so that we can
71 * fsync them down to disk before completing the next checkpoint. This hash
72 * table remembers the pending operations. We use a hash table not because
73 * we want to look up individual operations, but simply as a convenient way
74 * of eliminating duplicate requests.
76 * (Regular backends do not track pending operations locally, but forward
77 * them to the bgwriter.)
79 * XXX for WIN32, may want to expand this to track pending deletes, too.
83 RelFileNode rnode; /* the targeted relation */
84 BlockNumber segno; /* which segment */
85 } PendingOperationEntry;
87 static HTAB *pendingOpsTable = NULL;
91 static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound);
92 static bool register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
93 static MdfdVec *_fdvec_alloc(void);
95 #ifndef LET_OS_MANAGE_FILESIZE
96 static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
99 static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
101 static BlockNumber _mdnblocks(File file, Size blcksz);
105 * mdinit() -- Initialize private state for magnetic disk storage manager.
110 MdCxt = AllocSetContextCreate(TopMemoryContext,
112 ALLOCSET_DEFAULT_MINSIZE,
113 ALLOCSET_DEFAULT_INITSIZE,
114 ALLOCSET_DEFAULT_MAXSIZE);
117 * Create pending-operations hashtable if we need it. Currently, we
118 * need it if we are standalone (not under a postmaster) OR if we are
119 * a bootstrap-mode subprocess of a postmaster (that is, a startup or
122 if (!IsUnderPostmaster || IsBootstrapProcessingMode())
126 MemSet(&hash_ctl, 0, sizeof(hash_ctl));
127 hash_ctl.keysize = sizeof(PendingOperationEntry);
128 hash_ctl.entrysize = sizeof(PendingOperationEntry);
129 hash_ctl.hash = tag_hash;
130 hash_ctl.hcxt = MdCxt;
131 pendingOpsTable = hash_create("Pending Ops Table",
134 HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
141 * mdcreate() -- Create a new relation on magnetic disk.
143 * If isRedo is true, it's okay for the relation to exist already.
146 mdcreate(SMgrRelation reln, bool isRedo)
151 if (isRedo && reln->md_fd != NULL)
152 return true; /* created and opened already... */
154 Assert(reln->md_fd == NULL);
156 path = relpath(reln->smgr_rnode);
158 fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
162 int save_errno = errno;
165 * During bootstrap, there are cases where a system relation will
166 * be accessed (by internal backend processes) before the
167 * bootstrap script nominally creates it. Therefore, allow the
168 * file to exist already, even if isRedo is not set. (See also
171 if (isRedo || IsBootstrapProcessingMode())
172 fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
176 /* be sure to return the error reported by create, not open */
185 reln->md_fd = _fdvec_alloc();
187 reln->md_fd->mdfd_vfd = fd;
188 reln->md_fd->mdfd_segno = 0;
189 #ifndef LET_OS_MANAGE_FILESIZE
190 reln->md_fd->mdfd_chain = NULL;
197 * mdunlink() -- Unlink a relation.
199 * Note that we're passed a RelFileNode --- by the time this is called,
200 * there won't be an SMgrRelation hashtable entry anymore.
202 * If isRedo is true, it's okay for the relation to be already gone.
205 mdunlink(RelFileNode rnode, bool isRedo)
211 path = relpath(rnode);
213 /* Delete the first segment, or only segment if not doing segmenting */
214 if (unlink(path) < 0)
216 if (!isRedo || errno != ENOENT)
223 #ifndef LET_OS_MANAGE_FILESIZE
224 /* Get the additional segments, if any */
227 char *segpath = (char *) palloc(strlen(path) + 12);
230 for (segno = 1;; segno++)
232 sprintf(segpath, "%s.%u", path, segno);
233 if (unlink(segpath) < 0)
235 /* ENOENT is expected after the last segment... */
255 * mdextend() -- Add a block to the specified relation.
257 * The semantics are basically the same as mdwrite(): write at the
258 * specified position. However, we are expecting to extend the
259 * relation (ie, blocknum is the current EOF), and so in case of
260 * failure we clean up by truncating.
262 * This routine returns true or false, with errno set as appropriate.
264 * Note: this routine used to call mdnblocks() to get the block position
265 * to write at, but that's pretty silly since the caller needs to know where
266 * the block will be written, and accordingly must have done mdnblocks()
267 * already. Might as well pass in the position and save a seek.
270 mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
276 v = _mdfd_getseg(reln, blocknum, false);
278 #ifndef LET_OS_MANAGE_FILESIZE
279 seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
280 Assert(seekpos < BLCKSZ * RELSEG_SIZE);
282 seekpos = (long) (BLCKSZ * (blocknum));
286 * Note: because caller obtained blocknum by calling _mdnblocks, which
287 * did a seek(SEEK_END), this seek is often redundant and will be
288 * optimized away by fd.c. It's not redundant, however, if there is a
289 * partial page at the end of the file. In that case we want to try
290 * to overwrite the partial page with a full page. It's also not
291 * redundant if bufmgr.c had to dump another buffer of the same file
292 * to make room for the new page's buffer.
294 if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
297 if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
301 int save_errno = errno;
303 /* Remove the partially-written page */
304 FileTruncate(v->mdfd_vfd, seekpos);
305 FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
313 if (!register_dirty_segment(reln, v))
317 #ifndef LET_OS_MANAGE_FILESIZE
318 Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
325 * mdopen() -- Open the specified relation. ereport's on failure.
326 * (Optionally, can return NULL instead of ereport for ENOENT.)
328 * Note we only open the first segment, when there are multiple segments.
331 mdopen(SMgrRelation reln, bool allowNotFound)
337 /* No work if already open */
341 path = relpath(reln->smgr_rnode);
343 fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
348 * During bootstrap, there are cases where a system relation will
349 * be accessed (by internal backend processes) before the
350 * bootstrap script nominally creates it. Therefore, accept
351 * mdopen() as a substitute for mdcreate() in bootstrap mode only.
354 if (IsBootstrapProcessingMode())
355 fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
359 if (allowNotFound && errno == ENOENT)
362 (errcode_for_file_access(),
363 errmsg("could not open relation %u/%u/%u: %m",
364 reln->smgr_rnode.spcNode,
365 reln->smgr_rnode.dbNode,
366 reln->smgr_rnode.relNode)));
372 reln->md_fd = mdfd = _fdvec_alloc();
375 mdfd->mdfd_segno = 0;
376 #ifndef LET_OS_MANAGE_FILESIZE
377 mdfd->mdfd_chain = NULL;
378 Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
385 * mdclose() -- Close the specified relation, if it isn't closed already.
387 * Returns true or false with errno set as appropriate.
390 mdclose(SMgrRelation reln)
392 MdfdVec *v = reln->md_fd;
394 /* No work if already closed */
398 reln->md_fd = NULL; /* prevent dangling pointer after error */
400 #ifndef LET_OS_MANAGE_FILESIZE
405 /* if not closed already */
406 if (v->mdfd_vfd >= 0)
407 FileClose(v->mdfd_vfd);
408 /* Now free vector */
413 if (v->mdfd_vfd >= 0)
414 FileClose(v->mdfd_vfd);
422 * mdread() -- Read the specified block from a relation.
425 mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
432 v = _mdfd_getseg(reln, blocknum, false);
434 #ifndef LET_OS_MANAGE_FILESIZE
435 seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
436 Assert(seekpos < BLCKSZ * RELSEG_SIZE);
438 seekpos = (long) (BLCKSZ * (blocknum));
441 if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
445 if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
448 * If we are at or past EOF, return zeroes without complaining.
449 * Also substitute zeroes if we found a partial block at EOF.
451 * XXX this is really ugly, bad design. However the current
452 * implementation of hash indexes requires it, because hash index
453 * pages are initialized out-of-order.
456 (nbytes > 0 && mdnblocks(reln) == blocknum))
457 MemSet(buffer, 0, BLCKSZ);
466 * mdwrite() -- Write the supplied block at the appropriate location.
469 mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
474 v = _mdfd_getseg(reln, blocknum, false);
476 #ifndef LET_OS_MANAGE_FILESIZE
477 seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
478 Assert(seekpos < BLCKSZ * RELSEG_SIZE);
480 seekpos = (long) (BLCKSZ * (blocknum));
483 if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
486 if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
491 if (!register_dirty_segment(reln, v))
499 * mdnblocks() -- Get the number of blocks stored in a relation.
501 * Important side effect: all segments of the relation are opened
502 * and added to the mdfd_chain list. If this routine has not been
503 * called, then only segments up to the last one actually touched
504 * are present in the chain...
506 * Returns # of blocks, or InvalidBlockNumber on error.
509 mdnblocks(SMgrRelation reln)
511 MdfdVec *v = mdopen(reln, false);
513 #ifndef LET_OS_MANAGE_FILESIZE
515 BlockNumber segno = 0;
518 * Skip through any segments that aren't the last one, to avoid
519 * redundant seeks on them. We have previously verified that these
520 * segments are exactly RELSEG_SIZE long, and it's useless to recheck
521 * that each time. (NOTE: this assumption could only be wrong if
522 * another backend has truncated the relation. We rely on higher code
523 * levels to handle that scenario by closing and re-opening the md
526 while (v->mdfd_chain != NULL)
534 nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ);
535 if (nblocks > ((BlockNumber) RELSEG_SIZE))
536 elog(FATAL, "segment too big");
537 if (nblocks < ((BlockNumber) RELSEG_SIZE))
538 return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
541 * If segment is exactly RELSEG_SIZE, advance to next one.
545 if (v->mdfd_chain == NULL)
548 * Because we pass O_CREAT, we will create the next segment
549 * (with zero length) immediately, if the last segment is of
550 * length REL_SEGSIZE. This is unnecessary but harmless, and
551 * testing for the case would take more cycles than it seems
554 v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
555 if (v->mdfd_chain == NULL)
556 return InvalidBlockNumber; /* failed? */
562 return _mdnblocks(v->mdfd_vfd, BLCKSZ);
567 * mdtruncate() -- Truncate relation to specified number of blocks.
569 * Returns # of blocks or InvalidBlockNumber on error.
572 mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
577 #ifndef LET_OS_MANAGE_FILESIZE
578 BlockNumber priorblocks;
582 * NOTE: mdnblocks makes sure we have opened all existing segments, so
583 * that truncate/delete loop will get them all!
585 curnblk = mdnblocks(reln);
586 if (curnblk == InvalidBlockNumber)
587 return InvalidBlockNumber; /* mdnblocks failed */
588 if (nblocks > curnblk)
589 return InvalidBlockNumber; /* bogus request */
590 if (nblocks == curnblk)
591 return nblocks; /* no work */
593 v = mdopen(reln, false);
595 #ifndef LET_OS_MANAGE_FILESIZE
601 if (priorblocks > nblocks)
604 * This segment is no longer wanted at all (and has already
605 * been unlinked from the mdfd_chain). We truncate the file
606 * before deleting it because if other backends are holding
607 * the file open, the unlink will fail on some platforms.
608 * Better a zero-size file gets left around than a big file...
610 FileTruncate(v->mdfd_vfd, 0);
611 FileUnlink(v->mdfd_vfd);
613 Assert(ov != reln->md_fd); /* we never drop the 1st segment */
616 else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
619 * This is the last segment we want to keep. Truncate the file
620 * to the right length, and clear chain link that points to
621 * any remaining segments (which we shall zap). NOTE: if
622 * nblocks is exactly a multiple K of RELSEG_SIZE, we will
623 * truncate the K+1st segment to 0 length but keep it. This is
624 * mainly so that the right thing happens if nblocks==0.
626 BlockNumber lastsegblocks = nblocks - priorblocks;
628 if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
629 return InvalidBlockNumber;
632 if (!register_dirty_segment(reln, v))
633 return InvalidBlockNumber;
636 ov->mdfd_chain = NULL;
641 * We still need this segment and 0 or more blocks beyond it,
642 * so nothing to do here.
646 priorblocks += RELSEG_SIZE;
649 if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
650 return InvalidBlockNumber;
653 if (!register_dirty_segment(reln, v))
654 return InvalidBlockNumber;
662 * mdimmedsync() -- Immediately sync a relation to stable storage.
664 * Note that only writes already issued are synced; this routine knows
665 * nothing of dirty buffers that may exist inside the buffer manager.
668 mdimmedsync(SMgrRelation reln)
674 * NOTE: mdnblocks makes sure we have opened all existing segments, so
675 * that fsync loop will get them all!
677 curnblk = mdnblocks(reln);
678 if (curnblk == InvalidBlockNumber)
679 return false; /* mdnblocks failed */
681 v = mdopen(reln, false);
683 #ifndef LET_OS_MANAGE_FILESIZE
686 if (FileSync(v->mdfd_vfd) < 0)
691 if (FileSync(v->mdfd_vfd) < 0)
699 * mdsync() -- Sync previous writes to stable storage.
701 * This is only called during checkpoints, and checkpoints should only
702 * occur in processes that have created a pendingOpsTable.
707 HASH_SEQ_STATUS hstat;
708 PendingOperationEntry *entry;
710 if (!pendingOpsTable)
714 * If we are in the bgwriter, the sync had better include all fsync
715 * requests that were queued by backends before the checkpoint REDO
716 * point was determined. We go that a little better by accepting all
717 * requests queued up to the point where we start fsync'ing.
719 AbsorbFsyncRequests();
721 hash_seq_init(&hstat, pendingOpsTable);
722 while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
725 * If fsync is off then we don't have to bother opening the file
726 * at all. (We delay checking until this point so that changing
727 * fsync on the fly behaves sensibly.)
735 * Find or create an smgr hash entry for this relation. This
736 * may seem a bit unclean -- md calling smgr? But it's really
737 * the best solution. It ensures that the open file reference
738 * isn't permanently leaked if we get an error here. (You may
739 * say "but an unreferenced SMgrRelation is still a leak!" Not
740 * really, because the only case in which a checkpoint is done
741 * by a process that isn't about to shut down is in the
742 * bgwriter, and it will periodically do smgrcloseall(). This
743 * fact justifies our not closing the reln in the success path
744 * either, which is a good thing since in non-bgwriter cases
745 * we couldn't safely do that.) Furthermore, in many cases
746 * the relation will have been dirtied through this same smgr
747 * relation, and so we can save a file open/close cycle.
749 reln = smgropen(entry->rnode);
752 * It is possible that the relation has been dropped or
753 * truncated since the fsync request was entered. Therefore,
754 * we have to allow file-not-found errors. This applies both
755 * during _mdfd_getseg() and during FileSync, since fd.c might
756 * have closed the file behind our back.
758 seg = _mdfd_getseg(reln,
759 entry->segno * ((BlockNumber) RELSEG_SIZE),
763 if (FileSync(seg->mdfd_vfd) < 0 &&
767 (errcode_for_file_access(),
768 errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
770 entry->rnode.spcNode,
772 entry->rnode.relNode)));
778 /* Okay, delete this entry */
779 if (hash_search(pendingOpsTable, entry,
780 HASH_REMOVE, NULL) == NULL)
781 elog(ERROR, "pendingOpsTable corrupted");
788 * register_dirty_segment() -- Mark a relation segment as needing fsync
790 * If there is a local pending-ops table, just make an entry in it for
791 * mdsync to process later. Otherwise, try to pass off the fsync request
792 * to the background writer process. If that fails, just do the fsync
793 * locally before returning (we expect this will not happen often enough
794 * to be a performance problem).
796 * A false result implies I/O failure during local fsync. errno will be
797 * valid for error reporting.
800 register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
804 PendingOperationEntry entry;
806 /* ensure any pad bytes in the struct are zeroed */
807 MemSet(&entry, 0, sizeof(entry));
808 entry.rnode = reln->smgr_rnode;
809 entry.segno = seg->mdfd_segno;
811 (void) hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL);
816 if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))
820 if (FileSync(seg->mdfd_vfd) < 0)
826 * RememberFsyncRequest() -- callback from bgwriter side of fsync request
828 * We stuff the fsync request into the local hash table for execution
829 * during the bgwriter's next checkpoint.
832 RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
834 PendingOperationEntry entry;
836 Assert(pendingOpsTable);
838 /* ensure any pad bytes in the struct are zeroed */
839 MemSet(&entry, 0, sizeof(entry));
843 (void) hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL);
847 * _fdvec_alloc() -- Make a MdfdVec object.
852 return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
855 #ifndef LET_OS_MANAGE_FILESIZE
858 * Open the specified segment of the relation,
859 * and make a MdfdVec object for it. Returns NULL on failure.
862 _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
869 path = relpath(reln->smgr_rnode);
873 /* be sure we have enough space for the '.segno' */
874 fullpath = (char *) palloc(strlen(path) + 12);
875 sprintf(fullpath, "%s.%u", path, segno);
882 fd = FileNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags, 0600);
889 /* allocate an mdfdvec entry for it */
894 v->mdfd_segno = segno;
895 v->mdfd_chain = NULL;
896 Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
901 #endif /* LET_OS_MANAGE_FILESIZE */
904 * _mdfd_getseg() -- Find the segment of the relation holding the
905 * specified block. ereport's on failure.
906 * (Optionally, can return NULL instead of ereport for ENOENT.)
909 _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
911 MdfdVec *v = mdopen(reln, allowNotFound);
913 #ifndef LET_OS_MANAGE_FILESIZE
914 BlockNumber segstogo;
915 BlockNumber nextsegno;
918 return NULL; /* only possible if allowNotFound */
920 for (segstogo = blkno / ((BlockNumber) RELSEG_SIZE), nextsegno = 1;
922 nextsegno++, segstogo--)
924 if (v->mdfd_chain == NULL)
927 * We will create the next segment only if the target block is
928 * within it. This prevents Sorcerer's Apprentice syndrome if
929 * a bug at higher levels causes us to be handed a
930 * ridiculously large blkno --- otherwise we could create many
931 * thousands of empty segment files before reaching the
932 * "target" block. We should never need to create more than
933 * one new segment per call, so this restriction seems
936 * BUT: when doing WAL recovery, disable this logic and create
937 * segments unconditionally. In this case it seems better
938 * to assume the given blkno is good (it presumably came from
939 * a CRC-checked WAL record); furthermore this lets us cope
940 * in the case where we are replaying WAL data that has a write
941 * into a high-numbered segment of a relation that was later
942 * deleted. We want to go ahead and create the segments so
943 * we can finish out the replay.
945 v->mdfd_chain = _mdfd_openseg(reln,
947 (segstogo == 1 || InRecovery) ? O_CREAT : 0);
948 if (v->mdfd_chain == NULL)
950 if (allowNotFound && errno == ENOENT)
953 (errcode_for_file_access(),
954 errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m",
956 reln->smgr_rnode.spcNode,
957 reln->smgr_rnode.dbNode,
958 reln->smgr_rnode.relNode,
970 * Get number of blocks present in a single disk file
973 _mdnblocks(File file, Size blcksz)
977 len = FileSeek(file, 0L, SEEK_END);
979 return 0; /* on failure, assume file is empty */
980 return (BlockNumber) (len / blcksz);