1 /*-------------------------------------------------------------------------
5 * Implements the custom output format.
7 * The comments with the routined in this code are a good place to
8 * understand how to write a new format.
10 * See the headers to pg_restore for more details.
12 * Copyright (c) 2000, Philip Warner
13 * Rights are granted to use this software in any way so long
14 * as this notice is not removed.
16 * The author is not responsible for loss or damages that may
17 * and any liability will be limited to the time taken to fix any
22 * src/bin/pg_dump/pg_backup_custom.c
24 *-------------------------------------------------------------------------
27 #include "compress_io.h"
30 * Routines in the format interface
34 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
35 static void _StartData(ArchiveHandle *AH, TocEntry *te);
36 static size_t _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
37 static void _EndData(ArchiveHandle *AH, TocEntry *te);
38 static int _WriteByte(ArchiveHandle *AH, const int i);
39 static int _ReadByte(ArchiveHandle *);
40 static size_t _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
41 static size_t _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
42 static void _CloseArchive(ArchiveHandle *AH);
43 static void _ReopenArchive(ArchiveHandle *AH);
44 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
45 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
46 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
47 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
49 static void _PrintData(ArchiveHandle *AH);
50 static void _skipData(ArchiveHandle *AH);
51 static void _skipBlobs(ArchiveHandle *AH);
53 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
54 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
55 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
56 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
57 static void _LoadBlobs(ArchiveHandle *AH, bool drop);
58 static void _Clone(ArchiveHandle *AH);
59 static void _DeClone(ArchiveHandle *AH);
80 static void _readBlockHeader(ArchiveHandle *AH, int *type, int *id);
81 static pgoff_t _getFilePos(ArchiveHandle *AH, lclContext *ctx);
83 static size_t _CustomWriteFunc(ArchiveHandle *AH, const char *buf, size_t len);
84 static size_t _CustomReadFunc(ArchiveHandle *AH, char **buf, size_t *buflen);
86 static const char *modulename = gettext_noop("custom archiver");
91 * Init routine required by ALL formats. This is a global routine
92 * and should be declared in pg_backup_archiver.h
94 * It's task is to create any extra archive context (using AH->formatData),
95 * and to initialize the supported function pointers.
97 * It should also prepare whatever it's input source is for reading/writing,
98 * and in the case of a read mode connection, it should load the Header & TOC.
101 InitArchiveFmt_Custom(ArchiveHandle *AH)
105 /* Assuming static functions, this can be copied for each format. */
106 AH->ArchiveEntryPtr = _ArchiveEntry;
107 AH->StartDataPtr = _StartData;
108 AH->WriteDataPtr = _WriteData;
109 AH->EndDataPtr = _EndData;
110 AH->WriteBytePtr = _WriteByte;
111 AH->ReadBytePtr = _ReadByte;
112 AH->WriteBufPtr = _WriteBuf;
113 AH->ReadBufPtr = _ReadBuf;
114 AH->ClosePtr = _CloseArchive;
115 AH->ReopenPtr = _ReopenArchive;
116 AH->PrintTocDataPtr = _PrintTocData;
117 AH->ReadExtraTocPtr = _ReadExtraToc;
118 AH->WriteExtraTocPtr = _WriteExtraToc;
119 AH->PrintExtraTocPtr = _PrintExtraToc;
121 AH->StartBlobsPtr = _StartBlobs;
122 AH->StartBlobPtr = _StartBlob;
123 AH->EndBlobPtr = _EndBlob;
124 AH->EndBlobsPtr = _EndBlobs;
125 AH->ClonePtr = _Clone;
126 AH->DeClonePtr = _DeClone;
128 /* Set up a private area. */
129 ctx = (lclContext *) calloc(1, sizeof(lclContext));
131 die_horribly(AH, modulename, "out of memory\n");
132 AH->formatData = (void *) ctx;
134 /* Initialize LO buffering */
135 AH->lo_buf_size = LOBBUFSIZE;
136 AH->lo_buf = (void *) malloc(LOBBUFSIZE);
137 if (AH->lo_buf == NULL)
138 die_horribly(AH, modulename, "out of memory\n");
145 if (AH->mode == archModeWrite)
147 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
149 AH->FH = fopen(AH->fSpec, PG_BINARY_W);
151 die_horribly(AH, modulename, "could not open output file \"%s\": %s\n",
152 AH->fSpec, strerror(errno));
158 die_horribly(AH, modulename, "could not open output file: %s\n",
162 ctx->hasSeek = checkSeek(AH->FH);
166 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
168 AH->FH = fopen(AH->fSpec, PG_BINARY_R);
170 die_horribly(AH, modulename, "could not open input file \"%s\": %s\n",
171 AH->fSpec, strerror(errno));
177 die_horribly(AH, modulename, "could not open input file: %s\n",
181 ctx->hasSeek = checkSeek(AH->FH);
185 ctx->dataStart = _getFilePos(AH, ctx);
191 * Called by the Archiver when the dumper creates a new TOC entry.
195 * Set up extrac format-related TOC data.
198 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
202 ctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry));
204 ctx->dataState = K_OFFSET_POS_NOT_SET;
206 ctx->dataState = K_OFFSET_NO_DATA;
208 te->formatData = (void *) ctx;
212 * Called by the Archiver to save any extra format-related TOC entry
217 * Use the Archiver routines to write data - they are non-endian, and
218 * maintain other important file information.
221 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
223 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
225 WriteOffset(AH, ctx->dataPos, ctx->dataState);
229 * Called by the Archiver to read any extra format-related TOC data.
233 * Needs to match the order defined in _WriteExtraToc, and sould also
234 * use the Archiver input routines.
237 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
239 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
243 ctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry));
244 te->formatData = (void *) ctx;
247 ctx->dataState = ReadOffset(AH, &(ctx->dataPos));
250 * Prior to V1.7 (pg7.3), we dumped the data size as an int now we don't
253 if (AH->version < K_VERS_1_7)
258 * Called by the Archiver when restoring an archive to output a comment
259 * that includes useful information about the TOC entry.
265 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
267 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
269 if (AH->public.verbose)
270 ahprintf(AH, "-- Data Pos: " INT64_FORMAT "\n",
271 (int64) ctx->dataPos);
275 * Called by the archiver when saving TABLE DATA (not schema). This routine
276 * should save whatever format-specific information is needed to read
279 * It is called just prior to the dumper's 'DataDumper' routine being called.
281 * Optional, but strongly recommended.
285 _StartData(ArchiveHandle *AH, TocEntry *te)
287 lclContext *ctx = (lclContext *) AH->formatData;
288 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
290 tctx->dataPos = _getFilePos(AH, ctx);
291 tctx->dataState = K_OFFSET_POS_SET;
293 _WriteByte(AH, BLK_DATA); /* Block type */
294 WriteInt(AH, te->dumpId); /* For sanity check */
296 ctx->cs = AllocateCompressor(AH->compression, _CustomWriteFunc);
300 * Called by archiver when dumper calls WriteData. This routine is
301 * called for both BLOB and TABLE data; it is the responsibility of
302 * the format to manage each kind of data using StartBlob/StartData.
304 * It should only be called from within a DataDumper routine.
309 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
311 lclContext *ctx = (lclContext *) AH->formatData;
312 CompressorState *cs = ctx->cs;
317 return WriteDataToArchive(AH, cs, data, dLen);
321 * Called by the archiver when a dumper's 'DataDumper' routine has
328 _EndData(ArchiveHandle *AH, TocEntry *te)
330 lclContext *ctx = (lclContext *) AH->formatData;
332 EndCompressor(AH, ctx->cs);
333 /* Send the end marker */
338 * Called by the archiver when starting to save all BLOB DATA (not schema).
339 * This routine should save whatever format-specific information is needed
340 * to read the BLOBs back into memory.
342 * It is called just prior to the dumper's DataDumper routine.
344 * Optional, but strongly recommended.
347 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
349 lclContext *ctx = (lclContext *) AH->formatData;
350 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
352 tctx->dataPos = _getFilePos(AH, ctx);
353 tctx->dataState = K_OFFSET_POS_SET;
355 _WriteByte(AH, BLK_BLOBS); /* Block type */
356 WriteInt(AH, te->dumpId); /* For sanity check */
360 * Called by the archiver when the dumper calls StartBlob.
364 * Must save the passed OID for retrieval at restore-time.
367 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
369 lclContext *ctx = (lclContext *) AH->formatData;
372 die_horribly(AH, modulename, "invalid OID for large object\n");
376 ctx->cs = AllocateCompressor(AH->compression, _CustomWriteFunc);
380 * Called by the archiver when the dumper calls EndBlob.
385 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
387 lclContext *ctx = (lclContext *) AH->formatData;
389 EndCompressor(AH, ctx->cs);
390 /* Send the end marker */
395 * Called by the archiver when finishing saving all BLOB DATA.
400 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
402 /* Write out a fake zero OID to mark end-of-blobs. */
407 * Print data for a given TOC entry
410 _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt)
412 lclContext *ctx = (lclContext *) AH->formatData;
413 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
417 if (tctx->dataState == K_OFFSET_NO_DATA)
420 if (!ctx->hasSeek || tctx->dataState == K_OFFSET_POS_NOT_SET)
423 * We cannot seek directly to the desired block. Instead, skip over
424 * block headers until we find the one we want. This could fail if we
425 * are asked to restore items out-of-order.
427 _readBlockHeader(AH, &blkType, &id);
429 while (blkType != EOF && id != te->dumpId)
441 default: /* Always have a default */
442 die_horribly(AH, modulename,
443 "unrecognized data block type (%d) while searching archive\n",
447 _readBlockHeader(AH, &blkType, &id);
452 /* We can just seek to the place we need to be. */
453 if (fseeko(AH->FH, tctx->dataPos, SEEK_SET) != 0)
454 die_horribly(AH, modulename, "error during file seek: %s\n",
457 _readBlockHeader(AH, &blkType, &id);
460 /* Produce suitable failure message if we fell off end of file */
463 if (tctx->dataState == K_OFFSET_POS_NOT_SET)
464 die_horribly(AH, modulename, "could not find block ID %d in archive -- "
465 "possibly due to out-of-order restore request, "
466 "which cannot be handled due to lack of data offsets in archive\n",
468 else if (!ctx->hasSeek)
469 die_horribly(AH, modulename, "could not find block ID %d in archive -- "
470 "possibly due to out-of-order restore request, "
471 "which cannot be handled due to non-seekable input file\n",
473 else /* huh, the dataPos led us to EOF? */
474 die_horribly(AH, modulename, "could not find block ID %d in archive -- "
475 "possibly corrupt archive\n",
480 if (id != te->dumpId)
481 die_horribly(AH, modulename, "found unexpected block ID (%d) when reading data -- expected %d\n",
491 _LoadBlobs(AH, ropt->dropSchema);
494 default: /* Always have a default */
495 die_horribly(AH, modulename, "unrecognized data block type %d while restoring archive\n",
502 * Print data from current file position.
505 _PrintData(ArchiveHandle *AH)
507 ReadDataFromArchive(AH, AH->compression, _CustomReadFunc);
511 _LoadBlobs(ArchiveHandle *AH, bool drop)
515 StartRestoreBlobs(AH);
520 StartRestoreBlob(AH, oid, drop);
522 EndRestoreBlob(AH, oid);
530 * Skip the BLOBs from the current file position.
531 * BLOBS are written sequentially as data blocks (see below).
532 * Each BLOB is preceded by it's original OID.
533 * A zero OID indicated the end of the BLOBS
536 _skipBlobs(ArchiveHandle *AH)
549 * Skip data from current file position.
550 * Data blocks are formatted as an integer length, followed by data.
551 * A zero length denoted the end of the block.
554 _skipData(ArchiveHandle *AH)
556 lclContext *ctx = (lclContext *) AH->formatData;
562 blkLen = ReadInt(AH);
569 buf = (char *) malloc(blkLen);
572 cnt = fread(buf, 1, blkLen, AH->FH);
576 die_horribly(AH, modulename,
577 "could not read from input file: end of file\n");
579 die_horribly(AH, modulename,
580 "could not read from input file: %s\n", strerror(errno));
583 ctx->filePos += blkLen;
585 blkLen = ReadInt(AH);
593 * Write a byte of data to the archive.
597 * Called by the archiver to do integer & byte output to the archive.
600 _WriteByte(ArchiveHandle *AH, const int i)
602 lclContext *ctx = (lclContext *) AH->formatData;
605 res = fputc(i, AH->FH);
609 die_horribly(AH, modulename, "could not write byte: %s\n", strerror(errno));
614 * Read a byte of data from the archive.
618 * Called by the archiver to read bytes & integers from the archive.
619 * EOF should be treated as a fatal error.
622 _ReadByte(ArchiveHandle *AH)
624 lclContext *ctx = (lclContext *) AH->formatData;
629 die_horribly(AH, modulename, "unexpected end of file\n");
635 * Write a buffer of data to the archive.
639 * Called by the archiver to write a block of bytes to the archive.
642 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
644 lclContext *ctx = (lclContext *) AH->formatData;
647 res = fwrite(buf, 1, len, AH->FH);
650 die_horribly(AH, modulename,
651 "could not write to output file: %s\n", strerror(errno));
658 * Read a block of bytes from the archive.
662 * Called by the archiver to read a block of bytes from the archive
665 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
667 lclContext *ctx = (lclContext *) AH->formatData;
670 res = fread(buf, 1, len, AH->FH);
681 * When writing the archive, this is the routine that actually starts
682 * the process of saving it to files. No data should be written prior
683 * to this point, since the user could sort the TOC after creating it.
685 * If an archive is to be written, this toutine must call:
686 * WriteHead to save the archive header
687 * WriteToc to save the TOC entries
688 * WriteDataChunks to save all DATA & BLOBs.
692 _CloseArchive(ArchiveHandle *AH)
694 lclContext *ctx = (lclContext *) AH->formatData;
697 if (AH->mode == archModeWrite)
700 tpos = ftello(AH->FH);
702 ctx->dataStart = _getFilePos(AH, ctx);
706 * If possible, re-write the TOC in order to update the data offset
707 * information. This is not essential, as pg_restore can cope in most
708 * cases without it; but it can make pg_restore significantly faster
709 * in some situations (especially parallel restore).
712 fseeko(AH->FH, tpos, SEEK_SET) == 0)
716 if (fclose(AH->FH) != 0)
717 die_horribly(AH, modulename, "could not close archive file: %s\n", strerror(errno));
723 * Reopen the archive's file handle.
725 * We close the original file handle, except on Windows. (The difference
726 * is because on Windows, this is used within a multithreading context,
727 * and we don't want a thread closing the parent file handle.)
730 _ReopenArchive(ArchiveHandle *AH)
732 lclContext *ctx = (lclContext *) AH->formatData;
735 if (AH->mode == archModeWrite)
736 die_horribly(AH, modulename, "can only reopen input archives\n");
739 * These two cases are user-facing errors since they represent unsupported
740 * (but not invalid) use-cases. Word the error messages appropriately.
742 if (AH->fSpec == NULL || strcmp(AH->fSpec, "") == 0)
743 die_horribly(AH, modulename, "parallel restore from stdin is not supported\n");
745 die_horribly(AH, modulename, "parallel restore from non-seekable file is not supported\n");
748 tpos = ftello(AH->FH);
750 die_horribly(AH, modulename, "could not determine seek position in archive file: %s\n",
754 if (fclose(AH->FH) != 0)
755 die_horribly(AH, modulename, "could not close archive file: %s\n",
759 AH->FH = fopen(AH->fSpec, PG_BINARY_R);
761 die_horribly(AH, modulename, "could not open input file \"%s\": %s\n",
762 AH->fSpec, strerror(errno));
764 if (fseeko(AH->FH, tpos, SEEK_SET) != 0)
765 die_horribly(AH, modulename, "could not set seek position in archive file: %s\n",
770 * Clone format-specific fields during parallel restoration.
773 _Clone(ArchiveHandle *AH)
775 lclContext *ctx = (lclContext *) AH->formatData;
777 AH->formatData = (lclContext *) malloc(sizeof(lclContext));
778 if (AH->formatData == NULL)
779 die_horribly(AH, modulename, "out of memory\n");
780 memcpy(AH->formatData, ctx, sizeof(lclContext));
781 ctx = (lclContext *) AH->formatData;
783 /* sanity check, shouldn't happen */
785 die_horribly(AH, modulename, "compressor active\n");
788 * Note: we do not make a local lo_buf because we expect at most one BLOBS
789 * entry per archive, so no parallelism is possible. Likewise,
790 * TOC-entry-local state isn't an issue because any one TOC entry is
791 * touched by just one worker child.
796 _DeClone(ArchiveHandle *AH)
798 lclContext *ctx = (lclContext *) AH->formatData;
803 /*--------------------------------------------------
804 * END OF FORMAT CALLBACKS
805 *--------------------------------------------------
809 * Get the current position in the archive file.
812 _getFilePos(ArchiveHandle *AH, lclContext *ctx)
818 pos = ftello(AH->FH);
819 if (pos != ctx->filePos)
821 write_msg(modulename, "WARNING: ftell mismatch with expected position -- ftell used\n");
824 * Prior to 1.7 (pg7.3) we relied on the internally maintained
825 * pointer. Now we rely on ftello() always, unless the file has
826 * been found to not support it.
836 * Read a data block header. The format changed in V1.3, so we
837 * centralize the code here for simplicity. Returns *type = EOF
841 _readBlockHeader(ArchiveHandle *AH, int *type, int *id)
843 lclContext *ctx = (lclContext *) AH->formatData;
847 * Note: if we are at EOF with a pre-1.3 input file, we'll die_horribly
848 * inside ReadInt rather than returning EOF. It doesn't seem worth
849 * jumping through hoops to deal with that case better, because no such
850 * files are likely to exist in the wild: only some 7.1 development
851 * versions of pg_dump ever generated such files.
853 if (AH->version < K_VERS_1_3)
861 *id = 0; /* don't return an uninitialized value */
871 * Callback function for WriteDataToArchive. Writes one block of (compressed)
872 * data to the archive.
875 _CustomWriteFunc(ArchiveHandle *AH, const char *buf, size_t len)
877 /* never write 0-byte blocks (this should not happen) */
882 return _WriteBuf(AH, buf, len);
886 * Callback function for ReadDataFromArchive. To keep things simple, we
887 * always read one compressed block at a time.
890 _CustomReadFunc(ArchiveHandle *AH, char **buf, size_t *buflen)
896 blkLen = ReadInt(AH);
900 /* If the caller's buffer is not large enough, allocate a bigger one */
901 if (blkLen > *buflen)
904 *buf = (char *) malloc(blkLen);
906 die_horribly(AH, modulename, "out of memory\n");
910 cnt = _ReadBuf(AH, *buf, blkLen);
914 die_horribly(AH, modulename,
915 "could not read from input file: end of file\n");
917 die_horribly(AH, modulename,
918 "could not read from input file: %s\n", strerror(errno));