1 /*-------------------------------------------------------------------------
5 * Implements the custom output format.
7 * The comments with the routined in this code are a good place to
8 * understand how to write a new format.
10 * See the headers to pg_restore for more details.
12 * Copyright (c) 2000, Philip Warner
13 * Rights are granted to use this software in any way so long
14 * as this notice is not removed.
16 * The author is not responsible for loss or damages that may
17 * and any liability will be limited to the time taken to fix any
22 * $Header: /cvsroot/pgsql/src/bin/pg_dump/pg_backup_custom.c,v 1.25 2003/08/04 00:43:27 momjian Exp $
24 *-------------------------------------------------------------------------
27 #include "pg_backup.h"
28 #include "pg_backup_archiver.h"
33 * Routines in the format interface
37 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
38 static void _StartData(ArchiveHandle *AH, TocEntry *te);
39 static size_t _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
40 static void _EndData(ArchiveHandle *AH, TocEntry *te);
41 static int _WriteByte(ArchiveHandle *AH, const int i);
42 static int _ReadByte(ArchiveHandle *);
43 static size_t _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
44 static size_t _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
45 static void _CloseArchive(ArchiveHandle *AH);
46 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
47 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
48 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
49 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
51 static void _PrintData(ArchiveHandle *AH);
52 static void _skipData(ArchiveHandle *AH);
53 static void _skipBlobs(ArchiveHandle *AH);
55 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
56 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
57 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
58 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
59 static void _LoadBlobs(ArchiveHandle *AH);
62 * Buffers used in zlib compression and extra data stored in archive and
66 #define zlibOutSize 4096
67 #define zlibInSize 4096
91 static void _readBlockHeader(ArchiveHandle *AH, int *type, int *id);
92 static void _StartDataCompressor(ArchiveHandle *AH, TocEntry *te);
93 static void _EndDataCompressor(ArchiveHandle *AH, TocEntry *te);
94 static off_t _getFilePos(ArchiveHandle *AH, lclContext *ctx);
95 static int _DoDeflate(ArchiveHandle *AH, lclContext *ctx, int flush);
97 static char *modulename = gettext_noop("custom archiver");
102 * Init routine required by ALL formats. This is a global routine
103 * and should be declared in pg_backup_archiver.h
105 * It's task is to create any extra archive context (using AH->formatData),
106 * and to initialize the supported function pointers.
108 * It should also prepare whatever it's input source is for reading/writing,
109 * and in the case of a read mode connection, it should load the Header & TOC.
112 InitArchiveFmt_Custom(ArchiveHandle *AH)
116 /* Assuming static functions, this can be copied for each format. */
117 AH->ArchiveEntryPtr = _ArchiveEntry;
118 AH->StartDataPtr = _StartData;
119 AH->WriteDataPtr = _WriteData;
120 AH->EndDataPtr = _EndData;
121 AH->WriteBytePtr = _WriteByte;
122 AH->ReadBytePtr = _ReadByte;
123 AH->WriteBufPtr = _WriteBuf;
124 AH->ReadBufPtr = _ReadBuf;
125 AH->ClosePtr = _CloseArchive;
126 AH->PrintTocDataPtr = _PrintTocData;
127 AH->ReadExtraTocPtr = _ReadExtraToc;
128 AH->WriteExtraTocPtr = _WriteExtraToc;
129 AH->PrintExtraTocPtr = _PrintExtraToc;
131 AH->StartBlobsPtr = _StartBlobs;
132 AH->StartBlobPtr = _StartBlob;
133 AH->EndBlobPtr = _EndBlob;
134 AH->EndBlobsPtr = _EndBlobs;
137 * Set up some special context used in compressing data.
139 ctx = (lclContext *) malloc(sizeof(lclContext));
141 die_horribly(AH, modulename, "out of memory\n");
142 AH->formatData = (void *) ctx;
144 ctx->zp = (z_streamp) malloc(sizeof(z_stream));
146 die_horribly(AH, modulename, "out of memory\n");
148 /* Initialize LO buffering */
149 AH->lo_buf_size = LOBBUFSIZE;
150 AH->lo_buf = (void *) malloc(LOBBUFSIZE);
151 if (AH->lo_buf == NULL)
152 die_horribly(AH, modulename, "out of memory\n");
155 * zlibOutSize is the buffer size we tell zlib it can output to. We
156 * actually allocate one extra byte because some routines want to
157 * append a trailing zero byte to the zlib output. The input buffer
158 * is expansible and is always of size ctx->inSize; zlibInSize is just
159 * the initial default size for it.
161 ctx->zlibOut = (char *) malloc(zlibOutSize + 1);
162 ctx->zlibIn = (char *) malloc(zlibInSize);
163 ctx->inSize = zlibInSize;
166 if (ctx->zlibOut == NULL || ctx->zlibIn == NULL)
167 die_horribly(AH, modulename, "out of memory\n");
172 if (AH->mode == archModeWrite)
174 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
175 AH->FH = fopen(AH->fSpec, PG_BINARY_W);
180 die_horribly(AH, modulename, "could not open archive file \"%s\": %s\n", AH->fSpec, strerror(errno));
182 ctx->hasSeek = checkSeek(AH->FH);
186 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
187 AH->FH = fopen(AH->fSpec, PG_BINARY_R);
191 die_horribly(AH, modulename, "could not open archive file \"%s\": %s\n", AH->fSpec, strerror(errno));
193 ctx->hasSeek = checkSeek(AH->FH);
197 ctx->dataStart = _getFilePos(AH, ctx);
203 * Called by the Archiver when the dumper creates a new TOC entry.
207 * Set up extrac format-related TOC data.
210 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
214 ctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry));
216 ctx->dataState = K_OFFSET_POS_NOT_SET;
218 ctx->dataState = K_OFFSET_NO_DATA;
220 te->formatData = (void *) ctx;
224 * Called by the Archiver to save any extra format-related TOC entry
229 * Use the Archiver routines to write data - they are non-endian, and
230 * maintain other important file information.
233 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
235 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
237 WriteOffset(AH, ctx->dataPos, ctx->dataState);
241 * Called by the Archiver to read any extra format-related TOC data.
245 * Needs to match the order defined in _WriteExtraToc, and sould also
246 * use the Archiver input routines.
249 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
252 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
256 ctx = (lclTocEntry *) malloc(sizeof(lclTocEntry));
257 te->formatData = (void *) ctx;
260 ctx->dataState = ReadOffset(AH, &(ctx->dataPos));
263 * Prior to V1.7 (pg7.3), we dumped the data size as an int now we
264 * don't dump it at all.
266 if (AH->version < K_VERS_1_7)
271 * Called by the Archiver when restoring an archive to output a comment
272 * that includes useful information about the TOC entry.
278 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
280 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
282 ahprintf(AH, "-- Data Pos: " INT64_FORMAT "\n",
283 (int64) ctx->dataPos);
287 * Called by the archiver when saving TABLE DATA (not schema). This routine
288 * should save whatever format-specific information is needed to read
291 * It is called just prior to the dumper's 'DataDumper' routine being called.
293 * Optional, but strongly recommended.
297 _StartData(ArchiveHandle *AH, TocEntry *te)
299 lclContext *ctx = (lclContext *) AH->formatData;
300 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
302 tctx->dataPos = _getFilePos(AH, ctx);
303 tctx->dataState = K_OFFSET_POS_SET;
305 _WriteByte(AH, BLK_DATA); /* Block type */
306 WriteInt(AH, te->id); /* For sanity check */
308 _StartDataCompressor(AH, te);
312 * Called by archiver when dumper calls WriteData. This routine is
313 * called for both BLOB and TABLE data; it is the responsibility of
314 * the format to manage each kind of data using StartBlob/StartData.
316 * It should only be called from withing a DataDumper routine.
322 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
324 lclContext *ctx = (lclContext *) AH->formatData;
325 z_streamp zp = ctx->zp;
327 zp->next_in = (void *) data;
330 while (zp->avail_in != 0)
332 /* printf("Deflating %lu bytes\n", (unsigned long) dLen); */
333 _DoDeflate(AH, ctx, 0);
339 * Called by the archiver when a dumper's 'DataDumper' routine has
346 _EndData(ArchiveHandle *AH, TocEntry *te)
348 /* lclContext *ctx = (lclContext *) AH->formatData; */
349 /* lclTocEntry *tctx = (lclTocEntry *) te->formatData; */
351 _EndDataCompressor(AH, te);
355 * Called by the archiver when starting to save all BLOB DATA (not schema).
356 * This routine should save whatever format-specific information is needed
357 * to read the BLOBs back into memory.
359 * It is called just prior to the dumper's DataDumper routine.
361 * Optional, but strongly recommended.
365 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
367 lclContext *ctx = (lclContext *) AH->formatData;
368 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
370 tctx->dataPos = _getFilePos(AH, ctx);
371 tctx->dataState = K_OFFSET_POS_SET;
373 _WriteByte(AH, BLK_BLOBS); /* Block type */
374 WriteInt(AH, te->id); /* For sanity check */
378 * Called by the archiver when the dumper calls StartBlob.
382 * Must save the passed OID for retrieval at restore-time.
385 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
388 die_horribly(AH, modulename, "invalid OID for large object\n");
391 _StartDataCompressor(AH, te);
395 * Called by the archiver when the dumper calls EndBlob.
401 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
403 _EndDataCompressor(AH, te);
407 * Called by the archiver when finishing saving all BLOB DATA.
413 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
415 /* Write out a fake zero OID to mark end-of-blobs. */
420 * Print data for a gievn TOC entry
423 _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt)
425 lclContext *ctx = (lclContext *) AH->formatData;
427 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
431 if (tctx->dataState == K_OFFSET_NO_DATA)
434 if (!ctx->hasSeek || tctx->dataState == K_OFFSET_POS_NOT_SET)
436 /* Skip over unnecessary blocks until we get the one we want. */
440 _readBlockHeader(AH, &blkType, &id);
444 if ((TocIDRequired(AH, id, ropt) & 2) != 0)
445 die_horribly(AH, modulename,
446 "Dumping a specific TOC data block out of order is not supported"
447 " without ID on this input stream (fseek required)\n");
459 default: /* Always have a default */
460 die_horribly(AH, modulename,
461 "unrecognized data block type (%d) while searching archive\n",
465 _readBlockHeader(AH, &blkType, &id);
471 if (fseeko(AH->FH, tctx->dataPos, SEEK_SET) != 0)
472 die_horribly(AH, modulename, "error during file seek: %s\n", strerror(errno));
474 _readBlockHeader(AH, &blkType, &id);
479 die_horribly(AH, modulename, "found unexpected block ID (%d) when reading data -- expected %d\n",
490 die_horribly(AH, modulename, "large objects cannot be loaded without a database connection\n");
495 default: /* Always have a default */
496 die_horribly(AH, modulename, "unrecognized data block type %d while restoring archive\n",
501 ahprintf(AH, "\n\n");
505 * Print data from current file position.
508 _PrintData(ArchiveHandle *AH)
510 lclContext *ctx = (lclContext *) AH->formatData;
511 z_streamp zp = ctx->zp;
513 char *in = ctx->zlibIn;
518 char *out = ctx->zlibOut;
525 if (AH->compression != 0)
531 if (inflateInit(zp) != Z_OK)
532 die_horribly(AH, modulename, "could not initialize compression library: %s\n", zp->msg);
536 blkLen = ReadInt(AH);
539 if (blkLen + 1 > ctx->inSize)
543 ctx->zlibIn = (char *) malloc(blkLen + 1);
545 die_horribly(AH, modulename, "out of memory\n");
547 ctx->inSize = blkLen + 1;
551 cnt = fread(in, 1, blkLen, AH->FH);
553 die_horribly(AH, modulename,
554 "could not read data block -- expected %lu, got %lu\n",
555 (unsigned long) blkLen, (unsigned long) cnt);
557 ctx->filePos += blkLen;
560 zp->avail_in = blkLen;
564 if (AH->compression != 0)
566 while (zp->avail_in != 0)
569 zp->avail_out = zlibOutSize;
570 res = inflate(zp, 0);
571 if (res != Z_OK && res != Z_STREAM_END)
572 die_horribly(AH, modulename, "could not uncompress data: %s\n", zp->msg);
574 out[zlibOutSize - zp->avail_out] = '\0';
575 ahwrite(out, 1, zlibOutSize - zp->avail_out, AH);
581 in[zp->avail_in] = '\0';
582 ahwrite(in, 1, zp->avail_in, AH);
588 blkLen = ReadInt(AH);
592 if (AH->compression != 0)
596 while (res != Z_STREAM_END)
599 zp->avail_out = zlibOutSize;
600 res = inflate(zp, 0);
601 if (res != Z_OK && res != Z_STREAM_END)
602 die_horribly(AH, modulename, "could not uncompress data: %s\n", zp->msg);
604 out[zlibOutSize - zp->avail_out] = '\0';
605 ahwrite(out, 1, zlibOutSize - zp->avail_out, AH);
607 if (inflateEnd(zp) != Z_OK)
608 die_horribly(AH, modulename, "could not close compression library: %s\n", zp->msg);
614 _LoadBlobs(ArchiveHandle *AH)
618 StartRestoreBlobs(AH);
623 StartRestoreBlob(AH, oid);
625 EndRestoreBlob(AH, oid);
633 * Skip the BLOBs from the current file position.
634 * BLOBS are written sequentially as data blocks (see below).
635 * Each BLOB is preceded by it's original OID.
636 * A zero OID indicated the end of the BLOBS
639 _skipBlobs(ArchiveHandle *AH)
652 * Skip data from current file position.
653 * Data blocks are formatted as an integer length, followed by data.
654 * A zero length denoted the end of the block.
657 _skipData(ArchiveHandle *AH)
659 lclContext *ctx = (lclContext *) AH->formatData;
661 char *in = ctx->zlibIn;
664 blkLen = ReadInt(AH);
667 if (blkLen > ctx->inSize)
670 ctx->zlibIn = (char *) malloc(blkLen);
671 ctx->inSize = blkLen;
674 cnt = fread(in, 1, blkLen, AH->FH);
676 die_horribly(AH, modulename,
677 "could not read data block -- expected %lu, got %lu\n",
678 (unsigned long) blkLen, (unsigned long) cnt);
680 ctx->filePos += blkLen;
682 blkLen = ReadInt(AH);
687 * Write a byte of data to the archive.
691 * Called by the archiver to do integer & byte output to the archive.
692 * These routines are only used to read & write headers & TOC.
696 _WriteByte(ArchiveHandle *AH, const int i)
698 lclContext *ctx = (lclContext *) AH->formatData;
701 res = fputc(i, AH->FH);
705 die_horribly(AH, modulename, "could not write byte: %s\n", strerror(errno));
710 * Read a byte of data from the archive.
714 * Called by the archiver to read bytes & integers from the archive.
715 * These routines are only used to read & write headers & TOC.
719 _ReadByte(ArchiveHandle *AH)
721 lclContext *ctx = (lclContext *) AH->formatData;
731 * Write a buffer of data to the archive.
735 * Called by the archiver to write a block of bytes to the archive.
736 * These routines are only used to read & write headers & TOC.
740 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
742 lclContext *ctx = (lclContext *) AH->formatData;
745 res = fwrite(buf, 1, len, AH->FH);
748 die_horribly(AH, modulename,
749 "write error in _WriteBuf (%lu != %lu)\n",
750 (unsigned long) res, (unsigned long) len);
757 * Read a block of bytes from the archive.
761 * Called by the archiver to read a block of bytes from the archive
762 * These routines are only used to read & write headers & TOC.
766 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
768 lclContext *ctx = (lclContext *) AH->formatData;
771 res = fread(buf, 1, len, AH->FH);
782 * When writing the archive, this is the routine that actually starts
783 * the process of saving it to files. No data should be written prior
784 * to this point, since the user could sort the TOC after creating it.
786 * If an archive is to be written, this toutine must call:
787 * WriteHead to save the archive header
788 * WriteToc to save the TOC entries
789 * WriteDataChunks to save all DATA & BLOBs.
793 _CloseArchive(ArchiveHandle *AH)
795 lclContext *ctx = (lclContext *) AH->formatData;
798 if (AH->mode == archModeWrite)
801 tpos = ftello(AH->FH);
803 ctx->dataStart = _getFilePos(AH, ctx);
807 * This is not an essential operation - it is really only needed
808 * if we expect to be doing seeks to read the data back - it may
809 * be ok to just use the existing self-consistent block
814 fseeko(AH->FH, tpos, SEEK_SET);
819 if (fclose(AH->FH) != 0)
820 die_horribly(AH, modulename, "could not close archive file: %s\n", strerror(errno));
825 /*--------------------------------------------------
826 * END OF FORMAT CALLBACKS
827 *--------------------------------------------------
831 * Get the current position in the archive file.
834 _getFilePos(ArchiveHandle *AH, lclContext *ctx)
840 pos = ftello(AH->FH);
841 if (pos != ctx->filePos)
843 write_msg(modulename, "WARNING: ftell mismatch with expected position -- ftell used\n");
846 * Prior to 1.7 (pg7.3) we relied on the internally maintained
847 * pointer. Now we rely on off_t always. pos = ctx->filePos;
857 * Read a data block header. The format changed in V1.3, so we
858 * put the code here for simplicity.
861 _readBlockHeader(ArchiveHandle *AH, int *type, int *id)
863 if (AH->version < K_VERS_1_3)
866 *type = _ReadByte(AH);;
872 * If zlib is available, then startit up. This is called from
873 * StartData & StartBlob. The buffers are setup in the Init routine.
877 _StartDataCompressor(ArchiveHandle *AH, TocEntry *te)
879 lclContext *ctx = (lclContext *) AH->formatData;
880 z_streamp zp = ctx->zp;
884 if (AH->compression < 0 || AH->compression > 9)
885 AH->compression = Z_DEFAULT_COMPRESSION;
887 if (AH->compression != 0)
893 if (deflateInit(zp, AH->compression) != Z_OK)
894 die_horribly(AH, modulename, "could not initialize compression library: %s\n", zp->msg);
902 /* Just be paranoid - maybe End is called after Start, with no Write */
903 zp->next_out = ctx->zlibOut;
904 zp->avail_out = zlibOutSize;
908 * Send compressed data to the output stream (via ahwrite).
909 * Each data chunk is preceded by it's length.
910 * In the case of Z0, or no zlib, just write the raw data.
914 _DoDeflate(ArchiveHandle *AH, lclContext *ctx, int flush)
916 z_streamp zp = ctx->zp;
919 char *out = ctx->zlibOut;
922 if (AH->compression != 0)
924 res = deflate(zp, flush);
925 if (res == Z_STREAM_ERROR)
926 die_horribly(AH, modulename, "could not compress data: %s\n", zp->msg);
928 if (((flush == Z_FINISH) && (zp->avail_out < zlibOutSize))
929 || (zp->avail_out == 0)
930 || (zp->avail_in != 0)
934 * Extra paranoia: avoid zero-length chunks since a zero
935 * length chunk is the EOF marker. This should never happen
938 if (zp->avail_out < zlibOutSize)
941 * printf("Wrote %lu byte deflated chunk\n", (unsigned
942 * long) (zlibOutSize - zp->avail_out));
944 WriteInt(AH, zlibOutSize - zp->avail_out);
945 if (fwrite(out, 1, zlibOutSize - zp->avail_out, AH->FH) != (zlibOutSize - zp->avail_out))
946 die_horribly(AH, modulename, "could not write compressed chunk\n");
947 ctx->filePos += zlibOutSize - zp->avail_out;
950 zp->avail_out = zlibOutSize;
956 if (zp->avail_in > 0)
958 WriteInt(AH, zp->avail_in);
959 if (fwrite(zp->next_in, 1, zp->avail_in, AH->FH) != zp->avail_in)
960 die_horribly(AH, modulename, "could not write uncompressed chunk\n");
961 ctx->filePos += zp->avail_in;
967 if (flush == Z_FINISH)
981 * Terminate zlib context and flush it's buffers. If no zlib
986 _EndDataCompressor(ArchiveHandle *AH, TocEntry *te)
990 lclContext *ctx = (lclContext *) AH->formatData;
991 z_streamp zp = ctx->zp;
994 if (AH->compression != 0)
1001 /* printf("Ending data output\n"); */
1002 res = _DoDeflate(AH, ctx, Z_FINISH);
1003 } while (res != Z_STREAM_END);
1005 if (deflateEnd(zp) != Z_OK)
1006 die_horribly(AH, modulename, "could not close compression stream: %s\n", zp->msg);
1010 /* Send the end marker */