1 /*-------------------------------------------------------------------------
5 * Implements the custom output format.
7 * The comments with the routined in this code are a good place to
8 * understand how to write a new format.
10 * See the headers to pg_restore for more details.
12 * Copyright (c) 2000, Philip Warner
13 * Rights are granted to use this software in any way so long
14 * as this notice is not removed.
16 * The author is not responsible for loss or damages that may
17 * and any liability will be limited to the time taken to fix any
22 * $Header: /cvsroot/pgsql/src/bin/pg_dump/pg_backup_custom.c,v 1.18 2002/04/24 02:21:04 momjian Exp $
24 * Modifications - 28-Jun-2000 - pjw@rhyme.com.au
28 * Modifications - 04-Jan-2001 - pjw@rhyme.com.au
30 * - Check results of IO routines more carefully.
32 *-------------------------------------------------------------------------
35 #include "pg_backup.h"
36 #include "pg_backup_archiver.h"
41 * Routines in the format interface
45 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
46 static void _StartData(ArchiveHandle *AH, TocEntry *te);
47 static int _WriteData(ArchiveHandle *AH, const void *data, int dLen);
48 static void _EndData(ArchiveHandle *AH, TocEntry *te);
49 static int _WriteByte(ArchiveHandle *AH, const int i);
50 static int _ReadByte(ArchiveHandle *);
51 static int _WriteBuf(ArchiveHandle *AH, const void *buf, int len);
52 static int _ReadBuf(ArchiveHandle *AH, void *buf, int len);
53 static void _CloseArchive(ArchiveHandle *AH);
54 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
55 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
56 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
57 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
59 static void _PrintData(ArchiveHandle *AH);
60 static void _skipData(ArchiveHandle *AH);
61 static void _skipBlobs(ArchiveHandle *AH);
63 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
64 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
65 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
66 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
67 static void _LoadBlobs(ArchiveHandle *AH);
70 * Buffers used in zlib compression and extra data stored in archive and
74 #define zlibOutSize 4096
75 #define zlibInSize 4096
99 static void _readBlockHeader(ArchiveHandle *AH, int *type, int *id);
100 static void _StartDataCompressor(ArchiveHandle *AH, TocEntry *te);
101 static void _EndDataCompressor(ArchiveHandle *AH, TocEntry *te);
102 static int _getFilePos(ArchiveHandle *AH, lclContext *ctx);
103 static int _DoDeflate(ArchiveHandle *AH, lclContext *ctx, int flush);
105 static char *modulename = gettext_noop("custom archiver");
110 * Init routine required by ALL formats. This is a global routine
111 * and should be declared in pg_backup_archiver.h
113 * It's task is to create any extra archive context (using AH->formatData),
114 * and to initialize the supported function pointers.
116 * It should also prepare whatever it's input source is for reading/writing,
117 * and in the case of a read mode connection, it should load the Header & TOC.
120 InitArchiveFmt_Custom(ArchiveHandle *AH)
124 /* Assuming static functions, this can be copied for each format. */
125 AH->ArchiveEntryPtr = _ArchiveEntry;
126 AH->StartDataPtr = _StartData;
127 AH->WriteDataPtr = _WriteData;
128 AH->EndDataPtr = _EndData;
129 AH->WriteBytePtr = _WriteByte;
130 AH->ReadBytePtr = _ReadByte;
131 AH->WriteBufPtr = _WriteBuf;
132 AH->ReadBufPtr = _ReadBuf;
133 AH->ClosePtr = _CloseArchive;
134 AH->PrintTocDataPtr = _PrintTocData;
135 AH->ReadExtraTocPtr = _ReadExtraToc;
136 AH->WriteExtraTocPtr = _WriteExtraToc;
137 AH->PrintExtraTocPtr = _PrintExtraToc;
139 AH->StartBlobsPtr = _StartBlobs;
140 AH->StartBlobPtr = _StartBlob;
141 AH->EndBlobPtr = _EndBlob;
142 AH->EndBlobsPtr = _EndBlobs;
145 * Set up some special context used in compressing data.
147 ctx = (lclContext *) malloc(sizeof(lclContext));
149 die_horribly(AH, modulename, "out of memory\n");
150 AH->formatData = (void *) ctx;
152 ctx->zp = (z_streamp) malloc(sizeof(z_stream));
154 die_horribly(AH, modulename, "out of memory\n");
156 /* Initialize LO buffering */
157 AH->lo_buf_size = LOBBUFSIZE;
158 AH->lo_buf = (void *)malloc(LOBBUFSIZE);
159 if(AH->lo_buf == NULL)
160 die_horribly(AH, modulename, "out of memory\n");
163 * zlibOutSize is the buffer size we tell zlib it can output to. We
164 * actually allocate one extra byte because some routines want to
165 * append a trailing zero byte to the zlib output. The input buffer
166 * is expansible and is always of size ctx->inSize; zlibInSize is just
167 * the initial default size for it.
169 ctx->zlibOut = (char *) malloc(zlibOutSize + 1);
170 ctx->zlibIn = (char *) malloc(zlibInSize);
171 ctx->inSize = zlibInSize;
174 if (ctx->zlibOut == NULL || ctx->zlibIn == NULL)
175 die_horribly(AH, modulename, "out of memory\n");
180 if (AH->mode == archModeWrite)
183 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
184 AH->FH = fopen(AH->fSpec, PG_BINARY_W);
189 die_horribly(AH, modulename, "could not open archive file %s: %s\n", AH->fSpec, strerror(errno));
191 ctx->hasSeek = (fseek(AH->FH, 0, SEEK_CUR) == 0);
197 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
198 AH->FH = fopen(AH->fSpec, PG_BINARY_R);
202 die_horribly(AH, modulename, "could not open archive file %s: %s\n", AH->fSpec, strerror(errno));
204 ctx->hasSeek = (fseek(AH->FH, 0, SEEK_CUR) == 0);
208 ctx->dataStart = _getFilePos(AH, ctx);
214 * Called by the Archiver when the dumper creates a new TOC entry.
218 * Set up extrac format-related TOC data.
221 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
225 ctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry));
231 te->formatData = (void *) ctx;
236 * Called by the Archiver to save any extra format-related TOC entry
241 * Use the Archiver routines to write data - they are non-endian, and
242 * maintain other important file information.
245 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
247 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
249 WriteInt(AH, ctx->dataPos);
250 WriteInt(AH, ctx->dataLen);
254 * Called by the Archiver to read any extra format-related TOC data.
258 * Needs to match the order defined in _WriteExtraToc, and sould also
259 * use the Archiver input routines.
262 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
264 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
268 ctx = (lclTocEntry *) malloc(sizeof(lclTocEntry));
269 te->formatData = (void *) ctx;
272 ctx->dataPos = ReadInt(AH);
273 ctx->dataLen = ReadInt(AH);
277 * Called by the Archiver when restoring an archive to output a comment
278 * that includes useful information about the TOC entry.
284 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
286 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
288 ahprintf(AH, "-- Data Pos: %d (Length %d)\n", ctx->dataPos, ctx->dataLen);
292 * Called by the archiver when saving TABLE DATA (not schema). This routine
293 * should save whatever format-specific information is needed to read
296 * It is called just prior to the dumper's 'DataDumper' routine being called.
298 * Optional, but strongly recommended.
302 _StartData(ArchiveHandle *AH, TocEntry *te)
304 lclContext *ctx = (lclContext *) AH->formatData;
305 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
307 tctx->dataPos = _getFilePos(AH, ctx);
309 _WriteByte(AH, BLK_DATA); /* Block type */
310 WriteInt(AH, te->id); /* For sanity check */
312 _StartDataCompressor(AH, te);
317 * Called by archiver when dumper calls WriteData. This routine is
318 * called for both BLOB and TABLE data; it is the responsibility of
319 * the format to manage each kind of data using StartBlob/StartData.
321 * It should only be called from withing a DataDumper routine.
327 _WriteData(ArchiveHandle *AH, const void *data, int dLen)
329 lclContext *ctx = (lclContext *) AH->formatData;
330 z_streamp zp = ctx->zp;
332 zp->next_in = (void *) data;
335 while (zp->avail_in != 0)
337 /* printf("Deflating %d bytes\n", dLen); */
338 _DoDeflate(AH, ctx, 0);
344 * Called by the archiver when a dumper's 'DataDumper' routine has
351 _EndData(ArchiveHandle *AH, TocEntry *te)
353 lclContext *ctx = (lclContext *) AH->formatData;
354 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
356 _EndDataCompressor(AH, te);
358 tctx->dataLen = _getFilePos(AH, ctx) - tctx->dataPos;
362 * Called by the archiver when starting to save all BLOB DATA (not schema).
363 * This routine should save whatever format-specific information is needed
364 * to read the BLOBs back into memory.
366 * It is called just prior to the dumper's DataDumper routine.
368 * Optional, but strongly recommended.
372 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
374 lclContext *ctx = (lclContext *) AH->formatData;
375 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
377 tctx->dataPos = _getFilePos(AH, ctx);
379 _WriteByte(AH, BLK_BLOBS); /* Block type */
380 WriteInt(AH, te->id); /* For sanity check */
385 * Called by the archiver when the dumper calls StartBlob.
389 * Must save the passed OID for retrieval at restore-time.
392 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
395 die_horribly(AH, modulename, "invalid OID for large object\n");
398 _StartDataCompressor(AH, te);
402 * Called by the archiver when the dumper calls EndBlob.
408 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
410 _EndDataCompressor(AH, te);
414 * Called by the archiver when finishing saving all BLOB DATA.
420 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
422 /* Write out a fake zero OID to mark end-of-blobs. */
427 * Print data for a gievn TOC entry
430 _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt)
432 lclContext *ctx = (lclContext *) AH->formatData;
434 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
438 if (tctx->dataPos == 0)
441 if (!ctx->hasSeek || tctx->dataPos < 0)
444 /* Skip over unnecessary blocks until we get the one we want. */
448 _readBlockHeader(AH, &blkType, &id);
453 if ((TocIDRequired(AH, id, ropt) & 2) != 0)
454 die_horribly(AH, modulename,
455 "Dumping a specific TOC data block out of order is not supported"
456 " without id on this input stream (fseek required)\n");
471 default: /* Always have a default */
473 die_horribly(AH, modulename,
474 "unrecognized data block type (%d) while searching archive\n",
479 _readBlockHeader(AH, &blkType, &id);
489 if (fseek(AH->FH, tctx->dataPos, SEEK_SET) != 0)
490 die_horribly(AH, modulename, "error during file seek: %s\n", strerror(errno));
492 _readBlockHeader(AH, &blkType, &id);
498 die_horribly(AH, modulename, "found unexpected block ID (%d) when reading data - expected %d\n",
512 die_horribly(AH, modulename, "large objects cannot be loaded without a database connection\n");
517 default: /* Always have a default */
519 die_horribly(AH, modulename, "unrecognized data block type %d while restoring archive\n",
524 ahprintf(AH, "\n\n");
528 * Print data from current file position.
531 _PrintData(ArchiveHandle *AH)
533 lclContext *ctx = (lclContext *) AH->formatData;
534 z_streamp zp = ctx->zp;
536 char *in = ctx->zlibIn;
541 char *out = ctx->zlibOut;
548 if (AH->compression != 0)
554 if (inflateInit(zp) != Z_OK)
555 die_horribly(AH, modulename, "could not initialize compression library: %s\n", zp->msg);
559 blkLen = ReadInt(AH);
562 if (blkLen + 1 > ctx->inSize)
566 ctx->zlibIn = (char *) malloc(blkLen + 1);
568 die_horribly(AH, modulename, "out of memory\n");
570 ctx->inSize = blkLen + 1;
574 cnt = fread(in, 1, blkLen, AH->FH);
576 die_horribly(AH, modulename, "could not read data block - expected %d, got %d\n", blkLen, cnt);
578 ctx->filePos += blkLen;
581 zp->avail_in = blkLen;
585 if (AH->compression != 0)
588 while (zp->avail_in != 0)
591 zp->avail_out = zlibOutSize;
592 res = inflate(zp, 0);
593 if (res != Z_OK && res != Z_STREAM_END)
594 die_horribly(AH, modulename, "unable to uncompress data: %s\n", zp->msg);
596 out[zlibOutSize - zp->avail_out] = '\0';
597 ahwrite(out, 1, zlibOutSize - zp->avail_out, AH);
603 in[zp->avail_in] = '\0';
604 ahwrite(in, 1, zp->avail_in, AH);
611 blkLen = ReadInt(AH);
616 if (AH->compression != 0)
620 while (res != Z_STREAM_END)
623 zp->avail_out = zlibOutSize;
624 res = inflate(zp, 0);
625 if (res != Z_OK && res != Z_STREAM_END)
626 die_horribly(AH, modulename, "unable to uncompress data: %s\n", zp->msg);
628 out[zlibOutSize - zp->avail_out] = '\0';
629 ahwrite(out, 1, zlibOutSize - zp->avail_out, AH);
631 if (inflateEnd(zp) != Z_OK)
632 die_horribly(AH, modulename, "could not close compression library: %s\n", zp->msg);
639 _LoadBlobs(ArchiveHandle *AH)
643 StartRestoreBlobs(AH);
648 StartRestoreBlob(AH, oid);
650 EndRestoreBlob(AH, oid);
659 * Skip the BLOBs from the current file position.
660 * BLOBS are written sequentially as data blocks (see below).
661 * Each BLOB is preceded by it's original OID.
662 * A zero OID indicated the end of the BLOBS
665 _skipBlobs(ArchiveHandle *AH)
678 * Skip data from current file position.
679 * Data blocks are formatted as an integer length, followed by data.
680 * A zero length denoted the end of the block.
683 _skipData(ArchiveHandle *AH)
685 lclContext *ctx = (lclContext *) AH->formatData;
687 char *in = ctx->zlibIn;
690 blkLen = ReadInt(AH);
693 if (blkLen > ctx->inSize)
696 ctx->zlibIn = (char *) malloc(blkLen);
697 ctx->inSize = blkLen;
700 cnt = fread(in, 1, blkLen, AH->FH);
702 die_horribly(AH, modulename, "could not read data block - expected %d, got %d\n", blkLen, cnt);
704 ctx->filePos += blkLen;
706 blkLen = ReadInt(AH);
712 * Write a byte of data to the archive.
716 * Called by the archiver to do integer & byte output to the archive.
717 * These routines are only used to read & write headers & TOC.
721 _WriteByte(ArchiveHandle *AH, const int i)
723 lclContext *ctx = (lclContext *) AH->formatData;
726 res = fputc(i, AH->FH);
730 die_horribly(AH, modulename, "could not write byte: %s\n", strerror(errno));
735 * Read a byte of data from the archive.
739 * Called by the archiver to read bytes & integers from the archive.
740 * These routines are only used to read & write headers & TOC.
744 _ReadByte(ArchiveHandle *AH)
746 lclContext *ctx = (lclContext *) AH->formatData;
756 * Write a buffer of data to the archive.
760 * Called by the archiver to write a block of bytes to the archive.
761 * These routines are only used to read & write headers & TOC.
765 _WriteBuf(ArchiveHandle *AH, const void *buf, int len)
767 lclContext *ctx = (lclContext *) AH->formatData;
770 res = fwrite(buf, 1, len, AH->FH);
773 die_horribly(AH, modulename, "write error in _WriteBuf (%d != %d)\n", res, len);
780 * Read a block of bytes from the archive.
784 * Called by the archiver to read a block of bytes from the archive
785 * These routines are only used to read & write headers & TOC.
789 _ReadBuf(ArchiveHandle *AH, void *buf, int len)
791 lclContext *ctx = (lclContext *) AH->formatData;
794 res = fread(buf, 1, len, AH->FH);
805 * When writing the archive, this is the routine that actually starts
806 * the process of saving it to files. No data should be written prior
807 * to this point, since the user could sort the TOC after creating it.
809 * If an archive is to be written, this toutine must call:
810 * WriteHead to save the archive header
811 * WriteToc to save the TOC entries
812 * WriteDataChunks to save all DATA & BLOBs.
816 _CloseArchive(ArchiveHandle *AH)
818 lclContext *ctx = (lclContext *) AH->formatData;
821 if (AH->mode == archModeWrite)
824 tpos = ftell(AH->FH);
826 ctx->dataStart = _getFilePos(AH, ctx);
830 * This is not an essential operation - it is really only needed
831 * if we expect to be doing seeks to read the data back - it may
832 * be ok to just use the existing self-consistent block
837 fseek(AH->FH, tpos, SEEK_SET);
842 if (fclose(AH->FH) != 0)
843 die_horribly(AH, modulename, "could not close archive file: %s\n", strerror(errno));
848 /*--------------------------------------------------
849 * END OF FORMAT CALLBACKS
850 *--------------------------------------------------
854 * Get the current position in the archive file.
857 _getFilePos(ArchiveHandle *AH, lclContext *ctx)
864 if (pos != ctx->filePos)
866 write_msg(modulename, "WARNING: ftell mismatch with expected position -- ftell ignored\n");
876 * Read a data block header. The format changed in V1.3, so we
877 * put the code here for simplicity.
880 _readBlockHeader(ArchiveHandle *AH, int *type, int *id)
882 if (AH->version < K_VERS_1_3)
885 *type = _ReadByte(AH);;
891 * If zlib is available, then startit up. This is called from
892 * StartData & StartBlob. The buffers are setup in the Init routine.
896 _StartDataCompressor(ArchiveHandle *AH, TocEntry *te)
898 lclContext *ctx = (lclContext *) AH->formatData;
899 z_streamp zp = ctx->zp;
903 if (AH->compression < 0 || AH->compression > 9)
904 AH->compression = Z_DEFAULT_COMPRESSION;
906 if (AH->compression != 0)
912 if (deflateInit(zp, AH->compression) != Z_OK)
913 die_horribly(AH, modulename, "could not initialize compression library: %s\n", zp->msg);
921 /* Just be paranoid - maybe End is called after Start, with no Write */
922 zp->next_out = ctx->zlibOut;
923 zp->avail_out = zlibOutSize;
927 * Send compressed data to the output stream (via ahwrite).
928 * Each data chunk is preceded by it's length.
929 * In the case of Z0, or no zlib, just write the raw data.
933 _DoDeflate(ArchiveHandle *AH, lclContext *ctx, int flush)
935 z_streamp zp = ctx->zp;
938 char *out = ctx->zlibOut;
941 if (AH->compression != 0)
943 res = deflate(zp, flush);
944 if (res == Z_STREAM_ERROR)
945 die_horribly(AH, modulename, "could not compress data: %s\n", zp->msg);
947 if (((flush == Z_FINISH) && (zp->avail_out < zlibOutSize))
948 || (zp->avail_out == 0)
949 || (zp->avail_in != 0)
953 * Extra paranoia: avoid zero-length chunks since a zero
954 * length chunk is the EOF marker. This should never happen
957 if (zp->avail_out < zlibOutSize)
960 * printf("Wrote %d byte deflated chunk\n", zlibOutSize -
963 WriteInt(AH, zlibOutSize - zp->avail_out);
964 if (fwrite(out, 1, zlibOutSize - zp->avail_out, AH->FH) != (zlibOutSize - zp->avail_out))
965 die_horribly(AH, modulename, "could not write compressed chunk\n");
966 ctx->filePos += zlibOutSize - zp->avail_out;
969 zp->avail_out = zlibOutSize;
975 if (zp->avail_in > 0)
977 WriteInt(AH, zp->avail_in);
978 if (fwrite(zp->next_in, 1, zp->avail_in, AH->FH) != zp->avail_in)
979 die_horribly(AH, modulename, "could not write uncompressed chunk\n");
980 ctx->filePos += zp->avail_in;
986 if (flush == Z_FINISH)
1003 * Terminate zlib context and flush it's buffers. If no zlib
1008 _EndDataCompressor(ArchiveHandle *AH, TocEntry *te)
1012 lclContext *ctx = (lclContext *) AH->formatData;
1013 z_streamp zp = ctx->zp;
1016 if (AH->compression != 0)
1023 /* printf("Ending data output\n"); */
1024 res = _DoDeflate(AH, ctx, Z_FINISH);
1025 } while (res != Z_STREAM_END);
1027 if (deflateEnd(zp) != Z_OK)
1028 die_horribly(AH, modulename, "could not close compression stream: %s\n", zp->msg);
1032 /* Send the end marker */