1 /*-------------------------------------------------------------------------
3 * pg_backup_directory.c
5 * A directory format dump is a directory, which contains a "toc.dat" file
6 * for the TOC, and a separate file for each data entry, named "<oid>.dat".
7 * Large objects (BLOBs) are stored in separate files named "blob_<uid>.dat",
8 * and there's a plain-text TOC file for them called "blobs.toc". If
9 * compression is used, each data file is individually compressed and the
10 * ".gz" suffix is added to the filenames. The TOC files are never
11 * compressed by pg_dump, however they are accepted with the .gz suffix too,
12 * in case the user has manually compressed them with 'gzip'.
14 * NOTE: This format is identical to the files written in the tar file in
15 * the 'tar' format, except that we don't write the restore.sql file (TODO),
16 * and the tar format doesn't support compression. Please keep the formats in
20 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
21 * Portions Copyright (c) 1994, Regents of the University of California
22 * Portions Copyright (c) 2000, Philip Warner
24 * Rights are granted to use this software in any way so long
25 * as this notice is not removed.
27 * The author is not responsible for loss or damages that may
28 * result from it's use.
31 * src/bin/pg_dump/pg_backup_directory.c
33 *-------------------------------------------------------------------------
36 #include "compress_io.h"
38 #include "dumputils.h"
46 * Our archive location. This is basically what the user specified as his
47 * backup file but of course here it is a directory.
51 cfp *dataFH; /* currently open data file */
53 cfp *blobsTocFH; /* file handle for blobs.toc */
58 char *filename; /* filename excluding the directory (basename) */
61 static const char *modulename = gettext_noop("directory archiver");
63 /* prototypes for private functions */
64 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
65 static void _StartData(ArchiveHandle *AH, TocEntry *te);
66 static void _EndData(ArchiveHandle *AH, TocEntry *te);
67 static size_t _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
68 static int _WriteByte(ArchiveHandle *AH, const int i);
69 static int _ReadByte(ArchiveHandle *);
70 static size_t _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
71 static size_t _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
72 static void _CloseArchive(ArchiveHandle *AH);
73 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
75 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
76 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
77 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
79 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
80 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
81 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
82 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
83 static void _LoadBlobs(ArchiveHandle *AH, RestoreOptions *ropt);
85 static char *prependDirectory(ArchiveHandle *AH, const char *relativeFilename);
87 static void createDirectory(const char *dir);
91 * Init routine required by ALL formats. This is a global routine
92 * and should be declared in pg_backup_archiver.h
94 * Its task is to create any extra archive context (using AH->formatData),
95 * and to initialize the supported function pointers.
97 * It should also prepare whatever its input source is for reading/writing,
98 * and in the case of a read mode connection, it should load the Header & TOC.
101 InitArchiveFmt_Directory(ArchiveHandle *AH)
105 /* Assuming static functions, this can be copied for each format. */
106 AH->ArchiveEntryPtr = _ArchiveEntry;
107 AH->StartDataPtr = _StartData;
108 AH->WriteDataPtr = _WriteData;
109 AH->EndDataPtr = _EndData;
110 AH->WriteBytePtr = _WriteByte;
111 AH->ReadBytePtr = _ReadByte;
112 AH->WriteBufPtr = _WriteBuf;
113 AH->ReadBufPtr = _ReadBuf;
114 AH->ClosePtr = _CloseArchive;
115 AH->ReopenPtr = NULL;
116 AH->PrintTocDataPtr = _PrintTocData;
117 AH->ReadExtraTocPtr = _ReadExtraToc;
118 AH->WriteExtraTocPtr = _WriteExtraToc;
119 AH->PrintExtraTocPtr = _PrintExtraToc;
121 AH->StartBlobsPtr = _StartBlobs;
122 AH->StartBlobPtr = _StartBlob;
123 AH->EndBlobPtr = _EndBlob;
124 AH->EndBlobsPtr = _EndBlobs;
127 AH->DeClonePtr = NULL;
129 /* Set up our private context */
130 ctx = (lclContext *) pg_calloc(1, sizeof(lclContext));
131 AH->formatData = (void *) ctx;
134 ctx->blobsTocFH = NULL;
136 /* Initialize LO buffering */
137 AH->lo_buf_size = LOBBUFSIZE;
138 AH->lo_buf = (void *) pg_malloc(LOBBUFSIZE);
141 * Now open the TOC file
144 if (!AH->fSpec || strcmp(AH->fSpec, "") == 0)
145 exit_horribly(modulename, "no output directory specified\n");
147 ctx->directory = AH->fSpec;
149 if (AH->mode == archModeWrite)
151 /* Create the directory, errors are caught there */
152 createDirectory(ctx->directory);
159 fname = prependDirectory(AH, "toc.dat");
161 tocFH = cfopen_read(fname, PG_BINARY_R);
163 exit_horribly(modulename,
164 "could not open input file \"%s\": %s\n",
165 fname, strerror(errno));
170 * The TOC of a directory format dump shares the format code of the
173 AH->format = archTar;
175 AH->format = archDirectory;
178 /* Nothing else in the file, so close it again... */
179 if (cfclose(tocFH) != 0)
180 exit_horribly(modulename, "could not close TOC file: %s\n",
187 * Called by the Archiver when the dumper creates a new TOC entry.
189 * We determine the filename for this entry.
192 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
197 tctx = (lclTocEntry *) pg_calloc(1, sizeof(lclTocEntry));
200 snprintf(fn, MAXPGPATH, "%d.dat", te->dumpId);
201 tctx->filename = pg_strdup(fn);
203 else if (strcmp(te->desc, "BLOBS") == 0)
204 tctx->filename = pg_strdup("blobs.toc");
206 tctx->filename = NULL;
208 te->formatData = (void *) tctx;
212 * Called by the Archiver to save any extra format-related TOC entry
215 * Use the Archiver routines to write data - they are non-endian, and
216 * maintain other important file information.
219 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
221 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
224 * A dumpable object has set tctx->filename, any other object has not.
225 * (see _ArchiveEntry).
228 WriteStr(AH, tctx->filename);
234 * Called by the Archiver to read any extra format-related TOC data.
236 * Needs to match the order defined in _WriteExtraToc, and should also
237 * use the Archiver input routines.
240 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
242 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
246 tctx = (lclTocEntry *) pg_calloc(1, sizeof(lclTocEntry));
247 te->formatData = (void *) tctx;
250 tctx->filename = ReadStr(AH);
251 if (strlen(tctx->filename) == 0)
253 free(tctx->filename);
254 tctx->filename = NULL;
259 * Called by the Archiver when restoring an archive to output a comment
260 * that includes useful information about the TOC entry.
263 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
265 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
267 if (AH->public.verbose && tctx->filename)
268 ahprintf(AH, "-- File: %s\n", tctx->filename);
272 * Called by the archiver when saving TABLE DATA (not schema). This routine
273 * should save whatever format-specific information is needed to read
276 * It is called just prior to the dumper's 'DataDumper' routine being called.
278 * We create the data file for writing.
281 _StartData(ArchiveHandle *AH, TocEntry *te)
283 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
284 lclContext *ctx = (lclContext *) AH->formatData;
287 fname = prependDirectory(AH, tctx->filename);
289 ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
290 if (ctx->dataFH == NULL)
291 exit_horribly(modulename, "could not open output file \"%s\": %s\n",
292 fname, strerror(errno));
296 * Called by archiver when dumper calls WriteData. This routine is
297 * called for both BLOB and TABLE data; it is the responsibility of
298 * the format to manage each kind of data using StartBlob/StartData.
300 * It should only be called from within a DataDumper routine.
302 * We write the data to the open data file.
305 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
307 lclContext *ctx = (lclContext *) AH->formatData;
312 return cfwrite(data, dLen, ctx->dataFH);
316 * Called by the archiver when a dumper's 'DataDumper' routine has
319 * We close the data file.
322 _EndData(ArchiveHandle *AH, TocEntry *te)
324 lclContext *ctx = (lclContext *) AH->formatData;
327 cfclose(ctx->dataFH);
333 * Print data for a given file (can be a BLOB as well)
336 _PrintFileData(ArchiveHandle *AH, char *filename, RestoreOptions *ropt)
346 cfp = cfopen_read(filename, PG_BINARY_R);
349 exit_horribly(modulename, "could not open input file \"%s\": %s\n",
350 filename, strerror(errno));
352 buf = pg_malloc(ZLIB_OUT_SIZE);
353 buflen = ZLIB_OUT_SIZE;
355 while ((cnt = cfread(buf, buflen, cfp)))
356 ahwrite(buf, 1, cnt, AH);
359 if (cfclose(cfp) !=0)
360 exit_horribly(modulename, "could not close data file: %s\n",
365 * Print data for a given TOC entry
368 _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt)
370 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
375 if (strcmp(te->desc, "BLOBS") == 0)
376 _LoadBlobs(AH, ropt);
379 char *fname = prependDirectory(AH, tctx->filename);
381 _PrintFileData(AH, fname, ropt);
386 _LoadBlobs(ArchiveHandle *AH, RestoreOptions *ropt)
389 lclContext *ctx = (lclContext *) AH->formatData;
391 char line[MAXPGPATH];
393 StartRestoreBlobs(AH);
395 fname = prependDirectory(AH, "blobs.toc");
397 ctx->blobsTocFH = cfopen_read(fname, PG_BINARY_R);
399 if (ctx->blobsTocFH == NULL)
400 exit_horribly(modulename, "could not open large object TOC file \"%s\" for input: %s\n",
401 fname, strerror(errno));
403 /* Read the blobs TOC file line-by-line, and process each blob */
404 while ((cfgets(ctx->blobsTocFH, line, MAXPGPATH)) != NULL)
406 char fname[MAXPGPATH];
407 char path[MAXPGPATH];
409 if (sscanf(line, "%u %s\n", &oid, fname) != 2)
410 exit_horribly(modulename, "invalid line in large object TOC file \"%s\": \"%s\"\n",
413 StartRestoreBlob(AH, oid, ropt->dropSchema);
414 snprintf(path, MAXPGPATH, "%s/%s", ctx->directory, fname);
415 _PrintFileData(AH, path, ropt);
416 EndRestoreBlob(AH, oid);
418 if (!cfeof(ctx->blobsTocFH))
419 exit_horribly(modulename, "error reading large object TOC file \"%s\"\n",
422 if (cfclose(ctx->blobsTocFH) != 0)
423 exit_horribly(modulename, "could not close large object TOC file \"%s\": %s\n",
424 fname, strerror(errno));
426 ctx->blobsTocFH = NULL;
433 * Write a byte of data to the archive.
434 * Called by the archiver to do integer & byte output to the archive.
435 * These routines are only used to read & write the headers & TOC.
438 _WriteByte(ArchiveHandle *AH, const int i)
440 unsigned char c = (unsigned char) i;
441 lclContext *ctx = (lclContext *) AH->formatData;
443 if (cfwrite(&c, 1, ctx->dataFH) != 1)
444 exit_horribly(modulename, "could not write byte\n");
450 * Read a byte of data from the archive.
451 * Called by the archiver to read bytes & integers from the archive.
452 * These routines are only used to read & write headers & TOC.
453 * EOF should be treated as a fatal error.
456 _ReadByte(ArchiveHandle *AH)
458 lclContext *ctx = (lclContext *) AH->formatData;
461 res = cfgetc(ctx->dataFH);
463 exit_horribly(modulename, "unexpected end of file\n");
469 * Write a buffer of data to the archive.
470 * Called by the archiver to write a block of bytes to the TOC or a data file.
473 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
475 lclContext *ctx = (lclContext *) AH->formatData;
478 res = cfwrite(buf, len, ctx->dataFH);
480 exit_horribly(modulename, "could not write to output file: %s\n",
487 * Read a block of bytes from the archive.
489 * Called by the archiver to read a block of bytes from the archive
492 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
494 lclContext *ctx = (lclContext *) AH->formatData;
497 res = cfread(buf, len, ctx->dataFH);
505 * When writing the archive, this is the routine that actually starts
506 * the process of saving it to files. No data should be written prior
507 * to this point, since the user could sort the TOC after creating it.
509 * If an archive is to be written, this routine must call:
510 * WriteHead to save the archive header
511 * WriteToc to save the TOC entries
512 * WriteDataChunks to save all DATA & BLOBs.
515 _CloseArchive(ArchiveHandle *AH)
517 lclContext *ctx = (lclContext *) AH->formatData;
519 if (AH->mode == archModeWrite)
522 char *fname = prependDirectory(AH, "toc.dat");
524 /* The TOC is always created uncompressed */
525 tocFH = cfopen_write(fname, PG_BINARY_W, 0);
527 exit_horribly(modulename, "could not open output file \"%s\": %s\n",
528 fname, strerror(errno));
532 * Write 'tar' in the format field of the toc.dat file. The directory
533 * is compatible with 'tar', so there's no point having a different
534 * format code for it.
536 AH->format = archTar;
538 AH->format = archDirectory;
540 if (cfclose(tocFH) != 0)
541 exit_horribly(modulename, "could not close TOC file: %s\n",
554 * Called by the archiver when starting to save all BLOB DATA (not schema).
555 * It is called just prior to the dumper's DataDumper routine.
557 * We open the large object TOC file here, so that we can append a line to
561 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
563 lclContext *ctx = (lclContext *) AH->formatData;
566 fname = prependDirectory(AH, "blobs.toc");
568 /* The blob TOC file is never compressed */
569 ctx->blobsTocFH = cfopen_write(fname, "ab", 0);
570 if (ctx->blobsTocFH == NULL)
571 exit_horribly(modulename, "could not open output file \"%s\": %s\n",
572 fname, strerror(errno));
576 * Called by the archiver when we're about to start dumping a blob.
578 * We create a file to write the blob to.
581 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
583 lclContext *ctx = (lclContext *) AH->formatData;
584 char fname[MAXPGPATH];
586 snprintf(fname, MAXPGPATH, "%s/blob_%u.dat", ctx->directory, oid);
588 ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
590 if (ctx->dataFH == NULL)
591 exit_horribly(modulename, "could not open output file \"%s\": %s\n",
592 fname, strerror(errno));
596 * Called by the archiver when the dumper is finished writing a blob.
598 * We close the blob file and write an entry to the blob TOC file for it.
601 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
603 lclContext *ctx = (lclContext *) AH->formatData;
607 /* Close the BLOB data file itself */
608 cfclose(ctx->dataFH);
611 /* register the blob in blobs.toc */
612 len = snprintf(buf, sizeof(buf), "%u blob_%u.dat\n", oid, oid);
613 if (cfwrite(buf, len, ctx->blobsTocFH) != len)
614 exit_horribly(modulename, "could not write to blobs TOC file\n");
618 * Called by the archiver when finishing saving all BLOB DATA.
620 * We close the blobs TOC file.
623 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
625 lclContext *ctx = (lclContext *) AH->formatData;
627 cfclose(ctx->blobsTocFH);
628 ctx->blobsTocFH = NULL;
632 createDirectory(const char *dir)
636 /* the directory must not exist yet. */
637 if (stat(dir, &st) == 0)
639 if (S_ISDIR(st.st_mode))
640 exit_horribly(modulename,
641 "cannot create directory %s, it exists already\n",
644 exit_horribly(modulename,
645 "cannot create directory %s, a file with this name "
646 "exists already\n", dir);
650 * Now we create the directory. Note that for some race condition we could
651 * also run into the situation that the directory has been created just
652 * between our two calls.
654 if (mkdir(dir, 0700) < 0)
655 exit_horribly(modulename, "could not create directory %s: %s\n",
656 dir, strerror(errno));
661 prependDirectory(ArchiveHandle *AH, const char *relativeFilename)
663 lclContext *ctx = (lclContext *) AH->formatData;
664 static char buf[MAXPGPATH];
667 dname = ctx->directory;
669 if (strlen(dname) + 1 + strlen(relativeFilename) + 1 > MAXPGPATH)
670 exit_horribly(modulename, "file name too long: \"%s\"\n", dname);
674 strcat(buf, relativeFilename);